diff options
Diffstat (limited to 'recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch')
| -rw-r--r-- | recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch | 1759 |
1 files changed, 1759 insertions, 0 deletions
diff --git a/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch b/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch new file mode 100644 index 0000000000..a58dd24416 --- /dev/null +++ b/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch | |||
| @@ -0,0 +1,1759 @@ | |||
| 1 | 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 2 | |||
| 3 | Backport code hoisting improvements from mainline: | ||
| 4 | |||
| 5 | 2010-07-28 Jakub Jelinek <jakub@redhat.com> | ||
| 6 | PR debug/45105 | ||
| 7 | * gcc.dg/pr45105.c: New test. | ||
| 8 | |||
| 9 | 2010-07-28 Jakub Jelinek <jakub@redhat.com> | ||
| 10 | PR debug/45105 | ||
| 11 | * gcse.c (hoist_code): Use FOR_BB_INSNS macro. | ||
| 12 | |||
| 13 | 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 14 | PR rtl-optimization/45107 | ||
| 15 | * gcc.dg/pr45107.c: New test. | ||
| 16 | |||
| 17 | 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 18 | PR rtl-optimization/45107 | ||
| 19 | * gcse.c (hash_scan_set): Use max_distance for gcse-las. | ||
| 20 | |||
| 21 | 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 22 | PR rtl-optimization/45101 | ||
| 23 | * gcc.dg/pr45101.c: New test. | ||
| 24 | |||
| 25 | 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 26 | PR rtl-optimization/45101 | ||
| 27 | * gcse.c (hash_scan_set): Fix argument ordering of insert_expr_in_table | ||
| 28 | for gcse-las. | ||
| 29 | |||
| 30 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 31 | PR rtl-optimization/40956 | ||
| 32 | PR target/42495 | ||
| 33 | PR middle-end/42574 | ||
| 34 | * gcc.target/arm/pr40956.c, gcc.target/arm/pr42495.c, | ||
| 35 | * gcc.target/arm/pr42574.c: Add tests. | ||
| 36 | |||
| 37 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 38 | * config/arm/arm.c (params.h): Include. | ||
| 39 | (arm_override_options): Tune gcse-unrestricted-cost. | ||
| 40 | * config/arm/t-arm (arm.o): Define dependencies. | ||
| 41 | |||
| 42 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 43 | PR target/42495 | ||
| 44 | PR middle-end/42574 | ||
| 45 | * basic-block.h (get_dominated_to_depth): Declare. | ||
| 46 | * dominance.c (get_dominated_to_depth): New function, use | ||
| 47 | get_all_dominated_blocks as a base. | ||
| 48 | (get_all_dominated_blocks): Use get_dominated_to_depth. | ||
| 49 | * gcse.c (occr_t, VEC (occr_t, heap)): Define. | ||
| 50 | (hoist_exprs): Remove. | ||
| 51 | (alloc_code_hoist_mem, free_code_hoist_mem): Update. | ||
| 52 | (compute_code_hoist_vbeinout): Add debug print outs. | ||
| 53 | (hoist_code): Partially rewrite, simplify. Use get_dominated_to_depth. | ||
| 54 | * params.def (PARAM_MAX_HOIST_DEPTH): New parameter to avoid | ||
| 55 | quadratic behavior. | ||
| 56 | * params.h (MAX_HOIST_DEPTH): New macro. | ||
| 57 | * doc/invoke.texi (max-hoist-depth): Document. | ||
| 58 | |||
| 59 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 60 | PR rtl-optimization/40956 | ||
| 61 | * config/arm/arm.c (thumb1_size_rtx_costs): Fix cost of simple | ||
| 62 | constants. | ||
| 63 | |||
| 64 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 65 | PR target/42495 | ||
| 66 | PR middle-end/42574 | ||
| 67 | * config/arm/arm.c (legitimize_pic_address): Use | ||
| 68 | gen_calculate_pic_address pattern to emit calculation of PIC address. | ||
| 69 | (will_be_in_index_register): New function. | ||
| 70 | (arm_legitimate_address_outer_p, thumb2_legitimate_address_p,) | ||
| 71 | (thumb1_legitimate_address_p): Use it provided !strict_p. | ||
| 72 | * config/arm/arm.md (calculate_pic_address): New expand and split. | ||
| 73 | |||
| 74 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 75 | PR target/42495 | ||
| 76 | PR middle-end/42574 | ||
| 77 | * config/arm/arm.c (thumb1_size_rtx_costs): Add cost for "J" constants. | ||
| 78 | * config/arm/arm.md (define_split "J", define_split "K"): Make | ||
| 79 | IRA/reload friendly. | ||
| 80 | |||
| 81 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 82 | * gcse.c (insert_insn_end_basic_block): Update signature, remove | ||
| 83 | unused checks. | ||
| 84 | (pre_edge_insert, hoist_code): Update. | ||
| 85 | |||
| 86 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 87 | PR target/42495 | ||
| 88 | PR middle-end/42574 | ||
| 89 | * gcse.c (hoist_expr_reaches_here_p): Remove excessive check. | ||
| 90 | |||
| 91 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 92 | * gcse.c (hoist_code): Generate new pseudo for every new set insn. | ||
| 93 | |||
| 94 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 95 | PR rtl-optimization/40956 | ||
| 96 | PR target/42495 | ||
| 97 | PR middle-end/42574 | ||
| 98 | * gcse.c (compute_code_hoist_vbeinout): Consider more expressions | ||
| 99 | for hoisting. | ||
| 100 | (hoist_code): Count occurences in current block too. | ||
| 101 | |||
| 102 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 103 | * gcse.c (struct expr:max_distance): New field. | ||
| 104 | (doing_code_hoisting_p): New static variable. | ||
| 105 | (want_to_gcse_p): Change signature. Allow constrained hoisting of | ||
| 106 | simple expressions, don't change behavior for PRE. Set max_distance. | ||
| 107 | (insert_expr_in_table): Set new max_distance field. | ||
| 108 | (hash_scan_set): Update. | ||
| 109 | (hoist_expr_reaches_here_p): Stop search after max_distance | ||
| 110 | instructions. | ||
| 111 | (find_occr_in_bb): New static function. Use it in ... | ||
| 112 | (hoist_code): Calculate sizes of basic block before any changes are | ||
| 113 | done. Pass max_distance to hoist_expr_reaches_here_p. | ||
| 114 | (one_code_hoisting_pass): Set doing_code_hoisting_p. | ||
| 115 | * params.def (PARAM_GCSE_COST_DISTANCE_RATIO,) | ||
| 116 | (PARAM_GCSE_UNRESTRICTED_COST): New parameters. | ||
| 117 | * params.h (GCSE_COST_DISTANCE_RATIO, GCSE_UNRESTRICTED_COST): New | ||
| 118 | macros. | ||
| 119 | * doc/invoke.texi (gcse-cost-distance-ratio, gcse-unrestricted-cost): | ||
| 120 | Document. | ||
| 121 | |||
| 122 | 2010-07-27 Jeff Law <law@redhat.com> | ||
| 123 | Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 124 | * gcse.c (compute_transpout, transpout): Remove, move logic | ||
| 125 | to prune_expressions. | ||
| 126 | (compute_pre_data): Move pruning of trapping expressions ... | ||
| 127 | (prune_expressions): ... here. New static function. | ||
| 128 | (compute_code_hoist_data): Use it. | ||
| 129 | (alloc_code_hoist_mem, free_code_hoist_mem, hoist_code): Update. | ||
| 130 | |||
| 131 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
| 132 | * dbgcnt.def (hoist_insn): New debug counter. | ||
| 133 | * gcse.c (hoist_code): Use it. | ||
| 134 | |||
| 135 | 2010-07-28 Julian Brown <julian@codesourcery.com> | ||
| 136 | |||
| 137 | Backport from FSF mainline: | ||
| 138 | |||
| 139 | === modified file 'gcc/basic-block.h' | ||
| 140 | --- old/gcc/basic-block.h 2010-04-02 18:54:46 +0000 | ||
| 141 | +++ new/gcc/basic-block.h 2010-08-16 09:41:58 +0000 | ||
| 142 | @@ -932,6 +932,8 @@ | ||
| 143 | extern VEC (basic_block, heap) *get_dominated_by_region (enum cdi_direction, | ||
| 144 | basic_block *, | ||
| 145 | unsigned); | ||
| 146 | +extern VEC (basic_block, heap) *get_dominated_to_depth (enum cdi_direction, | ||
| 147 | + basic_block, int); | ||
| 148 | extern VEC (basic_block, heap) *get_all_dominated_blocks (enum cdi_direction, | ||
| 149 | basic_block); | ||
| 150 | extern void add_to_dominance_info (enum cdi_direction, basic_block); | ||
| 151 | |||
| 152 | === modified file 'gcc/config/arm/arm.c' | ||
| 153 | --- old/gcc/config/arm/arm.c 2010-08-13 15:37:39 +0000 | ||
| 154 | +++ new/gcc/config/arm/arm.c 2010-08-16 09:41:58 +0000 | ||
| 155 | @@ -56,6 +56,7 @@ | ||
| 156 | #include "df.h" | ||
| 157 | #include "intl.h" | ||
| 158 | #include "libfuncs.h" | ||
| 159 | +#include "params.h" | ||
| 160 | |||
| 161 | /* Forward definitions of types. */ | ||
| 162 | typedef struct minipool_node Mnode; | ||
| 163 | @@ -1902,6 +1903,14 @@ | ||
| 164 | flag_reorder_blocks = 1; | ||
| 165 | } | ||
| 166 | |||
| 167 | + if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST) | ||
| 168 | + && flag_pic) | ||
| 169 | + /* Hoisting PIC address calculations more aggressively provides a small, | ||
| 170 | + but measurable, size reduction for PIC code. Therefore, we decrease | ||
| 171 | + the bar for unrestricted expression hoisting to the cost of PIC address | ||
| 172 | + calculation, which is 2 instructions. */ | ||
| 173 | + set_param_value ("gcse-unrestricted-cost", 2); | ||
| 174 | + | ||
| 175 | /* Register global variables with the garbage collector. */ | ||
| 176 | arm_add_gc_roots (); | ||
| 177 | |||
| 178 | @@ -5070,17 +5079,13 @@ | ||
| 179 | if (GET_CODE (orig) == SYMBOL_REF | ||
| 180 | || GET_CODE (orig) == LABEL_REF) | ||
| 181 | { | ||
| 182 | - rtx pic_ref, address; | ||
| 183 | rtx insn; | ||
| 184 | |||
| 185 | if (reg == 0) | ||
| 186 | { | ||
| 187 | gcc_assert (can_create_pseudo_p ()); | ||
| 188 | reg = gen_reg_rtx (Pmode); | ||
| 189 | - address = gen_reg_rtx (Pmode); | ||
| 190 | } | ||
| 191 | - else | ||
| 192 | - address = reg; | ||
| 193 | |||
| 194 | /* VxWorks does not impose a fixed gap between segments; the run-time | ||
| 195 | gap can be different from the object-file gap. We therefore can't | ||
| 196 | @@ -5096,18 +5101,21 @@ | ||
| 197 | insn = arm_pic_static_addr (orig, reg); | ||
| 198 | else | ||
| 199 | { | ||
| 200 | + rtx pat; | ||
| 201 | + rtx mem; | ||
| 202 | + | ||
| 203 | /* If this function doesn't have a pic register, create one now. */ | ||
| 204 | require_pic_register (); | ||
| 205 | |||
| 206 | - if (TARGET_32BIT) | ||
| 207 | - emit_insn (gen_pic_load_addr_32bit (address, orig)); | ||
| 208 | - else /* TARGET_THUMB1 */ | ||
| 209 | - emit_insn (gen_pic_load_addr_thumb1 (address, orig)); | ||
| 210 | - | ||
| 211 | - pic_ref = gen_const_mem (Pmode, | ||
| 212 | - gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, | ||
| 213 | - address)); | ||
| 214 | - insn = emit_move_insn (reg, pic_ref); | ||
| 215 | + pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig); | ||
| 216 | + | ||
| 217 | + /* Make the MEM as close to a constant as possible. */ | ||
| 218 | + mem = SET_SRC (pat); | ||
| 219 | + gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem)); | ||
| 220 | + MEM_READONLY_P (mem) = 1; | ||
| 221 | + MEM_NOTRAP_P (mem) = 1; | ||
| 222 | + | ||
| 223 | + insn = emit_insn (pat); | ||
| 224 | } | ||
| 225 | |||
| 226 | /* Put a REG_EQUAL note on this insn, so that it can be optimized | ||
| 227 | @@ -5387,6 +5395,15 @@ | ||
| 228 | return FALSE; | ||
| 229 | } | ||
| 230 | |||
| 231 | +/* Return true if X will surely end up in an index register after next | ||
| 232 | + splitting pass. */ | ||
| 233 | +static bool | ||
| 234 | +will_be_in_index_register (const_rtx x) | ||
| 235 | +{ | ||
| 236 | + /* arm.md: calculate_pic_address will split this into a register. */ | ||
| 237 | + return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM; | ||
| 238 | +} | ||
| 239 | + | ||
| 240 | /* Return nonzero if X is a valid ARM state address operand. */ | ||
| 241 | int | ||
| 242 | arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, | ||
| 243 | @@ -5444,8 +5461,9 @@ | ||
| 244 | rtx xop1 = XEXP (x, 1); | ||
| 245 | |||
| 246 | return ((arm_address_register_rtx_p (xop0, strict_p) | ||
| 247 | - && GET_CODE(xop1) == CONST_INT | ||
| 248 | - && arm_legitimate_index_p (mode, xop1, outer, strict_p)) | ||
| 249 | + && ((GET_CODE(xop1) == CONST_INT | ||
| 250 | + && arm_legitimate_index_p (mode, xop1, outer, strict_p)) | ||
| 251 | + || (!strict_p && will_be_in_index_register (xop1)))) | ||
| 252 | || (arm_address_register_rtx_p (xop1, strict_p) | ||
| 253 | && arm_legitimate_index_p (mode, xop0, outer, strict_p))); | ||
| 254 | } | ||
| 255 | @@ -5531,7 +5549,8 @@ | ||
| 256 | rtx xop1 = XEXP (x, 1); | ||
| 257 | |||
| 258 | return ((arm_address_register_rtx_p (xop0, strict_p) | ||
| 259 | - && thumb2_legitimate_index_p (mode, xop1, strict_p)) | ||
| 260 | + && (thumb2_legitimate_index_p (mode, xop1, strict_p) | ||
| 261 | + || (!strict_p && will_be_in_index_register (xop1)))) | ||
| 262 | || (arm_address_register_rtx_p (xop1, strict_p) | ||
| 263 | && thumb2_legitimate_index_p (mode, xop0, strict_p))); | ||
| 264 | } | ||
| 265 | @@ -5834,7 +5853,8 @@ | ||
| 266 | && XEXP (x, 0) != frame_pointer_rtx | ||
| 267 | && XEXP (x, 1) != frame_pointer_rtx | ||
| 268 | && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) | ||
| 269 | - && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)) | ||
| 270 | + && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p) | ||
| 271 | + || (!strict_p && will_be_in_index_register (XEXP (x, 1))))) | ||
| 272 | return 1; | ||
| 273 | |||
| 274 | /* REG+const has 5-7 bit offset for non-SP registers. */ | ||
| 275 | @@ -6413,12 +6433,16 @@ | ||
| 276 | |||
| 277 | case CONST_INT: | ||
| 278 | if (outer == SET) | ||
| 279 | - { | ||
| 280 | - if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) | ||
| 281 | - return 0; | ||
| 282 | - if (thumb_shiftable_const (INTVAL (x))) | ||
| 283 | - return COSTS_N_INSNS (2); | ||
| 284 | - return COSTS_N_INSNS (3); | ||
| 285 | + { | ||
| 286 | + if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) | ||
| 287 | + return COSTS_N_INSNS (1); | ||
| 288 | + /* See split "TARGET_THUMB1 && satisfies_constraint_J". */ | ||
| 289 | + if (INTVAL (x) >= -255 && INTVAL (x) <= -1) | ||
| 290 | + return COSTS_N_INSNS (2); | ||
| 291 | + /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ | ||
| 292 | + if (thumb_shiftable_const (INTVAL (x))) | ||
| 293 | + return COSTS_N_INSNS (2); | ||
| 294 | + return COSTS_N_INSNS (3); | ||
| 295 | } | ||
| 296 | else if ((outer == PLUS || outer == COMPARE) | ||
| 297 | && INTVAL (x) < 256 && INTVAL (x) > -256) | ||
| 298 | @@ -7110,6 +7134,12 @@ | ||
| 299 | a single register, otherwise it costs one insn per word. */ | ||
| 300 | if (REG_P (XEXP (x, 0))) | ||
| 301 | *total = COSTS_N_INSNS (1); | ||
| 302 | + else if (flag_pic | ||
| 303 | + && GET_CODE (XEXP (x, 0)) == PLUS | ||
| 304 | + && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) | ||
| 305 | + /* This will be split into two instructions. | ||
| 306 | + See arm.md:calculate_pic_address. */ | ||
| 307 | + *total = COSTS_N_INSNS (2); | ||
| 308 | else | ||
| 309 | *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); | ||
| 310 | return true; | ||
| 311 | |||
| 312 | === modified file 'gcc/config/arm/arm.md' | ||
| 313 | --- old/gcc/config/arm/arm.md 2010-08-13 15:15:12 +0000 | ||
| 314 | +++ new/gcc/config/arm/arm.md 2010-08-16 09:41:58 +0000 | ||
| 315 | @@ -5290,17 +5290,21 @@ | ||
| 316 | [(set (match_operand:SI 0 "register_operand" "") | ||
| 317 | (match_operand:SI 1 "const_int_operand" ""))] | ||
| 318 | "TARGET_THUMB1 && satisfies_constraint_J (operands[1])" | ||
| 319 | - [(set (match_dup 0) (match_dup 1)) | ||
| 320 | - (set (match_dup 0) (neg:SI (match_dup 0)))] | ||
| 321 | - "operands[1] = GEN_INT (- INTVAL (operands[1]));" | ||
| 322 | + [(set (match_dup 2) (match_dup 1)) | ||
| 323 | + (set (match_dup 0) (neg:SI (match_dup 2)))] | ||
| 324 | + " | ||
| 325 | + { | ||
| 326 | + operands[1] = GEN_INT (- INTVAL (operands[1])); | ||
| 327 | + operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; | ||
| 328 | + }" | ||
| 329 | ) | ||
| 330 | |||
| 331 | (define_split | ||
| 332 | [(set (match_operand:SI 0 "register_operand" "") | ||
| 333 | (match_operand:SI 1 "const_int_operand" ""))] | ||
| 334 | "TARGET_THUMB1 && satisfies_constraint_K (operands[1])" | ||
| 335 | - [(set (match_dup 0) (match_dup 1)) | ||
| 336 | - (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))] | ||
| 337 | + [(set (match_dup 2) (match_dup 1)) | ||
| 338 | + (set (match_dup 0) (ashift:SI (match_dup 2) (match_dup 3)))] | ||
| 339 | " | ||
| 340 | { | ||
| 341 | unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu; | ||
| 342 | @@ -5311,12 +5315,13 @@ | ||
| 343 | if ((val & (mask << i)) == val) | ||
| 344 | break; | ||
| 345 | |||
| 346 | - /* Shouldn't happen, but we don't want to split if the shift is zero. */ | ||
| 347 | + /* Don't split if the shift is zero. */ | ||
| 348 | if (i == 0) | ||
| 349 | FAIL; | ||
| 350 | |||
| 351 | operands[1] = GEN_INT (val >> i); | ||
| 352 | - operands[2] = GEN_INT (i); | ||
| 353 | + operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; | ||
| 354 | + operands[3] = GEN_INT (i); | ||
| 355 | }" | ||
| 356 | ) | ||
| 357 | |||
| 358 | @@ -5325,6 +5330,34 @@ | ||
| 359 | ;; we use an unspec. The offset will be loaded from a constant pool entry, | ||
| 360 | ;; since that is the only type of relocation we can use. | ||
| 361 | |||
| 362 | +;; Wrap calculation of the whole PIC address in a single pattern for the | ||
| 363 | +;; benefit of optimizers, particularly, PRE and HOIST. Calculation of | ||
| 364 | +;; a PIC address involves two loads from memory, so we want to CSE it | ||
| 365 | +;; as often as possible. | ||
| 366 | +;; This pattern will be split into one of the pic_load_addr_* patterns | ||
| 367 | +;; and a move after GCSE optimizations. | ||
| 368 | +;; | ||
| 369 | +;; Note: Update arm.c: legitimize_pic_address() when changing this pattern. | ||
| 370 | +(define_expand "calculate_pic_address" | ||
| 371 | + [(set (match_operand:SI 0 "register_operand" "") | ||
| 372 | + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") | ||
| 373 | + (unspec:SI [(match_operand:SI 2 "" "")] | ||
| 374 | + UNSPEC_PIC_SYM))))] | ||
| 375 | + "flag_pic" | ||
| 376 | +) | ||
| 377 | + | ||
| 378 | +;; Split calculate_pic_address into pic_load_addr_* and a move. | ||
| 379 | +(define_split | ||
| 380 | + [(set (match_operand:SI 0 "register_operand" "") | ||
| 381 | + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") | ||
| 382 | + (unspec:SI [(match_operand:SI 2 "" "")] | ||
| 383 | + UNSPEC_PIC_SYM))))] | ||
| 384 | + "flag_pic" | ||
| 385 | + [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM)) | ||
| 386 | + (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))] | ||
| 387 | + "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];" | ||
| 388 | +) | ||
| 389 | + | ||
| 390 | ;; The rather odd constraints on the following are to force reload to leave | ||
| 391 | ;; the insn alone, and to force the minipool generation pass to then move | ||
| 392 | ;; the GOT symbol to memory. | ||
| 393 | |||
| 394 | === modified file 'gcc/config/arm/t-arm' | ||
| 395 | --- old/gcc/config/arm/t-arm 2009-06-21 19:48:15 +0000 | ||
| 396 | +++ new/gcc/config/arm/t-arm 2010-08-16 09:41:58 +0000 | ||
| 397 | @@ -45,6 +45,15 @@ | ||
| 398 | $(srcdir)/config/arm/arm-cores.def > \ | ||
| 399 | $(srcdir)/config/arm/arm-tune.md | ||
| 400 | |||
| 401 | +arm.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ | ||
| 402 | + $(RTL_H) $(TREE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ | ||
| 403 | + insn-config.h conditions.h output.h \ | ||
| 404 | + $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ | ||
| 405 | + $(EXPR_H) $(OPTABS_H) toplev.h $(RECOG_H) $(CGRAPH_H) \ | ||
| 406 | + $(GGC_H) except.h $(C_PRAGMA_H) $(INTEGRATE_H) $(TM_P_H) \ | ||
| 407 | + $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ | ||
| 408 | + intl.h libfuncs.h $(PARAMS_H) | ||
| 409 | + | ||
| 410 | arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \ | ||
| 411 | coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) | ||
| 412 | $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ | ||
| 413 | |||
| 414 | === modified file 'gcc/dbgcnt.def' | ||
| 415 | --- old/gcc/dbgcnt.def 2009-11-25 10:55:54 +0000 | ||
| 416 | +++ new/gcc/dbgcnt.def 2010-08-16 09:41:58 +0000 | ||
| 417 | @@ -158,6 +158,7 @@ | ||
| 418 | DEBUG_COUNTER (global_alloc_at_func) | ||
| 419 | DEBUG_COUNTER (global_alloc_at_reg) | ||
| 420 | DEBUG_COUNTER (hoist) | ||
| 421 | +DEBUG_COUNTER (hoist_insn) | ||
| 422 | DEBUG_COUNTER (ia64_sched2) | ||
| 423 | DEBUG_COUNTER (if_conversion) | ||
| 424 | DEBUG_COUNTER (if_after_combine) | ||
| 425 | |||
| 426 | === modified file 'gcc/doc/invoke.texi' | ||
| 427 | --- old/gcc/doc/invoke.texi 2010-08-05 15:20:54 +0000 | ||
| 428 | +++ new/gcc/doc/invoke.texi 2010-08-16 09:41:58 +0000 | ||
| 429 | @@ -8086,6 +8086,29 @@ | ||
| 430 | vectorization needs to be greater than the value specified by this option | ||
| 431 | to allow vectorization. The default value is 0. | ||
| 432 | |||
| 433 | +@item gcse-cost-distance-ratio | ||
| 434 | +Scaling factor in calculation of maximum distance an expression | ||
| 435 | +can be moved by GCSE optimizations. This is currently supported only in | ||
| 436 | +code hoisting pass. The bigger the ratio, the more agressive code hoisting | ||
| 437 | +will be with simple expressions, i.e., the expressions which have cost | ||
| 438 | +less than @option{gcse-unrestricted-cost}. Specifying 0 will disable | ||
| 439 | +hoisting of simple expressions. The default value is 10. | ||
| 440 | + | ||
| 441 | +@item gcse-unrestricted-cost | ||
| 442 | +Cost, roughly measured as the cost of a single typical machine | ||
| 443 | +instruction, at which GCSE optimizations will not constrain | ||
| 444 | +the distance an expression can travel. This is currently | ||
| 445 | +supported only in code hoisting pass. The lesser the cost, | ||
| 446 | +the more aggressive code hoisting will be. Specifying 0 will | ||
| 447 | +allow all expressions to travel unrestricted distances. | ||
| 448 | +The default value is 3. | ||
| 449 | + | ||
| 450 | +@item max-hoist-depth | ||
| 451 | +The depth of search in the dominator tree for expressions to hoist. | ||
| 452 | +This is used to avoid quadratic behavior in hoisting algorithm. | ||
| 453 | +The value of 0 will avoid limiting the search, but may slow down compilation | ||
| 454 | +of huge functions. The default value is 30. | ||
| 455 | + | ||
| 456 | @item max-unrolled-insns | ||
| 457 | The maximum number of instructions that a loop should have if that loop | ||
| 458 | is unrolled, and if the loop is unrolled, it determines how many times | ||
| 459 | |||
| 460 | === modified file 'gcc/dominance.c' | ||
| 461 | --- old/gcc/dominance.c 2010-04-02 18:54:46 +0000 | ||
| 462 | +++ new/gcc/dominance.c 2010-08-16 09:41:58 +0000 | ||
| 463 | @@ -782,16 +782,20 @@ | ||
| 464 | } | ||
| 465 | |||
| 466 | /* Returns the list of basic blocks including BB dominated by BB, in the | ||
| 467 | - direction DIR. The vector will be sorted in preorder. */ | ||
| 468 | + direction DIR up to DEPTH in the dominator tree. The DEPTH of zero will | ||
| 469 | + produce a vector containing all dominated blocks. The vector will be sorted | ||
| 470 | + in preorder. */ | ||
| 471 | |||
| 472 | VEC (basic_block, heap) * | ||
| 473 | -get_all_dominated_blocks (enum cdi_direction dir, basic_block bb) | ||
| 474 | +get_dominated_to_depth (enum cdi_direction dir, basic_block bb, int depth) | ||
| 475 | { | ||
| 476 | VEC(basic_block, heap) *bbs = NULL; | ||
| 477 | unsigned i; | ||
| 478 | + unsigned next_level_start; | ||
| 479 | |||
| 480 | i = 0; | ||
| 481 | VEC_safe_push (basic_block, heap, bbs, bb); | ||
| 482 | + next_level_start = 1; /* = VEC_length (basic_block, bbs); */ | ||
| 483 | |||
| 484 | do | ||
| 485 | { | ||
| 486 | @@ -802,12 +806,24 @@ | ||
| 487 | son; | ||
| 488 | son = next_dom_son (dir, son)) | ||
| 489 | VEC_safe_push (basic_block, heap, bbs, son); | ||
| 490 | + | ||
| 491 | + if (i == next_level_start && --depth) | ||
| 492 | + next_level_start = VEC_length (basic_block, bbs); | ||
| 493 | } | ||
| 494 | - while (i < VEC_length (basic_block, bbs)); | ||
| 495 | + while (i < next_level_start); | ||
| 496 | |||
| 497 | return bbs; | ||
| 498 | } | ||
| 499 | |||
| 500 | +/* Returns the list of basic blocks including BB dominated by BB, in the | ||
| 501 | + direction DIR. The vector will be sorted in preorder. */ | ||
| 502 | + | ||
| 503 | +VEC (basic_block, heap) * | ||
| 504 | +get_all_dominated_blocks (enum cdi_direction dir, basic_block bb) | ||
| 505 | +{ | ||
| 506 | + return get_dominated_to_depth (dir, bb, 0); | ||
| 507 | +} | ||
| 508 | + | ||
| 509 | /* Redirect all edges pointing to BB to TO. */ | ||
| 510 | void | ||
| 511 | redirect_immediate_dominators (enum cdi_direction dir, basic_block bb, | ||
| 512 | |||
| 513 | === modified file 'gcc/gcse.c' | ||
| 514 | --- old/gcc/gcse.c 2010-03-16 10:50:42 +0000 | ||
| 515 | +++ new/gcc/gcse.c 2010-08-16 09:41:58 +0000 | ||
| 516 | @@ -296,6 +296,12 @@ | ||
| 517 | The value is the newly created pseudo-reg to record a copy of the | ||
| 518 | expression in all the places that reach the redundant copy. */ | ||
| 519 | rtx reaching_reg; | ||
| 520 | + /* Maximum distance in instructions this expression can travel. | ||
| 521 | + We avoid moving simple expressions for more than a few instructions | ||
| 522 | + to keep register pressure under control. | ||
| 523 | + A value of "0" removes restrictions on how far the expression can | ||
| 524 | + travel. */ | ||
| 525 | + int max_distance; | ||
| 526 | }; | ||
| 527 | |||
| 528 | /* Occurrence of an expression. | ||
| 529 | @@ -317,6 +323,10 @@ | ||
| 530 | char copied_p; | ||
| 531 | }; | ||
| 532 | |||
| 533 | +typedef struct occr *occr_t; | ||
| 534 | +DEF_VEC_P (occr_t); | ||
| 535 | +DEF_VEC_ALLOC_P (occr_t, heap); | ||
| 536 | + | ||
| 537 | /* Expression and copy propagation hash tables. | ||
| 538 | Each hash table is an array of buckets. | ||
| 539 | ??? It is known that if it were an array of entries, structure elements | ||
| 540 | @@ -419,6 +429,9 @@ | ||
| 541 | /* Number of global copies propagated. */ | ||
| 542 | static int global_copy_prop_count; | ||
| 543 | |||
| 544 | +/* Doing code hoisting. */ | ||
| 545 | +static bool doing_code_hoisting_p = false; | ||
| 546 | + | ||
| 547 | /* For available exprs */ | ||
| 548 | static sbitmap *ae_kill; | ||
| 549 | |||
| 550 | @@ -432,12 +445,12 @@ | ||
| 551 | static void hash_scan_set (rtx, rtx, struct hash_table_d *); | ||
| 552 | static void hash_scan_clobber (rtx, rtx, struct hash_table_d *); | ||
| 553 | static void hash_scan_call (rtx, rtx, struct hash_table_d *); | ||
| 554 | -static int want_to_gcse_p (rtx); | ||
| 555 | +static int want_to_gcse_p (rtx, int *); | ||
| 556 | static bool gcse_constant_p (const_rtx); | ||
| 557 | static int oprs_unchanged_p (const_rtx, const_rtx, int); | ||
| 558 | static int oprs_anticipatable_p (const_rtx, const_rtx); | ||
| 559 | static int oprs_available_p (const_rtx, const_rtx); | ||
| 560 | -static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, | ||
| 561 | +static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int, | ||
| 562 | struct hash_table_d *); | ||
| 563 | static void insert_set_in_table (rtx, rtx, struct hash_table_d *); | ||
| 564 | static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int); | ||
| 565 | @@ -462,7 +475,6 @@ | ||
| 566 | static void alloc_cprop_mem (int, int); | ||
| 567 | static void free_cprop_mem (void); | ||
| 568 | static void compute_transp (const_rtx, int, sbitmap *, int); | ||
| 569 | -static void compute_transpout (void); | ||
| 570 | static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *, | ||
| 571 | struct hash_table_d *); | ||
| 572 | static void compute_cprop_data (void); | ||
| 573 | @@ -486,7 +498,7 @@ | ||
| 574 | static void compute_pre_data (void); | ||
| 575 | static int pre_expr_reaches_here_p (basic_block, struct expr *, | ||
| 576 | basic_block); | ||
| 577 | -static void insert_insn_end_basic_block (struct expr *, basic_block, int); | ||
| 578 | +static void insert_insn_end_basic_block (struct expr *, basic_block); | ||
| 579 | static void pre_insert_copy_insn (struct expr *, rtx); | ||
| 580 | static void pre_insert_copies (void); | ||
| 581 | static int pre_delete (void); | ||
| 582 | @@ -497,7 +509,8 @@ | ||
| 583 | static void free_code_hoist_mem (void); | ||
| 584 | static void compute_code_hoist_vbeinout (void); | ||
| 585 | static void compute_code_hoist_data (void); | ||
| 586 | -static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *); | ||
| 587 | +static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *, | ||
| 588 | + int, int *); | ||
| 589 | static int hoist_code (void); | ||
| 590 | static int one_code_hoisting_pass (void); | ||
| 591 | static rtx process_insert_insn (struct expr *); | ||
| 592 | @@ -755,7 +768,7 @@ | ||
| 593 | GCSE. */ | ||
| 594 | |||
| 595 | static int | ||
| 596 | -want_to_gcse_p (rtx x) | ||
| 597 | +want_to_gcse_p (rtx x, int *max_distance_ptr) | ||
| 598 | { | ||
| 599 | #ifdef STACK_REGS | ||
| 600 | /* On register stack architectures, don't GCSE constants from the | ||
| 601 | @@ -765,18 +778,67 @@ | ||
| 602 | x = avoid_constant_pool_reference (x); | ||
| 603 | #endif | ||
| 604 | |||
| 605 | + /* GCSE'ing constants: | ||
| 606 | + | ||
| 607 | + We do not specifically distinguish between constant and non-constant | ||
| 608 | + expressions in PRE and Hoist. We use rtx_cost below to limit | ||
| 609 | + the maximum distance simple expressions can travel. | ||
| 610 | + | ||
| 611 | + Nevertheless, constants are much easier to GCSE, and, hence, | ||
| 612 | + it is easy to overdo the optimizations. Usually, excessive PRE and | ||
| 613 | + Hoisting of constant leads to increased register pressure. | ||
| 614 | + | ||
| 615 | + RA can deal with this by rematerialing some of the constants. | ||
| 616 | + Therefore, it is important that the back-end generates sets of constants | ||
| 617 | + in a way that allows reload rematerialize them under high register | ||
| 618 | + pressure, i.e., a pseudo register with REG_EQUAL to constant | ||
| 619 | + is set only once. Failing to do so will result in IRA/reload | ||
| 620 | + spilling such constants under high register pressure instead of | ||
| 621 | + rematerializing them. */ | ||
| 622 | + | ||
| 623 | switch (GET_CODE (x)) | ||
| 624 | { | ||
| 625 | case REG: | ||
| 626 | case SUBREG: | ||
| 627 | - case CONST_INT: | ||
| 628 | - case CONST_DOUBLE: | ||
| 629 | - case CONST_FIXED: | ||
| 630 | - case CONST_VECTOR: | ||
| 631 | case CALL: | ||
| 632 | return 0; | ||
| 633 | |||
| 634 | + case CONST_INT: | ||
| 635 | + case CONST_DOUBLE: | ||
| 636 | + case CONST_FIXED: | ||
| 637 | + case CONST_VECTOR: | ||
| 638 | + if (!doing_code_hoisting_p) | ||
| 639 | + /* Do not PRE constants. */ | ||
| 640 | + return 0; | ||
| 641 | + | ||
| 642 | + /* FALLTHRU */ | ||
| 643 | + | ||
| 644 | default: | ||
| 645 | + if (doing_code_hoisting_p) | ||
| 646 | + /* PRE doesn't implement max_distance restriction. */ | ||
| 647 | + { | ||
| 648 | + int cost; | ||
| 649 | + int max_distance; | ||
| 650 | + | ||
| 651 | + gcc_assert (!optimize_function_for_speed_p (cfun) | ||
| 652 | + && optimize_function_for_size_p (cfun)); | ||
| 653 | + cost = rtx_cost (x, SET, 0); | ||
| 654 | + | ||
| 655 | + if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST)) | ||
| 656 | + { | ||
| 657 | + max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10; | ||
| 658 | + if (max_distance == 0) | ||
| 659 | + return 0; | ||
| 660 | + | ||
| 661 | + gcc_assert (max_distance > 0); | ||
| 662 | + } | ||
| 663 | + else | ||
| 664 | + max_distance = 0; | ||
| 665 | + | ||
| 666 | + if (max_distance_ptr) | ||
| 667 | + *max_distance_ptr = max_distance; | ||
| 668 | + } | ||
| 669 | + | ||
| 670 | return can_assign_to_reg_without_clobbers_p (x); | ||
| 671 | } | ||
| 672 | } | ||
| 673 | @@ -1090,11 +1152,14 @@ | ||
| 674 | It is only used if X is a CONST_INT. | ||
| 675 | |||
| 676 | ANTIC_P is nonzero if X is an anticipatable expression. | ||
| 677 | - AVAIL_P is nonzero if X is an available expression. */ | ||
| 678 | + AVAIL_P is nonzero if X is an available expression. | ||
| 679 | + | ||
| 680 | + MAX_DISTANCE is the maximum distance in instructions this expression can | ||
| 681 | + be moved. */ | ||
| 682 | |||
| 683 | static void | ||
| 684 | insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, | ||
| 685 | - int avail_p, struct hash_table_d *table) | ||
| 686 | + int avail_p, int max_distance, struct hash_table_d *table) | ||
| 687 | { | ||
| 688 | int found, do_not_record_p; | ||
| 689 | unsigned int hash; | ||
| 690 | @@ -1137,7 +1202,11 @@ | ||
| 691 | cur_expr->next_same_hash = NULL; | ||
| 692 | cur_expr->antic_occr = NULL; | ||
| 693 | cur_expr->avail_occr = NULL; | ||
| 694 | + gcc_assert (max_distance >= 0); | ||
| 695 | + cur_expr->max_distance = max_distance; | ||
| 696 | } | ||
| 697 | + else | ||
| 698 | + gcc_assert (cur_expr->max_distance == max_distance); | ||
| 699 | |||
| 700 | /* Now record the occurrence(s). */ | ||
| 701 | if (antic_p) | ||
| 702 | @@ -1238,6 +1307,8 @@ | ||
| 703 | cur_expr->next_same_hash = NULL; | ||
| 704 | cur_expr->antic_occr = NULL; | ||
| 705 | cur_expr->avail_occr = NULL; | ||
| 706 | + /* Not used for set_p tables. */ | ||
| 707 | + cur_expr->max_distance = 0; | ||
| 708 | } | ||
| 709 | |||
| 710 | /* Now record the occurrence. */ | ||
| 711 | @@ -1307,6 +1378,7 @@ | ||
| 712 | { | ||
| 713 | unsigned int regno = REGNO (dest); | ||
| 714 | rtx tmp; | ||
| 715 | + int max_distance = 0; | ||
| 716 | |||
| 717 | /* See if a REG_EQUAL note shows this equivalent to a simpler expression. | ||
| 718 | |||
| 719 | @@ -1329,7 +1401,7 @@ | ||
| 720 | && !REG_P (src) | ||
| 721 | && (table->set_p | ||
| 722 | ? gcse_constant_p (XEXP (note, 0)) | ||
| 723 | - : want_to_gcse_p (XEXP (note, 0)))) | ||
| 724 | + : want_to_gcse_p (XEXP (note, 0), NULL))) | ||
| 725 | src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src); | ||
| 726 | |||
| 727 | /* Only record sets of pseudo-regs in the hash table. */ | ||
| 728 | @@ -1344,7 +1416,7 @@ | ||
| 729 | can't do the same thing at the rtl level. */ | ||
| 730 | && !can_throw_internal (insn) | ||
| 731 | /* Is SET_SRC something we want to gcse? */ | ||
| 732 | - && want_to_gcse_p (src) | ||
| 733 | + && want_to_gcse_p (src, &max_distance) | ||
| 734 | /* Don't CSE a nop. */ | ||
| 735 | && ! set_noop_p (pat) | ||
| 736 | /* Don't GCSE if it has attached REG_EQUIV note. | ||
| 737 | @@ -1368,7 +1440,8 @@ | ||
| 738 | int avail_p = (oprs_available_p (src, insn) | ||
| 739 | && ! JUMP_P (insn)); | ||
| 740 | |||
| 741 | - insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table); | ||
| 742 | + insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, | ||
| 743 | + max_distance, table); | ||
| 744 | } | ||
| 745 | |||
| 746 | /* Record sets for constant/copy propagation. */ | ||
| 747 | @@ -1394,6 +1467,7 @@ | ||
| 748 | else if (flag_gcse_las && REG_P (src) && MEM_P (dest)) | ||
| 749 | { | ||
| 750 | unsigned int regno = REGNO (src); | ||
| 751 | + int max_distance = 0; | ||
| 752 | |||
| 753 | /* Do not do this for constant/copy propagation. */ | ||
| 754 | if (! table->set_p | ||
| 755 | @@ -1405,7 +1479,7 @@ | ||
| 756 | do that easily for EH edges so disable GCSE on these for now. */ | ||
| 757 | && !can_throw_internal (insn) | ||
| 758 | /* Is SET_DEST something we want to gcse? */ | ||
| 759 | - && want_to_gcse_p (dest) | ||
| 760 | + && want_to_gcse_p (dest, &max_distance) | ||
| 761 | /* Don't CSE a nop. */ | ||
| 762 | && ! set_noop_p (pat) | ||
| 763 | /* Don't GCSE if it has attached REG_EQUIV note. | ||
| 764 | @@ -1427,7 +1501,7 @@ | ||
| 765 | |||
| 766 | /* Record the memory expression (DEST) in the hash table. */ | ||
| 767 | insert_expr_in_table (dest, GET_MODE (dest), insn, | ||
| 768 | - antic_p, avail_p, table); | ||
| 769 | + antic_p, avail_p, max_distance, table); | ||
| 770 | } | ||
| 771 | } | ||
| 772 | } | ||
| 773 | @@ -1513,8 +1587,8 @@ | ||
| 774 | if (flat_table[i] != 0) | ||
| 775 | { | ||
| 776 | expr = flat_table[i]; | ||
| 777 | - fprintf (file, "Index %d (hash value %d)\n ", | ||
| 778 | - expr->bitmap_index, hash_val[i]); | ||
| 779 | + fprintf (file, "Index %d (hash value %d; max distance %d)\n ", | ||
| 780 | + expr->bitmap_index, hash_val[i], expr->max_distance); | ||
| 781 | print_rtl (file, expr->expr); | ||
| 782 | fprintf (file, "\n"); | ||
| 783 | } | ||
| 784 | @@ -3168,11 +3242,6 @@ | ||
| 785 | /* Nonzero for expressions that are transparent in the block. */ | ||
| 786 | static sbitmap *transp; | ||
| 787 | |||
| 788 | -/* Nonzero for expressions that are transparent at the end of the block. | ||
| 789 | - This is only zero for expressions killed by abnormal critical edge | ||
| 790 | - created by a calls. */ | ||
| 791 | -static sbitmap *transpout; | ||
| 792 | - | ||
| 793 | /* Nonzero for expressions that are computed (available) in the block. */ | ||
| 794 | static sbitmap *comp; | ||
| 795 | |||
| 796 | @@ -3236,28 +3305,105 @@ | ||
| 797 | pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; | ||
| 798 | } | ||
| 799 | |||
| 800 | -/* Top level routine to do the dataflow analysis needed by PRE. */ | ||
| 801 | +/* Remove certain expressions from anticipatable and transparent | ||
| 802 | + sets of basic blocks that have incoming abnormal edge. | ||
| 803 | + For PRE remove potentially trapping expressions to avoid placing | ||
| 804 | + them on abnormal edges. For hoisting remove memory references that | ||
| 805 | + can be clobbered by calls. */ | ||
| 806 | |||
| 807 | static void | ||
| 808 | -compute_pre_data (void) | ||
| 809 | +prune_expressions (bool pre_p) | ||
| 810 | { | ||
| 811 | - sbitmap trapping_expr; | ||
| 812 | - basic_block bb; | ||
| 813 | + sbitmap prune_exprs; | ||
| 814 | unsigned int ui; | ||
| 815 | - | ||
| 816 | - compute_local_properties (transp, comp, antloc, &expr_hash_table); | ||
| 817 | - sbitmap_vector_zero (ae_kill, last_basic_block); | ||
| 818 | - | ||
| 819 | - /* Collect expressions which might trap. */ | ||
| 820 | - trapping_expr = sbitmap_alloc (expr_hash_table.n_elems); | ||
| 821 | - sbitmap_zero (trapping_expr); | ||
| 822 | + basic_block bb; | ||
| 823 | + | ||
| 824 | + prune_exprs = sbitmap_alloc (expr_hash_table.n_elems); | ||
| 825 | + sbitmap_zero (prune_exprs); | ||
| 826 | for (ui = 0; ui < expr_hash_table.size; ui++) | ||
| 827 | { | ||
| 828 | struct expr *e; | ||
| 829 | for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash) | ||
| 830 | - if (may_trap_p (e->expr)) | ||
| 831 | - SET_BIT (trapping_expr, e->bitmap_index); | ||
| 832 | - } | ||
| 833 | + { | ||
| 834 | + /* Note potentially trapping expressions. */ | ||
| 835 | + if (may_trap_p (e->expr)) | ||
| 836 | + { | ||
| 837 | + SET_BIT (prune_exprs, e->bitmap_index); | ||
| 838 | + continue; | ||
| 839 | + } | ||
| 840 | + | ||
| 841 | + if (!pre_p && MEM_P (e->expr)) | ||
| 842 | + /* Note memory references that can be clobbered by a call. | ||
| 843 | + We do not split abnormal edges in hoisting, so would | ||
| 844 | + a memory reference get hoisted along an abnormal edge, | ||
| 845 | + it would be placed /before/ the call. Therefore, only | ||
| 846 | + constant memory references can be hoisted along abnormal | ||
| 847 | + edges. */ | ||
| 848 | + { | ||
| 849 | + if (GET_CODE (XEXP (e->expr, 0)) == SYMBOL_REF | ||
| 850 | + && CONSTANT_POOL_ADDRESS_P (XEXP (e->expr, 0))) | ||
| 851 | + continue; | ||
| 852 | + | ||
| 853 | + if (MEM_READONLY_P (e->expr) | ||
| 854 | + && !MEM_VOLATILE_P (e->expr) | ||
| 855 | + && MEM_NOTRAP_P (e->expr)) | ||
| 856 | + /* Constant memory reference, e.g., a PIC address. */ | ||
| 857 | + continue; | ||
| 858 | + | ||
| 859 | + /* ??? Optimally, we would use interprocedural alias | ||
| 860 | + analysis to determine if this mem is actually killed | ||
| 861 | + by this call. */ | ||
| 862 | + | ||
| 863 | + SET_BIT (prune_exprs, e->bitmap_index); | ||
| 864 | + } | ||
| 865 | + } | ||
| 866 | + } | ||
| 867 | + | ||
| 868 | + FOR_EACH_BB (bb) | ||
| 869 | + { | ||
| 870 | + edge e; | ||
| 871 | + edge_iterator ei; | ||
| 872 | + | ||
| 873 | + /* If the current block is the destination of an abnormal edge, we | ||
| 874 | + kill all trapping (for PRE) and memory (for hoist) expressions | ||
| 875 | + because we won't be able to properly place the instruction on | ||
| 876 | + the edge. So make them neither anticipatable nor transparent. | ||
| 877 | + This is fairly conservative. | ||
| 878 | + | ||
| 879 | + ??? For hoisting it may be necessary to check for set-and-jump | ||
| 880 | + instructions here, not just for abnormal edges. The general problem | ||
| 881 | + is that when an expression cannot not be placed right at the end of | ||
| 882 | + a basic block we should account for any side-effects of a subsequent | ||
| 883 | + jump instructions that could clobber the expression. It would | ||
| 884 | + be best to implement this check along the lines of | ||
| 885 | + hoist_expr_reaches_here_p where the target block is already known | ||
| 886 | + and, hence, there's no need to conservatively prune expressions on | ||
| 887 | + "intermediate" set-and-jump instructions. */ | ||
| 888 | + FOR_EACH_EDGE (e, ei, bb->preds) | ||
| 889 | + if ((e->flags & EDGE_ABNORMAL) | ||
| 890 | + && (pre_p || CALL_P (BB_END (e->src)))) | ||
| 891 | + { | ||
| 892 | + sbitmap_difference (antloc[bb->index], | ||
| 893 | + antloc[bb->index], prune_exprs); | ||
| 894 | + sbitmap_difference (transp[bb->index], | ||
| 895 | + transp[bb->index], prune_exprs); | ||
| 896 | + break; | ||
| 897 | + } | ||
| 898 | + } | ||
| 899 | + | ||
| 900 | + sbitmap_free (prune_exprs); | ||
| 901 | +} | ||
| 902 | + | ||
| 903 | +/* Top level routine to do the dataflow analysis needed by PRE. */ | ||
| 904 | + | ||
| 905 | +static void | ||
| 906 | +compute_pre_data (void) | ||
| 907 | +{ | ||
| 908 | + basic_block bb; | ||
| 909 | + | ||
| 910 | + compute_local_properties (transp, comp, antloc, &expr_hash_table); | ||
| 911 | + prune_expressions (true); | ||
| 912 | + sbitmap_vector_zero (ae_kill, last_basic_block); | ||
| 913 | |||
| 914 | /* Compute ae_kill for each basic block using: | ||
| 915 | |||
| 916 | @@ -3266,21 +3412,6 @@ | ||
| 917 | |||
| 918 | FOR_EACH_BB (bb) | ||
| 919 | { | ||
| 920 | - edge e; | ||
| 921 | - edge_iterator ei; | ||
| 922 | - | ||
| 923 | - /* If the current block is the destination of an abnormal edge, we | ||
| 924 | - kill all trapping expressions because we won't be able to properly | ||
| 925 | - place the instruction on the edge. So make them neither | ||
| 926 | - anticipatable nor transparent. This is fairly conservative. */ | ||
| 927 | - FOR_EACH_EDGE (e, ei, bb->preds) | ||
| 928 | - if (e->flags & EDGE_ABNORMAL) | ||
| 929 | - { | ||
| 930 | - sbitmap_difference (antloc[bb->index], antloc[bb->index], trapping_expr); | ||
| 931 | - sbitmap_difference (transp[bb->index], transp[bb->index], trapping_expr); | ||
| 932 | - break; | ||
| 933 | - } | ||
| 934 | - | ||
| 935 | sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]); | ||
| 936 | sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]); | ||
| 937 | } | ||
| 938 | @@ -3291,7 +3422,6 @@ | ||
| 939 | antloc = NULL; | ||
| 940 | sbitmap_vector_free (ae_kill); | ||
| 941 | ae_kill = NULL; | ||
| 942 | - sbitmap_free (trapping_expr); | ||
| 943 | } | ||
| 944 | |||
| 945 | /* PRE utilities */ | ||
| 946 | @@ -3406,14 +3536,10 @@ | ||
| 947 | |||
| 948 | /* Add EXPR to the end of basic block BB. | ||
| 949 | |||
| 950 | - This is used by both the PRE and code hoisting. | ||
| 951 | - | ||
| 952 | - For PRE, we want to verify that the expr is either transparent | ||
| 953 | - or locally anticipatable in the target block. This check makes | ||
| 954 | - no sense for code hoisting. */ | ||
| 955 | + This is used by both the PRE and code hoisting. */ | ||
| 956 | |||
| 957 | static void | ||
| 958 | -insert_insn_end_basic_block (struct expr *expr, basic_block bb, int pre) | ||
| 959 | +insert_insn_end_basic_block (struct expr *expr, basic_block bb) | ||
| 960 | { | ||
| 961 | rtx insn = BB_END (bb); | ||
| 962 | rtx new_insn; | ||
| 963 | @@ -3440,12 +3566,6 @@ | ||
| 964 | #ifdef HAVE_cc0 | ||
| 965 | rtx note; | ||
| 966 | #endif | ||
| 967 | - /* It should always be the case that we can put these instructions | ||
| 968 | - anywhere in the basic block with performing PRE optimizations. | ||
| 969 | - Check this. */ | ||
| 970 | - gcc_assert (!NONJUMP_INSN_P (insn) || !pre | ||
| 971 | - || TEST_BIT (antloc[bb->index], expr->bitmap_index) | ||
| 972 | - || TEST_BIT (transp[bb->index], expr->bitmap_index)); | ||
| 973 | |||
| 974 | /* If this is a jump table, then we can't insert stuff here. Since | ||
| 975 | we know the previous real insn must be the tablejump, we insert | ||
| 976 | @@ -3482,15 +3602,7 @@ | ||
| 977 | /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers, | ||
| 978 | we search backward and place the instructions before the first | ||
| 979 | parameter is loaded. Do this for everyone for consistency and a | ||
| 980 | - presumption that we'll get better code elsewhere as well. | ||
| 981 | - | ||
| 982 | - It should always be the case that we can put these instructions | ||
| 983 | - anywhere in the basic block with performing PRE optimizations. | ||
| 984 | - Check this. */ | ||
| 985 | - | ||
| 986 | - gcc_assert (!pre | ||
| 987 | - || TEST_BIT (antloc[bb->index], expr->bitmap_index) | ||
| 988 | - || TEST_BIT (transp[bb->index], expr->bitmap_index)); | ||
| 989 | + presumption that we'll get better code elsewhere as well. */ | ||
| 990 | |||
| 991 | /* Since different machines initialize their parameter registers | ||
| 992 | in different orders, assume nothing. Collect the set of all | ||
| 993 | @@ -3587,7 +3699,7 @@ | ||
| 994 | now. */ | ||
| 995 | |||
| 996 | if (eg->flags & EDGE_ABNORMAL) | ||
| 997 | - insert_insn_end_basic_block (index_map[j], bb, 0); | ||
| 998 | + insert_insn_end_basic_block (index_map[j], bb); | ||
| 999 | else | ||
| 1000 | { | ||
| 1001 | insn = process_insert_insn (index_map[j]); | ||
| 1002 | @@ -4046,61 +4158,12 @@ | ||
| 1003 | } | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | -/* Compute transparent outgoing information for each block. | ||
| 1007 | - | ||
| 1008 | - An expression is transparent to an edge unless it is killed by | ||
| 1009 | - the edge itself. This can only happen with abnormal control flow, | ||
| 1010 | - when the edge is traversed through a call. This happens with | ||
| 1011 | - non-local labels and exceptions. | ||
| 1012 | - | ||
| 1013 | - This would not be necessary if we split the edge. While this is | ||
| 1014 | - normally impossible for abnormal critical edges, with some effort | ||
| 1015 | - it should be possible with exception handling, since we still have | ||
| 1016 | - control over which handler should be invoked. But due to increased | ||
| 1017 | - EH table sizes, this may not be worthwhile. */ | ||
| 1018 | - | ||
| 1019 | -static void | ||
| 1020 | -compute_transpout (void) | ||
| 1021 | -{ | ||
| 1022 | - basic_block bb; | ||
| 1023 | - unsigned int i; | ||
| 1024 | - struct expr *expr; | ||
| 1025 | - | ||
| 1026 | - sbitmap_vector_ones (transpout, last_basic_block); | ||
| 1027 | - | ||
| 1028 | - FOR_EACH_BB (bb) | ||
| 1029 | - { | ||
| 1030 | - /* Note that flow inserted a nop at the end of basic blocks that | ||
| 1031 | - end in call instructions for reasons other than abnormal | ||
| 1032 | - control flow. */ | ||
| 1033 | - if (! CALL_P (BB_END (bb))) | ||
| 1034 | - continue; | ||
| 1035 | - | ||
| 1036 | - for (i = 0; i < expr_hash_table.size; i++) | ||
| 1037 | - for (expr = expr_hash_table.table[i]; expr ; expr = expr->next_same_hash) | ||
| 1038 | - if (MEM_P (expr->expr)) | ||
| 1039 | - { | ||
| 1040 | - if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF | ||
| 1041 | - && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0))) | ||
| 1042 | - continue; | ||
| 1043 | - | ||
| 1044 | - /* ??? Optimally, we would use interprocedural alias | ||
| 1045 | - analysis to determine if this mem is actually killed | ||
| 1046 | - by this call. */ | ||
| 1047 | - RESET_BIT (transpout[bb->index], expr->bitmap_index); | ||
| 1048 | - } | ||
| 1049 | - } | ||
| 1050 | -} | ||
| 1051 | - | ||
| 1052 | /* Code Hoisting variables and subroutines. */ | ||
| 1053 | |||
| 1054 | /* Very busy expressions. */ | ||
| 1055 | static sbitmap *hoist_vbein; | ||
| 1056 | static sbitmap *hoist_vbeout; | ||
| 1057 | |||
| 1058 | -/* Hoistable expressions. */ | ||
| 1059 | -static sbitmap *hoist_exprs; | ||
| 1060 | - | ||
| 1061 | /* ??? We could compute post dominators and run this algorithm in | ||
| 1062 | reverse to perform tail merging, doing so would probably be | ||
| 1063 | more effective than the tail merging code in jump.c. | ||
| 1064 | @@ -4119,8 +4182,6 @@ | ||
| 1065 | |||
| 1066 | hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); | ||
| 1067 | hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); | ||
| 1068 | - hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs); | ||
| 1069 | - transpout = sbitmap_vector_alloc (n_blocks, n_exprs); | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | /* Free vars used for code hoisting analysis. */ | ||
| 1073 | @@ -4134,8 +4195,6 @@ | ||
| 1074 | |||
| 1075 | sbitmap_vector_free (hoist_vbein); | ||
| 1076 | sbitmap_vector_free (hoist_vbeout); | ||
| 1077 | - sbitmap_vector_free (hoist_exprs); | ||
| 1078 | - sbitmap_vector_free (transpout); | ||
| 1079 | |||
| 1080 | free_dominance_info (CDI_DOMINATORS); | ||
| 1081 | } | ||
| 1082 | @@ -4166,8 +4225,15 @@ | ||
| 1083 | FOR_EACH_BB_REVERSE (bb) | ||
| 1084 | { | ||
| 1085 | if (bb->next_bb != EXIT_BLOCK_PTR) | ||
| 1086 | - sbitmap_intersection_of_succs (hoist_vbeout[bb->index], | ||
| 1087 | - hoist_vbein, bb->index); | ||
| 1088 | + { | ||
| 1089 | + sbitmap_intersection_of_succs (hoist_vbeout[bb->index], | ||
| 1090 | + hoist_vbein, bb->index); | ||
| 1091 | + | ||
| 1092 | + /* Include expressions in VBEout that are calculated | ||
| 1093 | + in BB and available at its end. */ | ||
| 1094 | + sbitmap_a_or_b (hoist_vbeout[bb->index], | ||
| 1095 | + hoist_vbeout[bb->index], comp[bb->index]); | ||
| 1096 | + } | ||
| 1097 | |||
| 1098 | changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index], | ||
| 1099 | antloc[bb->index], | ||
| 1100 | @@ -4179,7 +4245,17 @@ | ||
| 1101 | } | ||
| 1102 | |||
| 1103 | if (dump_file) | ||
| 1104 | - fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); | ||
| 1105 | + { | ||
| 1106 | + fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); | ||
| 1107 | + | ||
| 1108 | + FOR_EACH_BB (bb) | ||
| 1109 | + { | ||
| 1110 | + fprintf (dump_file, "vbein (%d): ", bb->index); | ||
| 1111 | + dump_sbitmap_file (dump_file, hoist_vbein[bb->index]); | ||
| 1112 | + fprintf (dump_file, "vbeout(%d): ", bb->index); | ||
| 1113 | + dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]); | ||
| 1114 | + } | ||
| 1115 | + } | ||
| 1116 | } | ||
| 1117 | |||
| 1118 | /* Top level routine to do the dataflow analysis needed by code hoisting. */ | ||
| 1119 | @@ -4188,7 +4264,7 @@ | ||
| 1120 | compute_code_hoist_data (void) | ||
| 1121 | { | ||
| 1122 | compute_local_properties (transp, comp, antloc, &expr_hash_table); | ||
| 1123 | - compute_transpout (); | ||
| 1124 | + prune_expressions (false); | ||
| 1125 | compute_code_hoist_vbeinout (); | ||
| 1126 | calculate_dominance_info (CDI_DOMINATORS); | ||
| 1127 | if (dump_file) | ||
| 1128 | @@ -4197,6 +4273,8 @@ | ||
| 1129 | |||
| 1130 | /* Determine if the expression identified by EXPR_INDEX would | ||
| 1131 | reach BB unimpared if it was placed at the end of EXPR_BB. | ||
| 1132 | + Stop the search if the expression would need to be moved more | ||
| 1133 | + than DISTANCE instructions. | ||
| 1134 | |||
| 1135 | It's unclear exactly what Muchnick meant by "unimpared". It seems | ||
| 1136 | to me that the expression must either be computed or transparent in | ||
| 1137 | @@ -4209,12 +4287,24 @@ | ||
| 1138 | paths. */ | ||
| 1139 | |||
| 1140 | static int | ||
| 1141 | -hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, char *visited) | ||
| 1142 | +hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, | ||
| 1143 | + char *visited, int distance, int *bb_size) | ||
| 1144 | { | ||
| 1145 | edge pred; | ||
| 1146 | edge_iterator ei; | ||
| 1147 | int visited_allocated_locally = 0; | ||
| 1148 | |||
| 1149 | + /* Terminate the search if distance, for which EXPR is allowed to move, | ||
| 1150 | + is exhausted. */ | ||
| 1151 | + if (distance > 0) | ||
| 1152 | + { | ||
| 1153 | + distance -= bb_size[bb->index]; | ||
| 1154 | + | ||
| 1155 | + if (distance <= 0) | ||
| 1156 | + return 0; | ||
| 1157 | + } | ||
| 1158 | + else | ||
| 1159 | + gcc_assert (distance == 0); | ||
| 1160 | |||
| 1161 | if (visited == NULL) | ||
| 1162 | { | ||
| 1163 | @@ -4233,9 +4323,6 @@ | ||
| 1164 | else if (visited[pred_bb->index]) | ||
| 1165 | continue; | ||
| 1166 | |||
| 1167 | - /* Does this predecessor generate this expression? */ | ||
| 1168 | - else if (TEST_BIT (comp[pred_bb->index], expr_index)) | ||
| 1169 | - break; | ||
| 1170 | else if (! TEST_BIT (transp[pred_bb->index], expr_index)) | ||
| 1171 | break; | ||
| 1172 | |||
| 1173 | @@ -4243,8 +4330,8 @@ | ||
| 1174 | else | ||
| 1175 | { | ||
| 1176 | visited[pred_bb->index] = 1; | ||
| 1177 | - if (! hoist_expr_reaches_here_p (expr_bb, expr_index, | ||
| 1178 | - pred_bb, visited)) | ||
| 1179 | + if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb, | ||
| 1180 | + visited, distance, bb_size)) | ||
| 1181 | break; | ||
| 1182 | } | ||
| 1183 | } | ||
| 1184 | @@ -4254,20 +4341,33 @@ | ||
| 1185 | return (pred == NULL); | ||
| 1186 | } | ||
| 1187 | |||
| 1188 | +/* Find occurence in BB. */ | ||
| 1189 | +static struct occr * | ||
| 1190 | +find_occr_in_bb (struct occr *occr, basic_block bb) | ||
| 1191 | +{ | ||
| 1192 | + /* Find the right occurrence of this expression. */ | ||
| 1193 | + while (occr && BLOCK_FOR_INSN (occr->insn) != bb) | ||
| 1194 | + occr = occr->next; | ||
| 1195 | + | ||
| 1196 | + return occr; | ||
| 1197 | +} | ||
| 1198 | + | ||
| 1199 | /* Actually perform code hoisting. */ | ||
| 1200 | |||
| 1201 | static int | ||
| 1202 | hoist_code (void) | ||
| 1203 | { | ||
| 1204 | basic_block bb, dominated; | ||
| 1205 | + VEC (basic_block, heap) *dom_tree_walk; | ||
| 1206 | + unsigned int dom_tree_walk_index; | ||
| 1207 | VEC (basic_block, heap) *domby; | ||
| 1208 | unsigned int i,j; | ||
| 1209 | struct expr **index_map; | ||
| 1210 | struct expr *expr; | ||
| 1211 | + int *to_bb_head; | ||
| 1212 | + int *bb_size; | ||
| 1213 | int changed = 0; | ||
| 1214 | |||
| 1215 | - sbitmap_vector_zero (hoist_exprs, last_basic_block); | ||
| 1216 | - | ||
| 1217 | /* Compute a mapping from expression number (`bitmap_index') to | ||
| 1218 | hash table entry. */ | ||
| 1219 | |||
| 1220 | @@ -4276,28 +4376,98 @@ | ||
| 1221 | for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash) | ||
| 1222 | index_map[expr->bitmap_index] = expr; | ||
| 1223 | |||
| 1224 | + /* Calculate sizes of basic blocks and note how far | ||
| 1225 | + each instruction is from the start of its block. We then use this | ||
| 1226 | + data to restrict distance an expression can travel. */ | ||
| 1227 | + | ||
| 1228 | + to_bb_head = XCNEWVEC (int, get_max_uid ()); | ||
| 1229 | + bb_size = XCNEWVEC (int, last_basic_block); | ||
| 1230 | + | ||
| 1231 | + FOR_EACH_BB (bb) | ||
| 1232 | + { | ||
| 1233 | + rtx insn; | ||
| 1234 | + int to_head; | ||
| 1235 | + | ||
| 1236 | + to_head = 0; | ||
| 1237 | + FOR_BB_INSNS (bb, insn) | ||
| 1238 | + { | ||
| 1239 | + /* Don't count debug instructions to avoid them affecting | ||
| 1240 | + decision choices. */ | ||
| 1241 | + if (NONDEBUG_INSN_P (insn)) | ||
| 1242 | + to_bb_head[INSN_UID (insn)] = to_head++; | ||
| 1243 | + } | ||
| 1244 | + | ||
| 1245 | + bb_size[bb->index] = to_head; | ||
| 1246 | + } | ||
| 1247 | + | ||
| 1248 | + gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1 | ||
| 1249 | + && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest | ||
| 1250 | + == ENTRY_BLOCK_PTR->next_bb)); | ||
| 1251 | + | ||
| 1252 | + dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS, | ||
| 1253 | + ENTRY_BLOCK_PTR->next_bb); | ||
| 1254 | + | ||
| 1255 | /* Walk over each basic block looking for potentially hoistable | ||
| 1256 | expressions, nothing gets hoisted from the entry block. */ | ||
| 1257 | - FOR_EACH_BB (bb) | ||
| 1258 | + for (dom_tree_walk_index = 0; | ||
| 1259 | + VEC_iterate (basic_block, dom_tree_walk, dom_tree_walk_index, bb); | ||
| 1260 | + dom_tree_walk_index++) | ||
| 1261 | { | ||
| 1262 | - int found = 0; | ||
| 1263 | - int insn_inserted_p; | ||
| 1264 | - | ||
| 1265 | - domby = get_dominated_by (CDI_DOMINATORS, bb); | ||
| 1266 | + domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH); | ||
| 1267 | + | ||
| 1268 | + if (VEC_length (basic_block, domby) == 0) | ||
| 1269 | + continue; | ||
| 1270 | + | ||
| 1271 | /* Examine each expression that is very busy at the exit of this | ||
| 1272 | block. These are the potentially hoistable expressions. */ | ||
| 1273 | for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++) | ||
| 1274 | { | ||
| 1275 | - int hoistable = 0; | ||
| 1276 | - | ||
| 1277 | - if (TEST_BIT (hoist_vbeout[bb->index], i) | ||
| 1278 | - && TEST_BIT (transpout[bb->index], i)) | ||
| 1279 | + if (TEST_BIT (hoist_vbeout[bb->index], i)) | ||
| 1280 | { | ||
| 1281 | + /* Current expression. */ | ||
| 1282 | + struct expr *expr = index_map[i]; | ||
| 1283 | + /* Number of occurences of EXPR that can be hoisted to BB. */ | ||
| 1284 | + int hoistable = 0; | ||
| 1285 | + /* Basic blocks that have occurences reachable from BB. */ | ||
| 1286 | + bitmap_head _from_bbs, *from_bbs = &_from_bbs; | ||
| 1287 | + /* Occurences reachable from BB. */ | ||
| 1288 | + VEC (occr_t, heap) *occrs_to_hoist = NULL; | ||
| 1289 | + /* We want to insert the expression into BB only once, so | ||
| 1290 | + note when we've inserted it. */ | ||
| 1291 | + int insn_inserted_p; | ||
| 1292 | + occr_t occr; | ||
| 1293 | + | ||
| 1294 | + bitmap_initialize (from_bbs, 0); | ||
| 1295 | + | ||
| 1296 | + /* If an expression is computed in BB and is available at end of | ||
| 1297 | + BB, hoist all occurences dominated by BB to BB. */ | ||
| 1298 | + if (TEST_BIT (comp[bb->index], i)) | ||
| 1299 | + { | ||
| 1300 | + occr = find_occr_in_bb (expr->antic_occr, bb); | ||
| 1301 | + | ||
| 1302 | + if (occr) | ||
| 1303 | + { | ||
| 1304 | + /* An occurence might've been already deleted | ||
| 1305 | + while processing a dominator of BB. */ | ||
| 1306 | + if (occr->deleted_p) | ||
| 1307 | + gcc_assert (MAX_HOIST_DEPTH > 1); | ||
| 1308 | + else | ||
| 1309 | + { | ||
| 1310 | + gcc_assert (NONDEBUG_INSN_P (occr->insn)); | ||
| 1311 | + hoistable++; | ||
| 1312 | + } | ||
| 1313 | + } | ||
| 1314 | + else | ||
| 1315 | + hoistable++; | ||
| 1316 | + } | ||
| 1317 | + | ||
| 1318 | /* We've found a potentially hoistable expression, now | ||
| 1319 | we look at every block BB dominates to see if it | ||
| 1320 | computes the expression. */ | ||
| 1321 | for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) | ||
| 1322 | { | ||
| 1323 | + int max_distance; | ||
| 1324 | + | ||
| 1325 | /* Ignore self dominance. */ | ||
| 1326 | if (bb == dominated) | ||
| 1327 | continue; | ||
| 1328 | @@ -4307,17 +4477,43 @@ | ||
| 1329 | if (!TEST_BIT (antloc[dominated->index], i)) | ||
| 1330 | continue; | ||
| 1331 | |||
| 1332 | + occr = find_occr_in_bb (expr->antic_occr, dominated); | ||
| 1333 | + gcc_assert (occr); | ||
| 1334 | + | ||
| 1335 | + /* An occurence might've been already deleted | ||
| 1336 | + while processing a dominator of BB. */ | ||
| 1337 | + if (occr->deleted_p) | ||
| 1338 | + { | ||
| 1339 | + gcc_assert (MAX_HOIST_DEPTH > 1); | ||
| 1340 | + continue; | ||
| 1341 | + } | ||
| 1342 | + gcc_assert (NONDEBUG_INSN_P (occr->insn)); | ||
| 1343 | + | ||
| 1344 | + max_distance = expr->max_distance; | ||
| 1345 | + if (max_distance > 0) | ||
| 1346 | + /* Adjust MAX_DISTANCE to account for the fact that | ||
| 1347 | + OCCR won't have to travel all of DOMINATED, but | ||
| 1348 | + only part of it. */ | ||
| 1349 | + max_distance += (bb_size[dominated->index] | ||
| 1350 | + - to_bb_head[INSN_UID (occr->insn)]); | ||
| 1351 | + | ||
| 1352 | /* Note if the expression would reach the dominated block | ||
| 1353 | unimpared if it was placed at the end of BB. | ||
| 1354 | |||
| 1355 | Keep track of how many times this expression is hoistable | ||
| 1356 | from a dominated block into BB. */ | ||
| 1357 | - if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) | ||
| 1358 | - hoistable++; | ||
| 1359 | + if (hoist_expr_reaches_here_p (bb, i, dominated, NULL, | ||
| 1360 | + max_distance, bb_size)) | ||
| 1361 | + { | ||
| 1362 | + hoistable++; | ||
| 1363 | + VEC_safe_push (occr_t, heap, | ||
| 1364 | + occrs_to_hoist, occr); | ||
| 1365 | + bitmap_set_bit (from_bbs, dominated->index); | ||
| 1366 | + } | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | /* If we found more than one hoistable occurrence of this | ||
| 1370 | - expression, then note it in the bitmap of expressions to | ||
| 1371 | + expression, then note it in the vector of expressions to | ||
| 1372 | hoist. It makes no sense to hoist things which are computed | ||
| 1373 | in only one BB, and doing so tends to pessimize register | ||
| 1374 | allocation. One could increase this value to try harder | ||
| 1375 | @@ -4326,91 +4522,80 @@ | ||
| 1376 | the vast majority of hoistable expressions are only movable | ||
| 1377 | from two successors, so raising this threshold is likely | ||
| 1378 | to nullify any benefit we get from code hoisting. */ | ||
| 1379 | - if (hoistable > 1) | ||
| 1380 | - { | ||
| 1381 | - SET_BIT (hoist_exprs[bb->index], i); | ||
| 1382 | - found = 1; | ||
| 1383 | - } | ||
| 1384 | - } | ||
| 1385 | - } | ||
| 1386 | - /* If we found nothing to hoist, then quit now. */ | ||
| 1387 | - if (! found) | ||
| 1388 | - { | ||
| 1389 | - VEC_free (basic_block, heap, domby); | ||
| 1390 | - continue; | ||
| 1391 | - } | ||
| 1392 | - | ||
| 1393 | - /* Loop over all the hoistable expressions. */ | ||
| 1394 | - for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++) | ||
| 1395 | - { | ||
| 1396 | - /* We want to insert the expression into BB only once, so | ||
| 1397 | - note when we've inserted it. */ | ||
| 1398 | - insn_inserted_p = 0; | ||
| 1399 | - | ||
| 1400 | - /* These tests should be the same as the tests above. */ | ||
| 1401 | - if (TEST_BIT (hoist_exprs[bb->index], i)) | ||
| 1402 | - { | ||
| 1403 | - /* We've found a potentially hoistable expression, now | ||
| 1404 | - we look at every block BB dominates to see if it | ||
| 1405 | - computes the expression. */ | ||
| 1406 | - for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) | ||
| 1407 | - { | ||
| 1408 | - /* Ignore self dominance. */ | ||
| 1409 | - if (bb == dominated) | ||
| 1410 | - continue; | ||
| 1411 | - | ||
| 1412 | - /* We've found a dominated block, now see if it computes | ||
| 1413 | - the busy expression and whether or not moving that | ||
| 1414 | - expression to the "beginning" of that block is safe. */ | ||
| 1415 | - if (!TEST_BIT (antloc[dominated->index], i)) | ||
| 1416 | - continue; | ||
| 1417 | - | ||
| 1418 | - /* The expression is computed in the dominated block and | ||
| 1419 | - it would be safe to compute it at the start of the | ||
| 1420 | - dominated block. Now we have to determine if the | ||
| 1421 | - expression would reach the dominated block if it was | ||
| 1422 | - placed at the end of BB. */ | ||
| 1423 | - if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) | ||
| 1424 | - { | ||
| 1425 | - struct expr *expr = index_map[i]; | ||
| 1426 | - struct occr *occr = expr->antic_occr; | ||
| 1427 | - rtx insn; | ||
| 1428 | - rtx set; | ||
| 1429 | - | ||
| 1430 | - /* Find the right occurrence of this expression. */ | ||
| 1431 | - while (BLOCK_FOR_INSN (occr->insn) != dominated && occr) | ||
| 1432 | - occr = occr->next; | ||
| 1433 | - | ||
| 1434 | - gcc_assert (occr); | ||
| 1435 | - insn = occr->insn; | ||
| 1436 | - set = single_set (insn); | ||
| 1437 | - gcc_assert (set); | ||
| 1438 | - | ||
| 1439 | - /* Create a pseudo-reg to store the result of reaching | ||
| 1440 | - expressions into. Get the mode for the new pseudo | ||
| 1441 | - from the mode of the original destination pseudo. */ | ||
| 1442 | - if (expr->reaching_reg == NULL) | ||
| 1443 | - expr->reaching_reg | ||
| 1444 | - = gen_reg_rtx_and_attrs (SET_DEST (set)); | ||
| 1445 | - | ||
| 1446 | - gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn); | ||
| 1447 | - delete_insn (insn); | ||
| 1448 | - occr->deleted_p = 1; | ||
| 1449 | - changed = 1; | ||
| 1450 | - gcse_subst_count++; | ||
| 1451 | - | ||
| 1452 | - if (!insn_inserted_p) | ||
| 1453 | - { | ||
| 1454 | - insert_insn_end_basic_block (index_map[i], bb, 0); | ||
| 1455 | - insn_inserted_p = 1; | ||
| 1456 | - } | ||
| 1457 | - } | ||
| 1458 | - } | ||
| 1459 | + if (hoistable > 1 && dbg_cnt (hoist_insn)) | ||
| 1460 | + { | ||
| 1461 | + /* If (hoistable != VEC_length), then there is | ||
| 1462 | + an occurence of EXPR in BB itself. Don't waste | ||
| 1463 | + time looking for LCA in this case. */ | ||
| 1464 | + if ((unsigned) hoistable | ||
| 1465 | + == VEC_length (occr_t, occrs_to_hoist)) | ||
| 1466 | + { | ||
| 1467 | + basic_block lca; | ||
| 1468 | + | ||
| 1469 | + lca = nearest_common_dominator_for_set (CDI_DOMINATORS, | ||
| 1470 | + from_bbs); | ||
| 1471 | + if (lca != bb) | ||
| 1472 | + /* Punt, it's better to hoist these occurences to | ||
| 1473 | + LCA. */ | ||
| 1474 | + VEC_free (occr_t, heap, occrs_to_hoist); | ||
| 1475 | + } | ||
| 1476 | + } | ||
| 1477 | + else | ||
| 1478 | + /* Punt, no point hoisting a single occurence. */ | ||
| 1479 | + VEC_free (occr_t, heap, occrs_to_hoist); | ||
| 1480 | + | ||
| 1481 | + insn_inserted_p = 0; | ||
| 1482 | + | ||
| 1483 | + /* Walk through occurences of I'th expressions we want | ||
| 1484 | + to hoist to BB and make the transformations. */ | ||
| 1485 | + for (j = 0; | ||
| 1486 | + VEC_iterate (occr_t, occrs_to_hoist, j, occr); | ||
| 1487 | + j++) | ||
| 1488 | + { | ||
| 1489 | + rtx insn; | ||
| 1490 | + rtx set; | ||
| 1491 | + | ||
| 1492 | + gcc_assert (!occr->deleted_p); | ||
| 1493 | + | ||
| 1494 | + insn = occr->insn; | ||
| 1495 | + set = single_set (insn); | ||
| 1496 | + gcc_assert (set); | ||
| 1497 | + | ||
| 1498 | + /* Create a pseudo-reg to store the result of reaching | ||
| 1499 | + expressions into. Get the mode for the new pseudo | ||
| 1500 | + from the mode of the original destination pseudo. | ||
| 1501 | + | ||
| 1502 | + It is important to use new pseudos whenever we | ||
| 1503 | + emit a set. This will allow reload to use | ||
| 1504 | + rematerialization for such registers. */ | ||
| 1505 | + if (!insn_inserted_p) | ||
| 1506 | + expr->reaching_reg | ||
| 1507 | + = gen_reg_rtx_and_attrs (SET_DEST (set)); | ||
| 1508 | + | ||
| 1509 | + gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), | ||
| 1510 | + insn); | ||
| 1511 | + delete_insn (insn); | ||
| 1512 | + occr->deleted_p = 1; | ||
| 1513 | + changed = 1; | ||
| 1514 | + gcse_subst_count++; | ||
| 1515 | + | ||
| 1516 | + if (!insn_inserted_p) | ||
| 1517 | + { | ||
| 1518 | + insert_insn_end_basic_block (expr, bb); | ||
| 1519 | + insn_inserted_p = 1; | ||
| 1520 | + } | ||
| 1521 | + } | ||
| 1522 | + | ||
| 1523 | + VEC_free (occr_t, heap, occrs_to_hoist); | ||
| 1524 | + bitmap_clear (from_bbs); | ||
| 1525 | } | ||
| 1526 | } | ||
| 1527 | VEC_free (basic_block, heap, domby); | ||
| 1528 | } | ||
| 1529 | |||
| 1530 | + VEC_free (basic_block, heap, dom_tree_walk); | ||
| 1531 | + free (bb_size); | ||
| 1532 | + free (to_bb_head); | ||
| 1533 | free (index_map); | ||
| 1534 | |||
| 1535 | return changed; | ||
| 1536 | @@ -4433,6 +4618,8 @@ | ||
| 1537 | || is_too_expensive (_("GCSE disabled"))) | ||
| 1538 | return 0; | ||
| 1539 | |||
| 1540 | + doing_code_hoisting_p = true; | ||
| 1541 | + | ||
| 1542 | /* We need alias. */ | ||
| 1543 | init_alias_analysis (); | ||
| 1544 | |||
| 1545 | @@ -4468,6 +4655,8 @@ | ||
| 1546 | gcse_subst_count, gcse_create_count); | ||
| 1547 | } | ||
| 1548 | |||
| 1549 | + doing_code_hoisting_p = false; | ||
| 1550 | + | ||
| 1551 | return changed; | ||
| 1552 | } | ||
| 1553 | |||
| 1554 | |||
| 1555 | === modified file 'gcc/params.def' | ||
| 1556 | --- old/gcc/params.def 2010-04-02 18:54:46 +0000 | ||
| 1557 | +++ new/gcc/params.def 2010-08-16 09:41:58 +0000 | ||
| 1558 | @@ -219,6 +219,29 @@ | ||
| 1559 | "gcse-after-reload-critical-fraction", | ||
| 1560 | "The threshold ratio of critical edges execution count that permit performing redundancy elimination after reload", | ||
| 1561 | 10, 0, 0) | ||
| 1562 | + | ||
| 1563 | +/* GCSE will use GCSE_COST_DISTANCE_RATION as a scaling factor | ||
| 1564 | + to calculate maximum distance for which an expression is allowed to move | ||
| 1565 | + from its rtx_cost. */ | ||
| 1566 | +DEFPARAM(PARAM_GCSE_COST_DISTANCE_RATIO, | ||
| 1567 | + "gcse-cost-distance-ratio", | ||
| 1568 | + "Scaling factor in calculation of maximum distance an expression can be moved by GCSE optimizations", | ||
| 1569 | + 10, 0, 0) | ||
| 1570 | +/* GCSE won't restrict distance for which an expression with rtx_cost greater | ||
| 1571 | + than COSTS_N_INSN(GCSE_UNRESTRICTED_COST) is allowed to move. */ | ||
| 1572 | +DEFPARAM(PARAM_GCSE_UNRESTRICTED_COST, | ||
| 1573 | + "gcse-unrestricted-cost", | ||
| 1574 | + "Cost at which GCSE optimizations will not constraint the distance an expression can travel", | ||
| 1575 | + 3, 0, 0) | ||
| 1576 | + | ||
| 1577 | +/* How deep from a given basic block the dominator tree should be searched | ||
| 1578 | + for expressions to hoist to the block. The value of 0 will avoid limiting | ||
| 1579 | + the search. */ | ||
| 1580 | +DEFPARAM(PARAM_MAX_HOIST_DEPTH, | ||
| 1581 | + "max-hoist-depth", | ||
| 1582 | + "Maximum depth of search in the dominator tree for expressions to hoist", | ||
| 1583 | + 30, 0, 0) | ||
| 1584 | + | ||
| 1585 | /* This parameter limits the number of insns in a loop that will be unrolled, | ||
| 1586 | and by how much the loop is unrolled. | ||
| 1587 | |||
| 1588 | |||
| 1589 | === modified file 'gcc/params.h' | ||
| 1590 | --- old/gcc/params.h 2009-12-01 19:12:29 +0000 | ||
| 1591 | +++ new/gcc/params.h 2010-08-16 09:41:58 +0000 | ||
| 1592 | @@ -125,6 +125,12 @@ | ||
| 1593 | PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION) | ||
| 1594 | #define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \ | ||
| 1595 | PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION) | ||
| 1596 | +#define GCSE_COST_DISTANCE_RATIO \ | ||
| 1597 | + PARAM_VALUE (PARAM_GCSE_COST_DISTANCE_RATIO) | ||
| 1598 | +#define GCSE_UNRESTRICTED_COST \ | ||
| 1599 | + PARAM_VALUE (PARAM_GCSE_UNRESTRICTED_COST) | ||
| 1600 | +#define MAX_HOIST_DEPTH \ | ||
| 1601 | + PARAM_VALUE (PARAM_MAX_HOIST_DEPTH) | ||
| 1602 | #define MAX_UNROLLED_INSNS \ | ||
| 1603 | PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) | ||
| 1604 | #define MAX_SMS_LOOP_NUMBER \ | ||
| 1605 | |||
| 1606 | === added file 'gcc/testsuite/gcc.dg/pr45101.c' | ||
| 1607 | --- old/gcc/testsuite/gcc.dg/pr45101.c 1970-01-01 00:00:00 +0000 | ||
| 1608 | +++ new/gcc/testsuite/gcc.dg/pr45101.c 2010-08-16 09:41:58 +0000 | ||
| 1609 | @@ -0,0 +1,15 @@ | ||
| 1610 | +/* PR rtl-optimization/45101 */ | ||
| 1611 | +/* { dg-do compile } */ | ||
| 1612 | +/* { dg-options "-O2 -fgcse -fgcse-las" } */ | ||
| 1613 | + | ||
| 1614 | +struct | ||
| 1615 | +{ | ||
| 1616 | + int i; | ||
| 1617 | +} *s; | ||
| 1618 | + | ||
| 1619 | +extern void bar (void); | ||
| 1620 | + | ||
| 1621 | +void foo () | ||
| 1622 | +{ | ||
| 1623 | + !s ? s->i++ : bar (); | ||
| 1624 | +} | ||
| 1625 | |||
| 1626 | === added file 'gcc/testsuite/gcc.dg/pr45105.c' | ||
| 1627 | --- old/gcc/testsuite/gcc.dg/pr45105.c 1970-01-01 00:00:00 +0000 | ||
| 1628 | +++ new/gcc/testsuite/gcc.dg/pr45105.c 2010-08-16 09:41:58 +0000 | ||
| 1629 | @@ -0,0 +1,27 @@ | ||
| 1630 | +/* PR debug/45105 */ | ||
| 1631 | +/* { dg-do compile } */ | ||
| 1632 | +/* { dg-options "-Os -fcompare-debug" } */ | ||
| 1633 | + | ||
| 1634 | +extern int *baz (int *, int *); | ||
| 1635 | + | ||
| 1636 | +void | ||
| 1637 | +bar (int *p1, int *p2) | ||
| 1638 | +{ | ||
| 1639 | + int n = *baz (0, 0); | ||
| 1640 | + p1[n] = p2[n]; | ||
| 1641 | +} | ||
| 1642 | + | ||
| 1643 | +void | ||
| 1644 | +foo (int *p, int l) | ||
| 1645 | +{ | ||
| 1646 | + int a1[32]; | ||
| 1647 | + int a2[32]; | ||
| 1648 | + baz (a1, a2); | ||
| 1649 | + while (l) | ||
| 1650 | + { | ||
| 1651 | + if (l & 1) | ||
| 1652 | + p = baz (a2, p); | ||
| 1653 | + l--; | ||
| 1654 | + bar (a1, a2); | ||
| 1655 | + } | ||
| 1656 | +} | ||
| 1657 | |||
| 1658 | === added file 'gcc/testsuite/gcc.dg/pr45107.c' | ||
| 1659 | --- old/gcc/testsuite/gcc.dg/pr45107.c 1970-01-01 00:00:00 +0000 | ||
| 1660 | +++ new/gcc/testsuite/gcc.dg/pr45107.c 2010-08-16 09:41:58 +0000 | ||
| 1661 | @@ -0,0 +1,13 @@ | ||
| 1662 | +/* PR rtl-optimization/45107 */ | ||
| 1663 | +/* { dg-do compile } */ | ||
| 1664 | +/* { dg-options "-Os -fgcse-las" } */ | ||
| 1665 | + | ||
| 1666 | +extern void bar(int *); | ||
| 1667 | + | ||
| 1668 | +int foo (int *p) | ||
| 1669 | +{ | ||
| 1670 | + int i = *p; | ||
| 1671 | + if (i != 1) | ||
| 1672 | + bar(&i); | ||
| 1673 | + *p = i; | ||
| 1674 | +} | ||
| 1675 | |||
| 1676 | === added file 'gcc/testsuite/gcc.target/arm/pr40956.c' | ||
| 1677 | --- old/gcc/testsuite/gcc.target/arm/pr40956.c 1970-01-01 00:00:00 +0000 | ||
| 1678 | +++ new/gcc/testsuite/gcc.target/arm/pr40956.c 2010-08-16 09:41:58 +0000 | ||
| 1679 | @@ -0,0 +1,14 @@ | ||
| 1680 | +/* { dg-options "-mthumb -Os -fpic -march=armv5te" } */ | ||
| 1681 | +/* { dg-require-effective-target arm_thumb1_ok } */ | ||
| 1682 | +/* { dg-require-effective-target fpic } */ | ||
| 1683 | +/* Make sure the constant "0" is loaded into register only once. */ | ||
| 1684 | +/* { dg-final { scan-assembler-times "mov\[\\t \]*r., #0" 1 } } */ | ||
| 1685 | + | ||
| 1686 | +int foo(int p, int* q) | ||
| 1687 | +{ | ||
| 1688 | + if (p!=9) | ||
| 1689 | + *q = 0; | ||
| 1690 | + else | ||
| 1691 | + *(q+1) = 0; | ||
| 1692 | + return 3; | ||
| 1693 | +} | ||
| 1694 | |||
| 1695 | === added file 'gcc/testsuite/gcc.target/arm/pr42495.c' | ||
| 1696 | --- old/gcc/testsuite/gcc.target/arm/pr42495.c 1970-01-01 00:00:00 +0000 | ||
| 1697 | +++ new/gcc/testsuite/gcc.target/arm/pr42495.c 2010-08-16 09:41:58 +0000 | ||
| 1698 | @@ -0,0 +1,31 @@ | ||
| 1699 | +/* { dg-options "-mthumb -Os -fpic -march=armv5te -fdump-rtl-hoist" } */ | ||
| 1700 | +/* { dg-require-effective-target arm_thumb1_ok } */ | ||
| 1701 | +/* { dg-require-effective-target fpic } */ | ||
| 1702 | +/* Make sure all calculations of gObj's address get hoisted to one location. */ | ||
| 1703 | +/* { dg-final { scan-rtl-dump "PRE/HOIST: end of bb .* copying expression" "hoist" } } */ | ||
| 1704 | + | ||
| 1705 | +struct st_a { | ||
| 1706 | + int data; | ||
| 1707 | +}; | ||
| 1708 | + | ||
| 1709 | +struct st_b { | ||
| 1710 | + struct st_a *p_a; | ||
| 1711 | + struct st_b *next; | ||
| 1712 | +}; | ||
| 1713 | + | ||
| 1714 | +extern struct st_b gObj; | ||
| 1715 | +extern void foo(int, struct st_b*); | ||
| 1716 | + | ||
| 1717 | +int goo(struct st_b * obj) { | ||
| 1718 | + struct st_a *pa; | ||
| 1719 | + if (gObj.p_a->data != 0) { | ||
| 1720 | + foo(gObj.p_a->data, obj); | ||
| 1721 | + } | ||
| 1722 | + pa = obj->p_a; | ||
| 1723 | + if (pa == 0) { | ||
| 1724 | + return 0; | ||
| 1725 | + } else if (pa == gObj.p_a) { | ||
| 1726 | + return 0; | ||
| 1727 | + } | ||
| 1728 | + return pa->data; | ||
| 1729 | +} | ||
| 1730 | |||
| 1731 | === added file 'gcc/testsuite/gcc.target/arm/pr42574.c' | ||
| 1732 | --- old/gcc/testsuite/gcc.target/arm/pr42574.c 1970-01-01 00:00:00 +0000 | ||
| 1733 | +++ new/gcc/testsuite/gcc.target/arm/pr42574.c 2010-08-16 09:41:58 +0000 | ||
| 1734 | @@ -0,0 +1,24 @@ | ||
| 1735 | +/* { dg-options "-mthumb -Os -fpic -march=armv5te" } */ | ||
| 1736 | +/* { dg-require-effective-target arm_thumb1_ok } */ | ||
| 1737 | +/* { dg-require-effective-target fpic } */ | ||
| 1738 | +/* Make sure the address of glob.c is calculated only once and using | ||
| 1739 | + a logical shift for the offset (200<<1). */ | ||
| 1740 | +/* { dg-final { scan-assembler-times "lsl" 1 } } */ | ||
| 1741 | + | ||
| 1742 | +struct A { | ||
| 1743 | + char a[400]; | ||
| 1744 | + float* c; | ||
| 1745 | +}; | ||
| 1746 | +struct A glob; | ||
| 1747 | +void func(); | ||
| 1748 | +void func1(float*); | ||
| 1749 | +int func2(float*, int*); | ||
| 1750 | +void func3(float*); | ||
| 1751 | + | ||
| 1752 | +void test(int *p) { | ||
| 1753 | + func1(glob.c); | ||
| 1754 | + if (func2(glob.c, p)) { | ||
| 1755 | + func(); | ||
| 1756 | + } | ||
| 1757 | + func3(glob.c); | ||
| 1758 | +} | ||
| 1759 | |||
