diff options
Diffstat (limited to 'meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch')
-rw-r--r-- | meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch | 1759 |
1 files changed, 1759 insertions, 0 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch new file mode 100644 index 000000000..a58dd2441 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99360.patch | |||
@@ -0,0 +1,1759 @@ | |||
1 | 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
2 | |||
3 | Backport code hoisting improvements from mainline: | ||
4 | |||
5 | 2010-07-28 Jakub Jelinek <jakub@redhat.com> | ||
6 | PR debug/45105 | ||
7 | * gcc.dg/pr45105.c: New test. | ||
8 | |||
9 | 2010-07-28 Jakub Jelinek <jakub@redhat.com> | ||
10 | PR debug/45105 | ||
11 | * gcse.c (hoist_code): Use FOR_BB_INSNS macro. | ||
12 | |||
13 | 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
14 | PR rtl-optimization/45107 | ||
15 | * gcc.dg/pr45107.c: New test. | ||
16 | |||
17 | 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
18 | PR rtl-optimization/45107 | ||
19 | * gcse.c (hash_scan_set): Use max_distance for gcse-las. | ||
20 | |||
21 | 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
22 | PR rtl-optimization/45101 | ||
23 | * gcc.dg/pr45101.c: New test. | ||
24 | |||
25 | 2010-07-28 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
26 | PR rtl-optimization/45101 | ||
27 | * gcse.c (hash_scan_set): Fix argument ordering of insert_expr_in_table | ||
28 | for gcse-las. | ||
29 | |||
30 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
31 | PR rtl-optimization/40956 | ||
32 | PR target/42495 | ||
33 | PR middle-end/42574 | ||
34 | * gcc.target/arm/pr40956.c, gcc.target/arm/pr42495.c, | ||
35 | * gcc.target/arm/pr42574.c: Add tests. | ||
36 | |||
37 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
38 | * config/arm/arm.c (params.h): Include. | ||
39 | (arm_override_options): Tune gcse-unrestricted-cost. | ||
40 | * config/arm/t-arm (arm.o): Define dependencies. | ||
41 | |||
42 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
43 | PR target/42495 | ||
44 | PR middle-end/42574 | ||
45 | * basic-block.h (get_dominated_to_depth): Declare. | ||
46 | * dominance.c (get_dominated_to_depth): New function, use | ||
47 | get_all_dominated_blocks as a base. | ||
48 | (get_all_dominated_blocks): Use get_dominated_to_depth. | ||
49 | * gcse.c (occr_t, VEC (occr_t, heap)): Define. | ||
50 | (hoist_exprs): Remove. | ||
51 | (alloc_code_hoist_mem, free_code_hoist_mem): Update. | ||
52 | (compute_code_hoist_vbeinout): Add debug print outs. | ||
53 | (hoist_code): Partially rewrite, simplify. Use get_dominated_to_depth. | ||
54 | * params.def (PARAM_MAX_HOIST_DEPTH): New parameter to avoid | ||
55 | quadratic behavior. | ||
56 | * params.h (MAX_HOIST_DEPTH): New macro. | ||
57 | * doc/invoke.texi (max-hoist-depth): Document. | ||
58 | |||
59 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
60 | PR rtl-optimization/40956 | ||
61 | * config/arm/arm.c (thumb1_size_rtx_costs): Fix cost of simple | ||
62 | constants. | ||
63 | |||
64 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
65 | PR target/42495 | ||
66 | PR middle-end/42574 | ||
67 | * config/arm/arm.c (legitimize_pic_address): Use | ||
68 | gen_calculate_pic_address pattern to emit calculation of PIC address. | ||
69 | (will_be_in_index_register): New function. | ||
70 | (arm_legitimate_address_outer_p, thumb2_legitimate_address_p,) | ||
71 | (thumb1_legitimate_address_p): Use it provided !strict_p. | ||
72 | * config/arm/arm.md (calculate_pic_address): New expand and split. | ||
73 | |||
74 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
75 | PR target/42495 | ||
76 | PR middle-end/42574 | ||
77 | * config/arm/arm.c (thumb1_size_rtx_costs): Add cost for "J" constants. | ||
78 | * config/arm/arm.md (define_split "J", define_split "K"): Make | ||
79 | IRA/reload friendly. | ||
80 | |||
81 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
82 | * gcse.c (insert_insn_end_basic_block): Update signature, remove | ||
83 | unused checks. | ||
84 | (pre_edge_insert, hoist_code): Update. | ||
85 | |||
86 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
87 | PR target/42495 | ||
88 | PR middle-end/42574 | ||
89 | * gcse.c (hoist_expr_reaches_here_p): Remove excessive check. | ||
90 | |||
91 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
92 | * gcse.c (hoist_code): Generate new pseudo for every new set insn. | ||
93 | |||
94 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
95 | PR rtl-optimization/40956 | ||
96 | PR target/42495 | ||
97 | PR middle-end/42574 | ||
98 | * gcse.c (compute_code_hoist_vbeinout): Consider more expressions | ||
99 | for hoisting. | ||
100 | (hoist_code): Count occurences in current block too. | ||
101 | |||
102 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
103 | * gcse.c (struct expr:max_distance): New field. | ||
104 | (doing_code_hoisting_p): New static variable. | ||
105 | (want_to_gcse_p): Change signature. Allow constrained hoisting of | ||
106 | simple expressions, don't change behavior for PRE. Set max_distance. | ||
107 | (insert_expr_in_table): Set new max_distance field. | ||
108 | (hash_scan_set): Update. | ||
109 | (hoist_expr_reaches_here_p): Stop search after max_distance | ||
110 | instructions. | ||
111 | (find_occr_in_bb): New static function. Use it in ... | ||
112 | (hoist_code): Calculate sizes of basic block before any changes are | ||
113 | done. Pass max_distance to hoist_expr_reaches_here_p. | ||
114 | (one_code_hoisting_pass): Set doing_code_hoisting_p. | ||
115 | * params.def (PARAM_GCSE_COST_DISTANCE_RATIO,) | ||
116 | (PARAM_GCSE_UNRESTRICTED_COST): New parameters. | ||
117 | * params.h (GCSE_COST_DISTANCE_RATIO, GCSE_UNRESTRICTED_COST): New | ||
118 | macros. | ||
119 | * doc/invoke.texi (gcse-cost-distance-ratio, gcse-unrestricted-cost): | ||
120 | Document. | ||
121 | |||
122 | 2010-07-27 Jeff Law <law@redhat.com> | ||
123 | Maxim Kuvyrkov <maxim@codesourcery.com> | ||
124 | * gcse.c (compute_transpout, transpout): Remove, move logic | ||
125 | to prune_expressions. | ||
126 | (compute_pre_data): Move pruning of trapping expressions ... | ||
127 | (prune_expressions): ... here. New static function. | ||
128 | (compute_code_hoist_data): Use it. | ||
129 | (alloc_code_hoist_mem, free_code_hoist_mem, hoist_code): Update. | ||
130 | |||
131 | 2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com> | ||
132 | * dbgcnt.def (hoist_insn): New debug counter. | ||
133 | * gcse.c (hoist_code): Use it. | ||
134 | |||
135 | 2010-07-28 Julian Brown <julian@codesourcery.com> | ||
136 | |||
137 | Backport from FSF mainline: | ||
138 | |||
139 | === modified file 'gcc/basic-block.h' | ||
140 | --- old/gcc/basic-block.h 2010-04-02 18:54:46 +0000 | ||
141 | +++ new/gcc/basic-block.h 2010-08-16 09:41:58 +0000 | ||
142 | @@ -932,6 +932,8 @@ | ||
143 | extern VEC (basic_block, heap) *get_dominated_by_region (enum cdi_direction, | ||
144 | basic_block *, | ||
145 | unsigned); | ||
146 | +extern VEC (basic_block, heap) *get_dominated_to_depth (enum cdi_direction, | ||
147 | + basic_block, int); | ||
148 | extern VEC (basic_block, heap) *get_all_dominated_blocks (enum cdi_direction, | ||
149 | basic_block); | ||
150 | extern void add_to_dominance_info (enum cdi_direction, basic_block); | ||
151 | |||
152 | === modified file 'gcc/config/arm/arm.c' | ||
153 | --- old/gcc/config/arm/arm.c 2010-08-13 15:37:39 +0000 | ||
154 | +++ new/gcc/config/arm/arm.c 2010-08-16 09:41:58 +0000 | ||
155 | @@ -56,6 +56,7 @@ | ||
156 | #include "df.h" | ||
157 | #include "intl.h" | ||
158 | #include "libfuncs.h" | ||
159 | +#include "params.h" | ||
160 | |||
161 | /* Forward definitions of types. */ | ||
162 | typedef struct minipool_node Mnode; | ||
163 | @@ -1902,6 +1903,14 @@ | ||
164 | flag_reorder_blocks = 1; | ||
165 | } | ||
166 | |||
167 | + if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST) | ||
168 | + && flag_pic) | ||
169 | + /* Hoisting PIC address calculations more aggressively provides a small, | ||
170 | + but measurable, size reduction for PIC code. Therefore, we decrease | ||
171 | + the bar for unrestricted expression hoisting to the cost of PIC address | ||
172 | + calculation, which is 2 instructions. */ | ||
173 | + set_param_value ("gcse-unrestricted-cost", 2); | ||
174 | + | ||
175 | /* Register global variables with the garbage collector. */ | ||
176 | arm_add_gc_roots (); | ||
177 | |||
178 | @@ -5070,17 +5079,13 @@ | ||
179 | if (GET_CODE (orig) == SYMBOL_REF | ||
180 | || GET_CODE (orig) == LABEL_REF) | ||
181 | { | ||
182 | - rtx pic_ref, address; | ||
183 | rtx insn; | ||
184 | |||
185 | if (reg == 0) | ||
186 | { | ||
187 | gcc_assert (can_create_pseudo_p ()); | ||
188 | reg = gen_reg_rtx (Pmode); | ||
189 | - address = gen_reg_rtx (Pmode); | ||
190 | } | ||
191 | - else | ||
192 | - address = reg; | ||
193 | |||
194 | /* VxWorks does not impose a fixed gap between segments; the run-time | ||
195 | gap can be different from the object-file gap. We therefore can't | ||
196 | @@ -5096,18 +5101,21 @@ | ||
197 | insn = arm_pic_static_addr (orig, reg); | ||
198 | else | ||
199 | { | ||
200 | + rtx pat; | ||
201 | + rtx mem; | ||
202 | + | ||
203 | /* If this function doesn't have a pic register, create one now. */ | ||
204 | require_pic_register (); | ||
205 | |||
206 | - if (TARGET_32BIT) | ||
207 | - emit_insn (gen_pic_load_addr_32bit (address, orig)); | ||
208 | - else /* TARGET_THUMB1 */ | ||
209 | - emit_insn (gen_pic_load_addr_thumb1 (address, orig)); | ||
210 | - | ||
211 | - pic_ref = gen_const_mem (Pmode, | ||
212 | - gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, | ||
213 | - address)); | ||
214 | - insn = emit_move_insn (reg, pic_ref); | ||
215 | + pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig); | ||
216 | + | ||
217 | + /* Make the MEM as close to a constant as possible. */ | ||
218 | + mem = SET_SRC (pat); | ||
219 | + gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem)); | ||
220 | + MEM_READONLY_P (mem) = 1; | ||
221 | + MEM_NOTRAP_P (mem) = 1; | ||
222 | + | ||
223 | + insn = emit_insn (pat); | ||
224 | } | ||
225 | |||
226 | /* Put a REG_EQUAL note on this insn, so that it can be optimized | ||
227 | @@ -5387,6 +5395,15 @@ | ||
228 | return FALSE; | ||
229 | } | ||
230 | |||
231 | +/* Return true if X will surely end up in an index register after next | ||
232 | + splitting pass. */ | ||
233 | +static bool | ||
234 | +will_be_in_index_register (const_rtx x) | ||
235 | +{ | ||
236 | + /* arm.md: calculate_pic_address will split this into a register. */ | ||
237 | + return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM; | ||
238 | +} | ||
239 | + | ||
240 | /* Return nonzero if X is a valid ARM state address operand. */ | ||
241 | int | ||
242 | arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, | ||
243 | @@ -5444,8 +5461,9 @@ | ||
244 | rtx xop1 = XEXP (x, 1); | ||
245 | |||
246 | return ((arm_address_register_rtx_p (xop0, strict_p) | ||
247 | - && GET_CODE(xop1) == CONST_INT | ||
248 | - && arm_legitimate_index_p (mode, xop1, outer, strict_p)) | ||
249 | + && ((GET_CODE(xop1) == CONST_INT | ||
250 | + && arm_legitimate_index_p (mode, xop1, outer, strict_p)) | ||
251 | + || (!strict_p && will_be_in_index_register (xop1)))) | ||
252 | || (arm_address_register_rtx_p (xop1, strict_p) | ||
253 | && arm_legitimate_index_p (mode, xop0, outer, strict_p))); | ||
254 | } | ||
255 | @@ -5531,7 +5549,8 @@ | ||
256 | rtx xop1 = XEXP (x, 1); | ||
257 | |||
258 | return ((arm_address_register_rtx_p (xop0, strict_p) | ||
259 | - && thumb2_legitimate_index_p (mode, xop1, strict_p)) | ||
260 | + && (thumb2_legitimate_index_p (mode, xop1, strict_p) | ||
261 | + || (!strict_p && will_be_in_index_register (xop1)))) | ||
262 | || (arm_address_register_rtx_p (xop1, strict_p) | ||
263 | && thumb2_legitimate_index_p (mode, xop0, strict_p))); | ||
264 | } | ||
265 | @@ -5834,7 +5853,8 @@ | ||
266 | && XEXP (x, 0) != frame_pointer_rtx | ||
267 | && XEXP (x, 1) != frame_pointer_rtx | ||
268 | && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) | ||
269 | - && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)) | ||
270 | + && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p) | ||
271 | + || (!strict_p && will_be_in_index_register (XEXP (x, 1))))) | ||
272 | return 1; | ||
273 | |||
274 | /* REG+const has 5-7 bit offset for non-SP registers. */ | ||
275 | @@ -6413,12 +6433,16 @@ | ||
276 | |||
277 | case CONST_INT: | ||
278 | if (outer == SET) | ||
279 | - { | ||
280 | - if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) | ||
281 | - return 0; | ||
282 | - if (thumb_shiftable_const (INTVAL (x))) | ||
283 | - return COSTS_N_INSNS (2); | ||
284 | - return COSTS_N_INSNS (3); | ||
285 | + { | ||
286 | + if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) | ||
287 | + return COSTS_N_INSNS (1); | ||
288 | + /* See split "TARGET_THUMB1 && satisfies_constraint_J". */ | ||
289 | + if (INTVAL (x) >= -255 && INTVAL (x) <= -1) | ||
290 | + return COSTS_N_INSNS (2); | ||
291 | + /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ | ||
292 | + if (thumb_shiftable_const (INTVAL (x))) | ||
293 | + return COSTS_N_INSNS (2); | ||
294 | + return COSTS_N_INSNS (3); | ||
295 | } | ||
296 | else if ((outer == PLUS || outer == COMPARE) | ||
297 | && INTVAL (x) < 256 && INTVAL (x) > -256) | ||
298 | @@ -7110,6 +7134,12 @@ | ||
299 | a single register, otherwise it costs one insn per word. */ | ||
300 | if (REG_P (XEXP (x, 0))) | ||
301 | *total = COSTS_N_INSNS (1); | ||
302 | + else if (flag_pic | ||
303 | + && GET_CODE (XEXP (x, 0)) == PLUS | ||
304 | + && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) | ||
305 | + /* This will be split into two instructions. | ||
306 | + See arm.md:calculate_pic_address. */ | ||
307 | + *total = COSTS_N_INSNS (2); | ||
308 | else | ||
309 | *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); | ||
310 | return true; | ||
311 | |||
312 | === modified file 'gcc/config/arm/arm.md' | ||
313 | --- old/gcc/config/arm/arm.md 2010-08-13 15:15:12 +0000 | ||
314 | +++ new/gcc/config/arm/arm.md 2010-08-16 09:41:58 +0000 | ||
315 | @@ -5290,17 +5290,21 @@ | ||
316 | [(set (match_operand:SI 0 "register_operand" "") | ||
317 | (match_operand:SI 1 "const_int_operand" ""))] | ||
318 | "TARGET_THUMB1 && satisfies_constraint_J (operands[1])" | ||
319 | - [(set (match_dup 0) (match_dup 1)) | ||
320 | - (set (match_dup 0) (neg:SI (match_dup 0)))] | ||
321 | - "operands[1] = GEN_INT (- INTVAL (operands[1]));" | ||
322 | + [(set (match_dup 2) (match_dup 1)) | ||
323 | + (set (match_dup 0) (neg:SI (match_dup 2)))] | ||
324 | + " | ||
325 | + { | ||
326 | + operands[1] = GEN_INT (- INTVAL (operands[1])); | ||
327 | + operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; | ||
328 | + }" | ||
329 | ) | ||
330 | |||
331 | (define_split | ||
332 | [(set (match_operand:SI 0 "register_operand" "") | ||
333 | (match_operand:SI 1 "const_int_operand" ""))] | ||
334 | "TARGET_THUMB1 && satisfies_constraint_K (operands[1])" | ||
335 | - [(set (match_dup 0) (match_dup 1)) | ||
336 | - (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))] | ||
337 | + [(set (match_dup 2) (match_dup 1)) | ||
338 | + (set (match_dup 0) (ashift:SI (match_dup 2) (match_dup 3)))] | ||
339 | " | ||
340 | { | ||
341 | unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu; | ||
342 | @@ -5311,12 +5315,13 @@ | ||
343 | if ((val & (mask << i)) == val) | ||
344 | break; | ||
345 | |||
346 | - /* Shouldn't happen, but we don't want to split if the shift is zero. */ | ||
347 | + /* Don't split if the shift is zero. */ | ||
348 | if (i == 0) | ||
349 | FAIL; | ||
350 | |||
351 | operands[1] = GEN_INT (val >> i); | ||
352 | - operands[2] = GEN_INT (i); | ||
353 | + operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; | ||
354 | + operands[3] = GEN_INT (i); | ||
355 | }" | ||
356 | ) | ||
357 | |||
358 | @@ -5325,6 +5330,34 @@ | ||
359 | ;; we use an unspec. The offset will be loaded from a constant pool entry, | ||
360 | ;; since that is the only type of relocation we can use. | ||
361 | |||
362 | +;; Wrap calculation of the whole PIC address in a single pattern for the | ||
363 | +;; benefit of optimizers, particularly, PRE and HOIST. Calculation of | ||
364 | +;; a PIC address involves two loads from memory, so we want to CSE it | ||
365 | +;; as often as possible. | ||
366 | +;; This pattern will be split into one of the pic_load_addr_* patterns | ||
367 | +;; and a move after GCSE optimizations. | ||
368 | +;; | ||
369 | +;; Note: Update arm.c: legitimize_pic_address() when changing this pattern. | ||
370 | +(define_expand "calculate_pic_address" | ||
371 | + [(set (match_operand:SI 0 "register_operand" "") | ||
372 | + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") | ||
373 | + (unspec:SI [(match_operand:SI 2 "" "")] | ||
374 | + UNSPEC_PIC_SYM))))] | ||
375 | + "flag_pic" | ||
376 | +) | ||
377 | + | ||
378 | +;; Split calculate_pic_address into pic_load_addr_* and a move. | ||
379 | +(define_split | ||
380 | + [(set (match_operand:SI 0 "register_operand" "") | ||
381 | + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") | ||
382 | + (unspec:SI [(match_operand:SI 2 "" "")] | ||
383 | + UNSPEC_PIC_SYM))))] | ||
384 | + "flag_pic" | ||
385 | + [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM)) | ||
386 | + (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))] | ||
387 | + "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];" | ||
388 | +) | ||
389 | + | ||
390 | ;; The rather odd constraints on the following are to force reload to leave | ||
391 | ;; the insn alone, and to force the minipool generation pass to then move | ||
392 | ;; the GOT symbol to memory. | ||
393 | |||
394 | === modified file 'gcc/config/arm/t-arm' | ||
395 | --- old/gcc/config/arm/t-arm 2009-06-21 19:48:15 +0000 | ||
396 | +++ new/gcc/config/arm/t-arm 2010-08-16 09:41:58 +0000 | ||
397 | @@ -45,6 +45,15 @@ | ||
398 | $(srcdir)/config/arm/arm-cores.def > \ | ||
399 | $(srcdir)/config/arm/arm-tune.md | ||
400 | |||
401 | +arm.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ | ||
402 | + $(RTL_H) $(TREE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ | ||
403 | + insn-config.h conditions.h output.h \ | ||
404 | + $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ | ||
405 | + $(EXPR_H) $(OPTABS_H) toplev.h $(RECOG_H) $(CGRAPH_H) \ | ||
406 | + $(GGC_H) except.h $(C_PRAGMA_H) $(INTEGRATE_H) $(TM_P_H) \ | ||
407 | + $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ | ||
408 | + intl.h libfuncs.h $(PARAMS_H) | ||
409 | + | ||
410 | arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \ | ||
411 | coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) | ||
412 | $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ | ||
413 | |||
414 | === modified file 'gcc/dbgcnt.def' | ||
415 | --- old/gcc/dbgcnt.def 2009-11-25 10:55:54 +0000 | ||
416 | +++ new/gcc/dbgcnt.def 2010-08-16 09:41:58 +0000 | ||
417 | @@ -158,6 +158,7 @@ | ||
418 | DEBUG_COUNTER (global_alloc_at_func) | ||
419 | DEBUG_COUNTER (global_alloc_at_reg) | ||
420 | DEBUG_COUNTER (hoist) | ||
421 | +DEBUG_COUNTER (hoist_insn) | ||
422 | DEBUG_COUNTER (ia64_sched2) | ||
423 | DEBUG_COUNTER (if_conversion) | ||
424 | DEBUG_COUNTER (if_after_combine) | ||
425 | |||
426 | === modified file 'gcc/doc/invoke.texi' | ||
427 | --- old/gcc/doc/invoke.texi 2010-08-05 15:20:54 +0000 | ||
428 | +++ new/gcc/doc/invoke.texi 2010-08-16 09:41:58 +0000 | ||
429 | @@ -8086,6 +8086,29 @@ | ||
430 | vectorization needs to be greater than the value specified by this option | ||
431 | to allow vectorization. The default value is 0. | ||
432 | |||
433 | +@item gcse-cost-distance-ratio | ||
434 | +Scaling factor in calculation of maximum distance an expression | ||
435 | +can be moved by GCSE optimizations. This is currently supported only in | ||
436 | +code hoisting pass. The bigger the ratio, the more agressive code hoisting | ||
437 | +will be with simple expressions, i.e., the expressions which have cost | ||
438 | +less than @option{gcse-unrestricted-cost}. Specifying 0 will disable | ||
439 | +hoisting of simple expressions. The default value is 10. | ||
440 | + | ||
441 | +@item gcse-unrestricted-cost | ||
442 | +Cost, roughly measured as the cost of a single typical machine | ||
443 | +instruction, at which GCSE optimizations will not constrain | ||
444 | +the distance an expression can travel. This is currently | ||
445 | +supported only in code hoisting pass. The lesser the cost, | ||
446 | +the more aggressive code hoisting will be. Specifying 0 will | ||
447 | +allow all expressions to travel unrestricted distances. | ||
448 | +The default value is 3. | ||
449 | + | ||
450 | +@item max-hoist-depth | ||
451 | +The depth of search in the dominator tree for expressions to hoist. | ||
452 | +This is used to avoid quadratic behavior in hoisting algorithm. | ||
453 | +The value of 0 will avoid limiting the search, but may slow down compilation | ||
454 | +of huge functions. The default value is 30. | ||
455 | + | ||
456 | @item max-unrolled-insns | ||
457 | The maximum number of instructions that a loop should have if that loop | ||
458 | is unrolled, and if the loop is unrolled, it determines how many times | ||
459 | |||
460 | === modified file 'gcc/dominance.c' | ||
461 | --- old/gcc/dominance.c 2010-04-02 18:54:46 +0000 | ||
462 | +++ new/gcc/dominance.c 2010-08-16 09:41:58 +0000 | ||
463 | @@ -782,16 +782,20 @@ | ||
464 | } | ||
465 | |||
466 | /* Returns the list of basic blocks including BB dominated by BB, in the | ||
467 | - direction DIR. The vector will be sorted in preorder. */ | ||
468 | + direction DIR up to DEPTH in the dominator tree. The DEPTH of zero will | ||
469 | + produce a vector containing all dominated blocks. The vector will be sorted | ||
470 | + in preorder. */ | ||
471 | |||
472 | VEC (basic_block, heap) * | ||
473 | -get_all_dominated_blocks (enum cdi_direction dir, basic_block bb) | ||
474 | +get_dominated_to_depth (enum cdi_direction dir, basic_block bb, int depth) | ||
475 | { | ||
476 | VEC(basic_block, heap) *bbs = NULL; | ||
477 | unsigned i; | ||
478 | + unsigned next_level_start; | ||
479 | |||
480 | i = 0; | ||
481 | VEC_safe_push (basic_block, heap, bbs, bb); | ||
482 | + next_level_start = 1; /* = VEC_length (basic_block, bbs); */ | ||
483 | |||
484 | do | ||
485 | { | ||
486 | @@ -802,12 +806,24 @@ | ||
487 | son; | ||
488 | son = next_dom_son (dir, son)) | ||
489 | VEC_safe_push (basic_block, heap, bbs, son); | ||
490 | + | ||
491 | + if (i == next_level_start && --depth) | ||
492 | + next_level_start = VEC_length (basic_block, bbs); | ||
493 | } | ||
494 | - while (i < VEC_length (basic_block, bbs)); | ||
495 | + while (i < next_level_start); | ||
496 | |||
497 | return bbs; | ||
498 | } | ||
499 | |||
500 | +/* Returns the list of basic blocks including BB dominated by BB, in the | ||
501 | + direction DIR. The vector will be sorted in preorder. */ | ||
502 | + | ||
503 | +VEC (basic_block, heap) * | ||
504 | +get_all_dominated_blocks (enum cdi_direction dir, basic_block bb) | ||
505 | +{ | ||
506 | + return get_dominated_to_depth (dir, bb, 0); | ||
507 | +} | ||
508 | + | ||
509 | /* Redirect all edges pointing to BB to TO. */ | ||
510 | void | ||
511 | redirect_immediate_dominators (enum cdi_direction dir, basic_block bb, | ||
512 | |||
513 | === modified file 'gcc/gcse.c' | ||
514 | --- old/gcc/gcse.c 2010-03-16 10:50:42 +0000 | ||
515 | +++ new/gcc/gcse.c 2010-08-16 09:41:58 +0000 | ||
516 | @@ -296,6 +296,12 @@ | ||
517 | The value is the newly created pseudo-reg to record a copy of the | ||
518 | expression in all the places that reach the redundant copy. */ | ||
519 | rtx reaching_reg; | ||
520 | + /* Maximum distance in instructions this expression can travel. | ||
521 | + We avoid moving simple expressions for more than a few instructions | ||
522 | + to keep register pressure under control. | ||
523 | + A value of "0" removes restrictions on how far the expression can | ||
524 | + travel. */ | ||
525 | + int max_distance; | ||
526 | }; | ||
527 | |||
528 | /* Occurrence of an expression. | ||
529 | @@ -317,6 +323,10 @@ | ||
530 | char copied_p; | ||
531 | }; | ||
532 | |||
533 | +typedef struct occr *occr_t; | ||
534 | +DEF_VEC_P (occr_t); | ||
535 | +DEF_VEC_ALLOC_P (occr_t, heap); | ||
536 | + | ||
537 | /* Expression and copy propagation hash tables. | ||
538 | Each hash table is an array of buckets. | ||
539 | ??? It is known that if it were an array of entries, structure elements | ||
540 | @@ -419,6 +429,9 @@ | ||
541 | /* Number of global copies propagated. */ | ||
542 | static int global_copy_prop_count; | ||
543 | |||
544 | +/* Doing code hoisting. */ | ||
545 | +static bool doing_code_hoisting_p = false; | ||
546 | + | ||
547 | /* For available exprs */ | ||
548 | static sbitmap *ae_kill; | ||
549 | |||
550 | @@ -432,12 +445,12 @@ | ||
551 | static void hash_scan_set (rtx, rtx, struct hash_table_d *); | ||
552 | static void hash_scan_clobber (rtx, rtx, struct hash_table_d *); | ||
553 | static void hash_scan_call (rtx, rtx, struct hash_table_d *); | ||
554 | -static int want_to_gcse_p (rtx); | ||
555 | +static int want_to_gcse_p (rtx, int *); | ||
556 | static bool gcse_constant_p (const_rtx); | ||
557 | static int oprs_unchanged_p (const_rtx, const_rtx, int); | ||
558 | static int oprs_anticipatable_p (const_rtx, const_rtx); | ||
559 | static int oprs_available_p (const_rtx, const_rtx); | ||
560 | -static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, | ||
561 | +static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int, | ||
562 | struct hash_table_d *); | ||
563 | static void insert_set_in_table (rtx, rtx, struct hash_table_d *); | ||
564 | static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int); | ||
565 | @@ -462,7 +475,6 @@ | ||
566 | static void alloc_cprop_mem (int, int); | ||
567 | static void free_cprop_mem (void); | ||
568 | static void compute_transp (const_rtx, int, sbitmap *, int); | ||
569 | -static void compute_transpout (void); | ||
570 | static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *, | ||
571 | struct hash_table_d *); | ||
572 | static void compute_cprop_data (void); | ||
573 | @@ -486,7 +498,7 @@ | ||
574 | static void compute_pre_data (void); | ||
575 | static int pre_expr_reaches_here_p (basic_block, struct expr *, | ||
576 | basic_block); | ||
577 | -static void insert_insn_end_basic_block (struct expr *, basic_block, int); | ||
578 | +static void insert_insn_end_basic_block (struct expr *, basic_block); | ||
579 | static void pre_insert_copy_insn (struct expr *, rtx); | ||
580 | static void pre_insert_copies (void); | ||
581 | static int pre_delete (void); | ||
582 | @@ -497,7 +509,8 @@ | ||
583 | static void free_code_hoist_mem (void); | ||
584 | static void compute_code_hoist_vbeinout (void); | ||
585 | static void compute_code_hoist_data (void); | ||
586 | -static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *); | ||
587 | +static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *, | ||
588 | + int, int *); | ||
589 | static int hoist_code (void); | ||
590 | static int one_code_hoisting_pass (void); | ||
591 | static rtx process_insert_insn (struct expr *); | ||
592 | @@ -755,7 +768,7 @@ | ||
593 | GCSE. */ | ||
594 | |||
595 | static int | ||
596 | -want_to_gcse_p (rtx x) | ||
597 | +want_to_gcse_p (rtx x, int *max_distance_ptr) | ||
598 | { | ||
599 | #ifdef STACK_REGS | ||
600 | /* On register stack architectures, don't GCSE constants from the | ||
601 | @@ -765,18 +778,67 @@ | ||
602 | x = avoid_constant_pool_reference (x); | ||
603 | #endif | ||
604 | |||
605 | + /* GCSE'ing constants: | ||
606 | + | ||
607 | + We do not specifically distinguish between constant and non-constant | ||
608 | + expressions in PRE and Hoist. We use rtx_cost below to limit | ||
609 | + the maximum distance simple expressions can travel. | ||
610 | + | ||
611 | + Nevertheless, constants are much easier to GCSE, and, hence, | ||
612 | + it is easy to overdo the optimizations. Usually, excessive PRE and | ||
613 | + Hoisting of constant leads to increased register pressure. | ||
614 | + | ||
615 | + RA can deal with this by rematerialing some of the constants. | ||
616 | + Therefore, it is important that the back-end generates sets of constants | ||
617 | + in a way that allows reload rematerialize them under high register | ||
618 | + pressure, i.e., a pseudo register with REG_EQUAL to constant | ||
619 | + is set only once. Failing to do so will result in IRA/reload | ||
620 | + spilling such constants under high register pressure instead of | ||
621 | + rematerializing them. */ | ||
622 | + | ||
623 | switch (GET_CODE (x)) | ||
624 | { | ||
625 | case REG: | ||
626 | case SUBREG: | ||
627 | - case CONST_INT: | ||
628 | - case CONST_DOUBLE: | ||
629 | - case CONST_FIXED: | ||
630 | - case CONST_VECTOR: | ||
631 | case CALL: | ||
632 | return 0; | ||
633 | |||
634 | + case CONST_INT: | ||
635 | + case CONST_DOUBLE: | ||
636 | + case CONST_FIXED: | ||
637 | + case CONST_VECTOR: | ||
638 | + if (!doing_code_hoisting_p) | ||
639 | + /* Do not PRE constants. */ | ||
640 | + return 0; | ||
641 | + | ||
642 | + /* FALLTHRU */ | ||
643 | + | ||
644 | default: | ||
645 | + if (doing_code_hoisting_p) | ||
646 | + /* PRE doesn't implement max_distance restriction. */ | ||
647 | + { | ||
648 | + int cost; | ||
649 | + int max_distance; | ||
650 | + | ||
651 | + gcc_assert (!optimize_function_for_speed_p (cfun) | ||
652 | + && optimize_function_for_size_p (cfun)); | ||
653 | + cost = rtx_cost (x, SET, 0); | ||
654 | + | ||
655 | + if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST)) | ||
656 | + { | ||
657 | + max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10; | ||
658 | + if (max_distance == 0) | ||
659 | + return 0; | ||
660 | + | ||
661 | + gcc_assert (max_distance > 0); | ||
662 | + } | ||
663 | + else | ||
664 | + max_distance = 0; | ||
665 | + | ||
666 | + if (max_distance_ptr) | ||
667 | + *max_distance_ptr = max_distance; | ||
668 | + } | ||
669 | + | ||
670 | return can_assign_to_reg_without_clobbers_p (x); | ||
671 | } | ||
672 | } | ||
673 | @@ -1090,11 +1152,14 @@ | ||
674 | It is only used if X is a CONST_INT. | ||
675 | |||
676 | ANTIC_P is nonzero if X is an anticipatable expression. | ||
677 | - AVAIL_P is nonzero if X is an available expression. */ | ||
678 | + AVAIL_P is nonzero if X is an available expression. | ||
679 | + | ||
680 | + MAX_DISTANCE is the maximum distance in instructions this expression can | ||
681 | + be moved. */ | ||
682 | |||
683 | static void | ||
684 | insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, | ||
685 | - int avail_p, struct hash_table_d *table) | ||
686 | + int avail_p, int max_distance, struct hash_table_d *table) | ||
687 | { | ||
688 | int found, do_not_record_p; | ||
689 | unsigned int hash; | ||
690 | @@ -1137,7 +1202,11 @@ | ||
691 | cur_expr->next_same_hash = NULL; | ||
692 | cur_expr->antic_occr = NULL; | ||
693 | cur_expr->avail_occr = NULL; | ||
694 | + gcc_assert (max_distance >= 0); | ||
695 | + cur_expr->max_distance = max_distance; | ||
696 | } | ||
697 | + else | ||
698 | + gcc_assert (cur_expr->max_distance == max_distance); | ||
699 | |||
700 | /* Now record the occurrence(s). */ | ||
701 | if (antic_p) | ||
702 | @@ -1238,6 +1307,8 @@ | ||
703 | cur_expr->next_same_hash = NULL; | ||
704 | cur_expr->antic_occr = NULL; | ||
705 | cur_expr->avail_occr = NULL; | ||
706 | + /* Not used for set_p tables. */ | ||
707 | + cur_expr->max_distance = 0; | ||
708 | } | ||
709 | |||
710 | /* Now record the occurrence. */ | ||
711 | @@ -1307,6 +1378,7 @@ | ||
712 | { | ||
713 | unsigned int regno = REGNO (dest); | ||
714 | rtx tmp; | ||
715 | + int max_distance = 0; | ||
716 | |||
717 | /* See if a REG_EQUAL note shows this equivalent to a simpler expression. | ||
718 | |||
719 | @@ -1329,7 +1401,7 @@ | ||
720 | && !REG_P (src) | ||
721 | && (table->set_p | ||
722 | ? gcse_constant_p (XEXP (note, 0)) | ||
723 | - : want_to_gcse_p (XEXP (note, 0)))) | ||
724 | + : want_to_gcse_p (XEXP (note, 0), NULL))) | ||
725 | src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src); | ||
726 | |||
727 | /* Only record sets of pseudo-regs in the hash table. */ | ||
728 | @@ -1344,7 +1416,7 @@ | ||
729 | can't do the same thing at the rtl level. */ | ||
730 | && !can_throw_internal (insn) | ||
731 | /* Is SET_SRC something we want to gcse? */ | ||
732 | - && want_to_gcse_p (src) | ||
733 | + && want_to_gcse_p (src, &max_distance) | ||
734 | /* Don't CSE a nop. */ | ||
735 | && ! set_noop_p (pat) | ||
736 | /* Don't GCSE if it has attached REG_EQUIV note. | ||
737 | @@ -1368,7 +1440,8 @@ | ||
738 | int avail_p = (oprs_available_p (src, insn) | ||
739 | && ! JUMP_P (insn)); | ||
740 | |||
741 | - insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table); | ||
742 | + insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, | ||
743 | + max_distance, table); | ||
744 | } | ||
745 | |||
746 | /* Record sets for constant/copy propagation. */ | ||
747 | @@ -1394,6 +1467,7 @@ | ||
748 | else if (flag_gcse_las && REG_P (src) && MEM_P (dest)) | ||
749 | { | ||
750 | unsigned int regno = REGNO (src); | ||
751 | + int max_distance = 0; | ||
752 | |||
753 | /* Do not do this for constant/copy propagation. */ | ||
754 | if (! table->set_p | ||
755 | @@ -1405,7 +1479,7 @@ | ||
756 | do that easily for EH edges so disable GCSE on these for now. */ | ||
757 | && !can_throw_internal (insn) | ||
758 | /* Is SET_DEST something we want to gcse? */ | ||
759 | - && want_to_gcse_p (dest) | ||
760 | + && want_to_gcse_p (dest, &max_distance) | ||
761 | /* Don't CSE a nop. */ | ||
762 | && ! set_noop_p (pat) | ||
763 | /* Don't GCSE if it has attached REG_EQUIV note. | ||
764 | @@ -1427,7 +1501,7 @@ | ||
765 | |||
766 | /* Record the memory expression (DEST) in the hash table. */ | ||
767 | insert_expr_in_table (dest, GET_MODE (dest), insn, | ||
768 | - antic_p, avail_p, table); | ||
769 | + antic_p, avail_p, max_distance, table); | ||
770 | } | ||
771 | } | ||
772 | } | ||
773 | @@ -1513,8 +1587,8 @@ | ||
774 | if (flat_table[i] != 0) | ||
775 | { | ||
776 | expr = flat_table[i]; | ||
777 | - fprintf (file, "Index %d (hash value %d)\n ", | ||
778 | - expr->bitmap_index, hash_val[i]); | ||
779 | + fprintf (file, "Index %d (hash value %d; max distance %d)\n ", | ||
780 | + expr->bitmap_index, hash_val[i], expr->max_distance); | ||
781 | print_rtl (file, expr->expr); | ||
782 | fprintf (file, "\n"); | ||
783 | } | ||
784 | @@ -3168,11 +3242,6 @@ | ||
785 | /* Nonzero for expressions that are transparent in the block. */ | ||
786 | static sbitmap *transp; | ||
787 | |||
788 | -/* Nonzero for expressions that are transparent at the end of the block. | ||
789 | - This is only zero for expressions killed by abnormal critical edge | ||
790 | - created by a calls. */ | ||
791 | -static sbitmap *transpout; | ||
792 | - | ||
793 | /* Nonzero for expressions that are computed (available) in the block. */ | ||
794 | static sbitmap *comp; | ||
795 | |||
796 | @@ -3236,28 +3305,105 @@ | ||
797 | pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; | ||
798 | } | ||
799 | |||
800 | -/* Top level routine to do the dataflow analysis needed by PRE. */ | ||
801 | +/* Remove certain expressions from anticipatable and transparent | ||
802 | + sets of basic blocks that have incoming abnormal edge. | ||
803 | + For PRE remove potentially trapping expressions to avoid placing | ||
804 | + them on abnormal edges. For hoisting remove memory references that | ||
805 | + can be clobbered by calls. */ | ||
806 | |||
807 | static void | ||
808 | -compute_pre_data (void) | ||
809 | +prune_expressions (bool pre_p) | ||
810 | { | ||
811 | - sbitmap trapping_expr; | ||
812 | - basic_block bb; | ||
813 | + sbitmap prune_exprs; | ||
814 | unsigned int ui; | ||
815 | - | ||
816 | - compute_local_properties (transp, comp, antloc, &expr_hash_table); | ||
817 | - sbitmap_vector_zero (ae_kill, last_basic_block); | ||
818 | - | ||
819 | - /* Collect expressions which might trap. */ | ||
820 | - trapping_expr = sbitmap_alloc (expr_hash_table.n_elems); | ||
821 | - sbitmap_zero (trapping_expr); | ||
822 | + basic_block bb; | ||
823 | + | ||
824 | + prune_exprs = sbitmap_alloc (expr_hash_table.n_elems); | ||
825 | + sbitmap_zero (prune_exprs); | ||
826 | for (ui = 0; ui < expr_hash_table.size; ui++) | ||
827 | { | ||
828 | struct expr *e; | ||
829 | for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash) | ||
830 | - if (may_trap_p (e->expr)) | ||
831 | - SET_BIT (trapping_expr, e->bitmap_index); | ||
832 | - } | ||
833 | + { | ||
834 | + /* Note potentially trapping expressions. */ | ||
835 | + if (may_trap_p (e->expr)) | ||
836 | + { | ||
837 | + SET_BIT (prune_exprs, e->bitmap_index); | ||
838 | + continue; | ||
839 | + } | ||
840 | + | ||
841 | + if (!pre_p && MEM_P (e->expr)) | ||
842 | + /* Note memory references that can be clobbered by a call. | ||
843 | + We do not split abnormal edges in hoisting, so would | ||
844 | + a memory reference get hoisted along an abnormal edge, | ||
845 | + it would be placed /before/ the call. Therefore, only | ||
846 | + constant memory references can be hoisted along abnormal | ||
847 | + edges. */ | ||
848 | + { | ||
849 | + if (GET_CODE (XEXP (e->expr, 0)) == SYMBOL_REF | ||
850 | + && CONSTANT_POOL_ADDRESS_P (XEXP (e->expr, 0))) | ||
851 | + continue; | ||
852 | + | ||
853 | + if (MEM_READONLY_P (e->expr) | ||
854 | + && !MEM_VOLATILE_P (e->expr) | ||
855 | + && MEM_NOTRAP_P (e->expr)) | ||
856 | + /* Constant memory reference, e.g., a PIC address. */ | ||
857 | + continue; | ||
858 | + | ||
859 | + /* ??? Optimally, we would use interprocedural alias | ||
860 | + analysis to determine if this mem is actually killed | ||
861 | + by this call. */ | ||
862 | + | ||
863 | + SET_BIT (prune_exprs, e->bitmap_index); | ||
864 | + } | ||
865 | + } | ||
866 | + } | ||
867 | + | ||
868 | + FOR_EACH_BB (bb) | ||
869 | + { | ||
870 | + edge e; | ||
871 | + edge_iterator ei; | ||
872 | + | ||
873 | + /* If the current block is the destination of an abnormal edge, we | ||
874 | + kill all trapping (for PRE) and memory (for hoist) expressions | ||
875 | + because we won't be able to properly place the instruction on | ||
876 | + the edge. So make them neither anticipatable nor transparent. | ||
877 | + This is fairly conservative. | ||
878 | + | ||
879 | + ??? For hoisting it may be necessary to check for set-and-jump | ||
880 | + instructions here, not just for abnormal edges. The general problem | ||
881 | + is that when an expression cannot not be placed right at the end of | ||
882 | + a basic block we should account for any side-effects of a subsequent | ||
883 | + jump instructions that could clobber the expression. It would | ||
884 | + be best to implement this check along the lines of | ||
885 | + hoist_expr_reaches_here_p where the target block is already known | ||
886 | + and, hence, there's no need to conservatively prune expressions on | ||
887 | + "intermediate" set-and-jump instructions. */ | ||
888 | + FOR_EACH_EDGE (e, ei, bb->preds) | ||
889 | + if ((e->flags & EDGE_ABNORMAL) | ||
890 | + && (pre_p || CALL_P (BB_END (e->src)))) | ||
891 | + { | ||
892 | + sbitmap_difference (antloc[bb->index], | ||
893 | + antloc[bb->index], prune_exprs); | ||
894 | + sbitmap_difference (transp[bb->index], | ||
895 | + transp[bb->index], prune_exprs); | ||
896 | + break; | ||
897 | + } | ||
898 | + } | ||
899 | + | ||
900 | + sbitmap_free (prune_exprs); | ||
901 | +} | ||
902 | + | ||
903 | +/* Top level routine to do the dataflow analysis needed by PRE. */ | ||
904 | + | ||
905 | +static void | ||
906 | +compute_pre_data (void) | ||
907 | +{ | ||
908 | + basic_block bb; | ||
909 | + | ||
910 | + compute_local_properties (transp, comp, antloc, &expr_hash_table); | ||
911 | + prune_expressions (true); | ||
912 | + sbitmap_vector_zero (ae_kill, last_basic_block); | ||
913 | |||
914 | /* Compute ae_kill for each basic block using: | ||
915 | |||
916 | @@ -3266,21 +3412,6 @@ | ||
917 | |||
918 | FOR_EACH_BB (bb) | ||
919 | { | ||
920 | - edge e; | ||
921 | - edge_iterator ei; | ||
922 | - | ||
923 | - /* If the current block is the destination of an abnormal edge, we | ||
924 | - kill all trapping expressions because we won't be able to properly | ||
925 | - place the instruction on the edge. So make them neither | ||
926 | - anticipatable nor transparent. This is fairly conservative. */ | ||
927 | - FOR_EACH_EDGE (e, ei, bb->preds) | ||
928 | - if (e->flags & EDGE_ABNORMAL) | ||
929 | - { | ||
930 | - sbitmap_difference (antloc[bb->index], antloc[bb->index], trapping_expr); | ||
931 | - sbitmap_difference (transp[bb->index], transp[bb->index], trapping_expr); | ||
932 | - break; | ||
933 | - } | ||
934 | - | ||
935 | sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]); | ||
936 | sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]); | ||
937 | } | ||
938 | @@ -3291,7 +3422,6 @@ | ||
939 | antloc = NULL; | ||
940 | sbitmap_vector_free (ae_kill); | ||
941 | ae_kill = NULL; | ||
942 | - sbitmap_free (trapping_expr); | ||
943 | } | ||
944 | |||
945 | /* PRE utilities */ | ||
946 | @@ -3406,14 +3536,10 @@ | ||
947 | |||
948 | /* Add EXPR to the end of basic block BB. | ||
949 | |||
950 | - This is used by both the PRE and code hoisting. | ||
951 | - | ||
952 | - For PRE, we want to verify that the expr is either transparent | ||
953 | - or locally anticipatable in the target block. This check makes | ||
954 | - no sense for code hoisting. */ | ||
955 | + This is used by both the PRE and code hoisting. */ | ||
956 | |||
957 | static void | ||
958 | -insert_insn_end_basic_block (struct expr *expr, basic_block bb, int pre) | ||
959 | +insert_insn_end_basic_block (struct expr *expr, basic_block bb) | ||
960 | { | ||
961 | rtx insn = BB_END (bb); | ||
962 | rtx new_insn; | ||
963 | @@ -3440,12 +3566,6 @@ | ||
964 | #ifdef HAVE_cc0 | ||
965 | rtx note; | ||
966 | #endif | ||
967 | - /* It should always be the case that we can put these instructions | ||
968 | - anywhere in the basic block with performing PRE optimizations. | ||
969 | - Check this. */ | ||
970 | - gcc_assert (!NONJUMP_INSN_P (insn) || !pre | ||
971 | - || TEST_BIT (antloc[bb->index], expr->bitmap_index) | ||
972 | - || TEST_BIT (transp[bb->index], expr->bitmap_index)); | ||
973 | |||
974 | /* If this is a jump table, then we can't insert stuff here. Since | ||
975 | we know the previous real insn must be the tablejump, we insert | ||
976 | @@ -3482,15 +3602,7 @@ | ||
977 | /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers, | ||
978 | we search backward and place the instructions before the first | ||
979 | parameter is loaded. Do this for everyone for consistency and a | ||
980 | - presumption that we'll get better code elsewhere as well. | ||
981 | - | ||
982 | - It should always be the case that we can put these instructions | ||
983 | - anywhere in the basic block with performing PRE optimizations. | ||
984 | - Check this. */ | ||
985 | - | ||
986 | - gcc_assert (!pre | ||
987 | - || TEST_BIT (antloc[bb->index], expr->bitmap_index) | ||
988 | - || TEST_BIT (transp[bb->index], expr->bitmap_index)); | ||
989 | + presumption that we'll get better code elsewhere as well. */ | ||
990 | |||
991 | /* Since different machines initialize their parameter registers | ||
992 | in different orders, assume nothing. Collect the set of all | ||
993 | @@ -3587,7 +3699,7 @@ | ||
994 | now. */ | ||
995 | |||
996 | if (eg->flags & EDGE_ABNORMAL) | ||
997 | - insert_insn_end_basic_block (index_map[j], bb, 0); | ||
998 | + insert_insn_end_basic_block (index_map[j], bb); | ||
999 | else | ||
1000 | { | ||
1001 | insn = process_insert_insn (index_map[j]); | ||
1002 | @@ -4046,61 +4158,12 @@ | ||
1003 | } | ||
1004 | } | ||
1005 | |||
1006 | -/* Compute transparent outgoing information for each block. | ||
1007 | - | ||
1008 | - An expression is transparent to an edge unless it is killed by | ||
1009 | - the edge itself. This can only happen with abnormal control flow, | ||
1010 | - when the edge is traversed through a call. This happens with | ||
1011 | - non-local labels and exceptions. | ||
1012 | - | ||
1013 | - This would not be necessary if we split the edge. While this is | ||
1014 | - normally impossible for abnormal critical edges, with some effort | ||
1015 | - it should be possible with exception handling, since we still have | ||
1016 | - control over which handler should be invoked. But due to increased | ||
1017 | - EH table sizes, this may not be worthwhile. */ | ||
1018 | - | ||
1019 | -static void | ||
1020 | -compute_transpout (void) | ||
1021 | -{ | ||
1022 | - basic_block bb; | ||
1023 | - unsigned int i; | ||
1024 | - struct expr *expr; | ||
1025 | - | ||
1026 | - sbitmap_vector_ones (transpout, last_basic_block); | ||
1027 | - | ||
1028 | - FOR_EACH_BB (bb) | ||
1029 | - { | ||
1030 | - /* Note that flow inserted a nop at the end of basic blocks that | ||
1031 | - end in call instructions for reasons other than abnormal | ||
1032 | - control flow. */ | ||
1033 | - if (! CALL_P (BB_END (bb))) | ||
1034 | - continue; | ||
1035 | - | ||
1036 | - for (i = 0; i < expr_hash_table.size; i++) | ||
1037 | - for (expr = expr_hash_table.table[i]; expr ; expr = expr->next_same_hash) | ||
1038 | - if (MEM_P (expr->expr)) | ||
1039 | - { | ||
1040 | - if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF | ||
1041 | - && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0))) | ||
1042 | - continue; | ||
1043 | - | ||
1044 | - /* ??? Optimally, we would use interprocedural alias | ||
1045 | - analysis to determine if this mem is actually killed | ||
1046 | - by this call. */ | ||
1047 | - RESET_BIT (transpout[bb->index], expr->bitmap_index); | ||
1048 | - } | ||
1049 | - } | ||
1050 | -} | ||
1051 | - | ||
1052 | /* Code Hoisting variables and subroutines. */ | ||
1053 | |||
1054 | /* Very busy expressions. */ | ||
1055 | static sbitmap *hoist_vbein; | ||
1056 | static sbitmap *hoist_vbeout; | ||
1057 | |||
1058 | -/* Hoistable expressions. */ | ||
1059 | -static sbitmap *hoist_exprs; | ||
1060 | - | ||
1061 | /* ??? We could compute post dominators and run this algorithm in | ||
1062 | reverse to perform tail merging, doing so would probably be | ||
1063 | more effective than the tail merging code in jump.c. | ||
1064 | @@ -4119,8 +4182,6 @@ | ||
1065 | |||
1066 | hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); | ||
1067 | hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); | ||
1068 | - hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs); | ||
1069 | - transpout = sbitmap_vector_alloc (n_blocks, n_exprs); | ||
1070 | } | ||
1071 | |||
1072 | /* Free vars used for code hoisting analysis. */ | ||
1073 | @@ -4134,8 +4195,6 @@ | ||
1074 | |||
1075 | sbitmap_vector_free (hoist_vbein); | ||
1076 | sbitmap_vector_free (hoist_vbeout); | ||
1077 | - sbitmap_vector_free (hoist_exprs); | ||
1078 | - sbitmap_vector_free (transpout); | ||
1079 | |||
1080 | free_dominance_info (CDI_DOMINATORS); | ||
1081 | } | ||
1082 | @@ -4166,8 +4225,15 @@ | ||
1083 | FOR_EACH_BB_REVERSE (bb) | ||
1084 | { | ||
1085 | if (bb->next_bb != EXIT_BLOCK_PTR) | ||
1086 | - sbitmap_intersection_of_succs (hoist_vbeout[bb->index], | ||
1087 | - hoist_vbein, bb->index); | ||
1088 | + { | ||
1089 | + sbitmap_intersection_of_succs (hoist_vbeout[bb->index], | ||
1090 | + hoist_vbein, bb->index); | ||
1091 | + | ||
1092 | + /* Include expressions in VBEout that are calculated | ||
1093 | + in BB and available at its end. */ | ||
1094 | + sbitmap_a_or_b (hoist_vbeout[bb->index], | ||
1095 | + hoist_vbeout[bb->index], comp[bb->index]); | ||
1096 | + } | ||
1097 | |||
1098 | changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index], | ||
1099 | antloc[bb->index], | ||
1100 | @@ -4179,7 +4245,17 @@ | ||
1101 | } | ||
1102 | |||
1103 | if (dump_file) | ||
1104 | - fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); | ||
1105 | + { | ||
1106 | + fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); | ||
1107 | + | ||
1108 | + FOR_EACH_BB (bb) | ||
1109 | + { | ||
1110 | + fprintf (dump_file, "vbein (%d): ", bb->index); | ||
1111 | + dump_sbitmap_file (dump_file, hoist_vbein[bb->index]); | ||
1112 | + fprintf (dump_file, "vbeout(%d): ", bb->index); | ||
1113 | + dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]); | ||
1114 | + } | ||
1115 | + } | ||
1116 | } | ||
1117 | |||
1118 | /* Top level routine to do the dataflow analysis needed by code hoisting. */ | ||
1119 | @@ -4188,7 +4264,7 @@ | ||
1120 | compute_code_hoist_data (void) | ||
1121 | { | ||
1122 | compute_local_properties (transp, comp, antloc, &expr_hash_table); | ||
1123 | - compute_transpout (); | ||
1124 | + prune_expressions (false); | ||
1125 | compute_code_hoist_vbeinout (); | ||
1126 | calculate_dominance_info (CDI_DOMINATORS); | ||
1127 | if (dump_file) | ||
1128 | @@ -4197,6 +4273,8 @@ | ||
1129 | |||
1130 | /* Determine if the expression identified by EXPR_INDEX would | ||
1131 | reach BB unimpared if it was placed at the end of EXPR_BB. | ||
1132 | + Stop the search if the expression would need to be moved more | ||
1133 | + than DISTANCE instructions. | ||
1134 | |||
1135 | It's unclear exactly what Muchnick meant by "unimpared". It seems | ||
1136 | to me that the expression must either be computed or transparent in | ||
1137 | @@ -4209,12 +4287,24 @@ | ||
1138 | paths. */ | ||
1139 | |||
1140 | static int | ||
1141 | -hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, char *visited) | ||
1142 | +hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, | ||
1143 | + char *visited, int distance, int *bb_size) | ||
1144 | { | ||
1145 | edge pred; | ||
1146 | edge_iterator ei; | ||
1147 | int visited_allocated_locally = 0; | ||
1148 | |||
1149 | + /* Terminate the search if distance, for which EXPR is allowed to move, | ||
1150 | + is exhausted. */ | ||
1151 | + if (distance > 0) | ||
1152 | + { | ||
1153 | + distance -= bb_size[bb->index]; | ||
1154 | + | ||
1155 | + if (distance <= 0) | ||
1156 | + return 0; | ||
1157 | + } | ||
1158 | + else | ||
1159 | + gcc_assert (distance == 0); | ||
1160 | |||
1161 | if (visited == NULL) | ||
1162 | { | ||
1163 | @@ -4233,9 +4323,6 @@ | ||
1164 | else if (visited[pred_bb->index]) | ||
1165 | continue; | ||
1166 | |||
1167 | - /* Does this predecessor generate this expression? */ | ||
1168 | - else if (TEST_BIT (comp[pred_bb->index], expr_index)) | ||
1169 | - break; | ||
1170 | else if (! TEST_BIT (transp[pred_bb->index], expr_index)) | ||
1171 | break; | ||
1172 | |||
1173 | @@ -4243,8 +4330,8 @@ | ||
1174 | else | ||
1175 | { | ||
1176 | visited[pred_bb->index] = 1; | ||
1177 | - if (! hoist_expr_reaches_here_p (expr_bb, expr_index, | ||
1178 | - pred_bb, visited)) | ||
1179 | + if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb, | ||
1180 | + visited, distance, bb_size)) | ||
1181 | break; | ||
1182 | } | ||
1183 | } | ||
1184 | @@ -4254,20 +4341,33 @@ | ||
1185 | return (pred == NULL); | ||
1186 | } | ||
1187 | |||
1188 | +/* Find occurence in BB. */ | ||
1189 | +static struct occr * | ||
1190 | +find_occr_in_bb (struct occr *occr, basic_block bb) | ||
1191 | +{ | ||
1192 | + /* Find the right occurrence of this expression. */ | ||
1193 | + while (occr && BLOCK_FOR_INSN (occr->insn) != bb) | ||
1194 | + occr = occr->next; | ||
1195 | + | ||
1196 | + return occr; | ||
1197 | +} | ||
1198 | + | ||
1199 | /* Actually perform code hoisting. */ | ||
1200 | |||
1201 | static int | ||
1202 | hoist_code (void) | ||
1203 | { | ||
1204 | basic_block bb, dominated; | ||
1205 | + VEC (basic_block, heap) *dom_tree_walk; | ||
1206 | + unsigned int dom_tree_walk_index; | ||
1207 | VEC (basic_block, heap) *domby; | ||
1208 | unsigned int i,j; | ||
1209 | struct expr **index_map; | ||
1210 | struct expr *expr; | ||
1211 | + int *to_bb_head; | ||
1212 | + int *bb_size; | ||
1213 | int changed = 0; | ||
1214 | |||
1215 | - sbitmap_vector_zero (hoist_exprs, last_basic_block); | ||
1216 | - | ||
1217 | /* Compute a mapping from expression number (`bitmap_index') to | ||
1218 | hash table entry. */ | ||
1219 | |||
1220 | @@ -4276,28 +4376,98 @@ | ||
1221 | for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash) | ||
1222 | index_map[expr->bitmap_index] = expr; | ||
1223 | |||
1224 | + /* Calculate sizes of basic blocks and note how far | ||
1225 | + each instruction is from the start of its block. We then use this | ||
1226 | + data to restrict distance an expression can travel. */ | ||
1227 | + | ||
1228 | + to_bb_head = XCNEWVEC (int, get_max_uid ()); | ||
1229 | + bb_size = XCNEWVEC (int, last_basic_block); | ||
1230 | + | ||
1231 | + FOR_EACH_BB (bb) | ||
1232 | + { | ||
1233 | + rtx insn; | ||
1234 | + int to_head; | ||
1235 | + | ||
1236 | + to_head = 0; | ||
1237 | + FOR_BB_INSNS (bb, insn) | ||
1238 | + { | ||
1239 | + /* Don't count debug instructions to avoid them affecting | ||
1240 | + decision choices. */ | ||
1241 | + if (NONDEBUG_INSN_P (insn)) | ||
1242 | + to_bb_head[INSN_UID (insn)] = to_head++; | ||
1243 | + } | ||
1244 | + | ||
1245 | + bb_size[bb->index] = to_head; | ||
1246 | + } | ||
1247 | + | ||
1248 | + gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1 | ||
1249 | + && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest | ||
1250 | + == ENTRY_BLOCK_PTR->next_bb)); | ||
1251 | + | ||
1252 | + dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS, | ||
1253 | + ENTRY_BLOCK_PTR->next_bb); | ||
1254 | + | ||
1255 | /* Walk over each basic block looking for potentially hoistable | ||
1256 | expressions, nothing gets hoisted from the entry block. */ | ||
1257 | - FOR_EACH_BB (bb) | ||
1258 | + for (dom_tree_walk_index = 0; | ||
1259 | + VEC_iterate (basic_block, dom_tree_walk, dom_tree_walk_index, bb); | ||
1260 | + dom_tree_walk_index++) | ||
1261 | { | ||
1262 | - int found = 0; | ||
1263 | - int insn_inserted_p; | ||
1264 | - | ||
1265 | - domby = get_dominated_by (CDI_DOMINATORS, bb); | ||
1266 | + domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH); | ||
1267 | + | ||
1268 | + if (VEC_length (basic_block, domby) == 0) | ||
1269 | + continue; | ||
1270 | + | ||
1271 | /* Examine each expression that is very busy at the exit of this | ||
1272 | block. These are the potentially hoistable expressions. */ | ||
1273 | for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++) | ||
1274 | { | ||
1275 | - int hoistable = 0; | ||
1276 | - | ||
1277 | - if (TEST_BIT (hoist_vbeout[bb->index], i) | ||
1278 | - && TEST_BIT (transpout[bb->index], i)) | ||
1279 | + if (TEST_BIT (hoist_vbeout[bb->index], i)) | ||
1280 | { | ||
1281 | + /* Current expression. */ | ||
1282 | + struct expr *expr = index_map[i]; | ||
1283 | + /* Number of occurences of EXPR that can be hoisted to BB. */ | ||
1284 | + int hoistable = 0; | ||
1285 | + /* Basic blocks that have occurences reachable from BB. */ | ||
1286 | + bitmap_head _from_bbs, *from_bbs = &_from_bbs; | ||
1287 | + /* Occurences reachable from BB. */ | ||
1288 | + VEC (occr_t, heap) *occrs_to_hoist = NULL; | ||
1289 | + /* We want to insert the expression into BB only once, so | ||
1290 | + note when we've inserted it. */ | ||
1291 | + int insn_inserted_p; | ||
1292 | + occr_t occr; | ||
1293 | + | ||
1294 | + bitmap_initialize (from_bbs, 0); | ||
1295 | + | ||
1296 | + /* If an expression is computed in BB and is available at end of | ||
1297 | + BB, hoist all occurences dominated by BB to BB. */ | ||
1298 | + if (TEST_BIT (comp[bb->index], i)) | ||
1299 | + { | ||
1300 | + occr = find_occr_in_bb (expr->antic_occr, bb); | ||
1301 | + | ||
1302 | + if (occr) | ||
1303 | + { | ||
1304 | + /* An occurence might've been already deleted | ||
1305 | + while processing a dominator of BB. */ | ||
1306 | + if (occr->deleted_p) | ||
1307 | + gcc_assert (MAX_HOIST_DEPTH > 1); | ||
1308 | + else | ||
1309 | + { | ||
1310 | + gcc_assert (NONDEBUG_INSN_P (occr->insn)); | ||
1311 | + hoistable++; | ||
1312 | + } | ||
1313 | + } | ||
1314 | + else | ||
1315 | + hoistable++; | ||
1316 | + } | ||
1317 | + | ||
1318 | /* We've found a potentially hoistable expression, now | ||
1319 | we look at every block BB dominates to see if it | ||
1320 | computes the expression. */ | ||
1321 | for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) | ||
1322 | { | ||
1323 | + int max_distance; | ||
1324 | + | ||
1325 | /* Ignore self dominance. */ | ||
1326 | if (bb == dominated) | ||
1327 | continue; | ||
1328 | @@ -4307,17 +4477,43 @@ | ||
1329 | if (!TEST_BIT (antloc[dominated->index], i)) | ||
1330 | continue; | ||
1331 | |||
1332 | + occr = find_occr_in_bb (expr->antic_occr, dominated); | ||
1333 | + gcc_assert (occr); | ||
1334 | + | ||
1335 | + /* An occurence might've been already deleted | ||
1336 | + while processing a dominator of BB. */ | ||
1337 | + if (occr->deleted_p) | ||
1338 | + { | ||
1339 | + gcc_assert (MAX_HOIST_DEPTH > 1); | ||
1340 | + continue; | ||
1341 | + } | ||
1342 | + gcc_assert (NONDEBUG_INSN_P (occr->insn)); | ||
1343 | + | ||
1344 | + max_distance = expr->max_distance; | ||
1345 | + if (max_distance > 0) | ||
1346 | + /* Adjust MAX_DISTANCE to account for the fact that | ||
1347 | + OCCR won't have to travel all of DOMINATED, but | ||
1348 | + only part of it. */ | ||
1349 | + max_distance += (bb_size[dominated->index] | ||
1350 | + - to_bb_head[INSN_UID (occr->insn)]); | ||
1351 | + | ||
1352 | /* Note if the expression would reach the dominated block | ||
1353 | unimpared if it was placed at the end of BB. | ||
1354 | |||
1355 | Keep track of how many times this expression is hoistable | ||
1356 | from a dominated block into BB. */ | ||
1357 | - if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) | ||
1358 | - hoistable++; | ||
1359 | + if (hoist_expr_reaches_here_p (bb, i, dominated, NULL, | ||
1360 | + max_distance, bb_size)) | ||
1361 | + { | ||
1362 | + hoistable++; | ||
1363 | + VEC_safe_push (occr_t, heap, | ||
1364 | + occrs_to_hoist, occr); | ||
1365 | + bitmap_set_bit (from_bbs, dominated->index); | ||
1366 | + } | ||
1367 | } | ||
1368 | |||
1369 | /* If we found more than one hoistable occurrence of this | ||
1370 | - expression, then note it in the bitmap of expressions to | ||
1371 | + expression, then note it in the vector of expressions to | ||
1372 | hoist. It makes no sense to hoist things which are computed | ||
1373 | in only one BB, and doing so tends to pessimize register | ||
1374 | allocation. One could increase this value to try harder | ||
1375 | @@ -4326,91 +4522,80 @@ | ||
1376 | the vast majority of hoistable expressions are only movable | ||
1377 | from two successors, so raising this threshold is likely | ||
1378 | to nullify any benefit we get from code hoisting. */ | ||
1379 | - if (hoistable > 1) | ||
1380 | - { | ||
1381 | - SET_BIT (hoist_exprs[bb->index], i); | ||
1382 | - found = 1; | ||
1383 | - } | ||
1384 | - } | ||
1385 | - } | ||
1386 | - /* If we found nothing to hoist, then quit now. */ | ||
1387 | - if (! found) | ||
1388 | - { | ||
1389 | - VEC_free (basic_block, heap, domby); | ||
1390 | - continue; | ||
1391 | - } | ||
1392 | - | ||
1393 | - /* Loop over all the hoistable expressions. */ | ||
1394 | - for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++) | ||
1395 | - { | ||
1396 | - /* We want to insert the expression into BB only once, so | ||
1397 | - note when we've inserted it. */ | ||
1398 | - insn_inserted_p = 0; | ||
1399 | - | ||
1400 | - /* These tests should be the same as the tests above. */ | ||
1401 | - if (TEST_BIT (hoist_exprs[bb->index], i)) | ||
1402 | - { | ||
1403 | - /* We've found a potentially hoistable expression, now | ||
1404 | - we look at every block BB dominates to see if it | ||
1405 | - computes the expression. */ | ||
1406 | - for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++) | ||
1407 | - { | ||
1408 | - /* Ignore self dominance. */ | ||
1409 | - if (bb == dominated) | ||
1410 | - continue; | ||
1411 | - | ||
1412 | - /* We've found a dominated block, now see if it computes | ||
1413 | - the busy expression and whether or not moving that | ||
1414 | - expression to the "beginning" of that block is safe. */ | ||
1415 | - if (!TEST_BIT (antloc[dominated->index], i)) | ||
1416 | - continue; | ||
1417 | - | ||
1418 | - /* The expression is computed in the dominated block and | ||
1419 | - it would be safe to compute it at the start of the | ||
1420 | - dominated block. Now we have to determine if the | ||
1421 | - expression would reach the dominated block if it was | ||
1422 | - placed at the end of BB. */ | ||
1423 | - if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) | ||
1424 | - { | ||
1425 | - struct expr *expr = index_map[i]; | ||
1426 | - struct occr *occr = expr->antic_occr; | ||
1427 | - rtx insn; | ||
1428 | - rtx set; | ||
1429 | - | ||
1430 | - /* Find the right occurrence of this expression. */ | ||
1431 | - while (BLOCK_FOR_INSN (occr->insn) != dominated && occr) | ||
1432 | - occr = occr->next; | ||
1433 | - | ||
1434 | - gcc_assert (occr); | ||
1435 | - insn = occr->insn; | ||
1436 | - set = single_set (insn); | ||
1437 | - gcc_assert (set); | ||
1438 | - | ||
1439 | - /* Create a pseudo-reg to store the result of reaching | ||
1440 | - expressions into. Get the mode for the new pseudo | ||
1441 | - from the mode of the original destination pseudo. */ | ||
1442 | - if (expr->reaching_reg == NULL) | ||
1443 | - expr->reaching_reg | ||
1444 | - = gen_reg_rtx_and_attrs (SET_DEST (set)); | ||
1445 | - | ||
1446 | - gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn); | ||
1447 | - delete_insn (insn); | ||
1448 | - occr->deleted_p = 1; | ||
1449 | - changed = 1; | ||
1450 | - gcse_subst_count++; | ||
1451 | - | ||
1452 | - if (!insn_inserted_p) | ||
1453 | - { | ||
1454 | - insert_insn_end_basic_block (index_map[i], bb, 0); | ||
1455 | - insn_inserted_p = 1; | ||
1456 | - } | ||
1457 | - } | ||
1458 | - } | ||
1459 | + if (hoistable > 1 && dbg_cnt (hoist_insn)) | ||
1460 | + { | ||
1461 | + /* If (hoistable != VEC_length), then there is | ||
1462 | + an occurence of EXPR in BB itself. Don't waste | ||
1463 | + time looking for LCA in this case. */ | ||
1464 | + if ((unsigned) hoistable | ||
1465 | + == VEC_length (occr_t, occrs_to_hoist)) | ||
1466 | + { | ||
1467 | + basic_block lca; | ||
1468 | + | ||
1469 | + lca = nearest_common_dominator_for_set (CDI_DOMINATORS, | ||
1470 | + from_bbs); | ||
1471 | + if (lca != bb) | ||
1472 | + /* Punt, it's better to hoist these occurences to | ||
1473 | + LCA. */ | ||
1474 | + VEC_free (occr_t, heap, occrs_to_hoist); | ||
1475 | + } | ||
1476 | + } | ||
1477 | + else | ||
1478 | + /* Punt, no point hoisting a single occurence. */ | ||
1479 | + VEC_free (occr_t, heap, occrs_to_hoist); | ||
1480 | + | ||
1481 | + insn_inserted_p = 0; | ||
1482 | + | ||
1483 | + /* Walk through occurences of I'th expressions we want | ||
1484 | + to hoist to BB and make the transformations. */ | ||
1485 | + for (j = 0; | ||
1486 | + VEC_iterate (occr_t, occrs_to_hoist, j, occr); | ||
1487 | + j++) | ||
1488 | + { | ||
1489 | + rtx insn; | ||
1490 | + rtx set; | ||
1491 | + | ||
1492 | + gcc_assert (!occr->deleted_p); | ||
1493 | + | ||
1494 | + insn = occr->insn; | ||
1495 | + set = single_set (insn); | ||
1496 | + gcc_assert (set); | ||
1497 | + | ||
1498 | + /* Create a pseudo-reg to store the result of reaching | ||
1499 | + expressions into. Get the mode for the new pseudo | ||
1500 | + from the mode of the original destination pseudo. | ||
1501 | + | ||
1502 | + It is important to use new pseudos whenever we | ||
1503 | + emit a set. This will allow reload to use | ||
1504 | + rematerialization for such registers. */ | ||
1505 | + if (!insn_inserted_p) | ||
1506 | + expr->reaching_reg | ||
1507 | + = gen_reg_rtx_and_attrs (SET_DEST (set)); | ||
1508 | + | ||
1509 | + gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), | ||
1510 | + insn); | ||
1511 | + delete_insn (insn); | ||
1512 | + occr->deleted_p = 1; | ||
1513 | + changed = 1; | ||
1514 | + gcse_subst_count++; | ||
1515 | + | ||
1516 | + if (!insn_inserted_p) | ||
1517 | + { | ||
1518 | + insert_insn_end_basic_block (expr, bb); | ||
1519 | + insn_inserted_p = 1; | ||
1520 | + } | ||
1521 | + } | ||
1522 | + | ||
1523 | + VEC_free (occr_t, heap, occrs_to_hoist); | ||
1524 | + bitmap_clear (from_bbs); | ||
1525 | } | ||
1526 | } | ||
1527 | VEC_free (basic_block, heap, domby); | ||
1528 | } | ||
1529 | |||
1530 | + VEC_free (basic_block, heap, dom_tree_walk); | ||
1531 | + free (bb_size); | ||
1532 | + free (to_bb_head); | ||
1533 | free (index_map); | ||
1534 | |||
1535 | return changed; | ||
1536 | @@ -4433,6 +4618,8 @@ | ||
1537 | || is_too_expensive (_("GCSE disabled"))) | ||
1538 | return 0; | ||
1539 | |||
1540 | + doing_code_hoisting_p = true; | ||
1541 | + | ||
1542 | /* We need alias. */ | ||
1543 | init_alias_analysis (); | ||
1544 | |||
1545 | @@ -4468,6 +4655,8 @@ | ||
1546 | gcse_subst_count, gcse_create_count); | ||
1547 | } | ||
1548 | |||
1549 | + doing_code_hoisting_p = false; | ||
1550 | + | ||
1551 | return changed; | ||
1552 | } | ||
1553 | |||
1554 | |||
1555 | === modified file 'gcc/params.def' | ||
1556 | --- old/gcc/params.def 2010-04-02 18:54:46 +0000 | ||
1557 | +++ new/gcc/params.def 2010-08-16 09:41:58 +0000 | ||
1558 | @@ -219,6 +219,29 @@ | ||
1559 | "gcse-after-reload-critical-fraction", | ||
1560 | "The threshold ratio of critical edges execution count that permit performing redundancy elimination after reload", | ||
1561 | 10, 0, 0) | ||
1562 | + | ||
1563 | +/* GCSE will use GCSE_COST_DISTANCE_RATION as a scaling factor | ||
1564 | + to calculate maximum distance for which an expression is allowed to move | ||
1565 | + from its rtx_cost. */ | ||
1566 | +DEFPARAM(PARAM_GCSE_COST_DISTANCE_RATIO, | ||
1567 | + "gcse-cost-distance-ratio", | ||
1568 | + "Scaling factor in calculation of maximum distance an expression can be moved by GCSE optimizations", | ||
1569 | + 10, 0, 0) | ||
1570 | +/* GCSE won't restrict distance for which an expression with rtx_cost greater | ||
1571 | + than COSTS_N_INSN(GCSE_UNRESTRICTED_COST) is allowed to move. */ | ||
1572 | +DEFPARAM(PARAM_GCSE_UNRESTRICTED_COST, | ||
1573 | + "gcse-unrestricted-cost", | ||
1574 | + "Cost at which GCSE optimizations will not constraint the distance an expression can travel", | ||
1575 | + 3, 0, 0) | ||
1576 | + | ||
1577 | +/* How deep from a given basic block the dominator tree should be searched | ||
1578 | + for expressions to hoist to the block. The value of 0 will avoid limiting | ||
1579 | + the search. */ | ||
1580 | +DEFPARAM(PARAM_MAX_HOIST_DEPTH, | ||
1581 | + "max-hoist-depth", | ||
1582 | + "Maximum depth of search in the dominator tree for expressions to hoist", | ||
1583 | + 30, 0, 0) | ||
1584 | + | ||
1585 | /* This parameter limits the number of insns in a loop that will be unrolled, | ||
1586 | and by how much the loop is unrolled. | ||
1587 | |||
1588 | |||
1589 | === modified file 'gcc/params.h' | ||
1590 | --- old/gcc/params.h 2009-12-01 19:12:29 +0000 | ||
1591 | +++ new/gcc/params.h 2010-08-16 09:41:58 +0000 | ||
1592 | @@ -125,6 +125,12 @@ | ||
1593 | PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION) | ||
1594 | #define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \ | ||
1595 | PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION) | ||
1596 | +#define GCSE_COST_DISTANCE_RATIO \ | ||
1597 | + PARAM_VALUE (PARAM_GCSE_COST_DISTANCE_RATIO) | ||
1598 | +#define GCSE_UNRESTRICTED_COST \ | ||
1599 | + PARAM_VALUE (PARAM_GCSE_UNRESTRICTED_COST) | ||
1600 | +#define MAX_HOIST_DEPTH \ | ||
1601 | + PARAM_VALUE (PARAM_MAX_HOIST_DEPTH) | ||
1602 | #define MAX_UNROLLED_INSNS \ | ||
1603 | PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) | ||
1604 | #define MAX_SMS_LOOP_NUMBER \ | ||
1605 | |||
1606 | === added file 'gcc/testsuite/gcc.dg/pr45101.c' | ||
1607 | --- old/gcc/testsuite/gcc.dg/pr45101.c 1970-01-01 00:00:00 +0000 | ||
1608 | +++ new/gcc/testsuite/gcc.dg/pr45101.c 2010-08-16 09:41:58 +0000 | ||
1609 | @@ -0,0 +1,15 @@ | ||
1610 | +/* PR rtl-optimization/45101 */ | ||
1611 | +/* { dg-do compile } */ | ||
1612 | +/* { dg-options "-O2 -fgcse -fgcse-las" } */ | ||
1613 | + | ||
1614 | +struct | ||
1615 | +{ | ||
1616 | + int i; | ||
1617 | +} *s; | ||
1618 | + | ||
1619 | +extern void bar (void); | ||
1620 | + | ||
1621 | +void foo () | ||
1622 | +{ | ||
1623 | + !s ? s->i++ : bar (); | ||
1624 | +} | ||
1625 | |||
1626 | === added file 'gcc/testsuite/gcc.dg/pr45105.c' | ||
1627 | --- old/gcc/testsuite/gcc.dg/pr45105.c 1970-01-01 00:00:00 +0000 | ||
1628 | +++ new/gcc/testsuite/gcc.dg/pr45105.c 2010-08-16 09:41:58 +0000 | ||
1629 | @@ -0,0 +1,27 @@ | ||
1630 | +/* PR debug/45105 */ | ||
1631 | +/* { dg-do compile } */ | ||
1632 | +/* { dg-options "-Os -fcompare-debug" } */ | ||
1633 | + | ||
1634 | +extern int *baz (int *, int *); | ||
1635 | + | ||
1636 | +void | ||
1637 | +bar (int *p1, int *p2) | ||
1638 | +{ | ||
1639 | + int n = *baz (0, 0); | ||
1640 | + p1[n] = p2[n]; | ||
1641 | +} | ||
1642 | + | ||
1643 | +void | ||
1644 | +foo (int *p, int l) | ||
1645 | +{ | ||
1646 | + int a1[32]; | ||
1647 | + int a2[32]; | ||
1648 | + baz (a1, a2); | ||
1649 | + while (l) | ||
1650 | + { | ||
1651 | + if (l & 1) | ||
1652 | + p = baz (a2, p); | ||
1653 | + l--; | ||
1654 | + bar (a1, a2); | ||
1655 | + } | ||
1656 | +} | ||
1657 | |||
1658 | === added file 'gcc/testsuite/gcc.dg/pr45107.c' | ||
1659 | --- old/gcc/testsuite/gcc.dg/pr45107.c 1970-01-01 00:00:00 +0000 | ||
1660 | +++ new/gcc/testsuite/gcc.dg/pr45107.c 2010-08-16 09:41:58 +0000 | ||
1661 | @@ -0,0 +1,13 @@ | ||
1662 | +/* PR rtl-optimization/45107 */ | ||
1663 | +/* { dg-do compile } */ | ||
1664 | +/* { dg-options "-Os -fgcse-las" } */ | ||
1665 | + | ||
1666 | +extern void bar(int *); | ||
1667 | + | ||
1668 | +int foo (int *p) | ||
1669 | +{ | ||
1670 | + int i = *p; | ||
1671 | + if (i != 1) | ||
1672 | + bar(&i); | ||
1673 | + *p = i; | ||
1674 | +} | ||
1675 | |||
1676 | === added file 'gcc/testsuite/gcc.target/arm/pr40956.c' | ||
1677 | --- old/gcc/testsuite/gcc.target/arm/pr40956.c 1970-01-01 00:00:00 +0000 | ||
1678 | +++ new/gcc/testsuite/gcc.target/arm/pr40956.c 2010-08-16 09:41:58 +0000 | ||
1679 | @@ -0,0 +1,14 @@ | ||
1680 | +/* { dg-options "-mthumb -Os -fpic -march=armv5te" } */ | ||
1681 | +/* { dg-require-effective-target arm_thumb1_ok } */ | ||
1682 | +/* { dg-require-effective-target fpic } */ | ||
1683 | +/* Make sure the constant "0" is loaded into register only once. */ | ||
1684 | +/* { dg-final { scan-assembler-times "mov\[\\t \]*r., #0" 1 } } */ | ||
1685 | + | ||
1686 | +int foo(int p, int* q) | ||
1687 | +{ | ||
1688 | + if (p!=9) | ||
1689 | + *q = 0; | ||
1690 | + else | ||
1691 | + *(q+1) = 0; | ||
1692 | + return 3; | ||
1693 | +} | ||
1694 | |||
1695 | === added file 'gcc/testsuite/gcc.target/arm/pr42495.c' | ||
1696 | --- old/gcc/testsuite/gcc.target/arm/pr42495.c 1970-01-01 00:00:00 +0000 | ||
1697 | +++ new/gcc/testsuite/gcc.target/arm/pr42495.c 2010-08-16 09:41:58 +0000 | ||
1698 | @@ -0,0 +1,31 @@ | ||
1699 | +/* { dg-options "-mthumb -Os -fpic -march=armv5te -fdump-rtl-hoist" } */ | ||
1700 | +/* { dg-require-effective-target arm_thumb1_ok } */ | ||
1701 | +/* { dg-require-effective-target fpic } */ | ||
1702 | +/* Make sure all calculations of gObj's address get hoisted to one location. */ | ||
1703 | +/* { dg-final { scan-rtl-dump "PRE/HOIST: end of bb .* copying expression" "hoist" } } */ | ||
1704 | + | ||
1705 | +struct st_a { | ||
1706 | + int data; | ||
1707 | +}; | ||
1708 | + | ||
1709 | +struct st_b { | ||
1710 | + struct st_a *p_a; | ||
1711 | + struct st_b *next; | ||
1712 | +}; | ||
1713 | + | ||
1714 | +extern struct st_b gObj; | ||
1715 | +extern void foo(int, struct st_b*); | ||
1716 | + | ||
1717 | +int goo(struct st_b * obj) { | ||
1718 | + struct st_a *pa; | ||
1719 | + if (gObj.p_a->data != 0) { | ||
1720 | + foo(gObj.p_a->data, obj); | ||
1721 | + } | ||
1722 | + pa = obj->p_a; | ||
1723 | + if (pa == 0) { | ||
1724 | + return 0; | ||
1725 | + } else if (pa == gObj.p_a) { | ||
1726 | + return 0; | ||
1727 | + } | ||
1728 | + return pa->data; | ||
1729 | +} | ||
1730 | |||
1731 | === added file 'gcc/testsuite/gcc.target/arm/pr42574.c' | ||
1732 | --- old/gcc/testsuite/gcc.target/arm/pr42574.c 1970-01-01 00:00:00 +0000 | ||
1733 | +++ new/gcc/testsuite/gcc.target/arm/pr42574.c 2010-08-16 09:41:58 +0000 | ||
1734 | @@ -0,0 +1,24 @@ | ||
1735 | +/* { dg-options "-mthumb -Os -fpic -march=armv5te" } */ | ||
1736 | +/* { dg-require-effective-target arm_thumb1_ok } */ | ||
1737 | +/* { dg-require-effective-target fpic } */ | ||
1738 | +/* Make sure the address of glob.c is calculated only once and using | ||
1739 | + a logical shift for the offset (200<<1). */ | ||
1740 | +/* { dg-final { scan-assembler-times "lsl" 1 } } */ | ||
1741 | + | ||
1742 | +struct A { | ||
1743 | + char a[400]; | ||
1744 | + float* c; | ||
1745 | +}; | ||
1746 | +struct A glob; | ||
1747 | +void func(); | ||
1748 | +void func1(float*); | ||
1749 | +int func2(float*, int*); | ||
1750 | +void func3(float*); | ||
1751 | + | ||
1752 | +void test(int *p) { | ||
1753 | + func1(glob.c); | ||
1754 | + if (func2(glob.c, p)) { | ||
1755 | + func(); | ||
1756 | + } | ||
1757 | + func3(glob.c); | ||
1758 | +} | ||
1759 | |||