diff options
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch')
-rw-r--r-- | toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch | 714 |
1 files changed, 0 insertions, 714 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch deleted file mode 100644 index 9f0c98e9c9..0000000000 --- a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch +++ /dev/null | |||
@@ -1,714 +0,0 @@ | |||
1 | 2010-07-15 Sandra Loosemore <sandra@codesourcery.com> | ||
2 | |||
3 | Backport from mainline: | ||
4 | |||
5 | 2010-06-09 Sandra Loosemore <sandra@codesourcery.com> | ||
6 | |||
7 | gcc/ | ||
8 | * tree-ssa-loop-ivopts.c (adjust_setup_cost): New function. | ||
9 | (get_computation_cost_at): Use it. | ||
10 | (determine_use_iv_cost_condition): Likewise. | ||
11 | (determine_iv_cost): Likewise. | ||
12 | |||
13 | 2010-07-05 Sandra Loosemore <sandra@codesourcery.com> | ||
14 | |||
15 | PR middle-end/42505 | ||
16 | |||
17 | gcc/ | ||
18 | * tree-ssa-loop-ivopts.c (determine_set_costs): Delete obsolete | ||
19 | comments about cost model. | ||
20 | (try_add_cand_for): Add second strategy for choosing initial set | ||
21 | based on original IVs, controlled by ORIGINALP argument. | ||
22 | (get_initial_solution): Add ORIGINALP argument. | ||
23 | (find_optimal_iv_set_1): New function, split from find_optimal_iv_set. | ||
24 | (find_optimal_iv_set): Try two different strategies for choosing | ||
25 | the IV set, and return the one with lower cost. | ||
26 | |||
27 | gcc/testsuite/ | ||
28 | * gcc.target/arm/pr42505.c: New test case. | ||
29 | |||
30 | 2010-07-10 Sandra Loosemore <sandra@codesourcery.com> | ||
31 | |||
32 | PR middle-end/42505 | ||
33 | |||
34 | gcc/ | ||
35 | * tree-inline.c (estimate_num_insns): Refactor builtin complexity | ||
36 | lookup code into.... | ||
37 | * builtins.c (is_simple_builtin, is_inexpensive_builtin): ...these | ||
38 | new functions. | ||
39 | * tree.h (is_simple_builtin, is_inexpensive_builtin): Declare. | ||
40 | * cfgloopanal.c (target_clobbered_regs): Define. | ||
41 | (init_set_costs): Initialize target_clobbered_regs. | ||
42 | (estimate_reg_pressure_cost): Add call_p argument. When true, | ||
43 | adjust the number of available registers to exclude the | ||
44 | call-clobbered registers. | ||
45 | * cfgloop.h (target_clobbered_regs): Declare. | ||
46 | (estimate_reg_pressure_cost): Adjust declaration. | ||
47 | * tree-ssa-loop-ivopts.c (struct ivopts_data): Add body_includes_call. | ||
48 | (ivopts_global_cost_for_size): Pass it to estimate_reg_pressure_cost. | ||
49 | (determine_set_costs): Dump target_clobbered_regs. | ||
50 | (loop_body_includes_call): New function. | ||
51 | (tree_ssa_iv_optimize_loop): Use it to initialize new field. | ||
52 | * loop-invariant.c (gain_for_invariant): Adjust arguments to pass | ||
53 | call_p flag through. | ||
54 | (best_gain_for_invariant): Likewise. | ||
55 | (find_invariants_to_move): Likewise. | ||
56 | (move_single_loop_invariants): Likewise, using already-computed | ||
57 | has_call field. | ||
58 | |||
59 | 2010-07-15 Jie Zhang <jie@codesourcery.com> | ||
60 | |||
61 | Issue #8497, #8893 | ||
62 | |||
63 | === modified file 'gcc/builtins.c' | ||
64 | --- old/gcc/builtins.c 2010-04-13 12:47:11 +0000 | ||
65 | +++ new/gcc/builtins.c 2010-08-02 13:51:23 +0000 | ||
66 | @@ -13624,3 +13624,123 @@ | ||
67 | break; | ||
68 | } | ||
69 | } | ||
70 | + | ||
71 | +/* Return true if DECL is a builtin that expands to a constant or similarly | ||
72 | + simple code. */ | ||
73 | +bool | ||
74 | +is_simple_builtin (tree decl) | ||
75 | +{ | ||
76 | + if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) | ||
77 | + switch (DECL_FUNCTION_CODE (decl)) | ||
78 | + { | ||
79 | + /* Builtins that expand to constants. */ | ||
80 | + case BUILT_IN_CONSTANT_P: | ||
81 | + case BUILT_IN_EXPECT: | ||
82 | + case BUILT_IN_OBJECT_SIZE: | ||
83 | + case BUILT_IN_UNREACHABLE: | ||
84 | + /* Simple register moves or loads from stack. */ | ||
85 | + case BUILT_IN_RETURN_ADDRESS: | ||
86 | + case BUILT_IN_EXTRACT_RETURN_ADDR: | ||
87 | + case BUILT_IN_FROB_RETURN_ADDR: | ||
88 | + case BUILT_IN_RETURN: | ||
89 | + case BUILT_IN_AGGREGATE_INCOMING_ADDRESS: | ||
90 | + case BUILT_IN_FRAME_ADDRESS: | ||
91 | + case BUILT_IN_VA_END: | ||
92 | + case BUILT_IN_STACK_SAVE: | ||
93 | + case BUILT_IN_STACK_RESTORE: | ||
94 | + /* Exception state returns or moves registers around. */ | ||
95 | + case BUILT_IN_EH_FILTER: | ||
96 | + case BUILT_IN_EH_POINTER: | ||
97 | + case BUILT_IN_EH_COPY_VALUES: | ||
98 | + return true; | ||
99 | + | ||
100 | + default: | ||
101 | + return false; | ||
102 | + } | ||
103 | + | ||
104 | + return false; | ||
105 | +} | ||
106 | + | ||
107 | +/* Return true if DECL is a builtin that is not expensive, i.e., they are | ||
108 | + most probably expanded inline into reasonably simple code. This is a | ||
109 | + superset of is_simple_builtin. */ | ||
110 | +bool | ||
111 | +is_inexpensive_builtin (tree decl) | ||
112 | +{ | ||
113 | + if (!decl) | ||
114 | + return false; | ||
115 | + else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD) | ||
116 | + return true; | ||
117 | + else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) | ||
118 | + switch (DECL_FUNCTION_CODE (decl)) | ||
119 | + { | ||
120 | + case BUILT_IN_ABS: | ||
121 | + case BUILT_IN_ALLOCA: | ||
122 | + case BUILT_IN_BSWAP32: | ||
123 | + case BUILT_IN_BSWAP64: | ||
124 | + case BUILT_IN_CLZ: | ||
125 | + case BUILT_IN_CLZIMAX: | ||
126 | + case BUILT_IN_CLZL: | ||
127 | + case BUILT_IN_CLZLL: | ||
128 | + case BUILT_IN_CTZ: | ||
129 | + case BUILT_IN_CTZIMAX: | ||
130 | + case BUILT_IN_CTZL: | ||
131 | + case BUILT_IN_CTZLL: | ||
132 | + case BUILT_IN_FFS: | ||
133 | + case BUILT_IN_FFSIMAX: | ||
134 | + case BUILT_IN_FFSL: | ||
135 | + case BUILT_IN_FFSLL: | ||
136 | + case BUILT_IN_IMAXABS: | ||
137 | + case BUILT_IN_FINITE: | ||
138 | + case BUILT_IN_FINITEF: | ||
139 | + case BUILT_IN_FINITEL: | ||
140 | + case BUILT_IN_FINITED32: | ||
141 | + case BUILT_IN_FINITED64: | ||
142 | + case BUILT_IN_FINITED128: | ||
143 | + case BUILT_IN_FPCLASSIFY: | ||
144 | + case BUILT_IN_ISFINITE: | ||
145 | + case BUILT_IN_ISINF_SIGN: | ||
146 | + case BUILT_IN_ISINF: | ||
147 | + case BUILT_IN_ISINFF: | ||
148 | + case BUILT_IN_ISINFL: | ||
149 | + case BUILT_IN_ISINFD32: | ||
150 | + case BUILT_IN_ISINFD64: | ||
151 | + case BUILT_IN_ISINFD128: | ||
152 | + case BUILT_IN_ISNAN: | ||
153 | + case BUILT_IN_ISNANF: | ||
154 | + case BUILT_IN_ISNANL: | ||
155 | + case BUILT_IN_ISNAND32: | ||
156 | + case BUILT_IN_ISNAND64: | ||
157 | + case BUILT_IN_ISNAND128: | ||
158 | + case BUILT_IN_ISNORMAL: | ||
159 | + case BUILT_IN_ISGREATER: | ||
160 | + case BUILT_IN_ISGREATEREQUAL: | ||
161 | + case BUILT_IN_ISLESS: | ||
162 | + case BUILT_IN_ISLESSEQUAL: | ||
163 | + case BUILT_IN_ISLESSGREATER: | ||
164 | + case BUILT_IN_ISUNORDERED: | ||
165 | + case BUILT_IN_VA_ARG_PACK: | ||
166 | + case BUILT_IN_VA_ARG_PACK_LEN: | ||
167 | + case BUILT_IN_VA_COPY: | ||
168 | + case BUILT_IN_TRAP: | ||
169 | + case BUILT_IN_SAVEREGS: | ||
170 | + case BUILT_IN_POPCOUNTL: | ||
171 | + case BUILT_IN_POPCOUNTLL: | ||
172 | + case BUILT_IN_POPCOUNTIMAX: | ||
173 | + case BUILT_IN_POPCOUNT: | ||
174 | + case BUILT_IN_PARITYL: | ||
175 | + case BUILT_IN_PARITYLL: | ||
176 | + case BUILT_IN_PARITYIMAX: | ||
177 | + case BUILT_IN_PARITY: | ||
178 | + case BUILT_IN_LABS: | ||
179 | + case BUILT_IN_LLABS: | ||
180 | + case BUILT_IN_PREFETCH: | ||
181 | + return true; | ||
182 | + | ||
183 | + default: | ||
184 | + return is_simple_builtin (decl); | ||
185 | + } | ||
186 | + | ||
187 | + return false; | ||
188 | +} | ||
189 | + | ||
190 | |||
191 | === modified file 'gcc/cfgloop.h' | ||
192 | --- old/gcc/cfgloop.h 2009-11-25 10:55:54 +0000 | ||
193 | +++ new/gcc/cfgloop.h 2010-08-02 13:51:23 +0000 | ||
194 | @@ -622,13 +622,14 @@ | ||
195 | /* The properties of the target. */ | ||
196 | |||
197 | extern unsigned target_avail_regs; | ||
198 | +extern unsigned target_clobbered_regs; | ||
199 | extern unsigned target_res_regs; | ||
200 | extern unsigned target_reg_cost [2]; | ||
201 | extern unsigned target_spill_cost [2]; | ||
202 | |||
203 | /* Register pressure estimation for induction variable optimizations & loop | ||
204 | invariant motion. */ | ||
205 | -extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool); | ||
206 | +extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool, bool); | ||
207 | extern void init_set_costs (void); | ||
208 | |||
209 | /* Loop optimizer initialization. */ | ||
210 | |||
211 | === modified file 'gcc/cfgloopanal.c' | ||
212 | --- old/gcc/cfgloopanal.c 2009-09-30 08:57:56 +0000 | ||
213 | +++ new/gcc/cfgloopanal.c 2010-08-02 13:51:23 +0000 | ||
214 | @@ -320,6 +320,8 @@ | ||
215 | /* The properties of the target. */ | ||
216 | |||
217 | unsigned target_avail_regs; /* Number of available registers. */ | ||
218 | +unsigned target_clobbered_regs; /* Number of available registers that are | ||
219 | + call-clobbered. */ | ||
220 | unsigned target_res_regs; /* Number of registers reserved for temporary | ||
221 | expressions. */ | ||
222 | unsigned target_reg_cost[2]; /* The cost for register when there still | ||
223 | @@ -342,10 +344,15 @@ | ||
224 | unsigned i; | ||
225 | |||
226 | target_avail_regs = 0; | ||
227 | + target_clobbered_regs = 0; | ||
228 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) | ||
229 | if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) | ||
230 | && !fixed_regs[i]) | ||
231 | - target_avail_regs++; | ||
232 | + { | ||
233 | + target_avail_regs++; | ||
234 | + if (call_used_regs[i]) | ||
235 | + target_clobbered_regs++; | ||
236 | + } | ||
237 | |||
238 | target_res_regs = 3; | ||
239 | |||
240 | @@ -379,20 +386,29 @@ | ||
241 | |||
242 | /* Estimates cost of increased register pressure caused by making N_NEW new | ||
243 | registers live around the loop. N_OLD is the number of registers live | ||
244 | - around the loop. */ | ||
245 | + around the loop. If CALL_P is true, also take into account that | ||
246 | + call-used registers may be clobbered in the loop body, reducing the | ||
247 | + number of available registers before we spill. */ | ||
248 | |||
249 | unsigned | ||
250 | -estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed) | ||
251 | +estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed, | ||
252 | + bool call_p) | ||
253 | { | ||
254 | unsigned cost; | ||
255 | unsigned regs_needed = n_new + n_old; | ||
256 | + unsigned available_regs = target_avail_regs; | ||
257 | + | ||
258 | + /* If there is a call in the loop body, the call-clobbered registers | ||
259 | + are not available for loop invariants. */ | ||
260 | + if (call_p) | ||
261 | + available_regs = available_regs - target_clobbered_regs; | ||
262 | |||
263 | /* If we have enough registers, we should use them and not restrict | ||
264 | the transformations unnecessarily. */ | ||
265 | - if (regs_needed + target_res_regs <= target_avail_regs) | ||
266 | + if (regs_needed + target_res_regs <= available_regs) | ||
267 | return 0; | ||
268 | |||
269 | - if (regs_needed <= target_avail_regs) | ||
270 | + if (regs_needed <= available_regs) | ||
271 | /* If we are close to running out of registers, try to preserve | ||
272 | them. */ | ||
273 | cost = target_reg_cost [speed] * n_new; | ||
274 | |||
275 | === modified file 'gcc/loop-invariant.c' | ||
276 | --- old/gcc/loop-invariant.c 2010-04-02 18:54:46 +0000 | ||
277 | +++ new/gcc/loop-invariant.c 2010-08-02 13:51:23 +0000 | ||
278 | @@ -1173,11 +1173,13 @@ | ||
279 | /* Calculates gain for eliminating invariant INV. REGS_USED is the number | ||
280 | of registers used in the loop, NEW_REGS is the number of new variables | ||
281 | already added due to the invariant motion. The number of registers needed | ||
282 | - for it is stored in *REGS_NEEDED. */ | ||
283 | + for it is stored in *REGS_NEEDED. SPEED and CALL_P are flags passed | ||
284 | + through to estimate_reg_pressure_cost. */ | ||
285 | |||
286 | static int | ||
287 | gain_for_invariant (struct invariant *inv, unsigned *regs_needed, | ||
288 | - unsigned *new_regs, unsigned regs_used, bool speed) | ||
289 | + unsigned *new_regs, unsigned regs_used, | ||
290 | + bool speed, bool call_p) | ||
291 | { | ||
292 | int comp_cost, size_cost; | ||
293 | |||
294 | @@ -1188,9 +1190,9 @@ | ||
295 | if (! flag_ira_loop_pressure) | ||
296 | { | ||
297 | size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0], | ||
298 | - regs_used, speed) | ||
299 | + regs_used, speed, call_p) | ||
300 | - estimate_reg_pressure_cost (new_regs[0], | ||
301 | - regs_used, speed)); | ||
302 | + regs_used, speed, call_p)); | ||
303 | } | ||
304 | else | ||
305 | { | ||
306 | @@ -1245,7 +1247,8 @@ | ||
307 | |||
308 | static int | ||
309 | best_gain_for_invariant (struct invariant **best, unsigned *regs_needed, | ||
310 | - unsigned *new_regs, unsigned regs_used, bool speed) | ||
311 | + unsigned *new_regs, unsigned regs_used, | ||
312 | + bool speed, bool call_p) | ||
313 | { | ||
314 | struct invariant *inv; | ||
315 | int i, gain = 0, again; | ||
316 | @@ -1261,7 +1264,7 @@ | ||
317 | continue; | ||
318 | |||
319 | again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used, | ||
320 | - speed); | ||
321 | + speed, call_p); | ||
322 | if (again > gain) | ||
323 | { | ||
324 | gain = again; | ||
325 | @@ -1314,7 +1317,7 @@ | ||
326 | /* Determines which invariants to move. */ | ||
327 | |||
328 | static void | ||
329 | -find_invariants_to_move (bool speed) | ||
330 | +find_invariants_to_move (bool speed, bool call_p) | ||
331 | { | ||
332 | int gain; | ||
333 | unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES]; | ||
334 | @@ -1353,7 +1356,8 @@ | ||
335 | new_regs[ira_reg_class_cover[i]] = 0; | ||
336 | } | ||
337 | while ((gain = best_gain_for_invariant (&inv, regs_needed, | ||
338 | - new_regs, regs_used, speed)) > 0) | ||
339 | + new_regs, regs_used, | ||
340 | + speed, call_p)) > 0) | ||
341 | { | ||
342 | set_move_mark (inv->invno, gain); | ||
343 | if (! flag_ira_loop_pressure) | ||
344 | @@ -1554,7 +1558,8 @@ | ||
345 | init_inv_motion_data (); | ||
346 | |||
347 | find_invariants (loop); | ||
348 | - find_invariants_to_move (optimize_loop_for_speed_p (loop)); | ||
349 | + find_invariants_to_move (optimize_loop_for_speed_p (loop), | ||
350 | + LOOP_DATA (loop)->has_call); | ||
351 | move_invariants (loop); | ||
352 | |||
353 | free_inv_motion_data (); | ||
354 | |||
355 | === added file 'gcc/testsuite/gcc.target/arm/pr42505.c' | ||
356 | --- old/gcc/testsuite/gcc.target/arm/pr42505.c 1970-01-01 00:00:00 +0000 | ||
357 | +++ new/gcc/testsuite/gcc.target/arm/pr42505.c 2010-08-02 13:51:23 +0000 | ||
358 | @@ -0,0 +1,23 @@ | ||
359 | +/* { dg-options "-mthumb -Os -march=armv5te" } */ | ||
360 | +/* { dg-require-effective-target arm_thumb1_ok } */ | ||
361 | +/* { dg-final { scan-assembler-not "str\[\\t \]*r.,\[\\t \]*.sp," } } */ | ||
362 | + | ||
363 | +struct A { | ||
364 | + int f1; | ||
365 | + int f2; | ||
366 | +}; | ||
367 | + | ||
368 | +int func(int c); | ||
369 | + | ||
370 | +/* This function should not need to spill anything to the stack. */ | ||
371 | +int test(struct A* src, struct A* dst, int count) | ||
372 | +{ | ||
373 | + while (count--) { | ||
374 | + if (!func(src->f2)) { | ||
375 | + return 0; | ||
376 | + } | ||
377 | + *dst++ = *src++; | ||
378 | + } | ||
379 | + | ||
380 | + return 1; | ||
381 | +} | ||
382 | |||
383 | === modified file 'gcc/tree-inline.c' | ||
384 | --- old/gcc/tree-inline.c 2010-03-18 20:07:13 +0000 | ||
385 | +++ new/gcc/tree-inline.c 2010-08-02 13:51:23 +0000 | ||
386 | @@ -3246,34 +3246,13 @@ | ||
387 | if (POINTER_TYPE_P (funtype)) | ||
388 | funtype = TREE_TYPE (funtype); | ||
389 | |||
390 | - if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD) | ||
391 | + if (is_simple_builtin (decl)) | ||
392 | + return 0; | ||
393 | + else if (is_inexpensive_builtin (decl)) | ||
394 | cost = weights->target_builtin_call_cost; | ||
395 | else | ||
396 | cost = weights->call_cost; | ||
397 | |||
398 | - if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) | ||
399 | - switch (DECL_FUNCTION_CODE (decl)) | ||
400 | - { | ||
401 | - case BUILT_IN_CONSTANT_P: | ||
402 | - return 0; | ||
403 | - case BUILT_IN_EXPECT: | ||
404 | - return 0; | ||
405 | - | ||
406 | - /* Prefetch instruction is not expensive. */ | ||
407 | - case BUILT_IN_PREFETCH: | ||
408 | - cost = weights->target_builtin_call_cost; | ||
409 | - break; | ||
410 | - | ||
411 | - /* Exception state returns or moves registers around. */ | ||
412 | - case BUILT_IN_EH_FILTER: | ||
413 | - case BUILT_IN_EH_POINTER: | ||
414 | - case BUILT_IN_EH_COPY_VALUES: | ||
415 | - return 0; | ||
416 | - | ||
417 | - default: | ||
418 | - break; | ||
419 | - } | ||
420 | - | ||
421 | if (decl) | ||
422 | funtype = TREE_TYPE (decl); | ||
423 | |||
424 | |||
425 | === modified file 'gcc/tree-ssa-loop-ivopts.c' | ||
426 | --- old/gcc/tree-ssa-loop-ivopts.c 2010-04-01 15:18:07 +0000 | ||
427 | +++ new/gcc/tree-ssa-loop-ivopts.c 2010-08-02 13:51:23 +0000 | ||
428 | @@ -257,6 +257,9 @@ | ||
429 | |||
430 | /* Are we optimizing for speed? */ | ||
431 | bool speed; | ||
432 | + | ||
433 | + /* Whether the loop body includes any function calls. */ | ||
434 | + bool body_includes_call; | ||
435 | }; | ||
436 | |||
437 | /* An assignment of iv candidates to uses. */ | ||
438 | @@ -2926,6 +2929,20 @@ | ||
439 | return get_computation_at (loop, use, cand, use->stmt); | ||
440 | } | ||
441 | |||
442 | +/* Adjust the cost COST for being in loop setup rather than loop body. | ||
443 | + If we're optimizing for space, the loop setup overhead is constant; | ||
444 | + if we're optimizing for speed, amortize it over the per-iteration cost. */ | ||
445 | +static unsigned | ||
446 | +adjust_setup_cost (struct ivopts_data *data, unsigned cost) | ||
447 | +{ | ||
448 | + if (cost == INFTY) | ||
449 | + return cost; | ||
450 | + else if (optimize_loop_for_speed_p (data->current_loop)) | ||
451 | + return cost / AVG_LOOP_NITER (data->current_loop); | ||
452 | + else | ||
453 | + return cost; | ||
454 | +} | ||
455 | + | ||
456 | /* Returns cost of addition in MODE. */ | ||
457 | |||
458 | static unsigned | ||
459 | @@ -3838,8 +3855,8 @@ | ||
460 | /* Symbol + offset should be compile-time computable so consider that they | ||
461 | are added once to the variable, if present. */ | ||
462 | if (var_present && (symbol_present || offset)) | ||
463 | - cost.cost += add_cost (TYPE_MODE (ctype), speed) | ||
464 | - / AVG_LOOP_NITER (data->current_loop); | ||
465 | + cost.cost += adjust_setup_cost (data, | ||
466 | + add_cost (TYPE_MODE (ctype), speed)); | ||
467 | |||
468 | /* Having offset does not affect runtime cost in case it is added to | ||
469 | symbol, but it increases complexity. */ | ||
470 | @@ -4104,7 +4121,7 @@ | ||
471 | elim_cost = force_var_cost (data, bound, &depends_on_elim); | ||
472 | /* The bound is a loop invariant, so it will be only computed | ||
473 | once. */ | ||
474 | - elim_cost.cost /= AVG_LOOP_NITER (data->current_loop); | ||
475 | + elim_cost.cost = adjust_setup_cost (data, elim_cost.cost); | ||
476 | } | ||
477 | else | ||
478 | elim_cost = infinite_cost; | ||
479 | @@ -4351,7 +4368,7 @@ | ||
480 | cost_base = force_var_cost (data, base, NULL); | ||
481 | cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed); | ||
482 | |||
483 | - cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop); | ||
484 | + cost = cost_step + adjust_setup_cost (data, cost_base.cost); | ||
485 | |||
486 | /* Prefer the original ivs unless we may gain something by replacing it. | ||
487 | The reason is to make debugging simpler; so this is not relevant for | ||
488 | @@ -4404,7 +4421,8 @@ | ||
489 | { | ||
490 | /* We add size to the cost, so that we prefer eliminating ivs | ||
491 | if possible. */ | ||
492 | - return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed); | ||
493 | + return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed, | ||
494 | + data->body_includes_call); | ||
495 | } | ||
496 | |||
497 | /* For each size of the induction variable set determine the penalty. */ | ||
498 | @@ -4419,30 +4437,11 @@ | ||
499 | struct loop *loop = data->current_loop; | ||
500 | bitmap_iterator bi; | ||
501 | |||
502 | - /* We use the following model (definitely improvable, especially the | ||
503 | - cost function -- TODO): | ||
504 | - | ||
505 | - We estimate the number of registers available (using MD data), name it A. | ||
506 | - | ||
507 | - We estimate the number of registers used by the loop, name it U. This | ||
508 | - number is obtained as the number of loop phi nodes (not counting virtual | ||
509 | - registers and bivs) + the number of variables from outside of the loop. | ||
510 | - | ||
511 | - We set a reserve R (free regs that are used for temporary computations, | ||
512 | - etc.). For now the reserve is a constant 3. | ||
513 | - | ||
514 | - Let I be the number of induction variables. | ||
515 | - | ||
516 | - -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage | ||
517 | - make a lot of ivs without a reason). | ||
518 | - -- if A - R < U + I <= A, the cost is I * PRES_COST | ||
519 | - -- if U + I > A, the cost is I * PRES_COST and | ||
520 | - number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */ | ||
521 | - | ||
522 | if (dump_file && (dump_flags & TDF_DETAILS)) | ||
523 | { | ||
524 | fprintf (dump_file, "Global costs:\n"); | ||
525 | fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); | ||
526 | + fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs); | ||
527 | fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); | ||
528 | fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); | ||
529 | } | ||
530 | @@ -5062,11 +5061,13 @@ | ||
531 | } | ||
532 | |||
533 | /* Tries to extend the sets IVS in the best possible way in order | ||
534 | - to express the USE. */ | ||
535 | + to express the USE. If ORIGINALP is true, prefer candidates from | ||
536 | + the original set of IVs, otherwise favor important candidates not | ||
537 | + based on any memory object. */ | ||
538 | |||
539 | static bool | ||
540 | try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, | ||
541 | - struct iv_use *use) | ||
542 | + struct iv_use *use, bool originalp) | ||
543 | { | ||
544 | comp_cost best_cost, act_cost; | ||
545 | unsigned i; | ||
546 | @@ -5085,7 +5086,8 @@ | ||
547 | iv_ca_set_no_cp (data, ivs, use); | ||
548 | } | ||
549 | |||
550 | - /* First try important candidates not based on any memory object. Only if | ||
551 | + /* If ORIGINALP is true, try to find the original IV for the use. Otherwise | ||
552 | + first try important candidates not based on any memory object. Only if | ||
553 | this fails, try the specific ones. Rationale -- in loops with many | ||
554 | variables the best choice often is to use just one generic biv. If we | ||
555 | added here many ivs specific to the uses, the optimization algorithm later | ||
556 | @@ -5097,7 +5099,10 @@ | ||
557 | { | ||
558 | cand = iv_cand (data, i); | ||
559 | |||
560 | - if (cand->iv->base_object != NULL_TREE) | ||
561 | + if (originalp && cand->pos !=IP_ORIGINAL) | ||
562 | + continue; | ||
563 | + | ||
564 | + if (!originalp && cand->iv->base_object != NULL_TREE) | ||
565 | continue; | ||
566 | |||
567 | if (iv_ca_cand_used_p (ivs, cand)) | ||
568 | @@ -5133,8 +5138,13 @@ | ||
569 | continue; | ||
570 | |||
571 | /* Already tried this. */ | ||
572 | - if (cand->important && cand->iv->base_object == NULL_TREE) | ||
573 | - continue; | ||
574 | + if (cand->important) | ||
575 | + { | ||
576 | + if (originalp && cand->pos == IP_ORIGINAL) | ||
577 | + continue; | ||
578 | + if (!originalp && cand->iv->base_object == NULL_TREE) | ||
579 | + continue; | ||
580 | + } | ||
581 | |||
582 | if (iv_ca_cand_used_p (ivs, cand)) | ||
583 | continue; | ||
584 | @@ -5168,13 +5178,13 @@ | ||
585 | /* Finds an initial assignment of candidates to uses. */ | ||
586 | |||
587 | static struct iv_ca * | ||
588 | -get_initial_solution (struct ivopts_data *data) | ||
589 | +get_initial_solution (struct ivopts_data *data, bool originalp) | ||
590 | { | ||
591 | struct iv_ca *ivs = iv_ca_new (data); | ||
592 | unsigned i; | ||
593 | |||
594 | for (i = 0; i < n_iv_uses (data); i++) | ||
595 | - if (!try_add_cand_for (data, ivs, iv_use (data, i))) | ||
596 | + if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp)) | ||
597 | { | ||
598 | iv_ca_free (&ivs); | ||
599 | return NULL; | ||
600 | @@ -5246,14 +5256,12 @@ | ||
601 | solution and remove the unused ivs while this improves the cost. */ | ||
602 | |||
603 | static struct iv_ca * | ||
604 | -find_optimal_iv_set (struct ivopts_data *data) | ||
605 | +find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) | ||
606 | { | ||
607 | - unsigned i; | ||
608 | struct iv_ca *set; | ||
609 | - struct iv_use *use; | ||
610 | |||
611 | /* Get the initial solution. */ | ||
612 | - set = get_initial_solution (data); | ||
613 | + set = get_initial_solution (data, originalp); | ||
614 | if (!set) | ||
615 | { | ||
616 | if (dump_file && (dump_flags & TDF_DETAILS)) | ||
617 | @@ -5276,11 +5284,46 @@ | ||
618 | } | ||
619 | } | ||
620 | |||
621 | + return set; | ||
622 | +} | ||
623 | + | ||
624 | +static struct iv_ca * | ||
625 | +find_optimal_iv_set (struct ivopts_data *data) | ||
626 | +{ | ||
627 | + unsigned i; | ||
628 | + struct iv_ca *set, *origset; | ||
629 | + struct iv_use *use; | ||
630 | + comp_cost cost, origcost; | ||
631 | + | ||
632 | + /* Determine the cost based on a strategy that starts with original IVs, | ||
633 | + and try again using a strategy that prefers candidates not based | ||
634 | + on any IVs. */ | ||
635 | + origset = find_optimal_iv_set_1 (data, true); | ||
636 | + set = find_optimal_iv_set_1 (data, false); | ||
637 | + | ||
638 | + if (!origset && !set) | ||
639 | + return NULL; | ||
640 | + | ||
641 | + origcost = origset ? iv_ca_cost (origset) : infinite_cost; | ||
642 | + cost = set ? iv_ca_cost (set) : infinite_cost; | ||
643 | + | ||
644 | if (dump_file && (dump_flags & TDF_DETAILS)) | ||
645 | { | ||
646 | - comp_cost cost = iv_ca_cost (set); | ||
647 | - fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity); | ||
648 | - } | ||
649 | + fprintf (dump_file, "Original cost %d (complexity %d)\n\n", | ||
650 | + origcost.cost, origcost.complexity); | ||
651 | + fprintf (dump_file, "Final cost %d (complexity %d)\n\n", | ||
652 | + cost.cost, cost.complexity); | ||
653 | + } | ||
654 | + | ||
655 | + /* Choose the one with the best cost. */ | ||
656 | + if (compare_costs (origcost, cost) <= 0) | ||
657 | + { | ||
658 | + if (set) | ||
659 | + iv_ca_free (&set); | ||
660 | + set = origset; | ||
661 | + } | ||
662 | + else if (origset) | ||
663 | + iv_ca_free (&origset); | ||
664 | |||
665 | for (i = 0; i < n_iv_uses (data); i++) | ||
666 | { | ||
667 | @@ -5768,6 +5811,25 @@ | ||
668 | VEC_free (iv_cand_p, heap, data->iv_candidates); | ||
669 | } | ||
670 | |||
671 | +/* Returns true if the loop body BODY includes any function calls. */ | ||
672 | + | ||
673 | +static bool | ||
674 | +loop_body_includes_call (basic_block *body, unsigned num_nodes) | ||
675 | +{ | ||
676 | + gimple_stmt_iterator gsi; | ||
677 | + unsigned i; | ||
678 | + | ||
679 | + for (i = 0; i < num_nodes; i++) | ||
680 | + for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) | ||
681 | + { | ||
682 | + gimple stmt = gsi_stmt (gsi); | ||
683 | + if (is_gimple_call (stmt) | ||
684 | + && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) | ||
685 | + return true; | ||
686 | + } | ||
687 | + return false; | ||
688 | +} | ||
689 | + | ||
690 | /* Optimizes the LOOP. Returns true if anything changed. */ | ||
691 | |||
692 | static bool | ||
693 | @@ -5799,6 +5861,7 @@ | ||
694 | } | ||
695 | |||
696 | body = get_loop_body (loop); | ||
697 | + data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); | ||
698 | renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); | ||
699 | free (body); | ||
700 | |||
701 | |||
702 | === modified file 'gcc/tree.h' | ||
703 | --- old/gcc/tree.h 2010-04-02 18:54:46 +0000 | ||
704 | +++ new/gcc/tree.h 2010-08-02 13:51:23 +0000 | ||
705 | @@ -4962,6 +4962,8 @@ | ||
706 | extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int, | ||
707 | tree, tree); | ||
708 | extern void set_builtin_user_assembler_name (tree decl, const char *asmspec); | ||
709 | +extern bool is_simple_builtin (tree); | ||
710 | +extern bool is_inexpensive_builtin (tree); | ||
711 | |||
712 | /* In convert.c */ | ||
713 | extern tree strip_float_extensions (tree); | ||
714 | |||