summaryrefslogtreecommitdiffstats
path: root/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch
diff options
context:
space:
mode:
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch')
-rw-r--r--toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch714
1 files changed, 0 insertions, 714 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch
deleted file mode 100644
index 9f0c98e9c9..0000000000
--- a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch
+++ /dev/null
@@ -1,714 +0,0 @@
12010-07-15 Sandra Loosemore <sandra@codesourcery.com>
2
3 Backport from mainline:
4
5 2010-06-09 Sandra Loosemore <sandra@codesourcery.com>
6
7 gcc/
8 * tree-ssa-loop-ivopts.c (adjust_setup_cost): New function.
9 (get_computation_cost_at): Use it.
10 (determine_use_iv_cost_condition): Likewise.
11 (determine_iv_cost): Likewise.
12
13 2010-07-05 Sandra Loosemore <sandra@codesourcery.com>
14
15 PR middle-end/42505
16
17 gcc/
18 * tree-ssa-loop-ivopts.c (determine_set_costs): Delete obsolete
19 comments about cost model.
20 (try_add_cand_for): Add second strategy for choosing initial set
21 based on original IVs, controlled by ORIGINALP argument.
22 (get_initial_solution): Add ORIGINALP argument.
23 (find_optimal_iv_set_1): New function, split from find_optimal_iv_set.
24 (find_optimal_iv_set): Try two different strategies for choosing
25 the IV set, and return the one with lower cost.
26
27 gcc/testsuite/
28 * gcc.target/arm/pr42505.c: New test case.
29
30 2010-07-10 Sandra Loosemore <sandra@codesourcery.com>
31
32 PR middle-end/42505
33
34 gcc/
35 * tree-inline.c (estimate_num_insns): Refactor builtin complexity
36 lookup code into....
37 * builtins.c (is_simple_builtin, is_inexpensive_builtin): ...these
38 new functions.
39 * tree.h (is_simple_builtin, is_inexpensive_builtin): Declare.
40 * cfgloopanal.c (target_clobbered_regs): Define.
41 (init_set_costs): Initialize target_clobbered_regs.
42 (estimate_reg_pressure_cost): Add call_p argument. When true,
43 adjust the number of available registers to exclude the
44 call-clobbered registers.
45 * cfgloop.h (target_clobbered_regs): Declare.
46 (estimate_reg_pressure_cost): Adjust declaration.
47 * tree-ssa-loop-ivopts.c (struct ivopts_data): Add body_includes_call.
48 (ivopts_global_cost_for_size): Pass it to estimate_reg_pressure_cost.
49 (determine_set_costs): Dump target_clobbered_regs.
50 (loop_body_includes_call): New function.
51 (tree_ssa_iv_optimize_loop): Use it to initialize new field.
52 * loop-invariant.c (gain_for_invariant): Adjust arguments to pass
53 call_p flag through.
54 (best_gain_for_invariant): Likewise.
55 (find_invariants_to_move): Likewise.
56 (move_single_loop_invariants): Likewise, using already-computed
57 has_call field.
58
59 2010-07-15 Jie Zhang <jie@codesourcery.com>
60
61 Issue #8497, #8893
62
63=== modified file 'gcc/builtins.c'
64--- old/gcc/builtins.c 2010-04-13 12:47:11 +0000
65+++ new/gcc/builtins.c 2010-08-02 13:51:23 +0000
66@@ -13624,3 +13624,123 @@
67 break;
68 }
69 }
70+
71+/* Return true if DECL is a builtin that expands to a constant or similarly
72+ simple code. */
73+bool
74+is_simple_builtin (tree decl)
75+{
76+ if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
77+ switch (DECL_FUNCTION_CODE (decl))
78+ {
79+ /* Builtins that expand to constants. */
80+ case BUILT_IN_CONSTANT_P:
81+ case BUILT_IN_EXPECT:
82+ case BUILT_IN_OBJECT_SIZE:
83+ case BUILT_IN_UNREACHABLE:
84+ /* Simple register moves or loads from stack. */
85+ case BUILT_IN_RETURN_ADDRESS:
86+ case BUILT_IN_EXTRACT_RETURN_ADDR:
87+ case BUILT_IN_FROB_RETURN_ADDR:
88+ case BUILT_IN_RETURN:
89+ case BUILT_IN_AGGREGATE_INCOMING_ADDRESS:
90+ case BUILT_IN_FRAME_ADDRESS:
91+ case BUILT_IN_VA_END:
92+ case BUILT_IN_STACK_SAVE:
93+ case BUILT_IN_STACK_RESTORE:
94+ /* Exception state returns or moves registers around. */
95+ case BUILT_IN_EH_FILTER:
96+ case BUILT_IN_EH_POINTER:
97+ case BUILT_IN_EH_COPY_VALUES:
98+ return true;
99+
100+ default:
101+ return false;
102+ }
103+
104+ return false;
105+}
106+
107+/* Return true if DECL is a builtin that is not expensive, i.e., they are
108+ most probably expanded inline into reasonably simple code. This is a
109+ superset of is_simple_builtin. */
110+bool
111+is_inexpensive_builtin (tree decl)
112+{
113+ if (!decl)
114+ return false;
115+ else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD)
116+ return true;
117+ else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
118+ switch (DECL_FUNCTION_CODE (decl))
119+ {
120+ case BUILT_IN_ABS:
121+ case BUILT_IN_ALLOCA:
122+ case BUILT_IN_BSWAP32:
123+ case BUILT_IN_BSWAP64:
124+ case BUILT_IN_CLZ:
125+ case BUILT_IN_CLZIMAX:
126+ case BUILT_IN_CLZL:
127+ case BUILT_IN_CLZLL:
128+ case BUILT_IN_CTZ:
129+ case BUILT_IN_CTZIMAX:
130+ case BUILT_IN_CTZL:
131+ case BUILT_IN_CTZLL:
132+ case BUILT_IN_FFS:
133+ case BUILT_IN_FFSIMAX:
134+ case BUILT_IN_FFSL:
135+ case BUILT_IN_FFSLL:
136+ case BUILT_IN_IMAXABS:
137+ case BUILT_IN_FINITE:
138+ case BUILT_IN_FINITEF:
139+ case BUILT_IN_FINITEL:
140+ case BUILT_IN_FINITED32:
141+ case BUILT_IN_FINITED64:
142+ case BUILT_IN_FINITED128:
143+ case BUILT_IN_FPCLASSIFY:
144+ case BUILT_IN_ISFINITE:
145+ case BUILT_IN_ISINF_SIGN:
146+ case BUILT_IN_ISINF:
147+ case BUILT_IN_ISINFF:
148+ case BUILT_IN_ISINFL:
149+ case BUILT_IN_ISINFD32:
150+ case BUILT_IN_ISINFD64:
151+ case BUILT_IN_ISINFD128:
152+ case BUILT_IN_ISNAN:
153+ case BUILT_IN_ISNANF:
154+ case BUILT_IN_ISNANL:
155+ case BUILT_IN_ISNAND32:
156+ case BUILT_IN_ISNAND64:
157+ case BUILT_IN_ISNAND128:
158+ case BUILT_IN_ISNORMAL:
159+ case BUILT_IN_ISGREATER:
160+ case BUILT_IN_ISGREATEREQUAL:
161+ case BUILT_IN_ISLESS:
162+ case BUILT_IN_ISLESSEQUAL:
163+ case BUILT_IN_ISLESSGREATER:
164+ case BUILT_IN_ISUNORDERED:
165+ case BUILT_IN_VA_ARG_PACK:
166+ case BUILT_IN_VA_ARG_PACK_LEN:
167+ case BUILT_IN_VA_COPY:
168+ case BUILT_IN_TRAP:
169+ case BUILT_IN_SAVEREGS:
170+ case BUILT_IN_POPCOUNTL:
171+ case BUILT_IN_POPCOUNTLL:
172+ case BUILT_IN_POPCOUNTIMAX:
173+ case BUILT_IN_POPCOUNT:
174+ case BUILT_IN_PARITYL:
175+ case BUILT_IN_PARITYLL:
176+ case BUILT_IN_PARITYIMAX:
177+ case BUILT_IN_PARITY:
178+ case BUILT_IN_LABS:
179+ case BUILT_IN_LLABS:
180+ case BUILT_IN_PREFETCH:
181+ return true;
182+
183+ default:
184+ return is_simple_builtin (decl);
185+ }
186+
187+ return false;
188+}
189+
190
191=== modified file 'gcc/cfgloop.h'
192--- old/gcc/cfgloop.h 2009-11-25 10:55:54 +0000
193+++ new/gcc/cfgloop.h 2010-08-02 13:51:23 +0000
194@@ -622,13 +622,14 @@
195 /* The properties of the target. */
196
197 extern unsigned target_avail_regs;
198+extern unsigned target_clobbered_regs;
199 extern unsigned target_res_regs;
200 extern unsigned target_reg_cost [2];
201 extern unsigned target_spill_cost [2];
202
203 /* Register pressure estimation for induction variable optimizations & loop
204 invariant motion. */
205-extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool);
206+extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool, bool);
207 extern void init_set_costs (void);
208
209 /* Loop optimizer initialization. */
210
211=== modified file 'gcc/cfgloopanal.c'
212--- old/gcc/cfgloopanal.c 2009-09-30 08:57:56 +0000
213+++ new/gcc/cfgloopanal.c 2010-08-02 13:51:23 +0000
214@@ -320,6 +320,8 @@
215 /* The properties of the target. */
216
217 unsigned target_avail_regs; /* Number of available registers. */
218+unsigned target_clobbered_regs; /* Number of available registers that are
219+ call-clobbered. */
220 unsigned target_res_regs; /* Number of registers reserved for temporary
221 expressions. */
222 unsigned target_reg_cost[2]; /* The cost for register when there still
223@@ -342,10 +344,15 @@
224 unsigned i;
225
226 target_avail_regs = 0;
227+ target_clobbered_regs = 0;
228 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
229 if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
230 && !fixed_regs[i])
231- target_avail_regs++;
232+ {
233+ target_avail_regs++;
234+ if (call_used_regs[i])
235+ target_clobbered_regs++;
236+ }
237
238 target_res_regs = 3;
239
240@@ -379,20 +386,29 @@
241
242 /* Estimates cost of increased register pressure caused by making N_NEW new
243 registers live around the loop. N_OLD is the number of registers live
244- around the loop. */
245+ around the loop. If CALL_P is true, also take into account that
246+ call-used registers may be clobbered in the loop body, reducing the
247+ number of available registers before we spill. */
248
249 unsigned
250-estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed)
251+estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
252+ bool call_p)
253 {
254 unsigned cost;
255 unsigned regs_needed = n_new + n_old;
256+ unsigned available_regs = target_avail_regs;
257+
258+ /* If there is a call in the loop body, the call-clobbered registers
259+ are not available for loop invariants. */
260+ if (call_p)
261+ available_regs = available_regs - target_clobbered_regs;
262
263 /* If we have enough registers, we should use them and not restrict
264 the transformations unnecessarily. */
265- if (regs_needed + target_res_regs <= target_avail_regs)
266+ if (regs_needed + target_res_regs <= available_regs)
267 return 0;
268
269- if (regs_needed <= target_avail_regs)
270+ if (regs_needed <= available_regs)
271 /* If we are close to running out of registers, try to preserve
272 them. */
273 cost = target_reg_cost [speed] * n_new;
274
275=== modified file 'gcc/loop-invariant.c'
276--- old/gcc/loop-invariant.c 2010-04-02 18:54:46 +0000
277+++ new/gcc/loop-invariant.c 2010-08-02 13:51:23 +0000
278@@ -1173,11 +1173,13 @@
279 /* Calculates gain for eliminating invariant INV. REGS_USED is the number
280 of registers used in the loop, NEW_REGS is the number of new variables
281 already added due to the invariant motion. The number of registers needed
282- for it is stored in *REGS_NEEDED. */
283+ for it is stored in *REGS_NEEDED. SPEED and CALL_P are flags passed
284+ through to estimate_reg_pressure_cost. */
285
286 static int
287 gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
288- unsigned *new_regs, unsigned regs_used, bool speed)
289+ unsigned *new_regs, unsigned regs_used,
290+ bool speed, bool call_p)
291 {
292 int comp_cost, size_cost;
293
294@@ -1188,9 +1190,9 @@
295 if (! flag_ira_loop_pressure)
296 {
297 size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0],
298- regs_used, speed)
299+ regs_used, speed, call_p)
300 - estimate_reg_pressure_cost (new_regs[0],
301- regs_used, speed));
302+ regs_used, speed, call_p));
303 }
304 else
305 {
306@@ -1245,7 +1247,8 @@
307
308 static int
309 best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
310- unsigned *new_regs, unsigned regs_used, bool speed)
311+ unsigned *new_regs, unsigned regs_used,
312+ bool speed, bool call_p)
313 {
314 struct invariant *inv;
315 int i, gain = 0, again;
316@@ -1261,7 +1264,7 @@
317 continue;
318
319 again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used,
320- speed);
321+ speed, call_p);
322 if (again > gain)
323 {
324 gain = again;
325@@ -1314,7 +1317,7 @@
326 /* Determines which invariants to move. */
327
328 static void
329-find_invariants_to_move (bool speed)
330+find_invariants_to_move (bool speed, bool call_p)
331 {
332 int gain;
333 unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES];
334@@ -1353,7 +1356,8 @@
335 new_regs[ira_reg_class_cover[i]] = 0;
336 }
337 while ((gain = best_gain_for_invariant (&inv, regs_needed,
338- new_regs, regs_used, speed)) > 0)
339+ new_regs, regs_used,
340+ speed, call_p)) > 0)
341 {
342 set_move_mark (inv->invno, gain);
343 if (! flag_ira_loop_pressure)
344@@ -1554,7 +1558,8 @@
345 init_inv_motion_data ();
346
347 find_invariants (loop);
348- find_invariants_to_move (optimize_loop_for_speed_p (loop));
349+ find_invariants_to_move (optimize_loop_for_speed_p (loop),
350+ LOOP_DATA (loop)->has_call);
351 move_invariants (loop);
352
353 free_inv_motion_data ();
354
355=== added file 'gcc/testsuite/gcc.target/arm/pr42505.c'
356--- old/gcc/testsuite/gcc.target/arm/pr42505.c 1970-01-01 00:00:00 +0000
357+++ new/gcc/testsuite/gcc.target/arm/pr42505.c 2010-08-02 13:51:23 +0000
358@@ -0,0 +1,23 @@
359+/* { dg-options "-mthumb -Os -march=armv5te" } */
360+/* { dg-require-effective-target arm_thumb1_ok } */
361+/* { dg-final { scan-assembler-not "str\[\\t \]*r.,\[\\t \]*.sp," } } */
362+
363+struct A {
364+ int f1;
365+ int f2;
366+};
367+
368+int func(int c);
369+
370+/* This function should not need to spill anything to the stack. */
371+int test(struct A* src, struct A* dst, int count)
372+{
373+ while (count--) {
374+ if (!func(src->f2)) {
375+ return 0;
376+ }
377+ *dst++ = *src++;
378+ }
379+
380+ return 1;
381+}
382
383=== modified file 'gcc/tree-inline.c'
384--- old/gcc/tree-inline.c 2010-03-18 20:07:13 +0000
385+++ new/gcc/tree-inline.c 2010-08-02 13:51:23 +0000
386@@ -3246,34 +3246,13 @@
387 if (POINTER_TYPE_P (funtype))
388 funtype = TREE_TYPE (funtype);
389
390- if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD)
391+ if (is_simple_builtin (decl))
392+ return 0;
393+ else if (is_inexpensive_builtin (decl))
394 cost = weights->target_builtin_call_cost;
395 else
396 cost = weights->call_cost;
397
398- if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
399- switch (DECL_FUNCTION_CODE (decl))
400- {
401- case BUILT_IN_CONSTANT_P:
402- return 0;
403- case BUILT_IN_EXPECT:
404- return 0;
405-
406- /* Prefetch instruction is not expensive. */
407- case BUILT_IN_PREFETCH:
408- cost = weights->target_builtin_call_cost;
409- break;
410-
411- /* Exception state returns or moves registers around. */
412- case BUILT_IN_EH_FILTER:
413- case BUILT_IN_EH_POINTER:
414- case BUILT_IN_EH_COPY_VALUES:
415- return 0;
416-
417- default:
418- break;
419- }
420-
421 if (decl)
422 funtype = TREE_TYPE (decl);
423
424
425=== modified file 'gcc/tree-ssa-loop-ivopts.c'
426--- old/gcc/tree-ssa-loop-ivopts.c 2010-04-01 15:18:07 +0000
427+++ new/gcc/tree-ssa-loop-ivopts.c 2010-08-02 13:51:23 +0000
428@@ -257,6 +257,9 @@
429
430 /* Are we optimizing for speed? */
431 bool speed;
432+
433+ /* Whether the loop body includes any function calls. */
434+ bool body_includes_call;
435 };
436
437 /* An assignment of iv candidates to uses. */
438@@ -2926,6 +2929,20 @@
439 return get_computation_at (loop, use, cand, use->stmt);
440 }
441
442+/* Adjust the cost COST for being in loop setup rather than loop body.
443+ If we're optimizing for space, the loop setup overhead is constant;
444+ if we're optimizing for speed, amortize it over the per-iteration cost. */
445+static unsigned
446+adjust_setup_cost (struct ivopts_data *data, unsigned cost)
447+{
448+ if (cost == INFTY)
449+ return cost;
450+ else if (optimize_loop_for_speed_p (data->current_loop))
451+ return cost / AVG_LOOP_NITER (data->current_loop);
452+ else
453+ return cost;
454+}
455+
456 /* Returns cost of addition in MODE. */
457
458 static unsigned
459@@ -3838,8 +3855,8 @@
460 /* Symbol + offset should be compile-time computable so consider that they
461 are added once to the variable, if present. */
462 if (var_present && (symbol_present || offset))
463- cost.cost += add_cost (TYPE_MODE (ctype), speed)
464- / AVG_LOOP_NITER (data->current_loop);
465+ cost.cost += adjust_setup_cost (data,
466+ add_cost (TYPE_MODE (ctype), speed));
467
468 /* Having offset does not affect runtime cost in case it is added to
469 symbol, but it increases complexity. */
470@@ -4104,7 +4121,7 @@
471 elim_cost = force_var_cost (data, bound, &depends_on_elim);
472 /* The bound is a loop invariant, so it will be only computed
473 once. */
474- elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
475+ elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
476 }
477 else
478 elim_cost = infinite_cost;
479@@ -4351,7 +4368,7 @@
480 cost_base = force_var_cost (data, base, NULL);
481 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
482
483- cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
484+ cost = cost_step + adjust_setup_cost (data, cost_base.cost);
485
486 /* Prefer the original ivs unless we may gain something by replacing it.
487 The reason is to make debugging simpler; so this is not relevant for
488@@ -4404,7 +4421,8 @@
489 {
490 /* We add size to the cost, so that we prefer eliminating ivs
491 if possible. */
492- return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
493+ return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
494+ data->body_includes_call);
495 }
496
497 /* For each size of the induction variable set determine the penalty. */
498@@ -4419,30 +4437,11 @@
499 struct loop *loop = data->current_loop;
500 bitmap_iterator bi;
501
502- /* We use the following model (definitely improvable, especially the
503- cost function -- TODO):
504-
505- We estimate the number of registers available (using MD data), name it A.
506-
507- We estimate the number of registers used by the loop, name it U. This
508- number is obtained as the number of loop phi nodes (not counting virtual
509- registers and bivs) + the number of variables from outside of the loop.
510-
511- We set a reserve R (free regs that are used for temporary computations,
512- etc.). For now the reserve is a constant 3.
513-
514- Let I be the number of induction variables.
515-
516- -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
517- make a lot of ivs without a reason).
518- -- if A - R < U + I <= A, the cost is I * PRES_COST
519- -- if U + I > A, the cost is I * PRES_COST and
520- number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
521-
522 if (dump_file && (dump_flags & TDF_DETAILS))
523 {
524 fprintf (dump_file, "Global costs:\n");
525 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
526+ fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
527 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
528 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
529 }
530@@ -5062,11 +5061,13 @@
531 }
532
533 /* Tries to extend the sets IVS in the best possible way in order
534- to express the USE. */
535+ to express the USE. If ORIGINALP is true, prefer candidates from
536+ the original set of IVs, otherwise favor important candidates not
537+ based on any memory object. */
538
539 static bool
540 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
541- struct iv_use *use)
542+ struct iv_use *use, bool originalp)
543 {
544 comp_cost best_cost, act_cost;
545 unsigned i;
546@@ -5085,7 +5086,8 @@
547 iv_ca_set_no_cp (data, ivs, use);
548 }
549
550- /* First try important candidates not based on any memory object. Only if
551+ /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
552+ first try important candidates not based on any memory object. Only if
553 this fails, try the specific ones. Rationale -- in loops with many
554 variables the best choice often is to use just one generic biv. If we
555 added here many ivs specific to the uses, the optimization algorithm later
556@@ -5097,7 +5099,10 @@
557 {
558 cand = iv_cand (data, i);
559
560- if (cand->iv->base_object != NULL_TREE)
561+ if (originalp && cand->pos !=IP_ORIGINAL)
562+ continue;
563+
564+ if (!originalp && cand->iv->base_object != NULL_TREE)
565 continue;
566
567 if (iv_ca_cand_used_p (ivs, cand))
568@@ -5133,8 +5138,13 @@
569 continue;
570
571 /* Already tried this. */
572- if (cand->important && cand->iv->base_object == NULL_TREE)
573- continue;
574+ if (cand->important)
575+ {
576+ if (originalp && cand->pos == IP_ORIGINAL)
577+ continue;
578+ if (!originalp && cand->iv->base_object == NULL_TREE)
579+ continue;
580+ }
581
582 if (iv_ca_cand_used_p (ivs, cand))
583 continue;
584@@ -5168,13 +5178,13 @@
585 /* Finds an initial assignment of candidates to uses. */
586
587 static struct iv_ca *
588-get_initial_solution (struct ivopts_data *data)
589+get_initial_solution (struct ivopts_data *data, bool originalp)
590 {
591 struct iv_ca *ivs = iv_ca_new (data);
592 unsigned i;
593
594 for (i = 0; i < n_iv_uses (data); i++)
595- if (!try_add_cand_for (data, ivs, iv_use (data, i)))
596+ if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
597 {
598 iv_ca_free (&ivs);
599 return NULL;
600@@ -5246,14 +5256,12 @@
601 solution and remove the unused ivs while this improves the cost. */
602
603 static struct iv_ca *
604-find_optimal_iv_set (struct ivopts_data *data)
605+find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
606 {
607- unsigned i;
608 struct iv_ca *set;
609- struct iv_use *use;
610
611 /* Get the initial solution. */
612- set = get_initial_solution (data);
613+ set = get_initial_solution (data, originalp);
614 if (!set)
615 {
616 if (dump_file && (dump_flags & TDF_DETAILS))
617@@ -5276,11 +5284,46 @@
618 }
619 }
620
621+ return set;
622+}
623+
624+static struct iv_ca *
625+find_optimal_iv_set (struct ivopts_data *data)
626+{
627+ unsigned i;
628+ struct iv_ca *set, *origset;
629+ struct iv_use *use;
630+ comp_cost cost, origcost;
631+
632+ /* Determine the cost based on a strategy that starts with original IVs,
633+ and try again using a strategy that prefers candidates not based
634+ on any IVs. */
635+ origset = find_optimal_iv_set_1 (data, true);
636+ set = find_optimal_iv_set_1 (data, false);
637+
638+ if (!origset && !set)
639+ return NULL;
640+
641+ origcost = origset ? iv_ca_cost (origset) : infinite_cost;
642+ cost = set ? iv_ca_cost (set) : infinite_cost;
643+
644 if (dump_file && (dump_flags & TDF_DETAILS))
645 {
646- comp_cost cost = iv_ca_cost (set);
647- fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
648- }
649+ fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
650+ origcost.cost, origcost.complexity);
651+ fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
652+ cost.cost, cost.complexity);
653+ }
654+
655+ /* Choose the one with the best cost. */
656+ if (compare_costs (origcost, cost) <= 0)
657+ {
658+ if (set)
659+ iv_ca_free (&set);
660+ set = origset;
661+ }
662+ else if (origset)
663+ iv_ca_free (&origset);
664
665 for (i = 0; i < n_iv_uses (data); i++)
666 {
667@@ -5768,6 +5811,25 @@
668 VEC_free (iv_cand_p, heap, data->iv_candidates);
669 }
670
671+/* Returns true if the loop body BODY includes any function calls. */
672+
673+static bool
674+loop_body_includes_call (basic_block *body, unsigned num_nodes)
675+{
676+ gimple_stmt_iterator gsi;
677+ unsigned i;
678+
679+ for (i = 0; i < num_nodes; i++)
680+ for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
681+ {
682+ gimple stmt = gsi_stmt (gsi);
683+ if (is_gimple_call (stmt)
684+ && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
685+ return true;
686+ }
687+ return false;
688+}
689+
690 /* Optimizes the LOOP. Returns true if anything changed. */
691
692 static bool
693@@ -5799,6 +5861,7 @@
694 }
695
696 body = get_loop_body (loop);
697+ data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
698 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
699 free (body);
700
701
702=== modified file 'gcc/tree.h'
703--- old/gcc/tree.h 2010-04-02 18:54:46 +0000
704+++ new/gcc/tree.h 2010-08-02 13:51:23 +0000
705@@ -4962,6 +4962,8 @@
706 extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
707 tree, tree);
708 extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
709+extern bool is_simple_builtin (tree);
710+extern bool is_inexpensive_builtin (tree);
711
712 /* In convert.c */
713 extern tree strip_float_extensions (tree);
714