diff options
Diffstat (limited to 'recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch')
| -rw-r--r-- | recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch | 714 |
1 files changed, 714 insertions, 0 deletions
diff --git a/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch b/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch new file mode 100644 index 0000000000..9f0c98e9c9 --- /dev/null +++ b/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99312.patch | |||
| @@ -0,0 +1,714 @@ | |||
| 1 | 2010-07-15 Sandra Loosemore <sandra@codesourcery.com> | ||
| 2 | |||
| 3 | Backport from mainline: | ||
| 4 | |||
| 5 | 2010-06-09 Sandra Loosemore <sandra@codesourcery.com> | ||
| 6 | |||
| 7 | gcc/ | ||
| 8 | * tree-ssa-loop-ivopts.c (adjust_setup_cost): New function. | ||
| 9 | (get_computation_cost_at): Use it. | ||
| 10 | (determine_use_iv_cost_condition): Likewise. | ||
| 11 | (determine_iv_cost): Likewise. | ||
| 12 | |||
| 13 | 2010-07-05 Sandra Loosemore <sandra@codesourcery.com> | ||
| 14 | |||
| 15 | PR middle-end/42505 | ||
| 16 | |||
| 17 | gcc/ | ||
| 18 | * tree-ssa-loop-ivopts.c (determine_set_costs): Delete obsolete | ||
| 19 | comments about cost model. | ||
| 20 | (try_add_cand_for): Add second strategy for choosing initial set | ||
| 21 | based on original IVs, controlled by ORIGINALP argument. | ||
| 22 | (get_initial_solution): Add ORIGINALP argument. | ||
| 23 | (find_optimal_iv_set_1): New function, split from find_optimal_iv_set. | ||
| 24 | (find_optimal_iv_set): Try two different strategies for choosing | ||
| 25 | the IV set, and return the one with lower cost. | ||
| 26 | |||
| 27 | gcc/testsuite/ | ||
| 28 | * gcc.target/arm/pr42505.c: New test case. | ||
| 29 | |||
| 30 | 2010-07-10 Sandra Loosemore <sandra@codesourcery.com> | ||
| 31 | |||
| 32 | PR middle-end/42505 | ||
| 33 | |||
| 34 | gcc/ | ||
| 35 | * tree-inline.c (estimate_num_insns): Refactor builtin complexity | ||
| 36 | lookup code into.... | ||
| 37 | * builtins.c (is_simple_builtin, is_inexpensive_builtin): ...these | ||
| 38 | new functions. | ||
| 39 | * tree.h (is_simple_builtin, is_inexpensive_builtin): Declare. | ||
| 40 | * cfgloopanal.c (target_clobbered_regs): Define. | ||
| 41 | (init_set_costs): Initialize target_clobbered_regs. | ||
| 42 | (estimate_reg_pressure_cost): Add call_p argument. When true, | ||
| 43 | adjust the number of available registers to exclude the | ||
| 44 | call-clobbered registers. | ||
| 45 | * cfgloop.h (target_clobbered_regs): Declare. | ||
| 46 | (estimate_reg_pressure_cost): Adjust declaration. | ||
| 47 | * tree-ssa-loop-ivopts.c (struct ivopts_data): Add body_includes_call. | ||
| 48 | (ivopts_global_cost_for_size): Pass it to estimate_reg_pressure_cost. | ||
| 49 | (determine_set_costs): Dump target_clobbered_regs. | ||
| 50 | (loop_body_includes_call): New function. | ||
| 51 | (tree_ssa_iv_optimize_loop): Use it to initialize new field. | ||
| 52 | * loop-invariant.c (gain_for_invariant): Adjust arguments to pass | ||
| 53 | call_p flag through. | ||
| 54 | (best_gain_for_invariant): Likewise. | ||
| 55 | (find_invariants_to_move): Likewise. | ||
| 56 | (move_single_loop_invariants): Likewise, using already-computed | ||
| 57 | has_call field. | ||
| 58 | |||
| 59 | 2010-07-15 Jie Zhang <jie@codesourcery.com> | ||
| 60 | |||
| 61 | Issue #8497, #8893 | ||
| 62 | |||
| 63 | === modified file 'gcc/builtins.c' | ||
| 64 | --- old/gcc/builtins.c 2010-04-13 12:47:11 +0000 | ||
| 65 | +++ new/gcc/builtins.c 2010-08-02 13:51:23 +0000 | ||
| 66 | @@ -13624,3 +13624,123 @@ | ||
| 67 | break; | ||
| 68 | } | ||
| 69 | } | ||
| 70 | + | ||
| 71 | +/* Return true if DECL is a builtin that expands to a constant or similarly | ||
| 72 | + simple code. */ | ||
| 73 | +bool | ||
| 74 | +is_simple_builtin (tree decl) | ||
| 75 | +{ | ||
| 76 | + if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) | ||
| 77 | + switch (DECL_FUNCTION_CODE (decl)) | ||
| 78 | + { | ||
| 79 | + /* Builtins that expand to constants. */ | ||
| 80 | + case BUILT_IN_CONSTANT_P: | ||
| 81 | + case BUILT_IN_EXPECT: | ||
| 82 | + case BUILT_IN_OBJECT_SIZE: | ||
| 83 | + case BUILT_IN_UNREACHABLE: | ||
| 84 | + /* Simple register moves or loads from stack. */ | ||
| 85 | + case BUILT_IN_RETURN_ADDRESS: | ||
| 86 | + case BUILT_IN_EXTRACT_RETURN_ADDR: | ||
| 87 | + case BUILT_IN_FROB_RETURN_ADDR: | ||
| 88 | + case BUILT_IN_RETURN: | ||
| 89 | + case BUILT_IN_AGGREGATE_INCOMING_ADDRESS: | ||
| 90 | + case BUILT_IN_FRAME_ADDRESS: | ||
| 91 | + case BUILT_IN_VA_END: | ||
| 92 | + case BUILT_IN_STACK_SAVE: | ||
| 93 | + case BUILT_IN_STACK_RESTORE: | ||
| 94 | + /* Exception state returns or moves registers around. */ | ||
| 95 | + case BUILT_IN_EH_FILTER: | ||
| 96 | + case BUILT_IN_EH_POINTER: | ||
| 97 | + case BUILT_IN_EH_COPY_VALUES: | ||
| 98 | + return true; | ||
| 99 | + | ||
| 100 | + default: | ||
| 101 | + return false; | ||
| 102 | + } | ||
| 103 | + | ||
| 104 | + return false; | ||
| 105 | +} | ||
| 106 | + | ||
| 107 | +/* Return true if DECL is a builtin that is not expensive, i.e., they are | ||
| 108 | + most probably expanded inline into reasonably simple code. This is a | ||
| 109 | + superset of is_simple_builtin. */ | ||
| 110 | +bool | ||
| 111 | +is_inexpensive_builtin (tree decl) | ||
| 112 | +{ | ||
| 113 | + if (!decl) | ||
| 114 | + return false; | ||
| 115 | + else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD) | ||
| 116 | + return true; | ||
| 117 | + else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) | ||
| 118 | + switch (DECL_FUNCTION_CODE (decl)) | ||
| 119 | + { | ||
| 120 | + case BUILT_IN_ABS: | ||
| 121 | + case BUILT_IN_ALLOCA: | ||
| 122 | + case BUILT_IN_BSWAP32: | ||
| 123 | + case BUILT_IN_BSWAP64: | ||
| 124 | + case BUILT_IN_CLZ: | ||
| 125 | + case BUILT_IN_CLZIMAX: | ||
| 126 | + case BUILT_IN_CLZL: | ||
| 127 | + case BUILT_IN_CLZLL: | ||
| 128 | + case BUILT_IN_CTZ: | ||
| 129 | + case BUILT_IN_CTZIMAX: | ||
| 130 | + case BUILT_IN_CTZL: | ||
| 131 | + case BUILT_IN_CTZLL: | ||
| 132 | + case BUILT_IN_FFS: | ||
| 133 | + case BUILT_IN_FFSIMAX: | ||
| 134 | + case BUILT_IN_FFSL: | ||
| 135 | + case BUILT_IN_FFSLL: | ||
| 136 | + case BUILT_IN_IMAXABS: | ||
| 137 | + case BUILT_IN_FINITE: | ||
| 138 | + case BUILT_IN_FINITEF: | ||
| 139 | + case BUILT_IN_FINITEL: | ||
| 140 | + case BUILT_IN_FINITED32: | ||
| 141 | + case BUILT_IN_FINITED64: | ||
| 142 | + case BUILT_IN_FINITED128: | ||
| 143 | + case BUILT_IN_FPCLASSIFY: | ||
| 144 | + case BUILT_IN_ISFINITE: | ||
| 145 | + case BUILT_IN_ISINF_SIGN: | ||
| 146 | + case BUILT_IN_ISINF: | ||
| 147 | + case BUILT_IN_ISINFF: | ||
| 148 | + case BUILT_IN_ISINFL: | ||
| 149 | + case BUILT_IN_ISINFD32: | ||
| 150 | + case BUILT_IN_ISINFD64: | ||
| 151 | + case BUILT_IN_ISINFD128: | ||
| 152 | + case BUILT_IN_ISNAN: | ||
| 153 | + case BUILT_IN_ISNANF: | ||
| 154 | + case BUILT_IN_ISNANL: | ||
| 155 | + case BUILT_IN_ISNAND32: | ||
| 156 | + case BUILT_IN_ISNAND64: | ||
| 157 | + case BUILT_IN_ISNAND128: | ||
| 158 | + case BUILT_IN_ISNORMAL: | ||
| 159 | + case BUILT_IN_ISGREATER: | ||
| 160 | + case BUILT_IN_ISGREATEREQUAL: | ||
| 161 | + case BUILT_IN_ISLESS: | ||
| 162 | + case BUILT_IN_ISLESSEQUAL: | ||
| 163 | + case BUILT_IN_ISLESSGREATER: | ||
| 164 | + case BUILT_IN_ISUNORDERED: | ||
| 165 | + case BUILT_IN_VA_ARG_PACK: | ||
| 166 | + case BUILT_IN_VA_ARG_PACK_LEN: | ||
| 167 | + case BUILT_IN_VA_COPY: | ||
| 168 | + case BUILT_IN_TRAP: | ||
| 169 | + case BUILT_IN_SAVEREGS: | ||
| 170 | + case BUILT_IN_POPCOUNTL: | ||
| 171 | + case BUILT_IN_POPCOUNTLL: | ||
| 172 | + case BUILT_IN_POPCOUNTIMAX: | ||
| 173 | + case BUILT_IN_POPCOUNT: | ||
| 174 | + case BUILT_IN_PARITYL: | ||
| 175 | + case BUILT_IN_PARITYLL: | ||
| 176 | + case BUILT_IN_PARITYIMAX: | ||
| 177 | + case BUILT_IN_PARITY: | ||
| 178 | + case BUILT_IN_LABS: | ||
| 179 | + case BUILT_IN_LLABS: | ||
| 180 | + case BUILT_IN_PREFETCH: | ||
| 181 | + return true; | ||
| 182 | + | ||
| 183 | + default: | ||
| 184 | + return is_simple_builtin (decl); | ||
| 185 | + } | ||
| 186 | + | ||
| 187 | + return false; | ||
| 188 | +} | ||
| 189 | + | ||
| 190 | |||
| 191 | === modified file 'gcc/cfgloop.h' | ||
| 192 | --- old/gcc/cfgloop.h 2009-11-25 10:55:54 +0000 | ||
| 193 | +++ new/gcc/cfgloop.h 2010-08-02 13:51:23 +0000 | ||
| 194 | @@ -622,13 +622,14 @@ | ||
| 195 | /* The properties of the target. */ | ||
| 196 | |||
| 197 | extern unsigned target_avail_regs; | ||
| 198 | +extern unsigned target_clobbered_regs; | ||
| 199 | extern unsigned target_res_regs; | ||
| 200 | extern unsigned target_reg_cost [2]; | ||
| 201 | extern unsigned target_spill_cost [2]; | ||
| 202 | |||
| 203 | /* Register pressure estimation for induction variable optimizations & loop | ||
| 204 | invariant motion. */ | ||
| 205 | -extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool); | ||
| 206 | +extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool, bool); | ||
| 207 | extern void init_set_costs (void); | ||
| 208 | |||
| 209 | /* Loop optimizer initialization. */ | ||
| 210 | |||
| 211 | === modified file 'gcc/cfgloopanal.c' | ||
| 212 | --- old/gcc/cfgloopanal.c 2009-09-30 08:57:56 +0000 | ||
| 213 | +++ new/gcc/cfgloopanal.c 2010-08-02 13:51:23 +0000 | ||
| 214 | @@ -320,6 +320,8 @@ | ||
| 215 | /* The properties of the target. */ | ||
| 216 | |||
| 217 | unsigned target_avail_regs; /* Number of available registers. */ | ||
| 218 | +unsigned target_clobbered_regs; /* Number of available registers that are | ||
| 219 | + call-clobbered. */ | ||
| 220 | unsigned target_res_regs; /* Number of registers reserved for temporary | ||
| 221 | expressions. */ | ||
| 222 | unsigned target_reg_cost[2]; /* The cost for register when there still | ||
| 223 | @@ -342,10 +344,15 @@ | ||
| 224 | unsigned i; | ||
| 225 | |||
| 226 | target_avail_regs = 0; | ||
| 227 | + target_clobbered_regs = 0; | ||
| 228 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) | ||
| 229 | if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) | ||
| 230 | && !fixed_regs[i]) | ||
| 231 | - target_avail_regs++; | ||
| 232 | + { | ||
| 233 | + target_avail_regs++; | ||
| 234 | + if (call_used_regs[i]) | ||
| 235 | + target_clobbered_regs++; | ||
| 236 | + } | ||
| 237 | |||
| 238 | target_res_regs = 3; | ||
| 239 | |||
| 240 | @@ -379,20 +386,29 @@ | ||
| 241 | |||
| 242 | /* Estimates cost of increased register pressure caused by making N_NEW new | ||
| 243 | registers live around the loop. N_OLD is the number of registers live | ||
| 244 | - around the loop. */ | ||
| 245 | + around the loop. If CALL_P is true, also take into account that | ||
| 246 | + call-used registers may be clobbered in the loop body, reducing the | ||
| 247 | + number of available registers before we spill. */ | ||
| 248 | |||
| 249 | unsigned | ||
| 250 | -estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed) | ||
| 251 | +estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed, | ||
| 252 | + bool call_p) | ||
| 253 | { | ||
| 254 | unsigned cost; | ||
| 255 | unsigned regs_needed = n_new + n_old; | ||
| 256 | + unsigned available_regs = target_avail_regs; | ||
| 257 | + | ||
| 258 | + /* If there is a call in the loop body, the call-clobbered registers | ||
| 259 | + are not available for loop invariants. */ | ||
| 260 | + if (call_p) | ||
| 261 | + available_regs = available_regs - target_clobbered_regs; | ||
| 262 | |||
| 263 | /* If we have enough registers, we should use them and not restrict | ||
| 264 | the transformations unnecessarily. */ | ||
| 265 | - if (regs_needed + target_res_regs <= target_avail_regs) | ||
| 266 | + if (regs_needed + target_res_regs <= available_regs) | ||
| 267 | return 0; | ||
| 268 | |||
| 269 | - if (regs_needed <= target_avail_regs) | ||
| 270 | + if (regs_needed <= available_regs) | ||
| 271 | /* If we are close to running out of registers, try to preserve | ||
| 272 | them. */ | ||
| 273 | cost = target_reg_cost [speed] * n_new; | ||
| 274 | |||
| 275 | === modified file 'gcc/loop-invariant.c' | ||
| 276 | --- old/gcc/loop-invariant.c 2010-04-02 18:54:46 +0000 | ||
| 277 | +++ new/gcc/loop-invariant.c 2010-08-02 13:51:23 +0000 | ||
| 278 | @@ -1173,11 +1173,13 @@ | ||
| 279 | /* Calculates gain for eliminating invariant INV. REGS_USED is the number | ||
| 280 | of registers used in the loop, NEW_REGS is the number of new variables | ||
| 281 | already added due to the invariant motion. The number of registers needed | ||
| 282 | - for it is stored in *REGS_NEEDED. */ | ||
| 283 | + for it is stored in *REGS_NEEDED. SPEED and CALL_P are flags passed | ||
| 284 | + through to estimate_reg_pressure_cost. */ | ||
| 285 | |||
| 286 | static int | ||
| 287 | gain_for_invariant (struct invariant *inv, unsigned *regs_needed, | ||
| 288 | - unsigned *new_regs, unsigned regs_used, bool speed) | ||
| 289 | + unsigned *new_regs, unsigned regs_used, | ||
| 290 | + bool speed, bool call_p) | ||
| 291 | { | ||
| 292 | int comp_cost, size_cost; | ||
| 293 | |||
| 294 | @@ -1188,9 +1190,9 @@ | ||
| 295 | if (! flag_ira_loop_pressure) | ||
| 296 | { | ||
| 297 | size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0], | ||
| 298 | - regs_used, speed) | ||
| 299 | + regs_used, speed, call_p) | ||
| 300 | - estimate_reg_pressure_cost (new_regs[0], | ||
| 301 | - regs_used, speed)); | ||
| 302 | + regs_used, speed, call_p)); | ||
| 303 | } | ||
| 304 | else | ||
| 305 | { | ||
| 306 | @@ -1245,7 +1247,8 @@ | ||
| 307 | |||
| 308 | static int | ||
| 309 | best_gain_for_invariant (struct invariant **best, unsigned *regs_needed, | ||
| 310 | - unsigned *new_regs, unsigned regs_used, bool speed) | ||
| 311 | + unsigned *new_regs, unsigned regs_used, | ||
| 312 | + bool speed, bool call_p) | ||
| 313 | { | ||
| 314 | struct invariant *inv; | ||
| 315 | int i, gain = 0, again; | ||
| 316 | @@ -1261,7 +1264,7 @@ | ||
| 317 | continue; | ||
| 318 | |||
| 319 | again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used, | ||
| 320 | - speed); | ||
| 321 | + speed, call_p); | ||
| 322 | if (again > gain) | ||
| 323 | { | ||
| 324 | gain = again; | ||
| 325 | @@ -1314,7 +1317,7 @@ | ||
| 326 | /* Determines which invariants to move. */ | ||
| 327 | |||
| 328 | static void | ||
| 329 | -find_invariants_to_move (bool speed) | ||
| 330 | +find_invariants_to_move (bool speed, bool call_p) | ||
| 331 | { | ||
| 332 | int gain; | ||
| 333 | unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES]; | ||
| 334 | @@ -1353,7 +1356,8 @@ | ||
| 335 | new_regs[ira_reg_class_cover[i]] = 0; | ||
| 336 | } | ||
| 337 | while ((gain = best_gain_for_invariant (&inv, regs_needed, | ||
| 338 | - new_regs, regs_used, speed)) > 0) | ||
| 339 | + new_regs, regs_used, | ||
| 340 | + speed, call_p)) > 0) | ||
| 341 | { | ||
| 342 | set_move_mark (inv->invno, gain); | ||
| 343 | if (! flag_ira_loop_pressure) | ||
| 344 | @@ -1554,7 +1558,8 @@ | ||
| 345 | init_inv_motion_data (); | ||
| 346 | |||
| 347 | find_invariants (loop); | ||
| 348 | - find_invariants_to_move (optimize_loop_for_speed_p (loop)); | ||
| 349 | + find_invariants_to_move (optimize_loop_for_speed_p (loop), | ||
| 350 | + LOOP_DATA (loop)->has_call); | ||
| 351 | move_invariants (loop); | ||
| 352 | |||
| 353 | free_inv_motion_data (); | ||
| 354 | |||
| 355 | === added file 'gcc/testsuite/gcc.target/arm/pr42505.c' | ||
| 356 | --- old/gcc/testsuite/gcc.target/arm/pr42505.c 1970-01-01 00:00:00 +0000 | ||
| 357 | +++ new/gcc/testsuite/gcc.target/arm/pr42505.c 2010-08-02 13:51:23 +0000 | ||
| 358 | @@ -0,0 +1,23 @@ | ||
| 359 | +/* { dg-options "-mthumb -Os -march=armv5te" } */ | ||
| 360 | +/* { dg-require-effective-target arm_thumb1_ok } */ | ||
| 361 | +/* { dg-final { scan-assembler-not "str\[\\t \]*r.,\[\\t \]*.sp," } } */ | ||
| 362 | + | ||
| 363 | +struct A { | ||
| 364 | + int f1; | ||
| 365 | + int f2; | ||
| 366 | +}; | ||
| 367 | + | ||
| 368 | +int func(int c); | ||
| 369 | + | ||
| 370 | +/* This function should not need to spill anything to the stack. */ | ||
| 371 | +int test(struct A* src, struct A* dst, int count) | ||
| 372 | +{ | ||
| 373 | + while (count--) { | ||
| 374 | + if (!func(src->f2)) { | ||
| 375 | + return 0; | ||
| 376 | + } | ||
| 377 | + *dst++ = *src++; | ||
| 378 | + } | ||
| 379 | + | ||
| 380 | + return 1; | ||
| 381 | +} | ||
| 382 | |||
| 383 | === modified file 'gcc/tree-inline.c' | ||
| 384 | --- old/gcc/tree-inline.c 2010-03-18 20:07:13 +0000 | ||
| 385 | +++ new/gcc/tree-inline.c 2010-08-02 13:51:23 +0000 | ||
| 386 | @@ -3246,34 +3246,13 @@ | ||
| 387 | if (POINTER_TYPE_P (funtype)) | ||
| 388 | funtype = TREE_TYPE (funtype); | ||
| 389 | |||
| 390 | - if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD) | ||
| 391 | + if (is_simple_builtin (decl)) | ||
| 392 | + return 0; | ||
| 393 | + else if (is_inexpensive_builtin (decl)) | ||
| 394 | cost = weights->target_builtin_call_cost; | ||
| 395 | else | ||
| 396 | cost = weights->call_cost; | ||
| 397 | |||
| 398 | - if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) | ||
| 399 | - switch (DECL_FUNCTION_CODE (decl)) | ||
| 400 | - { | ||
| 401 | - case BUILT_IN_CONSTANT_P: | ||
| 402 | - return 0; | ||
| 403 | - case BUILT_IN_EXPECT: | ||
| 404 | - return 0; | ||
| 405 | - | ||
| 406 | - /* Prefetch instruction is not expensive. */ | ||
| 407 | - case BUILT_IN_PREFETCH: | ||
| 408 | - cost = weights->target_builtin_call_cost; | ||
| 409 | - break; | ||
| 410 | - | ||
| 411 | - /* Exception state returns or moves registers around. */ | ||
| 412 | - case BUILT_IN_EH_FILTER: | ||
| 413 | - case BUILT_IN_EH_POINTER: | ||
| 414 | - case BUILT_IN_EH_COPY_VALUES: | ||
| 415 | - return 0; | ||
| 416 | - | ||
| 417 | - default: | ||
| 418 | - break; | ||
| 419 | - } | ||
| 420 | - | ||
| 421 | if (decl) | ||
| 422 | funtype = TREE_TYPE (decl); | ||
| 423 | |||
| 424 | |||
| 425 | === modified file 'gcc/tree-ssa-loop-ivopts.c' | ||
| 426 | --- old/gcc/tree-ssa-loop-ivopts.c 2010-04-01 15:18:07 +0000 | ||
| 427 | +++ new/gcc/tree-ssa-loop-ivopts.c 2010-08-02 13:51:23 +0000 | ||
| 428 | @@ -257,6 +257,9 @@ | ||
| 429 | |||
| 430 | /* Are we optimizing for speed? */ | ||
| 431 | bool speed; | ||
| 432 | + | ||
| 433 | + /* Whether the loop body includes any function calls. */ | ||
| 434 | + bool body_includes_call; | ||
| 435 | }; | ||
| 436 | |||
| 437 | /* An assignment of iv candidates to uses. */ | ||
| 438 | @@ -2926,6 +2929,20 @@ | ||
| 439 | return get_computation_at (loop, use, cand, use->stmt); | ||
| 440 | } | ||
| 441 | |||
| 442 | +/* Adjust the cost COST for being in loop setup rather than loop body. | ||
| 443 | + If we're optimizing for space, the loop setup overhead is constant; | ||
| 444 | + if we're optimizing for speed, amortize it over the per-iteration cost. */ | ||
| 445 | +static unsigned | ||
| 446 | +adjust_setup_cost (struct ivopts_data *data, unsigned cost) | ||
| 447 | +{ | ||
| 448 | + if (cost == INFTY) | ||
| 449 | + return cost; | ||
| 450 | + else if (optimize_loop_for_speed_p (data->current_loop)) | ||
| 451 | + return cost / AVG_LOOP_NITER (data->current_loop); | ||
| 452 | + else | ||
| 453 | + return cost; | ||
| 454 | +} | ||
| 455 | + | ||
| 456 | /* Returns cost of addition in MODE. */ | ||
| 457 | |||
| 458 | static unsigned | ||
| 459 | @@ -3838,8 +3855,8 @@ | ||
| 460 | /* Symbol + offset should be compile-time computable so consider that they | ||
| 461 | are added once to the variable, if present. */ | ||
| 462 | if (var_present && (symbol_present || offset)) | ||
| 463 | - cost.cost += add_cost (TYPE_MODE (ctype), speed) | ||
| 464 | - / AVG_LOOP_NITER (data->current_loop); | ||
| 465 | + cost.cost += adjust_setup_cost (data, | ||
| 466 | + add_cost (TYPE_MODE (ctype), speed)); | ||
| 467 | |||
| 468 | /* Having offset does not affect runtime cost in case it is added to | ||
| 469 | symbol, but it increases complexity. */ | ||
| 470 | @@ -4104,7 +4121,7 @@ | ||
| 471 | elim_cost = force_var_cost (data, bound, &depends_on_elim); | ||
| 472 | /* The bound is a loop invariant, so it will be only computed | ||
| 473 | once. */ | ||
| 474 | - elim_cost.cost /= AVG_LOOP_NITER (data->current_loop); | ||
| 475 | + elim_cost.cost = adjust_setup_cost (data, elim_cost.cost); | ||
| 476 | } | ||
| 477 | else | ||
| 478 | elim_cost = infinite_cost; | ||
| 479 | @@ -4351,7 +4368,7 @@ | ||
| 480 | cost_base = force_var_cost (data, base, NULL); | ||
| 481 | cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed); | ||
| 482 | |||
| 483 | - cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop); | ||
| 484 | + cost = cost_step + adjust_setup_cost (data, cost_base.cost); | ||
| 485 | |||
| 486 | /* Prefer the original ivs unless we may gain something by replacing it. | ||
| 487 | The reason is to make debugging simpler; so this is not relevant for | ||
| 488 | @@ -4404,7 +4421,8 @@ | ||
| 489 | { | ||
| 490 | /* We add size to the cost, so that we prefer eliminating ivs | ||
| 491 | if possible. */ | ||
| 492 | - return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed); | ||
| 493 | + return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed, | ||
| 494 | + data->body_includes_call); | ||
| 495 | } | ||
| 496 | |||
| 497 | /* For each size of the induction variable set determine the penalty. */ | ||
| 498 | @@ -4419,30 +4437,11 @@ | ||
| 499 | struct loop *loop = data->current_loop; | ||
| 500 | bitmap_iterator bi; | ||
| 501 | |||
| 502 | - /* We use the following model (definitely improvable, especially the | ||
| 503 | - cost function -- TODO): | ||
| 504 | - | ||
| 505 | - We estimate the number of registers available (using MD data), name it A. | ||
| 506 | - | ||
| 507 | - We estimate the number of registers used by the loop, name it U. This | ||
| 508 | - number is obtained as the number of loop phi nodes (not counting virtual | ||
| 509 | - registers and bivs) + the number of variables from outside of the loop. | ||
| 510 | - | ||
| 511 | - We set a reserve R (free regs that are used for temporary computations, | ||
| 512 | - etc.). For now the reserve is a constant 3. | ||
| 513 | - | ||
| 514 | - Let I be the number of induction variables. | ||
| 515 | - | ||
| 516 | - -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage | ||
| 517 | - make a lot of ivs without a reason). | ||
| 518 | - -- if A - R < U + I <= A, the cost is I * PRES_COST | ||
| 519 | - -- if U + I > A, the cost is I * PRES_COST and | ||
| 520 | - number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */ | ||
| 521 | - | ||
| 522 | if (dump_file && (dump_flags & TDF_DETAILS)) | ||
| 523 | { | ||
| 524 | fprintf (dump_file, "Global costs:\n"); | ||
| 525 | fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); | ||
| 526 | + fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs); | ||
| 527 | fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); | ||
| 528 | fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); | ||
| 529 | } | ||
| 530 | @@ -5062,11 +5061,13 @@ | ||
| 531 | } | ||
| 532 | |||
| 533 | /* Tries to extend the sets IVS in the best possible way in order | ||
| 534 | - to express the USE. */ | ||
| 535 | + to express the USE. If ORIGINALP is true, prefer candidates from | ||
| 536 | + the original set of IVs, otherwise favor important candidates not | ||
| 537 | + based on any memory object. */ | ||
| 538 | |||
| 539 | static bool | ||
| 540 | try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, | ||
| 541 | - struct iv_use *use) | ||
| 542 | + struct iv_use *use, bool originalp) | ||
| 543 | { | ||
| 544 | comp_cost best_cost, act_cost; | ||
| 545 | unsigned i; | ||
| 546 | @@ -5085,7 +5086,8 @@ | ||
| 547 | iv_ca_set_no_cp (data, ivs, use); | ||
| 548 | } | ||
| 549 | |||
| 550 | - /* First try important candidates not based on any memory object. Only if | ||
| 551 | + /* If ORIGINALP is true, try to find the original IV for the use. Otherwise | ||
| 552 | + first try important candidates not based on any memory object. Only if | ||
| 553 | this fails, try the specific ones. Rationale -- in loops with many | ||
| 554 | variables the best choice often is to use just one generic biv. If we | ||
| 555 | added here many ivs specific to the uses, the optimization algorithm later | ||
| 556 | @@ -5097,7 +5099,10 @@ | ||
| 557 | { | ||
| 558 | cand = iv_cand (data, i); | ||
| 559 | |||
| 560 | - if (cand->iv->base_object != NULL_TREE) | ||
| 561 | + if (originalp && cand->pos !=IP_ORIGINAL) | ||
| 562 | + continue; | ||
| 563 | + | ||
| 564 | + if (!originalp && cand->iv->base_object != NULL_TREE) | ||
| 565 | continue; | ||
| 566 | |||
| 567 | if (iv_ca_cand_used_p (ivs, cand)) | ||
| 568 | @@ -5133,8 +5138,13 @@ | ||
| 569 | continue; | ||
| 570 | |||
| 571 | /* Already tried this. */ | ||
| 572 | - if (cand->important && cand->iv->base_object == NULL_TREE) | ||
| 573 | - continue; | ||
| 574 | + if (cand->important) | ||
| 575 | + { | ||
| 576 | + if (originalp && cand->pos == IP_ORIGINAL) | ||
| 577 | + continue; | ||
| 578 | + if (!originalp && cand->iv->base_object == NULL_TREE) | ||
| 579 | + continue; | ||
| 580 | + } | ||
| 581 | |||
| 582 | if (iv_ca_cand_used_p (ivs, cand)) | ||
| 583 | continue; | ||
| 584 | @@ -5168,13 +5178,13 @@ | ||
| 585 | /* Finds an initial assignment of candidates to uses. */ | ||
| 586 | |||
| 587 | static struct iv_ca * | ||
| 588 | -get_initial_solution (struct ivopts_data *data) | ||
| 589 | +get_initial_solution (struct ivopts_data *data, bool originalp) | ||
| 590 | { | ||
| 591 | struct iv_ca *ivs = iv_ca_new (data); | ||
| 592 | unsigned i; | ||
| 593 | |||
| 594 | for (i = 0; i < n_iv_uses (data); i++) | ||
| 595 | - if (!try_add_cand_for (data, ivs, iv_use (data, i))) | ||
| 596 | + if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp)) | ||
| 597 | { | ||
| 598 | iv_ca_free (&ivs); | ||
| 599 | return NULL; | ||
| 600 | @@ -5246,14 +5256,12 @@ | ||
| 601 | solution and remove the unused ivs while this improves the cost. */ | ||
| 602 | |||
| 603 | static struct iv_ca * | ||
| 604 | -find_optimal_iv_set (struct ivopts_data *data) | ||
| 605 | +find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) | ||
| 606 | { | ||
| 607 | - unsigned i; | ||
| 608 | struct iv_ca *set; | ||
| 609 | - struct iv_use *use; | ||
| 610 | |||
| 611 | /* Get the initial solution. */ | ||
| 612 | - set = get_initial_solution (data); | ||
| 613 | + set = get_initial_solution (data, originalp); | ||
| 614 | if (!set) | ||
| 615 | { | ||
| 616 | if (dump_file && (dump_flags & TDF_DETAILS)) | ||
| 617 | @@ -5276,11 +5284,46 @@ | ||
| 618 | } | ||
| 619 | } | ||
| 620 | |||
| 621 | + return set; | ||
| 622 | +} | ||
| 623 | + | ||
| 624 | +static struct iv_ca * | ||
| 625 | +find_optimal_iv_set (struct ivopts_data *data) | ||
| 626 | +{ | ||
| 627 | + unsigned i; | ||
| 628 | + struct iv_ca *set, *origset; | ||
| 629 | + struct iv_use *use; | ||
| 630 | + comp_cost cost, origcost; | ||
| 631 | + | ||
| 632 | + /* Determine the cost based on a strategy that starts with original IVs, | ||
| 633 | + and try again using a strategy that prefers candidates not based | ||
| 634 | + on any IVs. */ | ||
| 635 | + origset = find_optimal_iv_set_1 (data, true); | ||
| 636 | + set = find_optimal_iv_set_1 (data, false); | ||
| 637 | + | ||
| 638 | + if (!origset && !set) | ||
| 639 | + return NULL; | ||
| 640 | + | ||
| 641 | + origcost = origset ? iv_ca_cost (origset) : infinite_cost; | ||
| 642 | + cost = set ? iv_ca_cost (set) : infinite_cost; | ||
| 643 | + | ||
| 644 | if (dump_file && (dump_flags & TDF_DETAILS)) | ||
| 645 | { | ||
| 646 | - comp_cost cost = iv_ca_cost (set); | ||
| 647 | - fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity); | ||
| 648 | - } | ||
| 649 | + fprintf (dump_file, "Original cost %d (complexity %d)\n\n", | ||
| 650 | + origcost.cost, origcost.complexity); | ||
| 651 | + fprintf (dump_file, "Final cost %d (complexity %d)\n\n", | ||
| 652 | + cost.cost, cost.complexity); | ||
| 653 | + } | ||
| 654 | + | ||
| 655 | + /* Choose the one with the best cost. */ | ||
| 656 | + if (compare_costs (origcost, cost) <= 0) | ||
| 657 | + { | ||
| 658 | + if (set) | ||
| 659 | + iv_ca_free (&set); | ||
| 660 | + set = origset; | ||
| 661 | + } | ||
| 662 | + else if (origset) | ||
| 663 | + iv_ca_free (&origset); | ||
| 664 | |||
| 665 | for (i = 0; i < n_iv_uses (data); i++) | ||
| 666 | { | ||
| 667 | @@ -5768,6 +5811,25 @@ | ||
| 668 | VEC_free (iv_cand_p, heap, data->iv_candidates); | ||
| 669 | } | ||
| 670 | |||
| 671 | +/* Returns true if the loop body BODY includes any function calls. */ | ||
| 672 | + | ||
| 673 | +static bool | ||
| 674 | +loop_body_includes_call (basic_block *body, unsigned num_nodes) | ||
| 675 | +{ | ||
| 676 | + gimple_stmt_iterator gsi; | ||
| 677 | + unsigned i; | ||
| 678 | + | ||
| 679 | + for (i = 0; i < num_nodes; i++) | ||
| 680 | + for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) | ||
| 681 | + { | ||
| 682 | + gimple stmt = gsi_stmt (gsi); | ||
| 683 | + if (is_gimple_call (stmt) | ||
| 684 | + && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) | ||
| 685 | + return true; | ||
| 686 | + } | ||
| 687 | + return false; | ||
| 688 | +} | ||
| 689 | + | ||
| 690 | /* Optimizes the LOOP. Returns true if anything changed. */ | ||
| 691 | |||
| 692 | static bool | ||
| 693 | @@ -5799,6 +5861,7 @@ | ||
| 694 | } | ||
| 695 | |||
| 696 | body = get_loop_body (loop); | ||
| 697 | + data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); | ||
| 698 | renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); | ||
| 699 | free (body); | ||
| 700 | |||
| 701 | |||
| 702 | === modified file 'gcc/tree.h' | ||
| 703 | --- old/gcc/tree.h 2010-04-02 18:54:46 +0000 | ||
| 704 | +++ new/gcc/tree.h 2010-08-02 13:51:23 +0000 | ||
| 705 | @@ -4962,6 +4962,8 @@ | ||
| 706 | extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int, | ||
| 707 | tree, tree); | ||
| 708 | extern void set_builtin_user_assembler_name (tree decl, const char *asmspec); | ||
| 709 | +extern bool is_simple_builtin (tree); | ||
| 710 | +extern bool is_inexpensive_builtin (tree); | ||
| 711 | |||
| 712 | /* In convert.c */ | ||
| 713 | extern tree strip_float_extensions (tree); | ||
| 714 | |||
