diff options
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch')
-rw-r--r-- | toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch | 948 |
1 files changed, 0 insertions, 948 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch deleted file mode 100644 index ade96fdd11..0000000000 --- a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106802.patch +++ /dev/null | |||
@@ -1,948 +0,0 @@ | |||
1 | 2011-09-12 Andrew Stubbs <ams@codesourcery.com> | ||
2 | |||
3 | Backport from FSF mainline: | ||
4 | |||
5 | 2011-08-30 Andrew Stubbs <ams@codesourcery.com> | ||
6 | |||
7 | gcc/ | ||
8 | * config/arm/arm.c (optimal_immediate_sequence_1): Make b1, b2, | ||
9 | b3 and b4 unsigned. | ||
10 | |||
11 | 2011-08-30 Andrew Stubbs <ams@codesourcery.com> | ||
12 | |||
13 | gcc/ | ||
14 | * config/arm/arm.c (arm_gen_constant): Set can_negate correctly | ||
15 | when code is SET. | ||
16 | |||
17 | 2011-08-26 Andrew Stubbs <ams@codesourcery.com> | ||
18 | |||
19 | gcc/ | ||
20 | * config/arm/arm.c (struct four_ints): New type. | ||
21 | (count_insns_for_constant): Delete function. | ||
22 | (find_best_start): Delete function. | ||
23 | (optimal_immediate_sequence): New function. | ||
24 | (optimal_immediate_sequence_1): New function. | ||
25 | (arm_gen_constant): Move constant splitting code to | ||
26 | optimal_immediate_sequence. | ||
27 | Rewrite constant negation/invertion code. | ||
28 | |||
29 | gcc/testsuite/ | ||
30 | * gcc.target/arm/thumb2-replicated-constant1.c: New file. | ||
31 | * gcc.target/arm/thumb2-replicated-constant2.c: New file. | ||
32 | * gcc.target/arm/thumb2-replicated-constant3.c: New file. | ||
33 | * gcc.target/arm/thumb2-replicated-constant4.c: New file. | ||
34 | |||
35 | 2011-08-26 Andrew Stubbs <ams@codesourcery.com> | ||
36 | |||
37 | gcc/ | ||
38 | * config/arm/arm-protos.h (const_ok_for_op): Add prototype. | ||
39 | * config/arm/arm.c (const_ok_for_op): Add support for addw/subw. | ||
40 | Remove prototype. Remove static function type. | ||
41 | * config/arm/arm.md (*arm_addsi3): Add addw/subw support. | ||
42 | Add arch attribute. | ||
43 | * config/arm/constraints.md (Pj, PJ): New constraints. | ||
44 | |||
45 | 2011-04-20 Andrew Stubbs <ams@codesourcery.com> | ||
46 | |||
47 | gcc/ | ||
48 | * config/arm/arm.c (arm_gen_constant): Move mowv support .... | ||
49 | (const_ok_for_op): ... to here. | ||
50 | |||
51 | 2011-04-20 Andrew Stubbs <ams@codesourcery.com> | ||
52 | |||
53 | gcc/ | ||
54 | * config/arm/arm.c (arm_gen_constant): Remove redundant can_invert. | ||
55 | |||
56 | |||
57 | === modified file 'gcc/config/arm/arm-protos.h' | ||
58 | --- old/gcc/config/arm/arm-protos.h 2011-07-04 14:03:49 +0000 | ||
59 | +++ new/gcc/config/arm/arm-protos.h 2011-08-25 13:26:58 +0000 | ||
60 | @@ -46,6 +46,7 @@ | ||
61 | extern bool arm_small_register_classes_for_mode_p (enum machine_mode); | ||
62 | extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode); | ||
63 | extern int const_ok_for_arm (HOST_WIDE_INT); | ||
64 | +extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); | ||
65 | extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx, | ||
66 | HOST_WIDE_INT, rtx, rtx, int); | ||
67 | extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *); | ||
68 | |||
69 | === modified file 'gcc/config/arm/arm.c' | ||
70 | --- old/gcc/config/arm/arm.c 2011-08-24 17:35:16 +0000 | ||
71 | +++ new/gcc/config/arm/arm.c 2011-09-06 12:57:56 +0000 | ||
72 | @@ -63,6 +63,11 @@ | ||
73 | |||
74 | void (*arm_lang_output_object_attributes_hook)(void); | ||
75 | |||
76 | +struct four_ints | ||
77 | +{ | ||
78 | + int i[4]; | ||
79 | +}; | ||
80 | + | ||
81 | /* Forward function declarations. */ | ||
82 | static bool arm_needs_doubleword_align (enum machine_mode, const_tree); | ||
83 | static int arm_compute_static_chain_stack_bytes (void); | ||
84 | @@ -81,7 +86,6 @@ | ||
85 | static bool arm_legitimate_address_p (enum machine_mode, rtx, bool); | ||
86 | static int thumb_far_jump_used_p (void); | ||
87 | static bool thumb_force_lr_save (void); | ||
88 | -static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); | ||
89 | static rtx emit_sfm (int, int); | ||
90 | static unsigned arm_size_return_regs (void); | ||
91 | static bool arm_assemble_integer (rtx, unsigned int, int); | ||
92 | @@ -129,7 +133,13 @@ | ||
93 | static int arm_comp_type_attributes (const_tree, const_tree); | ||
94 | static void arm_set_default_type_attributes (tree); | ||
95 | static int arm_adjust_cost (rtx, rtx, rtx, int); | ||
96 | -static int count_insns_for_constant (HOST_WIDE_INT, int); | ||
97 | +static int optimal_immediate_sequence (enum rtx_code code, | ||
98 | + unsigned HOST_WIDE_INT val, | ||
99 | + struct four_ints *return_sequence); | ||
100 | +static int optimal_immediate_sequence_1 (enum rtx_code code, | ||
101 | + unsigned HOST_WIDE_INT val, | ||
102 | + struct four_ints *return_sequence, | ||
103 | + int i); | ||
104 | static int arm_get_strip_length (int); | ||
105 | static bool arm_function_ok_for_sibcall (tree, tree); | ||
106 | static enum machine_mode arm_promote_function_mode (const_tree, | ||
107 | @@ -2525,7 +2535,7 @@ | ||
108 | } | ||
109 | |||
110 | /* Return true if I is a valid constant for the operation CODE. */ | ||
111 | -static int | ||
112 | +int | ||
113 | const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) | ||
114 | { | ||
115 | if (const_ok_for_arm (i)) | ||
116 | @@ -2533,7 +2543,21 @@ | ||
117 | |||
118 | switch (code) | ||
119 | { | ||
120 | + case SET: | ||
121 | + /* See if we can use movw. */ | ||
122 | + if (arm_arch_thumb2 && (i & 0xffff0000) == 0) | ||
123 | + return 1; | ||
124 | + else | ||
125 | + return 0; | ||
126 | + | ||
127 | case PLUS: | ||
128 | + /* See if we can use addw or subw. */ | ||
129 | + if (TARGET_THUMB2 | ||
130 | + && ((i & 0xfffff000) == 0 | ||
131 | + || ((-i) & 0xfffff000) == 0)) | ||
132 | + return 1; | ||
133 | + /* else fall through. */ | ||
134 | + | ||
135 | case COMPARE: | ||
136 | case EQ: | ||
137 | case NE: | ||
138 | @@ -2649,68 +2673,41 @@ | ||
139 | 1); | ||
140 | } | ||
141 | |||
142 | -/* Return the number of instructions required to synthesize the given | ||
143 | - constant, if we start emitting them from bit-position I. */ | ||
144 | -static int | ||
145 | -count_insns_for_constant (HOST_WIDE_INT remainder, int i) | ||
146 | -{ | ||
147 | - HOST_WIDE_INT temp1; | ||
148 | - int step_size = TARGET_ARM ? 2 : 1; | ||
149 | - int num_insns = 0; | ||
150 | - | ||
151 | - gcc_assert (TARGET_ARM || i == 0); | ||
152 | - | ||
153 | - do | ||
154 | - { | ||
155 | - int end; | ||
156 | - | ||
157 | - if (i <= 0) | ||
158 | - i += 32; | ||
159 | - if (remainder & (((1 << step_size) - 1) << (i - step_size))) | ||
160 | - { | ||
161 | - end = i - 8; | ||
162 | - if (end < 0) | ||
163 | - end += 32; | ||
164 | - temp1 = remainder & ((0x0ff << end) | ||
165 | - | ((i < end) ? (0xff >> (32 - end)) : 0)); | ||
166 | - remainder &= ~temp1; | ||
167 | - num_insns++; | ||
168 | - i -= 8 - step_size; | ||
169 | - } | ||
170 | - i -= step_size; | ||
171 | - } while (remainder); | ||
172 | - return num_insns; | ||
173 | -} | ||
174 | - | ||
175 | -static int | ||
176 | -find_best_start (unsigned HOST_WIDE_INT remainder) | ||
177 | +/* Return a sequence of integers, in RETURN_SEQUENCE that fit into | ||
178 | + ARM/THUMB2 immediates, and add up to VAL. | ||
179 | + Thr function return value gives the number of insns required. */ | ||
180 | +static int | ||
181 | +optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val, | ||
182 | + struct four_ints *return_sequence) | ||
183 | { | ||
184 | int best_consecutive_zeros = 0; | ||
185 | int i; | ||
186 | int best_start = 0; | ||
187 | + int insns1, insns2; | ||
188 | + struct four_ints tmp_sequence; | ||
189 | |||
190 | /* If we aren't targetting ARM, the best place to start is always at | ||
191 | - the bottom. */ | ||
192 | - if (! TARGET_ARM) | ||
193 | - return 0; | ||
194 | - | ||
195 | - for (i = 0; i < 32; i += 2) | ||
196 | + the bottom, otherwise look more closely. */ | ||
197 | + if (TARGET_ARM) | ||
198 | { | ||
199 | - int consecutive_zeros = 0; | ||
200 | - | ||
201 | - if (!(remainder & (3 << i))) | ||
202 | + for (i = 0; i < 32; i += 2) | ||
203 | { | ||
204 | - while ((i < 32) && !(remainder & (3 << i))) | ||
205 | - { | ||
206 | - consecutive_zeros += 2; | ||
207 | - i += 2; | ||
208 | - } | ||
209 | - if (consecutive_zeros > best_consecutive_zeros) | ||
210 | - { | ||
211 | - best_consecutive_zeros = consecutive_zeros; | ||
212 | - best_start = i - consecutive_zeros; | ||
213 | - } | ||
214 | - i -= 2; | ||
215 | + int consecutive_zeros = 0; | ||
216 | + | ||
217 | + if (!(val & (3 << i))) | ||
218 | + { | ||
219 | + while ((i < 32) && !(val & (3 << i))) | ||
220 | + { | ||
221 | + consecutive_zeros += 2; | ||
222 | + i += 2; | ||
223 | + } | ||
224 | + if (consecutive_zeros > best_consecutive_zeros) | ||
225 | + { | ||
226 | + best_consecutive_zeros = consecutive_zeros; | ||
227 | + best_start = i - consecutive_zeros; | ||
228 | + } | ||
229 | + i -= 2; | ||
230 | + } | ||
231 | } | ||
232 | } | ||
233 | |||
234 | @@ -2737,13 +2734,161 @@ | ||
235 | the constant starting from `best_start', and also starting from | ||
236 | zero (i.e. with bit 31 first to be output). If `best_start' doesn't | ||
237 | yield a shorter sequence, we may as well use zero. */ | ||
238 | + insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start); | ||
239 | if (best_start != 0 | ||
240 | - && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder) | ||
241 | - && (count_insns_for_constant (remainder, 0) <= | ||
242 | - count_insns_for_constant (remainder, best_start))) | ||
243 | - best_start = 0; | ||
244 | - | ||
245 | - return best_start; | ||
246 | + && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val)) | ||
247 | + { | ||
248 | + insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0); | ||
249 | + if (insns2 <= insns1) | ||
250 | + { | ||
251 | + *return_sequence = tmp_sequence; | ||
252 | + insns1 = insns2; | ||
253 | + } | ||
254 | + } | ||
255 | + | ||
256 | + return insns1; | ||
257 | +} | ||
258 | + | ||
259 | +/* As for optimal_immediate_sequence, but starting at bit-position I. */ | ||
260 | +static int | ||
261 | +optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val, | ||
262 | + struct four_ints *return_sequence, int i) | ||
263 | +{ | ||
264 | + int remainder = val & 0xffffffff; | ||
265 | + int insns = 0; | ||
266 | + | ||
267 | + /* Try and find a way of doing the job in either two or three | ||
268 | + instructions. | ||
269 | + | ||
270 | + In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned | ||
271 | + location. We start at position I. This may be the MSB, or | ||
272 | + optimial_immediate_sequence may have positioned it at the largest block | ||
273 | + of zeros that are aligned on a 2-bit boundary. We then fill up the temps, | ||
274 | + wrapping around to the top of the word when we drop off the bottom. | ||
275 | + In the worst case this code should produce no more than four insns. | ||
276 | + | ||
277 | + In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit | ||
278 | + constants, shifted to any arbitrary location. We should always start | ||
279 | + at the MSB. */ | ||
280 | + do | ||
281 | + { | ||
282 | + int end; | ||
283 | + unsigned int b1, b2, b3, b4; | ||
284 | + unsigned HOST_WIDE_INT result; | ||
285 | + int loc; | ||
286 | + | ||
287 | + gcc_assert (insns < 4); | ||
288 | + | ||
289 | + if (i <= 0) | ||
290 | + i += 32; | ||
291 | + | ||
292 | + /* First, find the next normal 12/8-bit shifted/rotated immediate. */ | ||
293 | + if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1))))) | ||
294 | + { | ||
295 | + loc = i; | ||
296 | + if (i <= 12 && TARGET_THUMB2 && code == PLUS) | ||
297 | + /* We can use addw/subw for the last 12 bits. */ | ||
298 | + result = remainder; | ||
299 | + else | ||
300 | + { | ||
301 | + /* Use an 8-bit shifted/rotated immediate. */ | ||
302 | + end = i - 8; | ||
303 | + if (end < 0) | ||
304 | + end += 32; | ||
305 | + result = remainder & ((0x0ff << end) | ||
306 | + | ((i < end) ? (0xff >> (32 - end)) | ||
307 | + : 0)); | ||
308 | + i -= 8; | ||
309 | + } | ||
310 | + } | ||
311 | + else | ||
312 | + { | ||
313 | + /* Arm allows rotates by a multiple of two. Thumb-2 allows | ||
314 | + arbitrary shifts. */ | ||
315 | + i -= TARGET_ARM ? 2 : 1; | ||
316 | + continue; | ||
317 | + } | ||
318 | + | ||
319 | + /* Next, see if we can do a better job with a thumb2 replicated | ||
320 | + constant. | ||
321 | + | ||
322 | + We do it this way around to catch the cases like 0x01F001E0 where | ||
323 | + two 8-bit immediates would work, but a replicated constant would | ||
324 | + make it worse. | ||
325 | + | ||
326 | + TODO: 16-bit constants that don't clear all the bits, but still win. | ||
327 | + TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */ | ||
328 | + if (TARGET_THUMB2) | ||
329 | + { | ||
330 | + b1 = (remainder & 0xff000000) >> 24; | ||
331 | + b2 = (remainder & 0x00ff0000) >> 16; | ||
332 | + b3 = (remainder & 0x0000ff00) >> 8; | ||
333 | + b4 = remainder & 0xff; | ||
334 | + | ||
335 | + if (loc > 24) | ||
336 | + { | ||
337 | + /* The 8-bit immediate already found clears b1 (and maybe b2), | ||
338 | + but must leave b3 and b4 alone. */ | ||
339 | + | ||
340 | + /* First try to find a 32-bit replicated constant that clears | ||
341 | + almost everything. We can assume that we can't do it in one, | ||
342 | + or else we wouldn't be here. */ | ||
343 | + unsigned int tmp = b1 & b2 & b3 & b4; | ||
344 | + unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16) | ||
345 | + + (tmp << 24); | ||
346 | + unsigned int matching_bytes = (tmp == b1) + (tmp == b2) | ||
347 | + + (tmp == b3) + (tmp == b4); | ||
348 | + if (tmp | ||
349 | + && (matching_bytes >= 3 | ||
350 | + || (matching_bytes == 2 | ||
351 | + && const_ok_for_op (remainder & ~tmp2, code)))) | ||
352 | + { | ||
353 | + /* At least 3 of the bytes match, and the fourth has at | ||
354 | + least as many bits set, or two of the bytes match | ||
355 | + and it will only require one more insn to finish. */ | ||
356 | + result = tmp2; | ||
357 | + i = tmp != b1 ? 32 | ||
358 | + : tmp != b2 ? 24 | ||
359 | + : tmp != b3 ? 16 | ||
360 | + : 8; | ||
361 | + } | ||
362 | + | ||
363 | + /* Second, try to find a 16-bit replicated constant that can | ||
364 | + leave three of the bytes clear. If b2 or b4 is already | ||
365 | + zero, then we can. If the 8-bit from above would not | ||
366 | + clear b2 anyway, then we still win. */ | ||
367 | + else if (b1 == b3 && (!b2 || !b4 | ||
368 | + || (remainder & 0x00ff0000 & ~result))) | ||
369 | + { | ||
370 | + result = remainder & 0xff00ff00; | ||
371 | + i = 24; | ||
372 | + } | ||
373 | + } | ||
374 | + else if (loc > 16) | ||
375 | + { | ||
376 | + /* The 8-bit immediate already found clears b2 (and maybe b3) | ||
377 | + and we don't get here unless b1 is alredy clear, but it will | ||
378 | + leave b4 unchanged. */ | ||
379 | + | ||
380 | + /* If we can clear b2 and b4 at once, then we win, since the | ||
381 | + 8-bits couldn't possibly reach that far. */ | ||
382 | + if (b2 == b4) | ||
383 | + { | ||
384 | + result = remainder & 0x00ff00ff; | ||
385 | + i = 16; | ||
386 | + } | ||
387 | + } | ||
388 | + } | ||
389 | + | ||
390 | + return_sequence->i[insns++] = result; | ||
391 | + remainder &= ~result; | ||
392 | + | ||
393 | + if (code == SET || code == MINUS) | ||
394 | + code = PLUS; | ||
395 | + } | ||
396 | + while (remainder); | ||
397 | + | ||
398 | + return insns; | ||
399 | } | ||
400 | |||
401 | /* Emit an instruction with the indicated PATTERN. If COND is | ||
402 | @@ -2760,7 +2905,6 @@ | ||
403 | |||
404 | /* As above, but extra parameter GENERATE which, if clear, suppresses | ||
405 | RTL generation. */ | ||
406 | -/* ??? This needs more work for thumb2. */ | ||
407 | |||
408 | static int | ||
409 | arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, | ||
410 | @@ -2772,15 +2916,15 @@ | ||
411 | int final_invert = 0; | ||
412 | int can_negate_initial = 0; | ||
413 | int i; | ||
414 | - int num_bits_set = 0; | ||
415 | int set_sign_bit_copies = 0; | ||
416 | int clear_sign_bit_copies = 0; | ||
417 | int clear_zero_bit_copies = 0; | ||
418 | int set_zero_bit_copies = 0; | ||
419 | - int insns = 0; | ||
420 | + int insns = 0, neg_insns, inv_insns; | ||
421 | unsigned HOST_WIDE_INT temp1, temp2; | ||
422 | unsigned HOST_WIDE_INT remainder = val & 0xffffffff; | ||
423 | - int step_size = TARGET_ARM ? 2 : 1; | ||
424 | + struct four_ints *immediates; | ||
425 | + struct four_ints pos_immediates, neg_immediates, inv_immediates; | ||
426 | |||
427 | /* Find out which operations are safe for a given CODE. Also do a quick | ||
428 | check for degenerate cases; these can occur when DImode operations | ||
429 | @@ -2789,7 +2933,6 @@ | ||
430 | { | ||
431 | case SET: | ||
432 | can_invert = 1; | ||
433 | - can_negate = 1; | ||
434 | break; | ||
435 | |||
436 | case PLUS: | ||
437 | @@ -2817,9 +2960,6 @@ | ||
438 | gen_rtx_SET (VOIDmode, target, source)); | ||
439 | return 1; | ||
440 | } | ||
441 | - | ||
442 | - if (TARGET_THUMB2) | ||
443 | - can_invert = 1; | ||
444 | break; | ||
445 | |||
446 | case AND: | ||
447 | @@ -2861,6 +3001,7 @@ | ||
448 | gen_rtx_NOT (mode, source))); | ||
449 | return 1; | ||
450 | } | ||
451 | + final_invert = 1; | ||
452 | break; | ||
453 | |||
454 | case MINUS: | ||
455 | @@ -2883,7 +3024,6 @@ | ||
456 | source))); | ||
457 | return 1; | ||
458 | } | ||
459 | - can_negate = 1; | ||
460 | |||
461 | break; | ||
462 | |||
463 | @@ -2892,9 +3032,7 @@ | ||
464 | } | ||
465 | |||
466 | /* If we can do it in one insn get out quickly. */ | ||
467 | - if (const_ok_for_arm (val) | ||
468 | - || (can_negate_initial && const_ok_for_arm (-val)) | ||
469 | - || (can_invert && const_ok_for_arm (~val))) | ||
470 | + if (const_ok_for_op (val, code)) | ||
471 | { | ||
472 | if (generate) | ||
473 | emit_constant_insn (cond, | ||
474 | @@ -2947,15 +3085,6 @@ | ||
475 | switch (code) | ||
476 | { | ||
477 | case SET: | ||
478 | - /* See if we can use movw. */ | ||
479 | - if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0) | ||
480 | - { | ||
481 | - if (generate) | ||
482 | - emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target, | ||
483 | - GEN_INT (val))); | ||
484 | - return 1; | ||
485 | - } | ||
486 | - | ||
487 | /* See if we can do this by sign_extending a constant that is known | ||
488 | to be negative. This is a good, way of doing it, since the shift | ||
489 | may well merge into a subsequent insn. */ | ||
490 | @@ -3306,121 +3435,97 @@ | ||
491 | break; | ||
492 | } | ||
493 | |||
494 | - for (i = 0; i < 32; i++) | ||
495 | - if (remainder & (1 << i)) | ||
496 | - num_bits_set++; | ||
497 | - | ||
498 | - if ((code == AND) | ||
499 | - || (code != IOR && can_invert && num_bits_set > 16)) | ||
500 | - remainder ^= 0xffffffff; | ||
501 | - else if (code == PLUS && num_bits_set > 16) | ||
502 | - remainder = (-remainder) & 0xffffffff; | ||
503 | - | ||
504 | - /* For XOR, if more than half the bits are set and there's a sequence | ||
505 | - of more than 8 consecutive ones in the pattern then we can XOR by the | ||
506 | - inverted constant and then invert the final result; this may save an | ||
507 | - instruction and might also lead to the final mvn being merged with | ||
508 | - some other operation. */ | ||
509 | - else if (code == XOR && num_bits_set > 16 | ||
510 | - && (count_insns_for_constant (remainder ^ 0xffffffff, | ||
511 | - find_best_start | ||
512 | - (remainder ^ 0xffffffff)) | ||
513 | - < count_insns_for_constant (remainder, | ||
514 | - find_best_start (remainder)))) | ||
515 | - { | ||
516 | - remainder ^= 0xffffffff; | ||
517 | - final_invert = 1; | ||
518 | + /* Calculate what the instruction sequences would be if we generated it | ||
519 | + normally, negated, or inverted. */ | ||
520 | + if (code == AND) | ||
521 | + /* AND cannot be split into multiple insns, so invert and use BIC. */ | ||
522 | + insns = 99; | ||
523 | + else | ||
524 | + insns = optimal_immediate_sequence (code, remainder, &pos_immediates); | ||
525 | + | ||
526 | + if (can_negate) | ||
527 | + neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff, | ||
528 | + &neg_immediates); | ||
529 | + else | ||
530 | + neg_insns = 99; | ||
531 | + | ||
532 | + if (can_invert || final_invert) | ||
533 | + inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff, | ||
534 | + &inv_immediates); | ||
535 | + else | ||
536 | + inv_insns = 99; | ||
537 | + | ||
538 | + immediates = &pos_immediates; | ||
539 | + | ||
540 | + /* Is the negated immediate sequence more efficient? */ | ||
541 | + if (neg_insns < insns && neg_insns <= inv_insns) | ||
542 | + { | ||
543 | + insns = neg_insns; | ||
544 | + immediates = &neg_immediates; | ||
545 | + } | ||
546 | + else | ||
547 | + can_negate = 0; | ||
548 | + | ||
549 | + /* Is the inverted immediate sequence more efficient? | ||
550 | + We must allow for an extra NOT instruction for XOR operations, although | ||
551 | + there is some chance that the final 'mvn' will get optimized later. */ | ||
552 | + if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns)) | ||
553 | + { | ||
554 | + insns = inv_insns; | ||
555 | + immediates = &inv_immediates; | ||
556 | } | ||
557 | else | ||
558 | { | ||
559 | can_invert = 0; | ||
560 | - can_negate = 0; | ||
561 | + final_invert = 0; | ||
562 | } | ||
563 | |||
564 | - /* Now try and find a way of doing the job in either two or three | ||
565 | - instructions. | ||
566 | - We start by looking for the largest block of zeros that are aligned on | ||
567 | - a 2-bit boundary, we then fill up the temps, wrapping around to the | ||
568 | - top of the word when we drop off the bottom. | ||
569 | - In the worst case this code should produce no more than four insns. | ||
570 | - Thumb-2 constants are shifted, not rotated, so the MSB is always the | ||
571 | - best place to start. */ | ||
572 | - | ||
573 | - /* ??? Use thumb2 replicated constants when the high and low halfwords are | ||
574 | - the same. */ | ||
575 | - { | ||
576 | - /* Now start emitting the insns. */ | ||
577 | - i = find_best_start (remainder); | ||
578 | - do | ||
579 | - { | ||
580 | - int end; | ||
581 | - | ||
582 | - if (i <= 0) | ||
583 | - i += 32; | ||
584 | - if (remainder & (3 << (i - 2))) | ||
585 | - { | ||
586 | - end = i - 8; | ||
587 | - if (end < 0) | ||
588 | - end += 32; | ||
589 | - temp1 = remainder & ((0x0ff << end) | ||
590 | - | ((i < end) ? (0xff >> (32 - end)) : 0)); | ||
591 | - remainder &= ~temp1; | ||
592 | - | ||
593 | - if (generate) | ||
594 | - { | ||
595 | - rtx new_src, temp1_rtx; | ||
596 | - | ||
597 | - if (code == SET || code == MINUS) | ||
598 | - { | ||
599 | - new_src = (subtargets ? gen_reg_rtx (mode) : target); | ||
600 | - if (can_invert && code != MINUS) | ||
601 | - temp1 = ~temp1; | ||
602 | - } | ||
603 | - else | ||
604 | - { | ||
605 | - if ((final_invert || remainder) && subtargets) | ||
606 | - new_src = gen_reg_rtx (mode); | ||
607 | - else | ||
608 | - new_src = target; | ||
609 | - if (can_invert) | ||
610 | - temp1 = ~temp1; | ||
611 | - else if (can_negate) | ||
612 | - temp1 = -temp1; | ||
613 | - } | ||
614 | - | ||
615 | - temp1 = trunc_int_for_mode (temp1, mode); | ||
616 | - temp1_rtx = GEN_INT (temp1); | ||
617 | - | ||
618 | - if (code == SET) | ||
619 | - ; | ||
620 | - else if (code == MINUS) | ||
621 | - temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source); | ||
622 | - else | ||
623 | - temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx); | ||
624 | - | ||
625 | - emit_constant_insn (cond, | ||
626 | - gen_rtx_SET (VOIDmode, new_src, | ||
627 | - temp1_rtx)); | ||
628 | - source = new_src; | ||
629 | - } | ||
630 | - | ||
631 | - if (code == SET) | ||
632 | - { | ||
633 | - can_invert = 0; | ||
634 | - code = PLUS; | ||
635 | - } | ||
636 | - else if (code == MINUS) | ||
637 | + /* Now output the chosen sequence as instructions. */ | ||
638 | + if (generate) | ||
639 | + { | ||
640 | + for (i = 0; i < insns; i++) | ||
641 | + { | ||
642 | + rtx new_src, temp1_rtx; | ||
643 | + | ||
644 | + temp1 = immediates->i[i]; | ||
645 | + | ||
646 | + if (code == SET || code == MINUS) | ||
647 | + new_src = (subtargets ? gen_reg_rtx (mode) : target); | ||
648 | + else if ((final_invert || i < (insns - 1)) && subtargets) | ||
649 | + new_src = gen_reg_rtx (mode); | ||
650 | + else | ||
651 | + new_src = target; | ||
652 | + | ||
653 | + if (can_invert) | ||
654 | + temp1 = ~temp1; | ||
655 | + else if (can_negate) | ||
656 | + temp1 = -temp1; | ||
657 | + | ||
658 | + temp1 = trunc_int_for_mode (temp1, mode); | ||
659 | + temp1_rtx = GEN_INT (temp1); | ||
660 | + | ||
661 | + if (code == SET) | ||
662 | + ; | ||
663 | + else if (code == MINUS) | ||
664 | + temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source); | ||
665 | + else | ||
666 | + temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx); | ||
667 | + | ||
668 | + emit_constant_insn (cond, | ||
669 | + gen_rtx_SET (VOIDmode, new_src, | ||
670 | + temp1_rtx)); | ||
671 | + source = new_src; | ||
672 | + | ||
673 | + if (code == SET) | ||
674 | + { | ||
675 | + can_negate = can_invert; | ||
676 | + can_invert = 0; | ||
677 | code = PLUS; | ||
678 | - | ||
679 | - insns++; | ||
680 | - i -= 8 - step_size; | ||
681 | - } | ||
682 | - /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary | ||
683 | - shifts. */ | ||
684 | - i -= step_size; | ||
685 | - } | ||
686 | - while (remainder); | ||
687 | - } | ||
688 | + } | ||
689 | + else if (code == MINUS) | ||
690 | + code = PLUS; | ||
691 | + } | ||
692 | + } | ||
693 | |||
694 | if (final_invert) | ||
695 | { | ||
696 | |||
697 | === modified file 'gcc/config/arm/arm.md' | ||
698 | --- old/gcc/config/arm/arm.md 2011-08-25 11:42:09 +0000 | ||
699 | +++ new/gcc/config/arm/arm.md 2011-08-25 13:26:58 +0000 | ||
700 | @@ -701,21 +701,24 @@ | ||
701 | ;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will | ||
702 | ;; put the duplicated register first, and not try the commutative version. | ||
703 | (define_insn_and_split "*arm_addsi3" | ||
704 | - [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k,r") | ||
705 | - (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk") | ||
706 | - (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))] | ||
707 | + [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r") | ||
708 | + (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk") | ||
709 | + (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))] | ||
710 | "TARGET_32BIT" | ||
711 | "@ | ||
712 | add%?\\t%0, %1, %2 | ||
713 | add%?\\t%0, %1, %2 | ||
714 | add%?\\t%0, %2, %1 | ||
715 | - sub%?\\t%0, %1, #%n2 | ||
716 | - sub%?\\t%0, %1, #%n2 | ||
717 | + addw%?\\t%0, %1, %2 | ||
718 | + addw%?\\t%0, %1, %2 | ||
719 | + sub%?\\t%0, %1, #%n2 | ||
720 | + sub%?\\t%0, %1, #%n2 | ||
721 | + subw%?\\t%0, %1, #%n2 | ||
722 | + subw%?\\t%0, %1, #%n2 | ||
723 | #" | ||
724 | "TARGET_32BIT | ||
725 | && GET_CODE (operands[2]) == CONST_INT | ||
726 | - && !(const_ok_for_arm (INTVAL (operands[2])) | ||
727 | - || const_ok_for_arm (-INTVAL (operands[2]))) | ||
728 | + && !const_ok_for_op (INTVAL (operands[2]), PLUS) | ||
729 | && (reload_completed || !arm_eliminable_register (operands[1]))" | ||
730 | [(clobber (const_int 0))] | ||
731 | " | ||
732 | @@ -724,8 +727,9 @@ | ||
733 | operands[1], 0); | ||
734 | DONE; | ||
735 | " | ||
736 | - [(set_attr "length" "4,4,4,4,4,16") | ||
737 | - (set_attr "predicable" "yes")] | ||
738 | + [(set_attr "length" "4,4,4,4,4,4,4,4,4,16") | ||
739 | + (set_attr "predicable" "yes") | ||
740 | + (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")] | ||
741 | ) | ||
742 | |||
743 | (define_insn_and_split "*thumb1_addsi3" | ||
744 | |||
745 | === modified file 'gcc/config/arm/constraints.md' | ||
746 | --- old/gcc/config/arm/constraints.md 2011-01-03 20:52:22 +0000 | ||
747 | +++ new/gcc/config/arm/constraints.md 2011-08-25 13:26:58 +0000 | ||
748 | @@ -31,7 +31,7 @@ | ||
749 | ;; The following multi-letter normal constraints have been used: | ||
750 | ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz | ||
751 | ;; in Thumb-1 state: Pa, Pb, Pc, Pd | ||
752 | -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px | ||
753 | +;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px | ||
754 | |||
755 | ;; The following memory constraints have been used: | ||
756 | ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us | ||
757 | @@ -74,6 +74,18 @@ | ||
758 | (and (match_code "const_int") | ||
759 | (match_test "(ival & 0xffff0000) == 0"))))) | ||
760 | |||
761 | +(define_constraint "Pj" | ||
762 | + "@internal A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)" | ||
763 | + (and (match_code "const_int") | ||
764 | + (and (match_test "TARGET_THUMB2") | ||
765 | + (match_test "(ival & 0xfffff000) == 0")))) | ||
766 | + | ||
767 | +(define_constraint "PJ" | ||
768 | + "@internal A constant that satisfies the Pj constrant if negated." | ||
769 | + (and (match_code "const_int") | ||
770 | + (and (match_test "TARGET_THUMB2") | ||
771 | + (match_test "((-ival) & 0xfffff000) == 0")))) | ||
772 | + | ||
773 | (define_register_constraint "k" "STACK_REG" | ||
774 | "@internal The stack register.") | ||
775 | |||
776 | |||
777 | === added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c' | ||
778 | --- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c 1970-01-01 00:00:00 +0000 | ||
779 | +++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c 2011-08-25 13:31:00 +0000 | ||
780 | @@ -0,0 +1,27 @@ | ||
781 | +/* Ensure simple replicated constant immediates work. */ | ||
782 | +/* { dg-options "-mthumb -O2" } */ | ||
783 | +/* { dg-require-effective-target arm_thumb2_ok } */ | ||
784 | + | ||
785 | +int | ||
786 | +foo1 (int a) | ||
787 | +{ | ||
788 | + return a + 0xfefefefe; | ||
789 | +} | ||
790 | + | ||
791 | +/* { dg-final { scan-assembler "add.*#-16843010" } } */ | ||
792 | + | ||
793 | +int | ||
794 | +foo2 (int a) | ||
795 | +{ | ||
796 | + return a - 0xab00ab00; | ||
797 | +} | ||
798 | + | ||
799 | +/* { dg-final { scan-assembler "sub.*#-1426019584" } } */ | ||
800 | + | ||
801 | +int | ||
802 | +foo3 (int a) | ||
803 | +{ | ||
804 | + return a & 0x00cd00cd; | ||
805 | +} | ||
806 | + | ||
807 | +/* { dg-final { scan-assembler "and.*#13435085" } } */ | ||
808 | |||
809 | === added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c' | ||
810 | --- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c 1970-01-01 00:00:00 +0000 | ||
811 | +++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c 2011-08-25 13:31:00 +0000 | ||
812 | @@ -0,0 +1,75 @@ | ||
813 | +/* Ensure split constants can use replicated patterns. */ | ||
814 | +/* { dg-options "-mthumb -O2" } */ | ||
815 | +/* { dg-require-effective-target arm_thumb2_ok } */ | ||
816 | + | ||
817 | +int | ||
818 | +foo1 (int a) | ||
819 | +{ | ||
820 | + return a + 0xfe00fe01; | ||
821 | +} | ||
822 | + | ||
823 | +/* { dg-final { scan-assembler "add.*#-33489408" } } */ | ||
824 | +/* { dg-final { scan-assembler "add.*#1" } } */ | ||
825 | + | ||
826 | +int | ||
827 | +foo2 (int a) | ||
828 | +{ | ||
829 | + return a + 0xdd01dd00; | ||
830 | +} | ||
831 | + | ||
832 | +/* { dg-final { scan-assembler "add.*#-587145984" } } */ | ||
833 | +/* { dg-final { scan-assembler "add.*#65536" } } */ | ||
834 | + | ||
835 | +int | ||
836 | +foo3 (int a) | ||
837 | +{ | ||
838 | + return a + 0x00443344; | ||
839 | +} | ||
840 | + | ||
841 | +/* { dg-final { scan-assembler "add.*#4456516" } } */ | ||
842 | +/* { dg-final { scan-assembler "add.*#13056" } } */ | ||
843 | + | ||
844 | +int | ||
845 | +foo4 (int a) | ||
846 | +{ | ||
847 | + return a + 0x77330033; | ||
848 | +} | ||
849 | + | ||
850 | +/* { dg-final { scan-assembler "add.*#1996488704" } } */ | ||
851 | +/* { dg-final { scan-assembler "add.*#3342387" } } */ | ||
852 | + | ||
853 | +int | ||
854 | +foo5 (int a) | ||
855 | +{ | ||
856 | + return a + 0x11221122; | ||
857 | +} | ||
858 | + | ||
859 | +/* { dg-final { scan-assembler "add.*#285217024" } } */ | ||
860 | +/* { dg-final { scan-assembler "add.*#2228258" } } */ | ||
861 | + | ||
862 | +int | ||
863 | +foo6 (int a) | ||
864 | +{ | ||
865 | + return a + 0x66666677; | ||
866 | +} | ||
867 | + | ||
868 | +/* { dg-final { scan-assembler "add.*#1717986918" } } */ | ||
869 | +/* { dg-final { scan-assembler "add.*#17" } } */ | ||
870 | + | ||
871 | +int | ||
872 | +foo7 (int a) | ||
873 | +{ | ||
874 | + return a + 0x99888888; | ||
875 | +} | ||
876 | + | ||
877 | +/* { dg-final { scan-assembler "add.*#-2004318072" } } */ | ||
878 | +/* { dg-final { scan-assembler "add.*#285212672" } } */ | ||
879 | + | ||
880 | +int | ||
881 | +foo8 (int a) | ||
882 | +{ | ||
883 | + return a + 0xdddddfff; | ||
884 | +} | ||
885 | + | ||
886 | +/* { dg-final { scan-assembler "add.*#-572662307" } } */ | ||
887 | +/* { dg-final { scan-assembler "addw.*#546" } } */ | ||
888 | |||
889 | === added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c' | ||
890 | --- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c 1970-01-01 00:00:00 +0000 | ||
891 | +++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c 2011-08-25 13:31:00 +0000 | ||
892 | @@ -0,0 +1,28 @@ | ||
893 | +/* Ensure negated/inverted replicated constant immediates work. */ | ||
894 | +/* { dg-options "-mthumb -O2" } */ | ||
895 | +/* { dg-require-effective-target arm_thumb2_ok } */ | ||
896 | + | ||
897 | +int | ||
898 | +foo1 (int a) | ||
899 | +{ | ||
900 | + return a | 0xffffff00; | ||
901 | +} | ||
902 | + | ||
903 | +/* { dg-final { scan-assembler "orn.*#255" } } */ | ||
904 | + | ||
905 | +int | ||
906 | +foo2 (int a) | ||
907 | +{ | ||
908 | + return a & 0xffeeffee; | ||
909 | +} | ||
910 | + | ||
911 | +/* { dg-final { scan-assembler "bic.*#1114129" } } */ | ||
912 | + | ||
913 | +int | ||
914 | +foo3 (int a) | ||
915 | +{ | ||
916 | + return a & 0xaaaaaa00; | ||
917 | +} | ||
918 | + | ||
919 | +/* { dg-final { scan-assembler "and.*#-1431655766" } } */ | ||
920 | +/* { dg-final { scan-assembler "bic.*#170" } } */ | ||
921 | |||
922 | === added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c' | ||
923 | --- old/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c 1970-01-01 00:00:00 +0000 | ||
924 | +++ new/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c 2011-08-25 13:31:00 +0000 | ||
925 | @@ -0,0 +1,22 @@ | ||
926 | +/* Ensure replicated constants don't make things worse. */ | ||
927 | +/* { dg-options "-mthumb -O2" } */ | ||
928 | +/* { dg-require-effective-target arm_thumb2_ok } */ | ||
929 | + | ||
930 | +int | ||
931 | +foo1 (int a) | ||
932 | +{ | ||
933 | + /* It might be tempting to use 0x01000100, but it wouldn't help. */ | ||
934 | + return a + 0x01f001e0; | ||
935 | +} | ||
936 | + | ||
937 | +/* { dg-final { scan-assembler "add.*#32505856" } } */ | ||
938 | +/* { dg-final { scan-assembler "add.*#480" } } */ | ||
939 | + | ||
940 | +int | ||
941 | +foo2 (int a) | ||
942 | +{ | ||
943 | + return a + 0x0f100e10; | ||
944 | +} | ||
945 | + | ||
946 | +/* { dg-final { scan-assembler "add.*#252706816" } } */ | ||
947 | +/* { dg-final { scan-assembler "add.*#3600" } } */ | ||
948 | |||