diff options
Diffstat (limited to 'meta-oe/recipes-devtools')
20 files changed, 5845 insertions, 1 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106759.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106759.patch new file mode 100644 index 000000000..c51576794 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106759.patch | |||
@@ -0,0 +1,545 @@ | |||
1 | 2011-06-20 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> | ||
2 | |||
3 | Backport from mainline. | ||
4 | 2011-06-03 Julian Brown <julian@codesourcery.com> | ||
5 | |||
6 | * config/arm/arm-cores.def (strongarm, strongarm110, strongarm1100) | ||
7 | (strongarm1110): Use strongarm tuning. | ||
8 | * config/arm/arm-protos.h (tune_params): Add max_insns_skipped | ||
9 | field. | ||
10 | * config/arm/arm.c (arm_strongarm_tune): New. | ||
11 | (arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune) | ||
12 | (arm_v6t2_tune, arm_cortex_tune, arm_cortex_a5_tune) | ||
13 | (arm_cortex_a9_tune, arm_fa726te_tune): Add max_insns_skipped field | ||
14 | setting, using previous defaults or 1 for Cortex-A5. | ||
15 | (arm_option_override): Set max_insns_skipped from current tuning. | ||
16 | |||
17 | 2011-06-14 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> | ||
18 | |||
19 | Backport from mainline. | ||
20 | 2011-06-02 Julian Brown <julian@codesourcery.com> | ||
21 | |||
22 | * config/arm/arm-cores.def (cortex-a5): Use cortex_a5 tuning. | ||
23 | * config/arm/arm.c (arm_cortex_a5_branch_cost): New. | ||
24 | (arm_cortex_a5_tune): New. | ||
25 | |||
26 | 2011-06-02 Julian Brown <julian@codesourcery.com> | ||
27 | |||
28 | * config/arm/arm-protos.h (tune_params): Add branch_cost hook. | ||
29 | * config/arm/arm.c (arm_default_branch_cost): New. | ||
30 | (arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune) | ||
31 | (arm_v6t2_tune, arm_cortex_tune, arm_cortex_a9_tune) | ||
32 | (arm_fa726_tune): Set branch_cost field using | ||
33 | arm_default_branch_cost. | ||
34 | * config/arm/arm.h (BRANCH_COST): Use branch_cost hook from | ||
35 | current_tune structure. | ||
36 | * dojump.c (tm_p.h): Include file. | ||
37 | |||
38 | 2011-06-02 Julian Brown <julian@codesourcery.com> | ||
39 | |||
40 | * config/arm/arm-cores.def (arm1156t2-s, arm1156t2f-s): Use v6t2 | ||
41 | tuning. | ||
42 | (cortex-a5, cortex-a8, cortex-a15, cortex-r4, cortex-r4f, cortex-m4) | ||
43 | (cortex-m3, cortex-m1, cortex-m0): Use cortex tuning. | ||
44 | * config/arm/arm-protos.h (tune_params): Add prefer_constant_pool | ||
45 | field. | ||
46 | * config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune) | ||
47 | (arm_xscale_tune, arm_9e_tune, arm_cortex_a9_tune) | ||
48 | (arm_fa726te_tune): Add prefer_constant_pool setting. | ||
49 | (arm_v6t2_tune, arm_cortex_tune): New. | ||
50 | * config/arm/arm.h (TARGET_USE_MOVT): Make dependent on | ||
51 | prefer_constant_pool setting. | ||
52 | |||
53 | 2011-06-14 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> | ||
54 | |||
55 | Backport from mainline | ||
56 | 2011-06-01 Paul Brook <paul@cpodesourcery.com> | ||
57 | |||
58 | * config/arm/arm-cores.def: Add cortex-r5. Add DIV flags to | ||
59 | Cortex-A15. | ||
60 | * config/arm/arm-tune.md: Regenerate. | ||
61 | * config/arm/arm.c (FL_DIV): Rename... | ||
62 | (FL_THUMB_DIV): ... to this. | ||
63 | (FL_ARM_DIV): Define. | ||
64 | (FL_FOR_ARCH7R, FL_FOR_ARCH7M): Use FL_THUMB_DIV. | ||
65 | (arm_arch_hwdiv): Remove. | ||
66 | (arm_arch_thumb_hwdiv, arm_arch_arm_hwdiv): New variables. | ||
67 | (arm_issue_rate): Add cortexr5. | ||
68 | * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Set | ||
69 | __ARM_ARCH_EXT_IDIV__. | ||
70 | (TARGET_IDIV): Define. | ||
71 | (arm_arch_hwdiv): Remove. | ||
72 | (arm_arch_arm_hwdiv, arm_arch_thumb_hwdiv): New prototypes. | ||
73 | * config/arm/arm.md (tune_cortexr4): Add cortexr5. | ||
74 | (divsi3, udivsi3): New patterns. | ||
75 | * config/arm/thumb2.md (divsi3, udivsi3): Remove. | ||
76 | * doc/invoke.texi: Document ARM -mcpu=cortex-r5 | ||
77 | |||
78 | === modified file 'gcc/config/arm/arm-cores.def' | ||
79 | --- old/gcc/config/arm/arm-cores.def 2011-01-03 20:52:22 +0000 | ||
80 | +++ new/gcc/config/arm/arm-cores.def 2011-06-14 16:00:30 +0000 | ||
81 | @@ -70,10 +70,10 @@ | ||
82 | /* V4 Architecture Processors */ | ||
83 | ARM_CORE("arm8", arm8, 4, FL_MODE26 | FL_LDSCHED, fastmul) | ||
84 | ARM_CORE("arm810", arm810, 4, FL_MODE26 | FL_LDSCHED, fastmul) | ||
85 | -ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) | ||
86 | -ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) | ||
87 | -ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) | ||
88 | -ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) | ||
89 | +ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) | ||
90 | +ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) | ||
91 | +ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) | ||
92 | +ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) | ||
93 | ARM_CORE("fa526", fa526, 4, FL_LDSCHED, fastmul) | ||
94 | ARM_CORE("fa626", fa626, 4, FL_LDSCHED, fastmul) | ||
95 | |||
96 | @@ -122,15 +122,16 @@ | ||
97 | ARM_CORE("arm1176jzf-s", arm1176jzfs, 6ZK, FL_LDSCHED | FL_VFPV2, 9e) | ||
98 | ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e) | ||
99 | ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) | ||
100 | -ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e) | ||
101 | -ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, 9e) | ||
102 | -ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, 9e) | ||
103 | -ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e) | ||
104 | +ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2) | ||
105 | +ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2) | ||
106 | +ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5) | ||
107 | +ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) | ||
108 | ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) | ||
109 | -ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED, 9e) | ||
110 | -ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e) | ||
111 | -ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e) | ||
112 | -ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, 9e) | ||
113 | -ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e) | ||
114 | -ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e) | ||
115 | -ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, 9e) | ||
116 | +ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex) | ||
117 | +ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex) | ||
118 | +ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex) | ||
119 | +ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) | ||
120 | +ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex) | ||
121 | +ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex) | ||
122 | +ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex) | ||
123 | +ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex) | ||
124 | |||
125 | === modified file 'gcc/config/arm/arm-protos.h' | ||
126 | --- old/gcc/config/arm/arm-protos.h 2011-05-03 15:17:25 +0000 | ||
127 | +++ new/gcc/config/arm/arm-protos.h 2011-06-14 16:00:30 +0000 | ||
128 | @@ -219,9 +219,14 @@ | ||
129 | bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); | ||
130 | bool (*sched_adjust_cost) (rtx, rtx, rtx, int *); | ||
131 | int constant_limit; | ||
132 | + /* Maximum number of instructions to conditionalise in | ||
133 | + arm_final_prescan_insn. */ | ||
134 | + int max_insns_skipped; | ||
135 | int num_prefetch_slots; | ||
136 | int l1_cache_size; | ||
137 | int l1_cache_line_size; | ||
138 | + bool prefer_constant_pool; | ||
139 | + int (*branch_cost) (bool, bool); | ||
140 | }; | ||
141 | |||
142 | extern const struct tune_params *current_tune; | ||
143 | |||
144 | === modified file 'gcc/config/arm/arm-tune.md' | ||
145 | --- old/gcc/config/arm/arm-tune.md 2010-12-20 17:48:51 +0000 | ||
146 | +++ new/gcc/config/arm/arm-tune.md 2011-06-14 14:37:30 +0000 | ||
147 | @@ -1,5 +1,5 @@ | ||
148 | ;; -*- buffer-read-only: t -*- | ||
149 | ;; Generated automatically by gentune.sh from arm-cores.def | ||
150 | (define_attr "tune" | ||
151 | - "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0" | ||
152 | + "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" | ||
153 | (const (symbol_ref "((enum attr_tune) arm_tune)"))) | ||
154 | |||
155 | === modified file 'gcc/config/arm/arm.c' | ||
156 | --- old/gcc/config/arm/arm.c 2011-05-11 14:49:48 +0000 | ||
157 | +++ new/gcc/config/arm/arm.c 2011-06-14 16:00:30 +0000 | ||
158 | @@ -255,6 +255,8 @@ | ||
159 | static void arm_conditional_register_usage (void); | ||
160 | static reg_class_t arm_preferred_rename_class (reg_class_t rclass); | ||
161 | static unsigned int arm_autovectorize_vector_sizes (void); | ||
162 | +static int arm_default_branch_cost (bool, bool); | ||
163 | +static int arm_cortex_a5_branch_cost (bool, bool); | ||
164 | |||
165 | |||
166 | /* Table of machine attributes. */ | ||
167 | @@ -672,12 +674,13 @@ | ||
168 | #define FL_THUMB2 (1 << 16) /* Thumb-2. */ | ||
169 | #define FL_NOTM (1 << 17) /* Instructions not present in the 'M' | ||
170 | profile. */ | ||
171 | -#define FL_DIV (1 << 18) /* Hardware divide. */ | ||
172 | +#define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */ | ||
173 | #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ | ||
174 | #define FL_NEON (1 << 20) /* Neon instructions. */ | ||
175 | #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M | ||
176 | architecture. */ | ||
177 | #define FL_ARCH7 (1 << 22) /* Architecture 7. */ | ||
178 | +#define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */ | ||
179 | |||
180 | #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ | ||
181 | |||
182 | @@ -704,8 +707,8 @@ | ||
183 | #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) | ||
184 | #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7) | ||
185 | #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) | ||
186 | -#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) | ||
187 | -#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) | ||
188 | +#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV) | ||
189 | +#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV) | ||
190 | #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) | ||
191 | |||
192 | /* The bits in this mask specify which | ||
193 | @@ -791,7 +794,8 @@ | ||
194 | int arm_arch_thumb2; | ||
195 | |||
196 | /* Nonzero if chip supports integer division instruction. */ | ||
197 | -int arm_arch_hwdiv; | ||
198 | +int arm_arch_arm_hwdiv; | ||
199 | +int arm_arch_thumb_hwdiv; | ||
200 | |||
201 | /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, | ||
202 | we must report the mode of the memory reference from | ||
203 | @@ -864,48 +868,117 @@ | ||
204 | { | ||
205 | arm_slowmul_rtx_costs, | ||
206 | NULL, | ||
207 | - 3, | ||
208 | - ARM_PREFETCH_NOT_BENEFICIAL | ||
209 | + 3, /* Constant limit. */ | ||
210 | + 5, /* Max cond insns. */ | ||
211 | + ARM_PREFETCH_NOT_BENEFICIAL, | ||
212 | + true, /* Prefer constant pool. */ | ||
213 | + arm_default_branch_cost | ||
214 | }; | ||
215 | |||
216 | const struct tune_params arm_fastmul_tune = | ||
217 | { | ||
218 | arm_fastmul_rtx_costs, | ||
219 | NULL, | ||
220 | - 1, | ||
221 | - ARM_PREFETCH_NOT_BENEFICIAL | ||
222 | + 1, /* Constant limit. */ | ||
223 | + 5, /* Max cond insns. */ | ||
224 | + ARM_PREFETCH_NOT_BENEFICIAL, | ||
225 | + true, /* Prefer constant pool. */ | ||
226 | + arm_default_branch_cost | ||
227 | +}; | ||
228 | + | ||
229 | +/* StrongARM has early execution of branches, so a sequence that is worth | ||
230 | + skipping is shorter. Set max_insns_skipped to a lower value. */ | ||
231 | + | ||
232 | +const struct tune_params arm_strongarm_tune = | ||
233 | +{ | ||
234 | + arm_fastmul_rtx_costs, | ||
235 | + NULL, | ||
236 | + 1, /* Constant limit. */ | ||
237 | + 3, /* Max cond insns. */ | ||
238 | + ARM_PREFETCH_NOT_BENEFICIAL, | ||
239 | + true, /* Prefer constant pool. */ | ||
240 | + arm_default_branch_cost | ||
241 | }; | ||
242 | |||
243 | const struct tune_params arm_xscale_tune = | ||
244 | { | ||
245 | arm_xscale_rtx_costs, | ||
246 | xscale_sched_adjust_cost, | ||
247 | - 2, | ||
248 | - ARM_PREFETCH_NOT_BENEFICIAL | ||
249 | + 2, /* Constant limit. */ | ||
250 | + 3, /* Max cond insns. */ | ||
251 | + ARM_PREFETCH_NOT_BENEFICIAL, | ||
252 | + true, /* Prefer constant pool. */ | ||
253 | + arm_default_branch_cost | ||
254 | }; | ||
255 | |||
256 | const struct tune_params arm_9e_tune = | ||
257 | { | ||
258 | arm_9e_rtx_costs, | ||
259 | NULL, | ||
260 | - 1, | ||
261 | - ARM_PREFETCH_NOT_BENEFICIAL | ||
262 | + 1, /* Constant limit. */ | ||
263 | + 5, /* Max cond insns. */ | ||
264 | + ARM_PREFETCH_NOT_BENEFICIAL, | ||
265 | + true, /* Prefer constant pool. */ | ||
266 | + arm_default_branch_cost | ||
267 | +}; | ||
268 | + | ||
269 | +const struct tune_params arm_v6t2_tune = | ||
270 | +{ | ||
271 | + arm_9e_rtx_costs, | ||
272 | + NULL, | ||
273 | + 1, /* Constant limit. */ | ||
274 | + 5, /* Max cond insns. */ | ||
275 | + ARM_PREFETCH_NOT_BENEFICIAL, | ||
276 | + false, /* Prefer constant pool. */ | ||
277 | + arm_default_branch_cost | ||
278 | +}; | ||
279 | + | ||
280 | +/* Generic Cortex tuning. Use more specific tunings if appropriate. */ | ||
281 | +const struct tune_params arm_cortex_tune = | ||
282 | +{ | ||
283 | + arm_9e_rtx_costs, | ||
284 | + NULL, | ||
285 | + 1, /* Constant limit. */ | ||
286 | + 5, /* Max cond insns. */ | ||
287 | + ARM_PREFETCH_NOT_BENEFICIAL, | ||
288 | + false, /* Prefer constant pool. */ | ||
289 | + arm_default_branch_cost | ||
290 | +}; | ||
291 | + | ||
292 | +/* Branches can be dual-issued on Cortex-A5, so conditional execution is | ||
293 | + less appealing. Set max_insns_skipped to a low value. */ | ||
294 | + | ||
295 | +const struct tune_params arm_cortex_a5_tune = | ||
296 | +{ | ||
297 | + arm_9e_rtx_costs, | ||
298 | + NULL, | ||
299 | + 1, /* Constant limit. */ | ||
300 | + 1, /* Max cond insns. */ | ||
301 | + ARM_PREFETCH_NOT_BENEFICIAL, | ||
302 | + false, /* Prefer constant pool. */ | ||
303 | + arm_cortex_a5_branch_cost | ||
304 | }; | ||
305 | |||
306 | const struct tune_params arm_cortex_a9_tune = | ||
307 | { | ||
308 | arm_9e_rtx_costs, | ||
309 | cortex_a9_sched_adjust_cost, | ||
310 | - 1, | ||
311 | - ARM_PREFETCH_BENEFICIAL(4,32,32) | ||
312 | + 1, /* Constant limit. */ | ||
313 | + 5, /* Max cond insns. */ | ||
314 | + ARM_PREFETCH_BENEFICIAL(4,32,32), | ||
315 | + false, /* Prefer constant pool. */ | ||
316 | + arm_default_branch_cost | ||
317 | }; | ||
318 | |||
319 | const struct tune_params arm_fa726te_tune = | ||
320 | { | ||
321 | arm_9e_rtx_costs, | ||
322 | fa726te_sched_adjust_cost, | ||
323 | - 1, | ||
324 | - ARM_PREFETCH_NOT_BENEFICIAL | ||
325 | + 1, /* Constant limit. */ | ||
326 | + 5, /* Max cond insns. */ | ||
327 | + ARM_PREFETCH_NOT_BENEFICIAL, | ||
328 | + true, /* Prefer constant pool. */ | ||
329 | + arm_default_branch_cost | ||
330 | }; | ||
331 | |||
332 | |||
333 | @@ -1711,7 +1784,8 @@ | ||
334 | arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; | ||
335 | arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; | ||
336 | arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; | ||
337 | - arm_arch_hwdiv = (insn_flags & FL_DIV) != 0; | ||
338 | + arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0; | ||
339 | + arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0; | ||
340 | arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; | ||
341 | |||
342 | /* If we are not using the default (ARM mode) section anchor offset | ||
343 | @@ -1991,12 +2065,7 @@ | ||
344 | max_insns_skipped = 6; | ||
345 | } | ||
346 | else | ||
347 | - { | ||
348 | - /* StrongARM has early execution of branches, so a sequence | ||
349 | - that is worth skipping is shorter. */ | ||
350 | - if (arm_tune_strongarm) | ||
351 | - max_insns_skipped = 3; | ||
352 | - } | ||
353 | + max_insns_skipped = current_tune->max_insns_skipped; | ||
354 | |||
355 | /* Hot/Cold partitioning is not currently supported, since we can't | ||
356 | handle literal pool placement in that case. */ | ||
357 | @@ -8211,6 +8280,21 @@ | ||
358 | return cost; | ||
359 | } | ||
360 | |||
361 | +static int | ||
362 | +arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED) | ||
363 | +{ | ||
364 | + if (TARGET_32BIT) | ||
365 | + return (TARGET_THUMB2 && !speed_p) ? 1 : 4; | ||
366 | + else | ||
367 | + return (optimize > 0) ? 2 : 0; | ||
368 | +} | ||
369 | + | ||
370 | +static int | ||
371 | +arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p) | ||
372 | +{ | ||
373 | + return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p); | ||
374 | +} | ||
375 | + | ||
376 | static int fp_consts_inited = 0; | ||
377 | |||
378 | /* Only zero is valid for VFP. Other values are also valid for FPA. */ | ||
379 | @@ -23123,6 +23207,7 @@ | ||
380 | { | ||
381 | case cortexr4: | ||
382 | case cortexr4f: | ||
383 | + case cortexr5: | ||
384 | case cortexa5: | ||
385 | case cortexa8: | ||
386 | case cortexa9: | ||
387 | |||
388 | === modified file 'gcc/config/arm/arm.h' | ||
389 | --- old/gcc/config/arm/arm.h 2011-06-02 12:12:00 +0000 | ||
390 | +++ new/gcc/config/arm/arm.h 2011-06-14 14:53:07 +0000 | ||
391 | @@ -101,6 +101,8 @@ | ||
392 | builtin_define ("__ARM_PCS"); \ | ||
393 | builtin_define ("__ARM_EABI__"); \ | ||
394 | } \ | ||
395 | + if (TARGET_IDIV) \ | ||
396 | + builtin_define ("__ARM_ARCH_EXT_IDIV__"); \ | ||
397 | } while (0) | ||
398 | |||
399 | /* The various ARM cores. */ | ||
400 | @@ -282,7 +284,8 @@ | ||
401 | (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em)) | ||
402 | |||
403 | /* Should MOVW/MOVT be used in preference to a constant pool. */ | ||
404 | -#define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size) | ||
405 | +#define TARGET_USE_MOVT \ | ||
406 | + (arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool) | ||
407 | |||
408 | /* We could use unified syntax for arm mode, but for now we just use it | ||
409 | for Thumb-2. */ | ||
410 | @@ -303,6 +306,10 @@ | ||
411 | /* Nonzero if this chip supports ldrex{bhd} and strex{bhd}. */ | ||
412 | #define TARGET_HAVE_LDREXBHD ((arm_arch6k && TARGET_ARM) || arm_arch7) | ||
413 | |||
414 | +/* Nonzero if integer division instructions supported. */ | ||
415 | +#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \ | ||
416 | + || (TARGET_THUMB2 && arm_arch_thumb_hwdiv)) | ||
417 | + | ||
418 | /* True iff the full BPABI is being used. If TARGET_BPABI is true, | ||
419 | then TARGET_AAPCS_BASED must be true -- but the converse does not | ||
420 | hold. TARGET_BPABI implies the use of the BPABI runtime library, | ||
421 | @@ -487,8 +494,11 @@ | ||
422 | /* Nonzero if chip supports Thumb 2. */ | ||
423 | extern int arm_arch_thumb2; | ||
424 | |||
425 | -/* Nonzero if chip supports integer division instruction. */ | ||
426 | -extern int arm_arch_hwdiv; | ||
427 | +/* Nonzero if chip supports integer division instruction in ARM mode. */ | ||
428 | +extern int arm_arch_arm_hwdiv; | ||
429 | + | ||
430 | +/* Nonzero if chip supports integer division instruction in Thumb mode. */ | ||
431 | +extern int arm_arch_thumb_hwdiv; | ||
432 | |||
433 | #ifndef TARGET_DEFAULT | ||
434 | #define TARGET_DEFAULT (MASK_APCS_FRAME) | ||
435 | @@ -2018,8 +2028,8 @@ | ||
436 | /* Try to generate sequences that don't involve branches, we can then use | ||
437 | conditional instructions */ | ||
438 | #define BRANCH_COST(speed_p, predictable_p) \ | ||
439 | - (TARGET_32BIT ? (TARGET_THUMB2 && !speed_p ? 1 : 4) \ | ||
440 | - : (optimize > 0 ? 2 : 0)) | ||
441 | + (current_tune->branch_cost (speed_p, predictable_p)) | ||
442 | + | ||
443 | |||
444 | /* Position Independent Code. */ | ||
445 | /* We decide which register to use based on the compilation options and | ||
446 | |||
447 | === modified file 'gcc/config/arm/arm.md' | ||
448 | --- old/gcc/config/arm/arm.md 2011-06-02 15:58:33 +0000 | ||
449 | +++ new/gcc/config/arm/arm.md 2011-06-14 14:37:30 +0000 | ||
450 | @@ -490,7 +490,7 @@ | ||
451 | |||
452 | (define_attr "tune_cortexr4" "yes,no" | ||
453 | (const (if_then_else | ||
454 | - (eq_attr "tune" "cortexr4,cortexr4f") | ||
455 | + (eq_attr "tune" "cortexr4,cortexr4f,cortexr5") | ||
456 | (const_string "yes") | ||
457 | (const_string "no")))) | ||
458 | |||
459 | @@ -3738,6 +3738,28 @@ | ||
460 | (set_attr "predicable" "yes")] | ||
461 | ) | ||
462 | |||
463 | + | ||
464 | +;; Division instructions | ||
465 | +(define_insn "divsi3" | ||
466 | + [(set (match_operand:SI 0 "s_register_operand" "=r") | ||
467 | + (div:SI (match_operand:SI 1 "s_register_operand" "r") | ||
468 | + (match_operand:SI 2 "s_register_operand" "r")))] | ||
469 | + "TARGET_IDIV" | ||
470 | + "sdiv%?\t%0, %1, %2" | ||
471 | + [(set_attr "predicable" "yes") | ||
472 | + (set_attr "insn" "sdiv")] | ||
473 | +) | ||
474 | + | ||
475 | +(define_insn "udivsi3" | ||
476 | + [(set (match_operand:SI 0 "s_register_operand" "=r") | ||
477 | + (udiv:SI (match_operand:SI 1 "s_register_operand" "r") | ||
478 | + (match_operand:SI 2 "s_register_operand" "r")))] | ||
479 | + "TARGET_IDIV" | ||
480 | + "udiv%?\t%0, %1, %2" | ||
481 | + [(set_attr "predicable" "yes") | ||
482 | + (set_attr "insn" "udiv")] | ||
483 | +) | ||
484 | + | ||
485 | |||
486 | ;; Unary arithmetic insns | ||
487 | |||
488 | |||
489 | === modified file 'gcc/config/arm/thumb2.md' | ||
490 | --- old/gcc/config/arm/thumb2.md 2011-05-11 07:15:47 +0000 | ||
491 | +++ new/gcc/config/arm/thumb2.md 2011-06-14 14:37:30 +0000 | ||
492 | @@ -779,26 +779,6 @@ | ||
493 | (set_attr "length" "2")] | ||
494 | ) | ||
495 | |||
496 | -(define_insn "divsi3" | ||
497 | - [(set (match_operand:SI 0 "s_register_operand" "=r") | ||
498 | - (div:SI (match_operand:SI 1 "s_register_operand" "r") | ||
499 | - (match_operand:SI 2 "s_register_operand" "r")))] | ||
500 | - "TARGET_THUMB2 && arm_arch_hwdiv" | ||
501 | - "sdiv%?\t%0, %1, %2" | ||
502 | - [(set_attr "predicable" "yes") | ||
503 | - (set_attr "insn" "sdiv")] | ||
504 | -) | ||
505 | - | ||
506 | -(define_insn "udivsi3" | ||
507 | - [(set (match_operand:SI 0 "s_register_operand" "=r") | ||
508 | - (udiv:SI (match_operand:SI 1 "s_register_operand" "r") | ||
509 | - (match_operand:SI 2 "s_register_operand" "r")))] | ||
510 | - "TARGET_THUMB2 && arm_arch_hwdiv" | ||
511 | - "udiv%?\t%0, %1, %2" | ||
512 | - [(set_attr "predicable" "yes") | ||
513 | - (set_attr "insn" "udiv")] | ||
514 | -) | ||
515 | - | ||
516 | (define_insn "*thumb2_subsi_short" | ||
517 | [(set (match_operand:SI 0 "low_register_operand" "=l") | ||
518 | (minus:SI (match_operand:SI 1 "low_register_operand" "l") | ||
519 | |||
520 | === modified file 'gcc/doc/invoke.texi' | ||
521 | --- old/gcc/doc/invoke.texi 2011-05-11 07:15:47 +0000 | ||
522 | +++ new/gcc/doc/invoke.texi 2011-06-14 14:37:30 +0000 | ||
523 | @@ -10208,7 +10208,8 @@ | ||
524 | @samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp}, | ||
525 | @samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s}, | ||
526 | @samp{cortex-a5}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a15}, | ||
527 | -@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-m4}, @samp{cortex-m3}, | ||
528 | +@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5}, | ||
529 | +@samp{cortex-m4}, @samp{cortex-m3}, | ||
530 | @samp{cortex-m1}, | ||
531 | @samp{cortex-m0}, | ||
532 | @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. | ||
533 | |||
534 | === modified file 'gcc/dojump.c' | ||
535 | --- old/gcc/dojump.c 2010-05-19 19:09:57 +0000 | ||
536 | +++ new/gcc/dojump.c 2011-06-14 14:53:07 +0000 | ||
537 | @@ -36,6 +36,7 @@ | ||
538 | #include "ggc.h" | ||
539 | #include "basic-block.h" | ||
540 | #include "output.h" | ||
541 | +#include "tm_p.h" | ||
542 | |||
543 | static bool prefer_and_bit_test (enum machine_mode, int); | ||
544 | static void do_jump_by_parts_greater (tree, tree, int, rtx, rtx, int); | ||
545 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106761.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106761.patch new file mode 100644 index 000000000..4374e7ed6 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106761.patch | |||
@@ -0,0 +1,188 @@ | |||
1 | gcc/ | ||
2 | Backport from mainline: | ||
3 | |||
4 | Chung-Lin Tang <cltang@codesourcery.com> | ||
5 | Richard Earnshaw <rearnsha@arm.com> | ||
6 | |||
7 | PR target/48250 | ||
8 | * config/arm/arm.c (arm_legitimize_reload_address): Update cases | ||
9 | to use sign-magnitude offsets. Reject unsupported unaligned | ||
10 | cases. Add detailed description in comments. | ||
11 | * config/arm/arm.md (reload_outdf): Disable for ARM mode; change | ||
12 | condition from TARGET_32BIT to TARGET_ARM. | ||
13 | |||
14 | Chung-Lin Tang <cltang@codesourcery.com> | ||
15 | |||
16 | * config/arm/arm.c (arm_legitimize_reload_address): For NEON | ||
17 | quad-word modes, reduce to 9-bit index range when above 1016 | ||
18 | limit. | ||
19 | |||
20 | === modified file 'gcc/config/arm/arm.c' | ||
21 | --- old/gcc/config/arm/arm.c 2011-06-14 16:00:30 +0000 | ||
22 | +++ new/gcc/config/arm/arm.c 2011-06-27 22:14:07 +0000 | ||
23 | @@ -6488,23 +6488,134 @@ | ||
24 | HOST_WIDE_INT val = INTVAL (XEXP (*p, 1)); | ||
25 | HOST_WIDE_INT low, high; | ||
26 | |||
27 | - if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT)) | ||
28 | - low = ((val & 0xf) ^ 0x8) - 0x8; | ||
29 | - else if (TARGET_MAVERICK && TARGET_HARD_FLOAT) | ||
30 | - /* Need to be careful, -256 is not a valid offset. */ | ||
31 | - low = val >= 0 ? (val & 0xff) : -((-val) & 0xff); | ||
32 | - else if (mode == SImode | ||
33 | - || (mode == SFmode && TARGET_SOFT_FLOAT) | ||
34 | - || ((mode == HImode || mode == QImode) && ! arm_arch4)) | ||
35 | - /* Need to be careful, -4096 is not a valid offset. */ | ||
36 | - low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff); | ||
37 | - else if ((mode == HImode || mode == QImode) && arm_arch4) | ||
38 | - /* Need to be careful, -256 is not a valid offset. */ | ||
39 | - low = val >= 0 ? (val & 0xff) : -((-val) & 0xff); | ||
40 | - else if (GET_MODE_CLASS (mode) == MODE_FLOAT | ||
41 | - && TARGET_HARD_FLOAT && TARGET_FPA) | ||
42 | - /* Need to be careful, -1024 is not a valid offset. */ | ||
43 | - low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff); | ||
44 | + /* Detect coprocessor load/stores. */ | ||
45 | + bool coproc_p = ((TARGET_HARD_FLOAT | ||
46 | + && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK) | ||
47 | + && (mode == SFmode || mode == DFmode | ||
48 | + || (mode == DImode && TARGET_MAVERICK))) | ||
49 | + || (TARGET_REALLY_IWMMXT | ||
50 | + && VALID_IWMMXT_REG_MODE (mode)) | ||
51 | + || (TARGET_NEON | ||
52 | + && (VALID_NEON_DREG_MODE (mode) | ||
53 | + || VALID_NEON_QREG_MODE (mode)))); | ||
54 | + | ||
55 | + /* For some conditions, bail out when lower two bits are unaligned. */ | ||
56 | + if ((val & 0x3) != 0 | ||
57 | + /* Coprocessor load/store indexes are 8-bits + '00' appended. */ | ||
58 | + && (coproc_p | ||
59 | + /* For DI, and DF under soft-float: */ | ||
60 | + || ((mode == DImode || mode == DFmode) | ||
61 | + /* Without ldrd, we use stm/ldm, which does not | ||
62 | + fair well with unaligned bits. */ | ||
63 | + && (! TARGET_LDRD | ||
64 | + /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */ | ||
65 | + || TARGET_THUMB2)))) | ||
66 | + return false; | ||
67 | + | ||
68 | + /* When breaking down a [reg+index] reload address into [(reg+high)+low], | ||
69 | + of which the (reg+high) gets turned into a reload add insn, | ||
70 | + we try to decompose the index into high/low values that can often | ||
71 | + also lead to better reload CSE. | ||
72 | + For example: | ||
73 | + ldr r0, [r2, #4100] // Offset too large | ||
74 | + ldr r1, [r2, #4104] // Offset too large | ||
75 | + | ||
76 | + is best reloaded as: | ||
77 | + add t1, r2, #4096 | ||
78 | + ldr r0, [t1, #4] | ||
79 | + add t2, r2, #4096 | ||
80 | + ldr r1, [t2, #8] | ||
81 | + | ||
82 | + which post-reload CSE can simplify in most cases to eliminate the | ||
83 | + second add instruction: | ||
84 | + add t1, r2, #4096 | ||
85 | + ldr r0, [t1, #4] | ||
86 | + ldr r1, [t1, #8] | ||
87 | + | ||
88 | + The idea here is that we want to split out the bits of the constant | ||
89 | + as a mask, rather than as subtracting the maximum offset that the | ||
90 | + respective type of load/store used can handle. | ||
91 | + | ||
92 | + When encountering negative offsets, we can still utilize it even if | ||
93 | + the overall offset is positive; sometimes this may lead to an immediate | ||
94 | + that can be constructed with fewer instructions. | ||
95 | + For example: | ||
96 | + ldr r0, [r2, #0x3FFFFC] | ||
97 | + | ||
98 | + This is best reloaded as: | ||
99 | + add t1, r2, #0x400000 | ||
100 | + ldr r0, [t1, #-4] | ||
101 | + | ||
102 | + The trick for spotting this for a load insn with N bits of offset | ||
103 | + (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a | ||
104 | + negative offset that is going to make bit N and all the bits below | ||
105 | + it become zero in the remainder part. | ||
106 | + | ||
107 | + The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect | ||
108 | + to sign-magnitude addressing (i.e. separate +- bit, or 1's complement), | ||
109 | + used in most cases of ARM load/store instructions. */ | ||
110 | + | ||
111 | +#define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \ | ||
112 | + (((VAL) & ((1 << (N)) - 1)) \ | ||
113 | + ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \ | ||
114 | + : 0) | ||
115 | + | ||
116 | + if (coproc_p) | ||
117 | + { | ||
118 | + low = SIGN_MAG_LOW_ADDR_BITS (val, 10); | ||
119 | + | ||
120 | + /* NEON quad-word load/stores are made of two double-word accesses, | ||
121 | + so the valid index range is reduced by 8. Treat as 9-bit range if | ||
122 | + we go over it. */ | ||
123 | + if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016) | ||
124 | + low = SIGN_MAG_LOW_ADDR_BITS (val, 9); | ||
125 | + } | ||
126 | + else if (GET_MODE_SIZE (mode) == 8) | ||
127 | + { | ||
128 | + if (TARGET_LDRD) | ||
129 | + low = (TARGET_THUMB2 | ||
130 | + ? SIGN_MAG_LOW_ADDR_BITS (val, 10) | ||
131 | + : SIGN_MAG_LOW_ADDR_BITS (val, 8)); | ||
132 | + else | ||
133 | + /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib) | ||
134 | + to access doublewords. The supported load/store offsets are | ||
135 | + -8, -4, and 4, which we try to produce here. */ | ||
136 | + low = ((val & 0xf) ^ 0x8) - 0x8; | ||
137 | + } | ||
138 | + else if (GET_MODE_SIZE (mode) < 8) | ||
139 | + { | ||
140 | + /* NEON element load/stores do not have an offset. */ | ||
141 | + if (TARGET_NEON_FP16 && mode == HFmode) | ||
142 | + return false; | ||
143 | + | ||
144 | + if (TARGET_THUMB2) | ||
145 | + { | ||
146 | + /* Thumb-2 has an asymmetrical index range of (-256,4096). | ||
147 | + Try the wider 12-bit range first, and re-try if the result | ||
148 | + is out of range. */ | ||
149 | + low = SIGN_MAG_LOW_ADDR_BITS (val, 12); | ||
150 | + if (low < -255) | ||
151 | + low = SIGN_MAG_LOW_ADDR_BITS (val, 8); | ||
152 | + } | ||
153 | + else | ||
154 | + { | ||
155 | + if (mode == HImode || mode == HFmode) | ||
156 | + { | ||
157 | + if (arm_arch4) | ||
158 | + low = SIGN_MAG_LOW_ADDR_BITS (val, 8); | ||
159 | + else | ||
160 | + { | ||
161 | + /* The storehi/movhi_bytes fallbacks can use only | ||
162 | + [-4094,+4094] of the full ldrb/strb index range. */ | ||
163 | + low = SIGN_MAG_LOW_ADDR_BITS (val, 12); | ||
164 | + if (low == 4095 || low == -4095) | ||
165 | + return false; | ||
166 | + } | ||
167 | + } | ||
168 | + else | ||
169 | + low = SIGN_MAG_LOW_ADDR_BITS (val, 12); | ||
170 | + } | ||
171 | + } | ||
172 | else | ||
173 | return false; | ||
174 | |||
175 | |||
176 | === modified file 'gcc/config/arm/arm.md' | ||
177 | --- old/gcc/config/arm/arm.md 2011-06-14 14:37:30 +0000 | ||
178 | +++ new/gcc/config/arm/arm.md 2011-06-27 22:14:07 +0000 | ||
179 | @@ -6267,7 +6267,7 @@ | ||
180 | [(match_operand:DF 0 "arm_reload_memory_operand" "=o") | ||
181 | (match_operand:DF 1 "s_register_operand" "r") | ||
182 | (match_operand:SI 2 "s_register_operand" "=&r")] | ||
183 | - "TARGET_32BIT" | ||
184 | + "TARGET_THUMB2" | ||
185 | " | ||
186 | { | ||
187 | enum rtx_code code = GET_CODE (XEXP (operands[0], 0)); | ||
188 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch new file mode 100644 index 000000000..bbf9819ec --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch | |||
@@ -0,0 +1,1355 @@ | |||
1 | 2011-06-28 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | Backport from FSF: | ||
4 | |||
5 | 2011-06-07 Ira Rosen <ira.rosen@linaro.org> | ||
6 | |||
7 | gcc/ | ||
8 | * tree-vectorizer.h (vect_recog_func_ptr): Make last argument to be | ||
9 | a pointer. | ||
10 | * tree-vect-patterns.c (vect_recog_widen_sum_pattern, | ||
11 | vect_recog_widen_mult_pattern, vect_recog_dot_prod_pattern, | ||
12 | vect_recog_pow_pattern): Likewise. | ||
13 | (vect_pattern_recog_1): Remove declaration. | ||
14 | (widened_name_p): Remove declaration. Add new argument to specify | ||
15 | whether to check that both types are either signed or unsigned. | ||
16 | (vect_recog_widen_mult_pattern): Update documentation. Handle | ||
17 | unsigned patterns and multiplication by constants. | ||
18 | (vect_pattern_recog_1): Update vect_recog_func references. Use | ||
19 | statement information from the statement returned from pattern | ||
20 | detection functions. | ||
21 | (vect_pattern_recog): Update vect_recog_func reference. | ||
22 | * tree-vect-stmts.c (vectorizable_type_promotion): For widening | ||
23 | multiplication by a constant use the type of the other operand. | ||
24 | |||
25 | gcc/testsuite | ||
26 | * lib/target-supports.exp | ||
27 | (check_effective_target_vect_widen_mult_qi_to_hi): | ||
28 | Add NEON as supporting target. | ||
29 | (check_effective_target_vect_widen_mult_hi_to_si): Likewise. | ||
30 | (check_effective_target_vect_widen_mult_qi_to_hi_pattern): New. | ||
31 | (check_effective_target_vect_widen_mult_hi_to_si_pattern): New. | ||
32 | * gcc.dg/vect/vect-widen-mult-u8.c: Expect to be vectorized | ||
33 | using widening multiplication on targets that support it. | ||
34 | * gcc.dg/vect/vect-widen-mult-u16.c: Likewise. | ||
35 | * gcc.dg/vect/vect-widen-mult-const-s16.c: New test. | ||
36 | * gcc.dg/vect/vect-widen-mult-const-u16.c: New test. | ||
37 | |||
38 | and | ||
39 | |||
40 | 2011-06-15 Ira Rosen <ira.rosen@linaro.org> | ||
41 | |||
42 | gcc/ | ||
43 | * tree-vect-loop-manip.c (remove_dead_stmts_from_loop): Remove. | ||
44 | (slpeel_tree_peel_loop_to_edge): Don't call | ||
45 | remove_dead_stmts_from_loop. | ||
46 | * tree-vect-loop.c (vect_determine_vectorization_factor): Don't | ||
47 | remove irrelevant pattern statements. For irrelevant statements | ||
48 | check if it is the last statement of a detected pattern, use | ||
49 | corresponding pattern statement instead. | ||
50 | (destroy_loop_vec_info): No need to remove pattern statements, | ||
51 | only free stmt_vec_info. | ||
52 | (vect_transform_loop): For irrelevant statements check if it is | ||
53 | the last statement of a detected pattern, use corresponding | ||
54 | pattern statement instead. | ||
55 | * tree-vect-patterns.c (vect_pattern_recog_1): Don't insert | ||
56 | pattern statements. Set basic block for the new statement. | ||
57 | (vect_pattern_recog): Update documentation. | ||
58 | * tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Scan | ||
59 | operands of pattern statements. | ||
60 | (vectorizable_call): Fix printing. In case of a pattern statement | ||
61 | use the lhs of the original statement when creating a dummy | ||
62 | statement to replace the original call. | ||
63 | (vect_analyze_stmt): For irrelevant statements check if it is | ||
64 | the last statement of a detected pattern, use corresponding | ||
65 | pattern statement instead. | ||
66 | * tree-vect-slp.c (vect_schedule_slp_instance): For pattern | ||
67 | statements use gsi of the original statement. | ||
68 | |||
69 | and | ||
70 | 2011-06-21 Ira Rosen <ira.rosen@linaro.org> | ||
71 | |||
72 | PR tree-optimization/49478 | ||
73 | gcc/ | ||
74 | |||
75 | * tree-vect-loop.c (vectorizable_reduction): Handle DOT_PROD_EXPR | ||
76 | with constant operand. | ||
77 | |||
78 | === added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c' | ||
79 | --- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c 1970-01-01 00:00:00 +0000 | ||
80 | +++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c 2011-06-19 10:59:13 +0000 | ||
81 | @@ -0,0 +1,60 @@ | ||
82 | +/* { dg-require-effective-target vect_int } */ | ||
83 | + | ||
84 | +#include "tree-vect.h" | ||
85 | +#include <stdlib.h> | ||
86 | + | ||
87 | +#define N 32 | ||
88 | + | ||
89 | +__attribute__ ((noinline)) void | ||
90 | +foo (int *__restrict a, | ||
91 | + short *__restrict b, | ||
92 | + int n) | ||
93 | +{ | ||
94 | + int i; | ||
95 | + | ||
96 | + for (i = 0; i < n; i++) | ||
97 | + a[i] = b[i] * 2333; | ||
98 | + | ||
99 | + for (i = 0; i < n; i++) | ||
100 | + if (a[i] != b[i] * 2333) | ||
101 | + abort (); | ||
102 | +} | ||
103 | + | ||
104 | +__attribute__ ((noinline)) void | ||
105 | +bar (int *__restrict a, | ||
106 | + short *__restrict b, | ||
107 | + int n) | ||
108 | +{ | ||
109 | + int i; | ||
110 | + | ||
111 | + for (i = 0; i < n; i++) | ||
112 | + a[i] = b[i] * (short) 2333; | ||
113 | + | ||
114 | + for (i = 0; i < n; i++) | ||
115 | + if (a[i] != b[i] * (short) 2333) | ||
116 | + abort (); | ||
117 | +} | ||
118 | + | ||
119 | +int main (void) | ||
120 | +{ | ||
121 | + int i; | ||
122 | + int a[N]; | ||
123 | + short b[N]; | ||
124 | + | ||
125 | + for (i = 0; i < N; i++) | ||
126 | + { | ||
127 | + a[i] = 0; | ||
128 | + b[i] = i; | ||
129 | + __asm__ volatile (""); | ||
130 | + } | ||
131 | + | ||
132 | + foo (a, b, N); | ||
133 | + bar (a, b, N); | ||
134 | + return 0; | ||
135 | +} | ||
136 | + | ||
137 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ | ||
138 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
139 | +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
140 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
141 | + | ||
142 | |||
143 | === added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c' | ||
144 | --- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c 1970-01-01 00:00:00 +0000 | ||
145 | +++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c 2011-06-19 10:59:13 +0000 | ||
146 | @@ -0,0 +1,77 @@ | ||
147 | +/* { dg-require-effective-target vect_int } */ | ||
148 | + | ||
149 | +#include "tree-vect.h" | ||
150 | +#include <stdlib.h> | ||
151 | + | ||
152 | +#define N 32 | ||
153 | + | ||
154 | +__attribute__ ((noinline)) void | ||
155 | +foo (unsigned int *__restrict a, | ||
156 | + unsigned short *__restrict b, | ||
157 | + int n) | ||
158 | +{ | ||
159 | + int i; | ||
160 | + | ||
161 | + for (i = 0; i < n; i++) | ||
162 | + a[i] = b[i] * 2333; | ||
163 | + | ||
164 | + for (i = 0; i < n; i++) | ||
165 | + if (a[i] != b[i] * 2333) | ||
166 | + abort (); | ||
167 | +} | ||
168 | + | ||
169 | +__attribute__ ((noinline)) void | ||
170 | +bar (unsigned int *__restrict a, | ||
171 | + unsigned short *__restrict b, | ||
172 | + int n) | ||
173 | +{ | ||
174 | + int i; | ||
175 | + | ||
176 | + for (i = 0; i < n; i++) | ||
177 | + a[i] = (unsigned short) 2333 * b[i]; | ||
178 | + | ||
179 | + for (i = 0; i < n; i++) | ||
180 | + if (a[i] != b[i] * (unsigned short) 2333) | ||
181 | + abort (); | ||
182 | +} | ||
183 | + | ||
184 | +__attribute__ ((noinline)) void | ||
185 | +baz (unsigned int *__restrict a, | ||
186 | + unsigned short *__restrict b, | ||
187 | + int n) | ||
188 | +{ | ||
189 | + int i; | ||
190 | + | ||
191 | + for (i = 0; i < n; i++) | ||
192 | + a[i] = b[i] * 233333333; | ||
193 | + | ||
194 | + for (i = 0; i < n; i++) | ||
195 | + if (a[i] != b[i] * 233333333) | ||
196 | + abort (); | ||
197 | +} | ||
198 | + | ||
199 | + | ||
200 | +int main (void) | ||
201 | +{ | ||
202 | + int i; | ||
203 | + unsigned int a[N]; | ||
204 | + unsigned short b[N]; | ||
205 | + | ||
206 | + for (i = 0; i < N; i++) | ||
207 | + { | ||
208 | + a[i] = 0; | ||
209 | + b[i] = i; | ||
210 | + __asm__ volatile (""); | ||
211 | + } | ||
212 | + | ||
213 | + foo (a, b, N); | ||
214 | + bar (a, b, N); | ||
215 | + baz (a, b, N); | ||
216 | + return 0; | ||
217 | +} | ||
218 | + | ||
219 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target vect_widen_mult_hi_to_si } } } */ | ||
220 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
221 | +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
222 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
223 | + | ||
224 | |||
225 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c' | ||
226 | --- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c 2010-05-27 12:23:45 +0000 | ||
227 | +++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c 2011-06-19 10:59:13 +0000 | ||
228 | @@ -9,13 +9,11 @@ | ||
229 | unsigned short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); | ||
230 | unsigned int result[N]; | ||
231 | |||
232 | -/* short->int widening-mult */ | ||
233 | +/* unsigned short->unsigned int widening-mult. */ | ||
234 | __attribute__ ((noinline)) int | ||
235 | foo1(int len) { | ||
236 | int i; | ||
237 | |||
238 | - /* Not vectorized because X[i] and Y[i] are casted to 'int' | ||
239 | - so the widening multiplication pattern is not recognized. */ | ||
240 | for (i=0; i<len; i++) { | ||
241 | result[i] = (unsigned int)(X[i] * Y[i]); | ||
242 | } | ||
243 | @@ -43,8 +41,8 @@ | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | -/*The induction loop is vectorized */ | ||
248 | -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */ | ||
249 | -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */ | ||
250 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */ | ||
251 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
252 | +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
253 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
254 | |||
255 | |||
256 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c' | ||
257 | --- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c 2009-05-08 12:39:01 +0000 | ||
258 | +++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c 2011-06-19 10:59:13 +0000 | ||
259 | @@ -9,7 +9,7 @@ | ||
260 | unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); | ||
261 | unsigned short result[N]; | ||
262 | |||
263 | -/* char->short widening-mult */ | ||
264 | +/* unsigned char-> unsigned short widening-mult. */ | ||
265 | __attribute__ ((noinline)) int | ||
266 | foo1(int len) { | ||
267 | int i; | ||
268 | @@ -28,8 +28,7 @@ | ||
269 | for (i=0; i<N; i++) { | ||
270 | X[i] = i; | ||
271 | Y[i] = 64-i; | ||
272 | - if (i%4 == 0) | ||
273 | - X[i] = 5; | ||
274 | + __asm__ volatile (""); | ||
275 | } | ||
276 | |||
277 | foo1 (N); | ||
278 | @@ -43,5 +42,7 @@ | ||
279 | } | ||
280 | |||
281 | /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */ | ||
282 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */ | ||
283 | +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */ | ||
284 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
285 | |||
286 | |||
287 | === modified file 'gcc/testsuite/lib/target-supports.exp' | ||
288 | --- old/gcc/testsuite/lib/target-supports.exp 2011-06-02 12:12:00 +0000 | ||
289 | +++ new/gcc/testsuite/lib/target-supports.exp 2011-06-19 10:59:13 +0000 | ||
290 | @@ -2663,7 +2663,8 @@ | ||
291 | } else { | ||
292 | set et_vect_widen_mult_qi_to_hi_saved 0 | ||
293 | } | ||
294 | - if { [istarget powerpc*-*-*] } { | ||
295 | + if { [istarget powerpc*-*-*] | ||
296 | + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { | ||
297 | set et_vect_widen_mult_qi_to_hi_saved 1 | ||
298 | } | ||
299 | } | ||
300 | @@ -2696,7 +2697,8 @@ | ||
301 | || [istarget spu-*-*] | ||
302 | || [istarget ia64-*-*] | ||
303 | || [istarget i?86-*-*] | ||
304 | - || [istarget x86_64-*-*] } { | ||
305 | + || [istarget x86_64-*-*] | ||
306 | + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { | ||
307 | set et_vect_widen_mult_hi_to_si_saved 1 | ||
308 | } | ||
309 | } | ||
310 | @@ -2705,6 +2707,52 @@ | ||
311 | } | ||
312 | |||
313 | # Return 1 if the target plus current options supports a vector | ||
314 | +# widening multiplication of *char* args into *short* result, 0 otherwise. | ||
315 | +# | ||
316 | +# This won't change for different subtargets so cache the result. | ||
317 | + | ||
318 | +proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } { | ||
319 | + global et_vect_widen_mult_qi_to_hi_pattern | ||
320 | + | ||
321 | + if [info exists et_vect_widen_mult_qi_to_hi_pattern_saved] { | ||
322 | + verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: using cached result" 2 | ||
323 | + } else { | ||
324 | + set et_vect_widen_mult_qi_to_hi_pattern_saved 0 | ||
325 | + if { [istarget powerpc*-*-*] | ||
326 | + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { | ||
327 | + set et_vect_widen_mult_qi_to_hi_pattern_saved 1 | ||
328 | + } | ||
329 | + } | ||
330 | + verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: returning $et_vect_widen_mult_qi_to_hi_pattern_saved" 2 | ||
331 | + return $et_vect_widen_mult_qi_to_hi_pattern_saved | ||
332 | +} | ||
333 | + | ||
334 | +# Return 1 if the target plus current options supports a vector | ||
335 | +# widening multiplication of *short* args into *int* result, 0 otherwise. | ||
336 | +# | ||
337 | +# This won't change for different subtargets so cache the result. | ||
338 | + | ||
339 | +proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } { | ||
340 | + global et_vect_widen_mult_hi_to_si_pattern | ||
341 | + | ||
342 | + if [info exists et_vect_widen_mult_hi_to_si_pattern_saved] { | ||
343 | + verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: using cached result" 2 | ||
344 | + } else { | ||
345 | + set et_vect_widen_mult_hi_to_si_pattern_saved 0 | ||
346 | + if { [istarget powerpc*-*-*] | ||
347 | + || [istarget spu-*-*] | ||
348 | + || [istarget ia64-*-*] | ||
349 | + || [istarget i?86-*-*] | ||
350 | + || [istarget x86_64-*-*] | ||
351 | + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { | ||
352 | + set et_vect_widen_mult_hi_to_si_pattern_saved 1 | ||
353 | + } | ||
354 | + } | ||
355 | + verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: returning $et_vect_widen_mult_hi_to_si_pattern_saved" 2 | ||
356 | + return $et_vect_widen_mult_hi_to_si_pattern_saved | ||
357 | +} | ||
358 | + | ||
359 | +# Return 1 if the target plus current options supports a vector | ||
360 | # dot-product of signed chars, 0 otherwise. | ||
361 | # | ||
362 | # This won't change for different subtargets so cache the result. | ||
363 | |||
364 | === modified file 'gcc/tree-vect-loop-manip.c' | ||
365 | --- old/gcc/tree-vect-loop-manip.c 2011-05-18 13:24:05 +0000 | ||
366 | +++ new/gcc/tree-vect-loop-manip.c 2011-06-19 10:59:13 +0000 | ||
367 | @@ -1105,35 +1105,6 @@ | ||
368 | first_niters = PHI_RESULT (newphi); | ||
369 | } | ||
370 | |||
371 | - | ||
372 | -/* Remove dead assignments from loop NEW_LOOP. */ | ||
373 | - | ||
374 | -static void | ||
375 | -remove_dead_stmts_from_loop (struct loop *new_loop) | ||
376 | -{ | ||
377 | - basic_block *bbs = get_loop_body (new_loop); | ||
378 | - unsigned i; | ||
379 | - for (i = 0; i < new_loop->num_nodes; ++i) | ||
380 | - { | ||
381 | - gimple_stmt_iterator gsi; | ||
382 | - for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);) | ||
383 | - { | ||
384 | - gimple stmt = gsi_stmt (gsi); | ||
385 | - if (is_gimple_assign (stmt) | ||
386 | - && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME | ||
387 | - && has_zero_uses (gimple_assign_lhs (stmt))) | ||
388 | - { | ||
389 | - gsi_remove (&gsi, true); | ||
390 | - release_defs (stmt); | ||
391 | - } | ||
392 | - else | ||
393 | - gsi_next (&gsi); | ||
394 | - } | ||
395 | - } | ||
396 | - free (bbs); | ||
397 | -} | ||
398 | - | ||
399 | - | ||
400 | /* Function slpeel_tree_peel_loop_to_edge. | ||
401 | |||
402 | Peel the first (last) iterations of LOOP into a new prolog (epilog) loop | ||
403 | @@ -1445,13 +1416,6 @@ | ||
404 | BITMAP_FREE (definitions); | ||
405 | delete_update_ssa (); | ||
406 | |||
407 | - /* Remove all pattern statements from the loop copy. They will confuse | ||
408 | - the expander if DCE is disabled. | ||
409 | - ??? The pattern recognizer should be split into an analysis and | ||
410 | - a transformation phase that is then run only on the loop that is | ||
411 | - going to be transformed. */ | ||
412 | - remove_dead_stmts_from_loop (new_loop); | ||
413 | - | ||
414 | adjust_vec_debug_stmts (); | ||
415 | |||
416 | return new_loop; | ||
417 | |||
418 | === modified file 'gcc/tree-vect-loop.c' | ||
419 | --- old/gcc/tree-vect-loop.c 2011-03-01 13:18:25 +0000 | ||
420 | +++ new/gcc/tree-vect-loop.c 2011-06-22 06:21:13 +0000 | ||
421 | @@ -244,7 +244,7 @@ | ||
422 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | ||
423 | { | ||
424 | tree vf_vectype; | ||
425 | - gimple stmt = gsi_stmt (si); | ||
426 | + gimple stmt = gsi_stmt (si), pattern_stmt; | ||
427 | stmt_info = vinfo_for_stmt (stmt); | ||
428 | |||
429 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
430 | @@ -259,9 +259,25 @@ | ||
431 | if (!STMT_VINFO_RELEVANT_P (stmt_info) | ||
432 | && !STMT_VINFO_LIVE_P (stmt_info)) | ||
433 | { | ||
434 | - if (vect_print_dump_info (REPORT_DETAILS)) | ||
435 | - fprintf (vect_dump, "skip."); | ||
436 | - continue; | ||
437 | + if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
438 | + && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) | ||
439 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | ||
440 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
441 | + { | ||
442 | + stmt = pattern_stmt; | ||
443 | + stmt_info = vinfo_for_stmt (pattern_stmt); | ||
444 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
445 | + { | ||
446 | + fprintf (vect_dump, "==> examining pattern statement: "); | ||
447 | + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
448 | + } | ||
449 | + } | ||
450 | + else | ||
451 | + { | ||
452 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
453 | + fprintf (vect_dump, "skip."); | ||
454 | + continue; | ||
455 | + } | ||
456 | } | ||
457 | |||
458 | if (gimple_get_lhs (stmt) == NULL_TREE) | ||
459 | @@ -816,25 +832,17 @@ | ||
460 | |||
461 | if (stmt_info) | ||
462 | { | ||
463 | - /* Check if this is a "pattern stmt" (introduced by the | ||
464 | - vectorizer during the pattern recognition pass). */ | ||
465 | - bool remove_stmt_p = false; | ||
466 | - gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
467 | - if (orig_stmt) | ||
468 | - { | ||
469 | - stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); | ||
470 | - if (orig_stmt_info | ||
471 | - && STMT_VINFO_IN_PATTERN_P (orig_stmt_info)) | ||
472 | - remove_stmt_p = true; | ||
473 | - } | ||
474 | + /* Check if this statement has a related "pattern stmt" | ||
475 | + (introduced by the vectorizer during the pattern recognition | ||
476 | + pass). Free pattern's stmt_vec_info. */ | ||
477 | + if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
478 | + && vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info))) | ||
479 | + free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info)); | ||
480 | |||
481 | /* Free stmt_vec_info. */ | ||
482 | free_stmt_vec_info (stmt); | ||
483 | + } | ||
484 | |||
485 | - /* Remove dead "pattern stmts". */ | ||
486 | - if (remove_stmt_p) | ||
487 | - gsi_remove (&si, true); | ||
488 | - } | ||
489 | gsi_next (&si); | ||
490 | } | ||
491 | } | ||
492 | @@ -4262,6 +4270,25 @@ | ||
493 | return false; | ||
494 | } | ||
495 | |||
496 | + /* In case of widenning multiplication by a constant, we update the type | ||
497 | + of the constant to be the type of the other operand. We check that the | ||
498 | + constant fits the type in the pattern recognition pass. */ | ||
499 | + if (code == DOT_PROD_EXPR | ||
500 | + && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1]))) | ||
501 | + { | ||
502 | + if (TREE_CODE (ops[0]) == INTEGER_CST) | ||
503 | + ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]); | ||
504 | + else if (TREE_CODE (ops[1]) == INTEGER_CST) | ||
505 | + ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]); | ||
506 | + else | ||
507 | + { | ||
508 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
509 | + fprintf (vect_dump, "invalid types in dot-prod"); | ||
510 | + | ||
511 | + return false; | ||
512 | + } | ||
513 | + } | ||
514 | + | ||
515 | if (!vec_stmt) /* transformation not required. */ | ||
516 | { | ||
517 | STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; | ||
518 | @@ -4796,7 +4823,7 @@ | ||
519 | |||
520 | for (si = gsi_start_bb (bb); !gsi_end_p (si);) | ||
521 | { | ||
522 | - gimple stmt = gsi_stmt (si); | ||
523 | + gimple stmt = gsi_stmt (si), pattern_stmt; | ||
524 | bool is_store; | ||
525 | |||
526 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
527 | @@ -4821,14 +4848,25 @@ | ||
528 | |||
529 | if (!STMT_VINFO_RELEVANT_P (stmt_info) | ||
530 | && !STMT_VINFO_LIVE_P (stmt_info)) | ||
531 | - { | ||
532 | - gsi_next (&si); | ||
533 | - continue; | ||
534 | + { | ||
535 | + if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
536 | + && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) | ||
537 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | ||
538 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
539 | + { | ||
540 | + stmt = pattern_stmt; | ||
541 | + stmt_info = vinfo_for_stmt (stmt); | ||
542 | + } | ||
543 | + else | ||
544 | + { | ||
545 | + gsi_next (&si); | ||
546 | + continue; | ||
547 | + } | ||
548 | } | ||
549 | |||
550 | gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); | ||
551 | - nunits = | ||
552 | - (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); | ||
553 | + nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( | ||
554 | + STMT_VINFO_VECTYPE (stmt_info)); | ||
555 | if (!STMT_SLP_TYPE (stmt_info) | ||
556 | && nunits != (unsigned int) vectorization_factor | ||
557 | && vect_print_dump_info (REPORT_DETAILS)) | ||
558 | |||
559 | === modified file 'gcc/tree-vect-patterns.c' | ||
560 | --- old/gcc/tree-vect-patterns.c 2010-12-02 11:47:12 +0000 | ||
561 | +++ new/gcc/tree-vect-patterns.c 2011-06-22 12:10:44 +0000 | ||
562 | @@ -38,16 +38,11 @@ | ||
563 | #include "recog.h" | ||
564 | #include "diagnostic-core.h" | ||
565 | |||
566 | -/* Function prototypes */ | ||
567 | -static void vect_pattern_recog_1 | ||
568 | - (gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator); | ||
569 | -static bool widened_name_p (tree, gimple, tree *, gimple *); | ||
570 | - | ||
571 | /* Pattern recognition functions */ | ||
572 | -static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *); | ||
573 | -static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *); | ||
574 | -static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *); | ||
575 | -static gimple vect_recog_pow_pattern (gimple, tree *, tree *); | ||
576 | +static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *); | ||
577 | +static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *); | ||
578 | +static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *); | ||
579 | +static gimple vect_recog_pow_pattern (gimple *, tree *, tree *); | ||
580 | static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { | ||
581 | vect_recog_widen_mult_pattern, | ||
582 | vect_recog_widen_sum_pattern, | ||
583 | @@ -61,10 +56,12 @@ | ||
584 | is a result of a type-promotion, such that: | ||
585 | DEF_STMT: NAME = NOP (name0) | ||
586 | where the type of name0 (HALF_TYPE) is smaller than the type of NAME. | ||
587 | -*/ | ||
588 | + If CHECK_SIGN is TRUE, check that either both types are signed or both are | ||
589 | + unsigned. */ | ||
590 | |||
591 | static bool | ||
592 | -widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt) | ||
593 | +widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt, | ||
594 | + bool check_sign) | ||
595 | { | ||
596 | tree dummy; | ||
597 | gimple dummy_gimple; | ||
598 | @@ -98,7 +95,7 @@ | ||
599 | |||
600 | *half_type = TREE_TYPE (oprnd0); | ||
601 | if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) | ||
602 | - || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) | ||
603 | + || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign) | ||
604 | || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) | ||
605 | return false; | ||
606 | |||
607 | @@ -168,12 +165,12 @@ | ||
608 | inner-loop nested in an outer-loop that us being vectorized). */ | ||
609 | |||
610 | static gimple | ||
611 | -vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out) | ||
612 | +vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out) | ||
613 | { | ||
614 | gimple stmt; | ||
615 | tree oprnd0, oprnd1; | ||
616 | tree oprnd00, oprnd01; | ||
617 | - stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); | ||
618 | + stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); | ||
619 | tree type, half_type; | ||
620 | gimple pattern_stmt; | ||
621 | tree prod_type; | ||
622 | @@ -181,10 +178,10 @@ | ||
623 | struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
624 | tree var, rhs; | ||
625 | |||
626 | - if (!is_gimple_assign (last_stmt)) | ||
627 | + if (!is_gimple_assign (*last_stmt)) | ||
628 | return NULL; | ||
629 | |||
630 | - type = gimple_expr_type (last_stmt); | ||
631 | + type = gimple_expr_type (*last_stmt); | ||
632 | |||
633 | /* Look for the following pattern | ||
634 | DX = (TYPE1) X; | ||
635 | @@ -210,7 +207,7 @@ | ||
636 | /* Starting from LAST_STMT, follow the defs of its uses in search | ||
637 | of the above pattern. */ | ||
638 | |||
639 | - if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) | ||
640 | + if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) | ||
641 | return NULL; | ||
642 | |||
643 | if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) | ||
644 | @@ -231,14 +228,14 @@ | ||
645 | |||
646 | if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) | ||
647 | return NULL; | ||
648 | - oprnd0 = gimple_assign_rhs1 (last_stmt); | ||
649 | - oprnd1 = gimple_assign_rhs2 (last_stmt); | ||
650 | + oprnd0 = gimple_assign_rhs1 (*last_stmt); | ||
651 | + oprnd1 = gimple_assign_rhs2 (*last_stmt); | ||
652 | if (!types_compatible_p (TREE_TYPE (oprnd0), type) | ||
653 | || !types_compatible_p (TREE_TYPE (oprnd1), type)) | ||
654 | return NULL; | ||
655 | - stmt = last_stmt; | ||
656 | + stmt = *last_stmt; | ||
657 | |||
658 | - if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt)) | ||
659 | + if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) | ||
660 | { | ||
661 | stmt = def_stmt; | ||
662 | oprnd0 = gimple_assign_rhs1 (stmt); | ||
663 | @@ -293,10 +290,10 @@ | ||
664 | if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) | ||
665 | || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) | ||
666 | return NULL; | ||
667 | - if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt)) | ||
668 | + if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true)) | ||
669 | return NULL; | ||
670 | oprnd00 = gimple_assign_rhs1 (def_stmt); | ||
671 | - if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt)) | ||
672 | + if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true)) | ||
673 | return NULL; | ||
674 | oprnd01 = gimple_assign_rhs1 (def_stmt); | ||
675 | if (!types_compatible_p (half_type0, half_type1)) | ||
676 | @@ -322,7 +319,7 @@ | ||
677 | |||
678 | /* We don't allow changing the order of the computation in the inner-loop | ||
679 | when doing outer-loop vectorization. */ | ||
680 | - gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); | ||
681 | + gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); | ||
682 | |||
683 | return pattern_stmt; | ||
684 | } | ||
685 | @@ -342,24 +339,47 @@ | ||
686 | |||
687 | where type 'TYPE' is at least double the size of type 'type'. | ||
688 | |||
689 | - Input: | ||
690 | - | ||
691 | - * LAST_STMT: A stmt from which the pattern search begins. In the example, | ||
692 | - when this function is called with S5, the pattern {S3,S4,S5} is be detected. | ||
693 | - | ||
694 | - Output: | ||
695 | - | ||
696 | - * TYPE_IN: The type of the input arguments to the pattern. | ||
697 | - | ||
698 | - * TYPE_OUT: The type of the output of this pattern. | ||
699 | - | ||
700 | - * Return value: A new stmt that will be used to replace the sequence of | ||
701 | - stmts that constitute the pattern. In this case it will be: | ||
702 | - WIDEN_MULT <a_t, b_t> | ||
703 | -*/ | ||
704 | + Also detect unsgigned cases: | ||
705 | + | ||
706 | + unsigned type a_t, b_t; | ||
707 | + unsigned TYPE u_prod_T; | ||
708 | + TYPE a_T, b_T, prod_T; | ||
709 | + | ||
710 | + S1 a_t = ; | ||
711 | + S2 b_t = ; | ||
712 | + S3 a_T = (TYPE) a_t; | ||
713 | + S4 b_T = (TYPE) b_t; | ||
714 | + S5 prod_T = a_T * b_T; | ||
715 | + S6 u_prod_T = (unsigned TYPE) prod_T; | ||
716 | + | ||
717 | + and multiplication by constants: | ||
718 | + | ||
719 | + type a_t; | ||
720 | + TYPE a_T, prod_T; | ||
721 | + | ||
722 | + S1 a_t = ; | ||
723 | + S3 a_T = (TYPE) a_t; | ||
724 | + S5 prod_T = a_T * CONST; | ||
725 | + | ||
726 | + Input: | ||
727 | + | ||
728 | + * LAST_STMT: A stmt from which the pattern search begins. In the example, | ||
729 | + when this function is called with S5, the pattern {S3,S4,S5,(S6)} is | ||
730 | + detected. | ||
731 | + | ||
732 | + Output: | ||
733 | + | ||
734 | + * TYPE_IN: The type of the input arguments to the pattern. | ||
735 | + | ||
736 | + * TYPE_OUT: The type of the output of this pattern. | ||
737 | + | ||
738 | + * Return value: A new stmt that will be used to replace the sequence of | ||
739 | + stmts that constitute the pattern. In this case it will be: | ||
740 | + WIDEN_MULT <a_t, b_t> | ||
741 | + */ | ||
742 | |||
743 | static gimple | ||
744 | -vect_recog_widen_mult_pattern (gimple last_stmt, | ||
745 | +vect_recog_widen_mult_pattern (gimple *last_stmt, | ||
746 | tree *type_in, | ||
747 | tree *type_out) | ||
748 | { | ||
749 | @@ -367,39 +387,112 @@ | ||
750 | tree oprnd0, oprnd1; | ||
751 | tree type, half_type0, half_type1; | ||
752 | gimple pattern_stmt; | ||
753 | - tree vectype, vectype_out; | ||
754 | + tree vectype, vectype_out = NULL_TREE; | ||
755 | tree dummy; | ||
756 | tree var; | ||
757 | enum tree_code dummy_code; | ||
758 | int dummy_int; | ||
759 | VEC (tree, heap) *dummy_vec; | ||
760 | + bool op0_ok, op1_ok; | ||
761 | |||
762 | - if (!is_gimple_assign (last_stmt)) | ||
763 | + if (!is_gimple_assign (*last_stmt)) | ||
764 | return NULL; | ||
765 | |||
766 | - type = gimple_expr_type (last_stmt); | ||
767 | + type = gimple_expr_type (*last_stmt); | ||
768 | |||
769 | /* Starting from LAST_STMT, follow the defs of its uses in search | ||
770 | of the above pattern. */ | ||
771 | |||
772 | - if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) | ||
773 | + if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR) | ||
774 | return NULL; | ||
775 | |||
776 | - oprnd0 = gimple_assign_rhs1 (last_stmt); | ||
777 | - oprnd1 = gimple_assign_rhs2 (last_stmt); | ||
778 | + oprnd0 = gimple_assign_rhs1 (*last_stmt); | ||
779 | + oprnd1 = gimple_assign_rhs2 (*last_stmt); | ||
780 | if (!types_compatible_p (TREE_TYPE (oprnd0), type) | ||
781 | || !types_compatible_p (TREE_TYPE (oprnd1), type)) | ||
782 | return NULL; | ||
783 | |||
784 | - /* Check argument 0 */ | ||
785 | - if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0)) | ||
786 | - return NULL; | ||
787 | - oprnd0 = gimple_assign_rhs1 (def_stmt0); | ||
788 | - | ||
789 | - /* Check argument 1 */ | ||
790 | - if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1)) | ||
791 | - return NULL; | ||
792 | - oprnd1 = gimple_assign_rhs1 (def_stmt1); | ||
793 | + /* Check argument 0. */ | ||
794 | + op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false); | ||
795 | + /* Check argument 1. */ | ||
796 | + op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false); | ||
797 | + | ||
798 | + /* In case of multiplication by a constant one of the operands may not match | ||
799 | + the pattern, but not both. */ | ||
800 | + if (!op0_ok && !op1_ok) | ||
801 | + return NULL; | ||
802 | + | ||
803 | + if (op0_ok && op1_ok) | ||
804 | + { | ||
805 | + oprnd0 = gimple_assign_rhs1 (def_stmt0); | ||
806 | + oprnd1 = gimple_assign_rhs1 (def_stmt1); | ||
807 | + } | ||
808 | + else if (!op0_ok) | ||
809 | + { | ||
810 | + if (CONSTANT_CLASS_P (oprnd0) | ||
811 | + && TREE_CODE (half_type1) == INTEGER_TYPE | ||
812 | + && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1)) | ||
813 | + && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0)) | ||
814 | + { | ||
815 | + /* OPRND0 is a constant of HALF_TYPE1. */ | ||
816 | + half_type0 = half_type1; | ||
817 | + oprnd1 = gimple_assign_rhs1 (def_stmt1); | ||
818 | + } | ||
819 | + else | ||
820 | + return NULL; | ||
821 | + } | ||
822 | + else if (!op1_ok) | ||
823 | + { | ||
824 | + if (CONSTANT_CLASS_P (oprnd1) | ||
825 | + && TREE_CODE (half_type0) == INTEGER_TYPE | ||
826 | + && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0)) | ||
827 | + && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1)) | ||
828 | + { | ||
829 | + /* OPRND1 is a constant of HALF_TYPE0. */ | ||
830 | + half_type1 = half_type0; | ||
831 | + oprnd0 = gimple_assign_rhs1 (def_stmt0); | ||
832 | + } | ||
833 | + else | ||
834 | + return NULL; | ||
835 | + } | ||
836 | + | ||
837 | + /* Handle unsigned case. Look for | ||
838 | + S6 u_prod_T = (unsigned TYPE) prod_T; | ||
839 | + Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ | ||
840 | + if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) | ||
841 | + { | ||
842 | + tree lhs = gimple_assign_lhs (*last_stmt), use_lhs; | ||
843 | + imm_use_iterator imm_iter; | ||
844 | + use_operand_p use_p; | ||
845 | + int nuses = 0; | ||
846 | + gimple use_stmt = NULL; | ||
847 | + tree use_type; | ||
848 | + | ||
849 | + if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1)) | ||
850 | + return NULL; | ||
851 | + | ||
852 | + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) | ||
853 | + { | ||
854 | + if (is_gimple_debug (USE_STMT (use_p))) | ||
855 | + continue; | ||
856 | + use_stmt = USE_STMT (use_p); | ||
857 | + nuses++; | ||
858 | + } | ||
859 | + | ||
860 | + if (nuses != 1 || !is_gimple_assign (use_stmt) | ||
861 | + || gimple_assign_rhs_code (use_stmt) != NOP_EXPR) | ||
862 | + return NULL; | ||
863 | + | ||
864 | + use_lhs = gimple_assign_lhs (use_stmt); | ||
865 | + use_type = TREE_TYPE (use_lhs); | ||
866 | + if (!INTEGRAL_TYPE_P (use_type) | ||
867 | + || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) | ||
868 | + || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) | ||
869 | + return NULL; | ||
870 | + | ||
871 | + type = use_type; | ||
872 | + *last_stmt = use_stmt; | ||
873 | + } | ||
874 | |||
875 | if (!types_compatible_p (half_type0, half_type1)) | ||
876 | return NULL; | ||
877 | @@ -413,7 +506,7 @@ | ||
878 | vectype_out = get_vectype_for_scalar_type (type); | ||
879 | if (!vectype | ||
880 | || !vectype_out | ||
881 | - || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, | ||
882 | + || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt, | ||
883 | vectype_out, vectype, | ||
884 | &dummy, &dummy, &dummy_code, | ||
885 | &dummy_code, &dummy_int, &dummy_vec)) | ||
886 | @@ -462,16 +555,16 @@ | ||
887 | */ | ||
888 | |||
889 | static gimple | ||
890 | -vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out) | ||
891 | +vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out) | ||
892 | { | ||
893 | tree fn, base, exp = NULL; | ||
894 | gimple stmt; | ||
895 | tree var; | ||
896 | |||
897 | - if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) | ||
898 | + if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL) | ||
899 | return NULL; | ||
900 | |||
901 | - fn = gimple_call_fndecl (last_stmt); | ||
902 | + fn = gimple_call_fndecl (*last_stmt); | ||
903 | if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) | ||
904 | return NULL; | ||
905 | |||
906 | @@ -481,8 +574,8 @@ | ||
907 | case BUILT_IN_POWI: | ||
908 | case BUILT_IN_POWF: | ||
909 | case BUILT_IN_POW: | ||
910 | - base = gimple_call_arg (last_stmt, 0); | ||
911 | - exp = gimple_call_arg (last_stmt, 1); | ||
912 | + base = gimple_call_arg (*last_stmt, 0); | ||
913 | + exp = gimple_call_arg (*last_stmt, 1); | ||
914 | if (TREE_CODE (exp) != REAL_CST | ||
915 | && TREE_CODE (exp) != INTEGER_CST) | ||
916 | return NULL; | ||
917 | @@ -574,21 +667,21 @@ | ||
918 | inner-loop nested in an outer-loop that us being vectorized). */ | ||
919 | |||
920 | static gimple | ||
921 | -vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out) | ||
922 | +vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out) | ||
923 | { | ||
924 | gimple stmt; | ||
925 | tree oprnd0, oprnd1; | ||
926 | - stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); | ||
927 | + stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); | ||
928 | tree type, half_type; | ||
929 | gimple pattern_stmt; | ||
930 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
931 | struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
932 | tree var; | ||
933 | |||
934 | - if (!is_gimple_assign (last_stmt)) | ||
935 | + if (!is_gimple_assign (*last_stmt)) | ||
936 | return NULL; | ||
937 | |||
938 | - type = gimple_expr_type (last_stmt); | ||
939 | + type = gimple_expr_type (*last_stmt); | ||
940 | |||
941 | /* Look for the following pattern | ||
942 | DX = (TYPE) X; | ||
943 | @@ -600,25 +693,25 @@ | ||
944 | /* Starting from LAST_STMT, follow the defs of its uses in search | ||
945 | of the above pattern. */ | ||
946 | |||
947 | - if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) | ||
948 | + if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) | ||
949 | return NULL; | ||
950 | |||
951 | if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) | ||
952 | return NULL; | ||
953 | |||
954 | - oprnd0 = gimple_assign_rhs1 (last_stmt); | ||
955 | - oprnd1 = gimple_assign_rhs2 (last_stmt); | ||
956 | + oprnd0 = gimple_assign_rhs1 (*last_stmt); | ||
957 | + oprnd1 = gimple_assign_rhs2 (*last_stmt); | ||
958 | if (!types_compatible_p (TREE_TYPE (oprnd0), type) | ||
959 | || !types_compatible_p (TREE_TYPE (oprnd1), type)) | ||
960 | return NULL; | ||
961 | |||
962 | - /* So far so good. Since last_stmt was detected as a (summation) reduction, | ||
963 | + /* So far so good. Since *last_stmt was detected as a (summation) reduction, | ||
964 | we know that oprnd1 is the reduction variable (defined by a loop-header | ||
965 | phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. | ||
966 | Left to check that oprnd0 is defined by a cast from type 'type' to type | ||
967 | 'TYPE'. */ | ||
968 | |||
969 | - if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt)) | ||
970 | + if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true)) | ||
971 | return NULL; | ||
972 | |||
973 | oprnd0 = gimple_assign_rhs1 (stmt); | ||
974 | @@ -639,7 +732,7 @@ | ||
975 | |||
976 | /* We don't allow changing the order of the computation in the inner-loop | ||
977 | when doing outer-loop vectorization. */ | ||
978 | - gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); | ||
979 | + gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); | ||
980 | |||
981 | return pattern_stmt; | ||
982 | } | ||
983 | @@ -669,23 +762,27 @@ | ||
984 | |||
985 | static void | ||
986 | vect_pattern_recog_1 ( | ||
987 | - gimple (* vect_recog_func) (gimple, tree *, tree *), | ||
988 | + gimple (* vect_recog_func) (gimple *, tree *, tree *), | ||
989 | gimple_stmt_iterator si) | ||
990 | { | ||
991 | gimple stmt = gsi_stmt (si), pattern_stmt; | ||
992 | - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | ||
993 | + stmt_vec_info stmt_info; | ||
994 | stmt_vec_info pattern_stmt_info; | ||
995 | - loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | ||
996 | + loop_vec_info loop_vinfo; | ||
997 | tree pattern_vectype; | ||
998 | tree type_in, type_out; | ||
999 | enum tree_code code; | ||
1000 | int i; | ||
1001 | gimple next; | ||
1002 | |||
1003 | - pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out); | ||
1004 | + pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out); | ||
1005 | if (!pattern_stmt) | ||
1006 | return; | ||
1007 | |||
1008 | + si = gsi_for_stmt (stmt); | ||
1009 | + stmt_info = vinfo_for_stmt (stmt); | ||
1010 | + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | ||
1011 | + | ||
1012 | if (VECTOR_MODE_P (TYPE_MODE (type_in))) | ||
1013 | { | ||
1014 | /* No need to check target support (already checked by the pattern | ||
1015 | @@ -736,9 +833,9 @@ | ||
1016 | } | ||
1017 | |||
1018 | /* Mark the stmts that are involved in the pattern. */ | ||
1019 | - gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT); | ||
1020 | set_vinfo_for_stmt (pattern_stmt, | ||
1021 | new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); | ||
1022 | + gimple_set_bb (pattern_stmt, gimple_bb (stmt)); | ||
1023 | pattern_stmt_info = vinfo_for_stmt (pattern_stmt); | ||
1024 | |||
1025 | STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; | ||
1026 | @@ -761,8 +858,8 @@ | ||
1027 | LOOP_VINFO - a struct_loop_info of a loop in which we want to look for | ||
1028 | computation idioms. | ||
1029 | |||
1030 | - Output - for each computation idiom that is detected we insert a new stmt | ||
1031 | - that provides the same functionality and that can be vectorized. We | ||
1032 | + Output - for each computation idiom that is detected we create a new stmt | ||
1033 | + that provides the same functionality and that can be vectorized. We | ||
1034 | also record some information in the struct_stmt_info of the relevant | ||
1035 | stmts, as explained below: | ||
1036 | |||
1037 | @@ -777,52 +874,48 @@ | ||
1038 | S5: ... = ..use(a_0).. - - - | ||
1039 | |||
1040 | Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be | ||
1041 | - represented by a single stmt. We then: | ||
1042 | - - create a new stmt S6 that will replace the pattern. | ||
1043 | - - insert the new stmt S6 before the last stmt in the pattern | ||
1044 | + represented by a single stmt. We then: | ||
1045 | + - create a new stmt S6 equivalent to the pattern (the stmt is not | ||
1046 | + inserted into the code) | ||
1047 | - fill in the STMT_VINFO fields as follows: | ||
1048 | |||
1049 | in_pattern_p related_stmt vec_stmt | ||
1050 | S1: a_i = .... - - - | ||
1051 | S2: a_2 = ..use(a_i).. - - - | ||
1052 | S3: a_1 = ..use(a_2).. - - - | ||
1053 | - > S6: a_new = .... - S4 - | ||
1054 | S4: a_0 = ..use(a_1).. true S6 - | ||
1055 | + '---> S6: a_new = .... - S4 - | ||
1056 | S5: ... = ..use(a_0).. - - - | ||
1057 | |||
1058 | (the last stmt in the pattern (S4) and the new pattern stmt (S6) point | ||
1059 | - to each other through the RELATED_STMT field). | ||
1060 | + to each other through the RELATED_STMT field). | ||
1061 | |||
1062 | S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead | ||
1063 | of S4 because it will replace all its uses. Stmts {S1,S2,S3} will | ||
1064 | remain irrelevant unless used by stmts other than S4. | ||
1065 | |||
1066 | If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} | ||
1067 | - (because they are marked as irrelevant). It will vectorize S6, and record | ||
1068 | + (because they are marked as irrelevant). It will vectorize S6, and record | ||
1069 | a pointer to the new vector stmt VS6 both from S6 (as usual), and also | ||
1070 | - from S4. We do that so that when we get to vectorizing stmts that use the | ||
1071 | + from S4. We do that so that when we get to vectorizing stmts that use the | ||
1072 | def of S4 (like S5 that uses a_0), we'll know where to take the relevant | ||
1073 | - vector-def from. S4 will be skipped, and S5 will be vectorized as usual: | ||
1074 | + vector-def from. S4 will be skipped, and S5 will be vectorized as usual: | ||
1075 | |||
1076 | in_pattern_p related_stmt vec_stmt | ||
1077 | S1: a_i = .... - - - | ||
1078 | S2: a_2 = ..use(a_i).. - - - | ||
1079 | S3: a_1 = ..use(a_2).. - - - | ||
1080 | > VS6: va_new = .... - - - | ||
1081 | - S6: a_new = .... - S4 VS6 | ||
1082 | S4: a_0 = ..use(a_1).. true S6 VS6 | ||
1083 | + '---> S6: a_new = .... - S4 VS6 | ||
1084 | > VS5: ... = ..vuse(va_new).. - - - | ||
1085 | S5: ... = ..use(a_0).. - - - | ||
1086 | |||
1087 | - DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used | ||
1088 | + DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used | ||
1089 | elsewhere), and we'll end up with: | ||
1090 | |||
1091 | VS6: va_new = .... | ||
1092 | - VS5: ... = ..vuse(va_new).. | ||
1093 | - | ||
1094 | - If vectorization does not succeed, DCE will clean S6 away (its def is | ||
1095 | - not used), and we'll end up with the original sequence. | ||
1096 | -*/ | ||
1097 | + VS5: ... = ..vuse(va_new).. */ | ||
1098 | |||
1099 | void | ||
1100 | vect_pattern_recog (loop_vec_info loop_vinfo) | ||
1101 | @@ -832,7 +925,7 @@ | ||
1102 | unsigned int nbbs = loop->num_nodes; | ||
1103 | gimple_stmt_iterator si; | ||
1104 | unsigned int i, j; | ||
1105 | - gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); | ||
1106 | + gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); | ||
1107 | |||
1108 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
1109 | fprintf (vect_dump, "=== vect_pattern_recog ==="); | ||
1110 | |||
1111 | === modified file 'gcc/tree-vect-slp.c' | ||
1112 | --- old/gcc/tree-vect-slp.c 2011-05-05 15:43:06 +0000 | ||
1113 | +++ new/gcc/tree-vect-slp.c 2011-06-19 10:59:13 +0000 | ||
1114 | @@ -2510,6 +2510,8 @@ | ||
1115 | && STMT_VINFO_STRIDED_ACCESS (stmt_info) | ||
1116 | && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))) | ||
1117 | si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); | ||
1118 | + else if (is_pattern_stmt_p (stmt_info)) | ||
1119 | + si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); | ||
1120 | else | ||
1121 | si = gsi_for_stmt (stmt); | ||
1122 | |||
1123 | |||
1124 | === modified file 'gcc/tree-vect-stmts.c' | ||
1125 | --- old/gcc/tree-vect-stmts.c 2011-06-02 12:12:00 +0000 | ||
1126 | +++ new/gcc/tree-vect-stmts.c 2011-06-22 06:21:13 +0000 | ||
1127 | @@ -605,15 +605,76 @@ | ||
1128 | break; | ||
1129 | } | ||
1130 | |||
1131 | - FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) | ||
1132 | - { | ||
1133 | - tree op = USE_FROM_PTR (use_p); | ||
1134 | - if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist)) | ||
1135 | - { | ||
1136 | - VEC_free (gimple, heap, worklist); | ||
1137 | - return false; | ||
1138 | - } | ||
1139 | - } | ||
1140 | + if (is_pattern_stmt_p (vinfo_for_stmt (stmt))) | ||
1141 | + { | ||
1142 | + /* Pattern statements are not inserted into the code, so | ||
1143 | + FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we | ||
1144 | + have to scan the RHS or function arguments instead. */ | ||
1145 | + if (is_gimple_assign (stmt)) | ||
1146 | + { | ||
1147 | + tree rhs = gimple_assign_rhs1 (stmt); | ||
1148 | + if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) | ||
1149 | + == GIMPLE_SINGLE_RHS) | ||
1150 | + { | ||
1151 | + unsigned int op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 | ||
1152 | + (stmt)); | ||
1153 | + for (i = 0; i < op_num; i++) | ||
1154 | + { | ||
1155 | + tree op = TREE_OPERAND (rhs, i); | ||
1156 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1157 | + &worklist)) | ||
1158 | + { | ||
1159 | + VEC_free (gimple, heap, worklist); | ||
1160 | + return false; | ||
1161 | + } | ||
1162 | + } | ||
1163 | + } | ||
1164 | + else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) | ||
1165 | + == GIMPLE_BINARY_RHS) | ||
1166 | + { | ||
1167 | + tree op = gimple_assign_rhs1 (stmt); | ||
1168 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1169 | + &worklist)) | ||
1170 | + { | ||
1171 | + VEC_free (gimple, heap, worklist); | ||
1172 | + return false; | ||
1173 | + } | ||
1174 | + op = gimple_assign_rhs2 (stmt); | ||
1175 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1176 | + &worklist)) | ||
1177 | + { | ||
1178 | + VEC_free (gimple, heap, worklist); | ||
1179 | + return false; | ||
1180 | + } | ||
1181 | + } | ||
1182 | + else | ||
1183 | + return false; | ||
1184 | + } | ||
1185 | + else if (is_gimple_call (stmt)) | ||
1186 | + { | ||
1187 | + for (i = 0; i < gimple_call_num_args (stmt); i++) | ||
1188 | + { | ||
1189 | + tree arg = gimple_call_arg (stmt, i); | ||
1190 | + if (!process_use (stmt, arg, loop_vinfo, live_p, relevant, | ||
1191 | + &worklist)) | ||
1192 | + { | ||
1193 | + VEC_free (gimple, heap, worklist); | ||
1194 | + return false; | ||
1195 | + } | ||
1196 | + } | ||
1197 | + } | ||
1198 | + } | ||
1199 | + else | ||
1200 | + FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) | ||
1201 | + { | ||
1202 | + tree op = USE_FROM_PTR (use_p); | ||
1203 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1204 | + &worklist)) | ||
1205 | + { | ||
1206 | + VEC_free (gimple, heap, worklist); | ||
1207 | + return false; | ||
1208 | + } | ||
1209 | + } | ||
1210 | } /* while worklist */ | ||
1211 | |||
1212 | VEC_free (gimple, heap, worklist); | ||
1213 | @@ -1405,6 +1466,7 @@ | ||
1214 | VEC(tree, heap) *vargs = NULL; | ||
1215 | enum { NARROW, NONE, WIDEN } modifier; | ||
1216 | size_t i, nargs; | ||
1217 | + tree lhs; | ||
1218 | |||
1219 | /* FORNOW: unsupported in basic block SLP. */ | ||
1220 | gcc_assert (loop_vinfo); | ||
1221 | @@ -1542,7 +1604,7 @@ | ||
1222 | /** Transform. **/ | ||
1223 | |||
1224 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
1225 | - fprintf (vect_dump, "transform operation."); | ||
1226 | + fprintf (vect_dump, "transform call."); | ||
1227 | |||
1228 | /* Handle def. */ | ||
1229 | scalar_dest = gimple_call_lhs (stmt); | ||
1230 | @@ -1661,8 +1723,11 @@ | ||
1231 | rhs of the statement with something harmless. */ | ||
1232 | |||
1233 | type = TREE_TYPE (scalar_dest); | ||
1234 | - new_stmt = gimple_build_assign (gimple_call_lhs (stmt), | ||
1235 | - build_zero_cst (type)); | ||
1236 | + if (is_pattern_stmt_p (stmt_info)) | ||
1237 | + lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); | ||
1238 | + else | ||
1239 | + lhs = gimple_call_lhs (stmt); | ||
1240 | + new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); | ||
1241 | set_vinfo_for_stmt (new_stmt, stmt_info); | ||
1242 | set_vinfo_for_stmt (stmt, NULL); | ||
1243 | STMT_VINFO_STMT (stmt_info) = new_stmt; | ||
1244 | @@ -3231,6 +3296,33 @@ | ||
1245 | fprintf (vect_dump, "use not simple."); | ||
1246 | return false; | ||
1247 | } | ||
1248 | + | ||
1249 | + op_type = TREE_CODE_LENGTH (code); | ||
1250 | + if (op_type == binary_op) | ||
1251 | + { | ||
1252 | + bool ok; | ||
1253 | + | ||
1254 | + op1 = gimple_assign_rhs2 (stmt); | ||
1255 | + if (code == WIDEN_MULT_EXPR) | ||
1256 | + { | ||
1257 | + /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of | ||
1258 | + OP1. */ | ||
1259 | + if (CONSTANT_CLASS_P (op0)) | ||
1260 | + ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL, | ||
1261 | + &def_stmt, &def, &dt[1], &vectype_in); | ||
1262 | + else | ||
1263 | + ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, | ||
1264 | + &dt[1]); | ||
1265 | + | ||
1266 | + if (!ok) | ||
1267 | + { | ||
1268 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
1269 | + fprintf (vect_dump, "use not simple."); | ||
1270 | + return false; | ||
1271 | + } | ||
1272 | + } | ||
1273 | + } | ||
1274 | + | ||
1275 | /* If op0 is an external or constant def use a vector type with | ||
1276 | the same size as the output vector type. */ | ||
1277 | if (!vectype_in) | ||
1278 | @@ -3263,18 +3355,6 @@ | ||
1279 | |||
1280 | gcc_assert (ncopies >= 1); | ||
1281 | |||
1282 | - op_type = TREE_CODE_LENGTH (code); | ||
1283 | - if (op_type == binary_op) | ||
1284 | - { | ||
1285 | - op1 = gimple_assign_rhs2 (stmt); | ||
1286 | - if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1])) | ||
1287 | - { | ||
1288 | - if (vect_print_dump_info (REPORT_DETAILS)) | ||
1289 | - fprintf (vect_dump, "use not simple."); | ||
1290 | - return false; | ||
1291 | - } | ||
1292 | - } | ||
1293 | - | ||
1294 | /* Supportable by target? */ | ||
1295 | if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in, | ||
1296 | &decl1, &decl2, &code1, &code2, | ||
1297 | @@ -3300,6 +3380,14 @@ | ||
1298 | fprintf (vect_dump, "transform type promotion operation. ncopies = %d.", | ||
1299 | ncopies); | ||
1300 | |||
1301 | + if (code == WIDEN_MULT_EXPR) | ||
1302 | + { | ||
1303 | + if (CONSTANT_CLASS_P (op0)) | ||
1304 | + op0 = fold_convert (TREE_TYPE (op1), op0); | ||
1305 | + else if (CONSTANT_CLASS_P (op1)) | ||
1306 | + op1 = fold_convert (TREE_TYPE (op0), op1); | ||
1307 | + } | ||
1308 | + | ||
1309 | /* Handle def. */ | ||
1310 | /* In case of multi-step promotion, we first generate promotion operations | ||
1311 | to the intermediate types, and then from that types to the final one. | ||
1312 | @@ -4824,10 +4912,26 @@ | ||
1313 | if (!STMT_VINFO_RELEVANT_P (stmt_info) | ||
1314 | && !STMT_VINFO_LIVE_P (stmt_info)) | ||
1315 | { | ||
1316 | - if (vect_print_dump_info (REPORT_DETAILS)) | ||
1317 | - fprintf (vect_dump, "irrelevant."); | ||
1318 | + gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
1319 | + if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
1320 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | ||
1321 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
1322 | + { | ||
1323 | + stmt = pattern_stmt; | ||
1324 | + stmt_info = vinfo_for_stmt (pattern_stmt); | ||
1325 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
1326 | + { | ||
1327 | + fprintf (vect_dump, "==> examining pattern statement: "); | ||
1328 | + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
1329 | + } | ||
1330 | + } | ||
1331 | + else | ||
1332 | + { | ||
1333 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
1334 | + fprintf (vect_dump, "irrelevant."); | ||
1335 | |||
1336 | - return true; | ||
1337 | + return true; | ||
1338 | + } | ||
1339 | } | ||
1340 | |||
1341 | switch (STMT_VINFO_DEF_TYPE (stmt_info)) | ||
1342 | |||
1343 | === modified file 'gcc/tree-vectorizer.h' | ||
1344 | --- old/gcc/tree-vectorizer.h 2011-05-05 15:43:06 +0000 | ||
1345 | +++ new/gcc/tree-vectorizer.h 2011-06-19 10:59:13 +0000 | ||
1346 | @@ -884,7 +884,7 @@ | ||
1347 | /* Pattern recognition functions. | ||
1348 | Additional pattern recognition functions can (and will) be added | ||
1349 | in the future. */ | ||
1350 | -typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); | ||
1351 | +typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); | ||
1352 | #define NUM_PATTERNS 4 | ||
1353 | void vect_pattern_recog (loop_vec_info); | ||
1354 | |||
1355 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106763.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106763.patch new file mode 100644 index 000000000..8d2ce2176 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106763.patch | |||
@@ -0,0 +1,96 @@ | |||
1 | 2011-06-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> | ||
2 | |||
3 | Backport from mainline. | ||
4 | LP 791327 | ||
5 | gcc/ | ||
6 | 2011-06-09 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> | ||
7 | |||
8 | PR target/49335 | ||
9 | * config/arm/predicates.md (add_operator): New. | ||
10 | * config/arm/arm.md ("*arith_shiftsi"): Fix for SP reg usage | ||
11 | in Thumb2. | ||
12 | |||
13 | 2011-06-28 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> | ||
14 | |||
15 | Backport from mainline. | ||
16 | gcc/ | ||
17 | 2011-06-24 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> | ||
18 | |||
19 | PR target/49385 | ||
20 | * config/arm/thumb2.md (*thumb2_movhi_insn): Make sure atleast | ||
21 | one of the operands is a register. | ||
22 | |||
23 | === modified file 'gcc/config/arm/arm.md' | ||
24 | --- old/gcc/config/arm/arm.md 2011-06-27 22:14:07 +0000 | ||
25 | +++ new/gcc/config/arm/arm.md 2011-06-28 12:02:27 +0000 | ||
26 | @@ -8584,18 +8584,22 @@ | ||
27 | ;; Patterns to allow combination of arithmetic, cond code and shifts | ||
28 | |||
29 | (define_insn "*arith_shiftsi" | ||
30 | - [(set (match_operand:SI 0 "s_register_operand" "=r,r") | ||
31 | + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") | ||
32 | (match_operator:SI 1 "shiftable_operator" | ||
33 | [(match_operator:SI 3 "shift_operator" | ||
34 | - [(match_operand:SI 4 "s_register_operand" "r,r") | ||
35 | - (match_operand:SI 5 "shift_amount_operand" "M,r")]) | ||
36 | - (match_operand:SI 2 "s_register_operand" "rk,rk")]))] | ||
37 | + [(match_operand:SI 4 "s_register_operand" "r,r,r,r") | ||
38 | + (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")]) | ||
39 | + (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))] | ||
40 | "TARGET_32BIT" | ||
41 | "%i1%?\\t%0, %2, %4%S3" | ||
42 | [(set_attr "predicable" "yes") | ||
43 | (set_attr "shift" "4") | ||
44 | - (set_attr "arch" "32,a") | ||
45 | - ;; We have to make sure to disable the second alternative if | ||
46 | + (set_attr "arch" "a,t2,t2,a") | ||
47 | + ;; Thumb2 doesn't allow the stack pointer to be used for | ||
48 | + ;; operand1 for all operations other than add and sub. In this case | ||
49 | + ;; the minus operation is a candidate for an rsub and hence needs | ||
50 | + ;; to be disabled. | ||
51 | + ;; We have to make sure to disable the fourth alternative if | ||
52 | ;; the shift_operator is MULT, since otherwise the insn will | ||
53 | ;; also match a multiply_accumulate pattern and validate_change | ||
54 | ;; will allow a replacement of the constant with a register | ||
55 | @@ -8603,9 +8607,13 @@ | ||
56 | (set_attr_alternative "insn_enabled" | ||
57 | [(const_string "yes") | ||
58 | (if_then_else | ||
59 | + (match_operand:SI 1 "add_operator" "") | ||
60 | + (const_string "yes") (const_string "no")) | ||
61 | + (const_string "yes") | ||
62 | + (if_then_else | ||
63 | (match_operand:SI 3 "mult_operator" "") | ||
64 | (const_string "no") (const_string "yes"))]) | ||
65 | - (set_attr "type" "alu_shift,alu_shift_reg")]) | ||
66 | + (set_attr "type" "alu_shift,alu_shift,alu_shift,alu_shift_reg")]) | ||
67 | |||
68 | (define_split | ||
69 | [(set (match_operand:SI 0 "s_register_operand" "") | ||
70 | |||
71 | === modified file 'gcc/config/arm/predicates.md' | ||
72 | --- old/gcc/config/arm/predicates.md 2011-05-03 15:14:56 +0000 | ||
73 | +++ new/gcc/config/arm/predicates.md 2011-06-22 15:50:23 +0000 | ||
74 | @@ -687,3 +687,6 @@ | ||
75 | (define_special_predicate "neon_struct_operand" | ||
76 | (and (match_code "mem") | ||
77 | (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)"))) | ||
78 | + | ||
79 | +(define_special_predicate "add_operator" | ||
80 | + (match_code "plus")) | ||
81 | |||
82 | === modified file 'gcc/config/arm/thumb2.md' | ||
83 | --- old/gcc/config/arm/thumb2.md 2011-06-14 14:37:30 +0000 | ||
84 | +++ new/gcc/config/arm/thumb2.md 2011-06-20 12:18:27 +0000 | ||
85 | @@ -207,7 +207,9 @@ | ||
86 | (define_insn "*thumb2_movhi_insn" | ||
87 | [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r") | ||
88 | (match_operand:HI 1 "general_operand" "rI,n,r,m"))] | ||
89 | - "TARGET_THUMB2" | ||
90 | + "TARGET_THUMB2 | ||
91 | + && (register_operand (operands[0], HImode) | ||
92 | + || register_operand (operands[1], HImode))" | ||
93 | "@ | ||
94 | mov%?\\t%0, %1\\t%@ movhi | ||
95 | movw%?\\t%0, %L1\\t%@ movhi | ||
96 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106764.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106764.patch new file mode 100644 index 000000000..a548b1b68 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106764.patch | |||
@@ -0,0 +1,25 @@ | |||
1 | 2011-06-30 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> | ||
2 | |||
3 | Backport from mainline. | ||
4 | LP 744754 | ||
5 | 2011-04-17 Chung-Lin Tang <cltang@codesourcery.com> | ||
6 | |||
7 | * config/arm/arm.c (neon_struct_mem_operand): | ||
8 | Support POST_INC/PRE_DEC memory operands. | ||
9 | |||
10 | === modified file 'gcc/config/arm/arm.c' | ||
11 | --- old/gcc/config/arm/arm.c 2011-06-27 22:14:07 +0000 | ||
12 | +++ new/gcc/config/arm/arm.c 2011-06-29 09:13:17 +0000 | ||
13 | @@ -9357,6 +9357,11 @@ | ||
14 | if (GET_CODE (ind) == REG) | ||
15 | return arm_address_register_rtx_p (ind, 0); | ||
16 | |||
17 | + /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */ | ||
18 | + if (GET_CODE (ind) == POST_INC | ||
19 | + || GET_CODE (ind) == PRE_DEC) | ||
20 | + return arm_address_register_rtx_p (XEXP (ind, 0), 0); | ||
21 | + | ||
22 | return FALSE; | ||
23 | } | ||
24 | |||
25 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106766.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106766.patch new file mode 100644 index 000000000..15046a766 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106766.patch | |||
@@ -0,0 +1,25 @@ | |||
1 | 2011-07-03 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | Backport from FSF: | ||
4 | 2011-06-12 Ira Rosen <ira.rosen@linaro.org> | ||
5 | |||
6 | gcc/ | ||
7 | * tree-vect-data-refs.c (vect_peeling_hash_get_most_frequent): | ||
8 | Take number of iterations to peel into account for equally frequent | ||
9 | misalignment values. | ||
10 | |||
11 | === modified file 'gcc/tree-vect-data-refs.c' | ||
12 | --- old/gcc/tree-vect-data-refs.c 2011-06-02 12:12:00 +0000 | ||
13 | +++ new/gcc/tree-vect-data-refs.c 2011-06-29 11:20:24 +0000 | ||
14 | @@ -1256,7 +1256,9 @@ | ||
15 | vect_peel_info elem = (vect_peel_info) *slot; | ||
16 | vect_peel_extended_info max = (vect_peel_extended_info) data; | ||
17 | |||
18 | - if (elem->count > max->peel_info.count) | ||
19 | + if (elem->count > max->peel_info.count | ||
20 | + || (elem->count == max->peel_info.count | ||
21 | + && max->peel_info.npeel > elem->npeel)) | ||
22 | { | ||
23 | max->peel_info.npeel = elem->npeel; | ||
24 | max->peel_info.count = elem->count; | ||
25 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106768.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106768.patch new file mode 100644 index 000000000..f1f7718eb --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106768.patch | |||
@@ -0,0 +1,182 @@ | |||
1 | 2011-07-07 Richard Sandiford <richard.sandiford@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | * builtins.c (get_object_alignment): Fix comment. | ||
5 | * fold-const.c (get_pointer_modulus_and_residue): Remove | ||
6 | allow_func_align. Use get_object_alignment. | ||
7 | (fold_binary_loc): Update caller. | ||
8 | |||
9 | 2011-07-07 Richard Sandiford <richard.sandiford@linaro.org> | ||
10 | |||
11 | gcc/ | ||
12 | Backport from mainline: | ||
13 | |||
14 | 2011-06-29 Richard Sandiford <richard.sandiford@linaro.org> | ||
15 | |||
16 | PR tree-optimization/49545 | ||
17 | * builtins.c (get_object_alignment_1): Update function comment. | ||
18 | Do not use DECL_ALIGN for functions, but test | ||
19 | TARGET_PTRMEMFUNC_VBIT_LOCATION instead. | ||
20 | * fold-const.c (get_pointer_modulus_and_residue): Don't check | ||
21 | for functions here. | ||
22 | * tree-ssa-ccp.c (get_value_from_alignment): Likewise. | ||
23 | |||
24 | gcc/testsuite/ | ||
25 | Backport from mainline: | ||
26 | |||
27 | 2011-06-29 Richard Sandiford <richard.sandiford@linaro.org> | ||
28 | |||
29 | * gcc.dg/torture/pr49169.c: Restrict to ARM and MIPS targets. | ||
30 | |||
31 | 2011-07-07 Richard Sandiford <richard.sandiford@linaro.org> | ||
32 | |||
33 | gcc/ | ||
34 | Backport from mainline: | ||
35 | |||
36 | 2011-07-27 Richard Guenther <rguenther@suse.de> | ||
37 | |||
38 | PR tree-optimization/49169 | ||
39 | * fold-const.c (get_pointer_modulus_and_residue): Don't rely on | ||
40 | the alignment of function decls. | ||
41 | |||
42 | gcc/testsuite/ | ||
43 | Backport from mainline: | ||
44 | |||
45 | 2011-07-27 Michael Hope <michael.hope@linaro.org> | ||
46 | Richard Sandiford <richard.sandiford@linaro.org> | ||
47 | |||
48 | PR tree-optimization/49169 | ||
49 | * gcc.dg/torture/pr49169.c: New test. | ||
50 | |||
51 | === modified file 'gcc/builtins.c' | ||
52 | --- old/gcc/builtins.c 2011-03-03 21:56:58 +0000 | ||
53 | +++ new/gcc/builtins.c 2011-07-04 09:52:27 +0000 | ||
54 | @@ -264,7 +264,14 @@ | ||
55 | } | ||
56 | |||
57 | /* Return the alignment in bits of EXP, an object. | ||
58 | - Don't return more than MAX_ALIGN no matter what. */ | ||
59 | + Don't return more than MAX_ALIGN no matter what. | ||
60 | + | ||
61 | + Note that the address (and thus the alignment) computed here is based | ||
62 | + on the address to which a symbol resolves, whereas DECL_ALIGN is based | ||
63 | + on the address at which an object is actually located. These two | ||
64 | + addresses are not always the same. For example, on ARM targets, | ||
65 | + the address &foo of a Thumb function foo() has the lowest bit set, | ||
66 | + whereas foo() itself starts on an even address. */ | ||
67 | |||
68 | unsigned int | ||
69 | get_object_alignment (tree exp, unsigned int max_align) | ||
70 | @@ -286,7 +293,21 @@ | ||
71 | exp = DECL_INITIAL (exp); | ||
72 | if (DECL_P (exp) | ||
73 | && TREE_CODE (exp) != LABEL_DECL) | ||
74 | - align = DECL_ALIGN (exp); | ||
75 | + { | ||
76 | + if (TREE_CODE (exp) == FUNCTION_DECL) | ||
77 | + { | ||
78 | + /* Function addresses can encode extra information besides their | ||
79 | + alignment. However, if TARGET_PTRMEMFUNC_VBIT_LOCATION | ||
80 | + allows the low bit to be used as a virtual bit, we know | ||
81 | + that the address itself must be 2-byte aligned. */ | ||
82 | + if (TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn) | ||
83 | + align = 2 * BITS_PER_UNIT; | ||
84 | + else | ||
85 | + align = BITS_PER_UNIT; | ||
86 | + } | ||
87 | + else | ||
88 | + align = DECL_ALIGN (exp); | ||
89 | + } | ||
90 | else if (CONSTANT_CLASS_P (exp)) | ||
91 | { | ||
92 | align = TYPE_ALIGN (TREE_TYPE (exp)); | ||
93 | |||
94 | === modified file 'gcc/fold-const.c' | ||
95 | --- old/gcc/fold-const.c 2011-05-23 20:37:18 +0000 | ||
96 | +++ new/gcc/fold-const.c 2011-07-04 09:52:27 +0000 | ||
97 | @@ -9232,15 +9232,10 @@ | ||
98 | 0 <= N < M as is common. In general, the precise value of P is unknown. | ||
99 | M is chosen as large as possible such that constant N can be determined. | ||
100 | |||
101 | - Returns M and sets *RESIDUE to N. | ||
102 | - | ||
103 | - If ALLOW_FUNC_ALIGN is true, do take functions' DECL_ALIGN_UNIT into | ||
104 | - account. This is not always possible due to PR 35705. | ||
105 | - */ | ||
106 | + Returns M and sets *RESIDUE to N. */ | ||
107 | |||
108 | static unsigned HOST_WIDE_INT | ||
109 | -get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue, | ||
110 | - bool allow_func_align) | ||
111 | +get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue) | ||
112 | { | ||
113 | enum tree_code code; | ||
114 | |||
115 | @@ -9270,9 +9265,8 @@ | ||
116 | } | ||
117 | } | ||
118 | |||
119 | - if (DECL_P (expr) | ||
120 | - && (allow_func_align || TREE_CODE (expr) != FUNCTION_DECL)) | ||
121 | - return DECL_ALIGN_UNIT (expr); | ||
122 | + if (DECL_P (expr)) | ||
123 | + return get_object_alignment (expr, ~0U) / BITS_PER_UNIT; | ||
124 | } | ||
125 | else if (code == POINTER_PLUS_EXPR) | ||
126 | { | ||
127 | @@ -9282,8 +9276,7 @@ | ||
128 | |||
129 | op0 = TREE_OPERAND (expr, 0); | ||
130 | STRIP_NOPS (op0); | ||
131 | - modulus = get_pointer_modulus_and_residue (op0, residue, | ||
132 | - allow_func_align); | ||
133 | + modulus = get_pointer_modulus_and_residue (op0, residue); | ||
134 | |||
135 | op1 = TREE_OPERAND (expr, 1); | ||
136 | STRIP_NOPS (op1); | ||
137 | @@ -11163,8 +11156,7 @@ | ||
138 | unsigned HOST_WIDE_INT modulus, residue; | ||
139 | unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg1); | ||
140 | |||
141 | - modulus = get_pointer_modulus_and_residue (arg0, &residue, | ||
142 | - integer_onep (arg1)); | ||
143 | + modulus = get_pointer_modulus_and_residue (arg0, &residue); | ||
144 | |||
145 | /* This works because modulus is a power of 2. If this weren't the | ||
146 | case, we'd have to replace it by its greatest power-of-2 | ||
147 | |||
148 | === added file 'gcc/testsuite/gcc.dg/torture/pr49169.c' | ||
149 | --- old/gcc/testsuite/gcc.dg/torture/pr49169.c 1970-01-01 00:00:00 +0000 | ||
150 | +++ new/gcc/testsuite/gcc.dg/torture/pr49169.c 2011-06-29 09:46:06 +0000 | ||
151 | @@ -0,0 +1,15 @@ | ||
152 | +/* { dg-do compile { target { arm*-*-* || mips*-*-* } } } */ | ||
153 | + | ||
154 | +#include <stdlib.h> | ||
155 | +#include <stdint.h> | ||
156 | + | ||
157 | +int | ||
158 | +main (void) | ||
159 | +{ | ||
160 | + void *p = main; | ||
161 | + if ((intptr_t) p & 1) | ||
162 | + abort (); | ||
163 | + return 0; | ||
164 | +} | ||
165 | + | ||
166 | +/* { dg-final { scan-assembler "abort" } } */ | ||
167 | |||
168 | === modified file 'gcc/tree-ssa-ccp.c' | ||
169 | --- old/gcc/tree-ssa-ccp.c 2011-05-05 15:42:22 +0000 | ||
170 | +++ new/gcc/tree-ssa-ccp.c 2011-06-29 09:46:06 +0000 | ||
171 | @@ -522,10 +522,6 @@ | ||
172 | val = bit_value_binop (PLUS_EXPR, TREE_TYPE (expr), | ||
173 | TREE_OPERAND (base, 0), TREE_OPERAND (base, 1)); | ||
174 | else if (base | ||
175 | - /* ??? While function decls have DECL_ALIGN their addresses | ||
176 | - may encode extra information in the lower bits on some | ||
177 | - targets (PR47239). Simply punt for function decls for now. */ | ||
178 | - && TREE_CODE (base) != FUNCTION_DECL | ||
179 | && ((align = get_object_alignment (base, BIGGEST_ALIGNMENT)) | ||
180 | > BITS_PER_UNIT)) | ||
181 | { | ||
182 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch new file mode 100644 index 000000000..37e3036b2 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch | |||
@@ -0,0 +1,1294 @@ | |||
1 | 2011-07-11 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | Backport from FSF: | ||
4 | 2011-06-16 Ira Rosen <ira.rosen@linaro.org> | ||
5 | |||
6 | gcc/ | ||
7 | * tree-vectorizer.h (vect_recog_func_ptr): Change the first | ||
8 | argument to be a VEC of statements. | ||
9 | * tree-vect-loop.c (vect_determine_vectorization_factor): Remove the | ||
10 | assert that pattern statements have to have their vector type set. | ||
11 | * tree-vect-patterns.c (vect_recog_widen_sum_pattern): | ||
12 | Change the first argument to be a VEC of statements. Update | ||
13 | documentation. | ||
14 | (vect_recog_dot_prod_pattern, vect_recog_pow_pattern): Likewise. | ||
15 | (vect_handle_widen_mult_by_const): New function. | ||
16 | (vect_recog_widen_mult_pattern): Change the first argument to be a | ||
17 | VEC of statements. Update documentation. Check that the constant is | ||
18 | INTEGER_CST. Support multiplication by a constant that fits an | ||
19 | intermediate type - call vect_handle_widen_mult_by_const. | ||
20 | (vect_pattern_recog_1): Update vect_recog_func_ptr and its | ||
21 | call. Handle additional pattern statements if necessary. | ||
22 | |||
23 | gcc/testsuite/ | ||
24 | * gcc.dg/vect/vect-widen-mult-half-u8.c: New test. | ||
25 | |||
26 | and | ||
27 | 2011-06-30 Ira Rosen <ira.rosen@linaro.org> | ||
28 | |||
29 | gcc/ | ||
30 | * tree-vect-loop.c (vect_determine_vectorization_factor): Handle | ||
31 | both pattern and original statements if necessary. | ||
32 | (vect_transform_loop): Likewise. | ||
33 | * tree-vect-patterns.c (vect_pattern_recog): Update documentation. | ||
34 | * tree-vect-stmts.c (vect_mark_relevant): Add new argument. | ||
35 | Mark the pattern statement only if the original statement doesn't | ||
36 | have its own uses. | ||
37 | (process_use): Call vect_mark_relevant with additional parameter. | ||
38 | (vect_mark_stmts_to_be_vectorized): Likewise. | ||
39 | (vect_get_vec_def_for_operand): Use vectorized pattern statement. | ||
40 | (vect_analyze_stmt): Handle both pattern and original statements | ||
41 | if necessary. | ||
42 | (vect_transform_stmt): Don't store vectorized pattern statement | ||
43 | in the original statement. | ||
44 | (vect_is_simple_use_1): Use related pattern statement only if the | ||
45 | original statement is irrelevant. | ||
46 | * tree-vect-slp.c (vect_get_and_check_slp_defs): Likewise. | ||
47 | |||
48 | gcc/testsuite/ | ||
49 | * gcc.dg/vect/slp-widen-mult-half.c: New test. | ||
50 | * gcc.dg/vect/vect-widen-mult-half.c: New test. | ||
51 | |||
52 | === added file 'gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c' | ||
53 | --- old/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 1970-01-01 00:00:00 +0000 | ||
54 | +++ new/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 2011-07-06 12:04:10 +0000 | ||
55 | @@ -0,0 +1,52 @@ | ||
56 | +/* { dg-require-effective-target vect_int } */ | ||
57 | + | ||
58 | +#include "tree-vect.h" | ||
59 | +#include <stdlib.h> | ||
60 | + | ||
61 | +#define N 32 | ||
62 | +#define COEF 32470 | ||
63 | +#define COEF2 324700 | ||
64 | + | ||
65 | +unsigned char in[N]; | ||
66 | +int out[N]; | ||
67 | +int out2[N]; | ||
68 | + | ||
69 | +__attribute__ ((noinline)) void | ||
70 | +foo () | ||
71 | +{ | ||
72 | + int i; | ||
73 | + | ||
74 | + for (i = 0; i < N/2; i++) | ||
75 | + { | ||
76 | + out[2*i] = in[2*i] * COEF; | ||
77 | + out2[2*i] = in[2*i] + COEF2; | ||
78 | + out[2*i+1] = in[2*i+1] * COEF; | ||
79 | + out2[2*i+1] = in[2*i+1] + COEF2; | ||
80 | + } | ||
81 | +} | ||
82 | + | ||
83 | +int main (void) | ||
84 | +{ | ||
85 | + int i; | ||
86 | + | ||
87 | + for (i = 0; i < N; i++) | ||
88 | + { | ||
89 | + in[i] = i; | ||
90 | + __asm__ volatile (""); | ||
91 | + } | ||
92 | + | ||
93 | + foo (); | ||
94 | + | ||
95 | + for (i = 0; i < N; i++) | ||
96 | + if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2) | ||
97 | + abort (); | ||
98 | + | ||
99 | + return 0; | ||
100 | +} | ||
101 | + | ||
102 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ | ||
103 | +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ | ||
104 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
105 | +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
106 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
107 | + | ||
108 | |||
109 | === added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c' | ||
110 | --- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c 1970-01-01 00:00:00 +0000 | ||
111 | +++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c 2011-07-06 12:04:10 +0000 | ||
112 | @@ -0,0 +1,59 @@ | ||
113 | +/* { dg-require-effective-target vect_int } */ | ||
114 | + | ||
115 | +#include "tree-vect.h" | ||
116 | +#include <stdlib.h> | ||
117 | + | ||
118 | +#define N 32 | ||
119 | +#define COEF 32470 | ||
120 | + | ||
121 | +unsigned char in[N]; | ||
122 | +int out[N]; | ||
123 | + | ||
124 | +__attribute__ ((noinline)) void | ||
125 | +foo () | ||
126 | +{ | ||
127 | + int i; | ||
128 | + | ||
129 | + for (i = 0; i < N; i++) | ||
130 | + out[i] = in[i] * COEF; | ||
131 | +} | ||
132 | + | ||
133 | +__attribute__ ((noinline)) void | ||
134 | +bar () | ||
135 | +{ | ||
136 | + int i; | ||
137 | + | ||
138 | + for (i = 0; i < N; i++) | ||
139 | + out[i] = COEF * in[i]; | ||
140 | +} | ||
141 | + | ||
142 | +int main (void) | ||
143 | +{ | ||
144 | + int i; | ||
145 | + | ||
146 | + for (i = 0; i < N; i++) | ||
147 | + { | ||
148 | + in[i] = i; | ||
149 | + __asm__ volatile (""); | ||
150 | + } | ||
151 | + | ||
152 | + foo (); | ||
153 | + | ||
154 | + for (i = 0; i < N; i++) | ||
155 | + if (out[i] != in[i] * COEF) | ||
156 | + abort (); | ||
157 | + | ||
158 | + bar (); | ||
159 | + | ||
160 | + for (i = 0; i < N; i++) | ||
161 | + if (out[i] != in[i] * COEF) | ||
162 | + abort (); | ||
163 | + | ||
164 | + return 0; | ||
165 | +} | ||
166 | + | ||
167 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ | ||
168 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
169 | +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
170 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
171 | + | ||
172 | |||
173 | === added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c' | ||
174 | --- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c 1970-01-01 00:00:00 +0000 | ||
175 | +++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c 2011-07-06 12:04:10 +0000 | ||
176 | @@ -0,0 +1,49 @@ | ||
177 | +/* { dg-require-effective-target vect_int } */ | ||
178 | + | ||
179 | +#include "tree-vect.h" | ||
180 | +#include <stdlib.h> | ||
181 | + | ||
182 | +#define N 32 | ||
183 | +#define COEF 32470 | ||
184 | +#define COEF2 324700 | ||
185 | + | ||
186 | +unsigned char in[N]; | ||
187 | +int out[N]; | ||
188 | +int out2[N]; | ||
189 | + | ||
190 | +__attribute__ ((noinline)) void | ||
191 | +foo (int a) | ||
192 | +{ | ||
193 | + int i; | ||
194 | + | ||
195 | + for (i = 0; i < N; i++) | ||
196 | + { | ||
197 | + out[i] = in[i] * COEF; | ||
198 | + out2[i] = in[i] + a; | ||
199 | + } | ||
200 | +} | ||
201 | + | ||
202 | +int main (void) | ||
203 | +{ | ||
204 | + int i; | ||
205 | + | ||
206 | + for (i = 0; i < N; i++) | ||
207 | + { | ||
208 | + in[i] = i; | ||
209 | + __asm__ volatile (""); | ||
210 | + } | ||
211 | + | ||
212 | + foo (COEF2); | ||
213 | + | ||
214 | + for (i = 0; i < N; i++) | ||
215 | + if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2) | ||
216 | + abort (); | ||
217 | + | ||
218 | + return 0; | ||
219 | +} | ||
220 | + | ||
221 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ | ||
222 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
223 | +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
224 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
225 | + | ||
226 | |||
227 | === modified file 'gcc/tree-vect-loop.c' | ||
228 | --- old/gcc/tree-vect-loop.c 2011-07-04 11:13:51 +0000 | ||
229 | +++ new/gcc/tree-vect-loop.c 2011-07-11 11:02:55 +0000 | ||
230 | @@ -181,6 +181,8 @@ | ||
231 | stmt_vec_info stmt_info; | ||
232 | int i; | ||
233 | HOST_WIDE_INT dummy; | ||
234 | + gimple stmt, pattern_stmt = NULL; | ||
235 | + bool analyze_pattern_stmt = false; | ||
236 | |||
237 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
238 | fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); | ||
239 | @@ -241,12 +243,20 @@ | ||
240 | } | ||
241 | } | ||
242 | |||
243 | - for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | ||
244 | + for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;) | ||
245 | { | ||
246 | - tree vf_vectype; | ||
247 | - gimple stmt = gsi_stmt (si), pattern_stmt; | ||
248 | - stmt_info = vinfo_for_stmt (stmt); | ||
249 | - | ||
250 | + tree vf_vectype; | ||
251 | + | ||
252 | + if (analyze_pattern_stmt) | ||
253 | + { | ||
254 | + stmt = pattern_stmt; | ||
255 | + analyze_pattern_stmt = false; | ||
256 | + } | ||
257 | + else | ||
258 | + stmt = gsi_stmt (si); | ||
259 | + | ||
260 | + stmt_info = vinfo_for_stmt (stmt); | ||
261 | + | ||
262 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
263 | { | ||
264 | fprintf (vect_dump, "==> examining statement: "); | ||
265 | @@ -276,10 +286,17 @@ | ||
266 | { | ||
267 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
268 | fprintf (vect_dump, "skip."); | ||
269 | + gsi_next (&si); | ||
270 | continue; | ||
271 | } | ||
272 | } | ||
273 | |||
274 | + else if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
275 | + && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) | ||
276 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | ||
277 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
278 | + analyze_pattern_stmt = true; | ||
279 | + | ||
280 | if (gimple_get_lhs (stmt) == NULL_TREE) | ||
281 | { | ||
282 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) | ||
283 | @@ -311,9 +328,7 @@ | ||
284 | } | ||
285 | else | ||
286 | { | ||
287 | - gcc_assert (!STMT_VINFO_DATA_REF (stmt_info) | ||
288 | - && !is_pattern_stmt_p (stmt_info)); | ||
289 | - | ||
290 | + gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)); | ||
291 | scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); | ||
292 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
293 | { | ||
294 | @@ -385,6 +400,9 @@ | ||
295 | if (!vectorization_factor | ||
296 | || (nunits > vectorization_factor)) | ||
297 | vectorization_factor = nunits; | ||
298 | + | ||
299 | + if (!analyze_pattern_stmt) | ||
300 | + gsi_next (&si); | ||
301 | } | ||
302 | } | ||
303 | |||
304 | @@ -4740,6 +4758,8 @@ | ||
305 | tree cond_expr = NULL_TREE; | ||
306 | gimple_seq cond_expr_stmt_list = NULL; | ||
307 | bool do_peeling_for_loop_bound; | ||
308 | + gimple stmt, pattern_stmt; | ||
309 | + bool transform_pattern_stmt = false; | ||
310 | |||
311 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
312 | fprintf (vect_dump, "=== vec_transform_loop ==="); | ||
313 | @@ -4827,11 +4847,19 @@ | ||
314 | } | ||
315 | } | ||
316 | |||
317 | - for (si = gsi_start_bb (bb); !gsi_end_p (si);) | ||
318 | + pattern_stmt = NULL; | ||
319 | + for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;) | ||
320 | { | ||
321 | - gimple stmt = gsi_stmt (si), pattern_stmt; | ||
322 | bool is_store; | ||
323 | |||
324 | + if (transform_pattern_stmt) | ||
325 | + { | ||
326 | + stmt = pattern_stmt; | ||
327 | + transform_pattern_stmt = false; | ||
328 | + } | ||
329 | + else | ||
330 | + stmt = gsi_stmt (si); | ||
331 | + | ||
332 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
333 | { | ||
334 | fprintf (vect_dump, "------>vectorizing statement: "); | ||
335 | @@ -4869,6 +4897,11 @@ | ||
336 | continue; | ||
337 | } | ||
338 | } | ||
339 | + else if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
340 | + && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) | ||
341 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | ||
342 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
343 | + transform_pattern_stmt = true; | ||
344 | |||
345 | gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); | ||
346 | nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( | ||
347 | @@ -4897,8 +4930,9 @@ | ||
348 | /* Hybrid SLP stmts must be vectorized in addition to SLP. */ | ||
349 | if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info)) | ||
350 | { | ||
351 | - gsi_next (&si); | ||
352 | - continue; | ||
353 | + if (!transform_pattern_stmt) | ||
354 | + gsi_next (&si); | ||
355 | + continue; | ||
356 | } | ||
357 | } | ||
358 | |||
359 | @@ -4917,7 +4951,7 @@ | ||
360 | the chain. */ | ||
361 | vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info)); | ||
362 | gsi_remove (&si, true); | ||
363 | - continue; | ||
364 | + continue; | ||
365 | } | ||
366 | else | ||
367 | { | ||
368 | @@ -4927,7 +4961,9 @@ | ||
369 | continue; | ||
370 | } | ||
371 | } | ||
372 | - gsi_next (&si); | ||
373 | + | ||
374 | + if (!transform_pattern_stmt) | ||
375 | + gsi_next (&si); | ||
376 | } /* stmts in BB */ | ||
377 | } /* BBs in loop */ | ||
378 | |||
379 | |||
380 | === modified file 'gcc/tree-vect-patterns.c' | ||
381 | --- old/gcc/tree-vect-patterns.c 2011-06-22 12:10:44 +0000 | ||
382 | +++ new/gcc/tree-vect-patterns.c 2011-07-06 12:04:10 +0000 | ||
383 | @@ -39,10 +39,13 @@ | ||
384 | #include "diagnostic-core.h" | ||
385 | |||
386 | /* Pattern recognition functions */ | ||
387 | -static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *); | ||
388 | -static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *); | ||
389 | -static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *); | ||
390 | -static gimple vect_recog_pow_pattern (gimple *, tree *, tree *); | ||
391 | +static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *, | ||
392 | + tree *); | ||
393 | +static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *, | ||
394 | + tree *); | ||
395 | +static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *, | ||
396 | + tree *); | ||
397 | +static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); | ||
398 | static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { | ||
399 | vect_recog_widen_mult_pattern, | ||
400 | vect_recog_widen_sum_pattern, | ||
401 | @@ -142,9 +145,9 @@ | ||
402 | |||
403 | Input: | ||
404 | |||
405 | - * LAST_STMT: A stmt from which the pattern search begins. In the example, | ||
406 | - when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be | ||
407 | - detected. | ||
408 | + * STMTS: Contains a stmt from which the pattern search begins. In the | ||
409 | + example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} | ||
410 | + will be detected. | ||
411 | |||
412 | Output: | ||
413 | |||
414 | @@ -165,12 +168,13 @@ | ||
415 | inner-loop nested in an outer-loop that us being vectorized). */ | ||
416 | |||
417 | static gimple | ||
418 | -vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out) | ||
419 | +vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in, | ||
420 | + tree *type_out) | ||
421 | { | ||
422 | - gimple stmt; | ||
423 | + gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0); | ||
424 | tree oprnd0, oprnd1; | ||
425 | tree oprnd00, oprnd01; | ||
426 | - stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); | ||
427 | + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); | ||
428 | tree type, half_type; | ||
429 | gimple pattern_stmt; | ||
430 | tree prod_type; | ||
431 | @@ -178,10 +182,10 @@ | ||
432 | struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
433 | tree var, rhs; | ||
434 | |||
435 | - if (!is_gimple_assign (*last_stmt)) | ||
436 | + if (!is_gimple_assign (last_stmt)) | ||
437 | return NULL; | ||
438 | |||
439 | - type = gimple_expr_type (*last_stmt); | ||
440 | + type = gimple_expr_type (last_stmt); | ||
441 | |||
442 | /* Look for the following pattern | ||
443 | DX = (TYPE1) X; | ||
444 | @@ -207,7 +211,7 @@ | ||
445 | /* Starting from LAST_STMT, follow the defs of its uses in search | ||
446 | of the above pattern. */ | ||
447 | |||
448 | - if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) | ||
449 | + if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) | ||
450 | return NULL; | ||
451 | |||
452 | if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) | ||
453 | @@ -228,12 +232,12 @@ | ||
454 | |||
455 | if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) | ||
456 | return NULL; | ||
457 | - oprnd0 = gimple_assign_rhs1 (*last_stmt); | ||
458 | - oprnd1 = gimple_assign_rhs2 (*last_stmt); | ||
459 | + oprnd0 = gimple_assign_rhs1 (last_stmt); | ||
460 | + oprnd1 = gimple_assign_rhs2 (last_stmt); | ||
461 | if (!types_compatible_p (TREE_TYPE (oprnd0), type) | ||
462 | || !types_compatible_p (TREE_TYPE (oprnd1), type)) | ||
463 | return NULL; | ||
464 | - stmt = *last_stmt; | ||
465 | + stmt = last_stmt; | ||
466 | |||
467 | if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) | ||
468 | { | ||
469 | @@ -319,11 +323,79 @@ | ||
470 | |||
471 | /* We don't allow changing the order of the computation in the inner-loop | ||
472 | when doing outer-loop vectorization. */ | ||
473 | - gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); | ||
474 | + gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); | ||
475 | |||
476 | return pattern_stmt; | ||
477 | } | ||
478 | |||
479 | +/* Handle two cases of multiplication by a constant. The first one is when | ||
480 | + the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second | ||
481 | + operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to | ||
482 | + TYPE. | ||
483 | + | ||
484 | + Otherwise, if the type of the result (TYPE) is at least 4 times bigger than | ||
485 | + HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than | ||
486 | + TYPE), we can perform widen-mult from the intermediate type to TYPE and | ||
487 | + replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */ | ||
488 | + | ||
489 | +static bool | ||
490 | +vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd, | ||
491 | + VEC (gimple, heap) **stmts, tree type, | ||
492 | + tree *half_type, gimple def_stmt) | ||
493 | +{ | ||
494 | + tree new_type, new_oprnd, tmp; | ||
495 | + gimple new_stmt; | ||
496 | + | ||
497 | + if (int_fits_type_p (const_oprnd, *half_type)) | ||
498 | + { | ||
499 | + /* CONST_OPRND is a constant of HALF_TYPE. */ | ||
500 | + *oprnd = gimple_assign_rhs1 (def_stmt); | ||
501 | + return true; | ||
502 | + } | ||
503 | + | ||
504 | + if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) | ||
505 | + || !vinfo_for_stmt (def_stmt)) | ||
506 | + return false; | ||
507 | + | ||
508 | + /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for | ||
509 | + a type 2 times bigger than HALF_TYPE. */ | ||
510 | + new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, | ||
511 | + TYPE_UNSIGNED (type)); | ||
512 | + if (!int_fits_type_p (const_oprnd, new_type)) | ||
513 | + return false; | ||
514 | + | ||
515 | + /* Use NEW_TYPE for widen_mult. */ | ||
516 | + if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) | ||
517 | + { | ||
518 | + new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); | ||
519 | + /* Check if the already created pattern stmt is what we need. */ | ||
520 | + if (!is_gimple_assign (new_stmt) | ||
521 | + || gimple_assign_rhs_code (new_stmt) != NOP_EXPR | ||
522 | + || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type) | ||
523 | + return false; | ||
524 | + | ||
525 | + *oprnd = gimple_assign_lhs (new_stmt); | ||
526 | + } | ||
527 | + else | ||
528 | + { | ||
529 | + /* Create a_T = (NEW_TYPE) a_t; */ | ||
530 | + *oprnd = gimple_assign_rhs1 (def_stmt); | ||
531 | + tmp = create_tmp_var (new_type, NULL); | ||
532 | + add_referenced_var (tmp); | ||
533 | + new_oprnd = make_ssa_name (tmp, NULL); | ||
534 | + new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd, | ||
535 | + NULL_TREE); | ||
536 | + SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; | ||
537 | + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; | ||
538 | + VEC_safe_push (gimple, heap, *stmts, def_stmt); | ||
539 | + *oprnd = new_oprnd; | ||
540 | + } | ||
541 | + | ||
542 | + *half_type = new_type; | ||
543 | + return true; | ||
544 | +} | ||
545 | + | ||
546 | + | ||
547 | /* Function vect_recog_widen_mult_pattern | ||
548 | |||
549 | Try to find the following pattern: | ||
550 | @@ -361,28 +433,47 @@ | ||
551 | S3 a_T = (TYPE) a_t; | ||
552 | S5 prod_T = a_T * CONST; | ||
553 | |||
554 | - Input: | ||
555 | - | ||
556 | - * LAST_STMT: A stmt from which the pattern search begins. In the example, | ||
557 | - when this function is called with S5, the pattern {S3,S4,S5,(S6)} is | ||
558 | - detected. | ||
559 | - | ||
560 | - Output: | ||
561 | - | ||
562 | - * TYPE_IN: The type of the input arguments to the pattern. | ||
563 | - | ||
564 | - * TYPE_OUT: The type of the output of this pattern. | ||
565 | - | ||
566 | - * Return value: A new stmt that will be used to replace the sequence of | ||
567 | - stmts that constitute the pattern. In this case it will be: | ||
568 | - WIDEN_MULT <a_t, b_t> | ||
569 | - */ | ||
570 | + A special case of multiplication by constants is when 'TYPE' is 4 times | ||
571 | + bigger than 'type', but CONST fits an intermediate type 2 times smaller | ||
572 | + than 'TYPE'. In that case we create an additional pattern stmt for S3 | ||
573 | + to create a variable of the intermediate type, and perform widen-mult | ||
574 | + on the intermediate type as well: | ||
575 | + | ||
576 | + type a_t; | ||
577 | + interm_type a_it; | ||
578 | + TYPE a_T, prod_T, prod_T'; | ||
579 | + | ||
580 | + S1 a_t = ; | ||
581 | + S3 a_T = (TYPE) a_t; | ||
582 | + '--> a_it = (interm_type) a_t; | ||
583 | + S5 prod_T = a_T * CONST; | ||
584 | + '--> prod_T' = a_it w* CONST; | ||
585 | + | ||
586 | + Input/Output: | ||
587 | + | ||
588 | + * STMTS: Contains a stmt from which the pattern search begins. In the | ||
589 | + example, when this function is called with S5, the pattern {S3,S4,S5,(S6)} | ||
590 | + is detected. In case of unsigned widen-mult, the original stmt (S5) is | ||
591 | + replaced with S6 in STMTS. In case of multiplication by a constant | ||
592 | + of an intermediate type (the last case above), STMTS also contains S3 | ||
593 | + (inserted before S5). | ||
594 | + | ||
595 | + Output: | ||
596 | + | ||
597 | + * TYPE_IN: The type of the input arguments to the pattern. | ||
598 | + | ||
599 | + * TYPE_OUT: The type of the output of this pattern. | ||
600 | + | ||
601 | + * Return value: A new stmt that will be used to replace the sequence of | ||
602 | + stmts that constitute the pattern. In this case it will be: | ||
603 | + WIDEN_MULT <a_t, b_t> | ||
604 | +*/ | ||
605 | |||
606 | static gimple | ||
607 | -vect_recog_widen_mult_pattern (gimple *last_stmt, | ||
608 | - tree *type_in, | ||
609 | - tree *type_out) | ||
610 | +vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts, | ||
611 | + tree *type_in, tree *type_out) | ||
612 | { | ||
613 | + gimple last_stmt = VEC_pop (gimple, *stmts); | ||
614 | gimple def_stmt0, def_stmt1; | ||
615 | tree oprnd0, oprnd1; | ||
616 | tree type, half_type0, half_type1; | ||
617 | @@ -395,27 +486,27 @@ | ||
618 | VEC (tree, heap) *dummy_vec; | ||
619 | bool op0_ok, op1_ok; | ||
620 | |||
621 | - if (!is_gimple_assign (*last_stmt)) | ||
622 | + if (!is_gimple_assign (last_stmt)) | ||
623 | return NULL; | ||
624 | |||
625 | - type = gimple_expr_type (*last_stmt); | ||
626 | + type = gimple_expr_type (last_stmt); | ||
627 | |||
628 | /* Starting from LAST_STMT, follow the defs of its uses in search | ||
629 | of the above pattern. */ | ||
630 | |||
631 | - if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR) | ||
632 | + if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) | ||
633 | return NULL; | ||
634 | |||
635 | - oprnd0 = gimple_assign_rhs1 (*last_stmt); | ||
636 | - oprnd1 = gimple_assign_rhs2 (*last_stmt); | ||
637 | + oprnd0 = gimple_assign_rhs1 (last_stmt); | ||
638 | + oprnd1 = gimple_assign_rhs2 (last_stmt); | ||
639 | if (!types_compatible_p (TREE_TYPE (oprnd0), type) | ||
640 | || !types_compatible_p (TREE_TYPE (oprnd1), type)) | ||
641 | return NULL; | ||
642 | |||
643 | /* Check argument 0. */ | ||
644 | - op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false); | ||
645 | + op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false); | ||
646 | /* Check argument 1. */ | ||
647 | - op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false); | ||
648 | + op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false); | ||
649 | |||
650 | /* In case of multiplication by a constant one of the operands may not match | ||
651 | the pattern, but not both. */ | ||
652 | @@ -429,29 +520,21 @@ | ||
653 | } | ||
654 | else if (!op0_ok) | ||
655 | { | ||
656 | - if (CONSTANT_CLASS_P (oprnd0) | ||
657 | - && TREE_CODE (half_type1) == INTEGER_TYPE | ||
658 | - && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1)) | ||
659 | - && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0)) | ||
660 | - { | ||
661 | - /* OPRND0 is a constant of HALF_TYPE1. */ | ||
662 | - half_type0 = half_type1; | ||
663 | - oprnd1 = gimple_assign_rhs1 (def_stmt1); | ||
664 | - } | ||
665 | + if (TREE_CODE (oprnd0) == INTEGER_CST | ||
666 | + && TREE_CODE (half_type1) == INTEGER_TYPE | ||
667 | + && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type, | ||
668 | + &half_type1, def_stmt1)) | ||
669 | + half_type0 = half_type1; | ||
670 | else | ||
671 | return NULL; | ||
672 | } | ||
673 | else if (!op1_ok) | ||
674 | { | ||
675 | - if (CONSTANT_CLASS_P (oprnd1) | ||
676 | + if (TREE_CODE (oprnd1) == INTEGER_CST | ||
677 | && TREE_CODE (half_type0) == INTEGER_TYPE | ||
678 | - && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0)) | ||
679 | - && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1)) | ||
680 | - { | ||
681 | - /* OPRND1 is a constant of HALF_TYPE0. */ | ||
682 | - half_type1 = half_type0; | ||
683 | - oprnd0 = gimple_assign_rhs1 (def_stmt0); | ||
684 | - } | ||
685 | + && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type, | ||
686 | + &half_type0, def_stmt0)) | ||
687 | + half_type1 = half_type0; | ||
688 | else | ||
689 | return NULL; | ||
690 | } | ||
691 | @@ -461,7 +544,7 @@ | ||
692 | Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ | ||
693 | if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) | ||
694 | { | ||
695 | - tree lhs = gimple_assign_lhs (*last_stmt), use_lhs; | ||
696 | + tree lhs = gimple_assign_lhs (last_stmt), use_lhs; | ||
697 | imm_use_iterator imm_iter; | ||
698 | use_operand_p use_p; | ||
699 | int nuses = 0; | ||
700 | @@ -491,7 +574,7 @@ | ||
701 | return NULL; | ||
702 | |||
703 | type = use_type; | ||
704 | - *last_stmt = use_stmt; | ||
705 | + last_stmt = use_stmt; | ||
706 | } | ||
707 | |||
708 | if (!types_compatible_p (half_type0, half_type1)) | ||
709 | @@ -506,7 +589,7 @@ | ||
710 | vectype_out = get_vectype_for_scalar_type (type); | ||
711 | if (!vectype | ||
712 | || !vectype_out | ||
713 | - || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt, | ||
714 | + || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, | ||
715 | vectype_out, vectype, | ||
716 | &dummy, &dummy, &dummy_code, | ||
717 | &dummy_code, &dummy_int, &dummy_vec)) | ||
718 | @@ -524,6 +607,7 @@ | ||
719 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
720 | print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); | ||
721 | |||
722 | + VEC_safe_push (gimple, heap, *stmts, last_stmt); | ||
723 | return pattern_stmt; | ||
724 | } | ||
725 | |||
726 | @@ -555,16 +639,17 @@ | ||
727 | */ | ||
728 | |||
729 | static gimple | ||
730 | -vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out) | ||
731 | +vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out) | ||
732 | { | ||
733 | + gimple last_stmt = VEC_index (gimple, *stmts, 0); | ||
734 | tree fn, base, exp = NULL; | ||
735 | gimple stmt; | ||
736 | tree var; | ||
737 | |||
738 | - if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL) | ||
739 | + if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) | ||
740 | return NULL; | ||
741 | |||
742 | - fn = gimple_call_fndecl (*last_stmt); | ||
743 | + fn = gimple_call_fndecl (last_stmt); | ||
744 | if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) | ||
745 | return NULL; | ||
746 | |||
747 | @@ -574,8 +659,8 @@ | ||
748 | case BUILT_IN_POWI: | ||
749 | case BUILT_IN_POWF: | ||
750 | case BUILT_IN_POW: | ||
751 | - base = gimple_call_arg (*last_stmt, 0); | ||
752 | - exp = gimple_call_arg (*last_stmt, 1); | ||
753 | + base = gimple_call_arg (last_stmt, 0); | ||
754 | + exp = gimple_call_arg (last_stmt, 1); | ||
755 | if (TREE_CODE (exp) != REAL_CST | ||
756 | && TREE_CODE (exp) != INTEGER_CST) | ||
757 | return NULL; | ||
758 | @@ -667,21 +752,23 @@ | ||
759 | inner-loop nested in an outer-loop that us being vectorized). */ | ||
760 | |||
761 | static gimple | ||
762 | -vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out) | ||
763 | +vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in, | ||
764 | + tree *type_out) | ||
765 | { | ||
766 | + gimple last_stmt = VEC_index (gimple, *stmts, 0); | ||
767 | gimple stmt; | ||
768 | tree oprnd0, oprnd1; | ||
769 | - stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); | ||
770 | + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); | ||
771 | tree type, half_type; | ||
772 | gimple pattern_stmt; | ||
773 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
774 | struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
775 | tree var; | ||
776 | |||
777 | - if (!is_gimple_assign (*last_stmt)) | ||
778 | + if (!is_gimple_assign (last_stmt)) | ||
779 | return NULL; | ||
780 | |||
781 | - type = gimple_expr_type (*last_stmt); | ||
782 | + type = gimple_expr_type (last_stmt); | ||
783 | |||
784 | /* Look for the following pattern | ||
785 | DX = (TYPE) X; | ||
786 | @@ -693,25 +780,25 @@ | ||
787 | /* Starting from LAST_STMT, follow the defs of its uses in search | ||
788 | of the above pattern. */ | ||
789 | |||
790 | - if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) | ||
791 | + if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) | ||
792 | return NULL; | ||
793 | |||
794 | if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) | ||
795 | return NULL; | ||
796 | |||
797 | - oprnd0 = gimple_assign_rhs1 (*last_stmt); | ||
798 | - oprnd1 = gimple_assign_rhs2 (*last_stmt); | ||
799 | + oprnd0 = gimple_assign_rhs1 (last_stmt); | ||
800 | + oprnd1 = gimple_assign_rhs2 (last_stmt); | ||
801 | if (!types_compatible_p (TREE_TYPE (oprnd0), type) | ||
802 | || !types_compatible_p (TREE_TYPE (oprnd1), type)) | ||
803 | return NULL; | ||
804 | |||
805 | - /* So far so good. Since *last_stmt was detected as a (summation) reduction, | ||
806 | + /* So far so good. Since last_stmt was detected as a (summation) reduction, | ||
807 | we know that oprnd1 is the reduction variable (defined by a loop-header | ||
808 | phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. | ||
809 | Left to check that oprnd0 is defined by a cast from type 'type' to type | ||
810 | 'TYPE'. */ | ||
811 | |||
812 | - if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true)) | ||
813 | + if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true)) | ||
814 | return NULL; | ||
815 | |||
816 | oprnd0 = gimple_assign_rhs1 (stmt); | ||
817 | @@ -732,8 +819,9 @@ | ||
818 | |||
819 | /* We don't allow changing the order of the computation in the inner-loop | ||
820 | when doing outer-loop vectorization. */ | ||
821 | - gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); | ||
822 | + gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); | ||
823 | |||
824 | + VEC_safe_push (gimple, heap, *stmts, last_stmt); | ||
825 | return pattern_stmt; | ||
826 | } | ||
827 | |||
828 | @@ -762,7 +850,7 @@ | ||
829 | |||
830 | static void | ||
831 | vect_pattern_recog_1 ( | ||
832 | - gimple (* vect_recog_func) (gimple *, tree *, tree *), | ||
833 | + gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *), | ||
834 | gimple_stmt_iterator si) | ||
835 | { | ||
836 | gimple stmt = gsi_stmt (si), pattern_stmt; | ||
837 | @@ -774,12 +862,14 @@ | ||
838 | enum tree_code code; | ||
839 | int i; | ||
840 | gimple next; | ||
841 | + VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); | ||
842 | |||
843 | - pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out); | ||
844 | + VEC_quick_push (gimple, stmts_to_replace, stmt); | ||
845 | + pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out); | ||
846 | if (!pattern_stmt) | ||
847 | return; | ||
848 | |||
849 | - si = gsi_for_stmt (stmt); | ||
850 | + stmt = VEC_last (gimple, stmts_to_replace); | ||
851 | stmt_info = vinfo_for_stmt (stmt); | ||
852 | loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | ||
853 | |||
854 | @@ -849,6 +939,35 @@ | ||
855 | FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) | ||
856 | if (next == stmt) | ||
857 | VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); | ||
858 | + | ||
859 | + /* In case of widen-mult by a constant, it is possible that an additional | ||
860 | + pattern stmt is created and inserted in STMTS_TO_REPLACE. We create a | ||
861 | + stmt_info for it, and mark the relevant statements. */ | ||
862 | + for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt) | ||
863 | + && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1); | ||
864 | + i++) | ||
865 | + { | ||
866 | + stmt_info = vinfo_for_stmt (stmt); | ||
867 | + pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
868 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
869 | + { | ||
870 | + fprintf (vect_dump, "additional pattern stmt: "); | ||
871 | + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); | ||
872 | + } | ||
873 | + | ||
874 | + set_vinfo_for_stmt (pattern_stmt, | ||
875 | + new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); | ||
876 | + gimple_set_bb (pattern_stmt, gimple_bb (stmt)); | ||
877 | + pattern_stmt_info = vinfo_for_stmt (pattern_stmt); | ||
878 | + | ||
879 | + STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; | ||
880 | + STMT_VINFO_DEF_TYPE (pattern_stmt_info) | ||
881 | + = STMT_VINFO_DEF_TYPE (stmt_info); | ||
882 | + STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info); | ||
883 | + STMT_VINFO_IN_PATTERN_P (stmt_info) = true; | ||
884 | + } | ||
885 | + | ||
886 | + VEC_free (gimple, heap, stmts_to_replace); | ||
887 | } | ||
888 | |||
889 | |||
890 | @@ -896,10 +1015,8 @@ | ||
891 | |||
892 | If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} | ||
893 | (because they are marked as irrelevant). It will vectorize S6, and record | ||
894 | - a pointer to the new vector stmt VS6 both from S6 (as usual), and also | ||
895 | - from S4. We do that so that when we get to vectorizing stmts that use the | ||
896 | - def of S4 (like S5 that uses a_0), we'll know where to take the relevant | ||
897 | - vector-def from. S4 will be skipped, and S5 will be vectorized as usual: | ||
898 | + a pointer to the new vector stmt VS6 from S6 (as usual). | ||
899 | + S4 will be skipped, and S5 will be vectorized as usual: | ||
900 | |||
901 | in_pattern_p related_stmt vec_stmt | ||
902 | S1: a_i = .... - - - | ||
903 | @@ -915,7 +1032,21 @@ | ||
904 | elsewhere), and we'll end up with: | ||
905 | |||
906 | VS6: va_new = .... | ||
907 | - VS5: ... = ..vuse(va_new).. */ | ||
908 | + VS5: ... = ..vuse(va_new).. | ||
909 | + | ||
910 | + In case of more than one pattern statements, e.g., widen-mult with | ||
911 | + intermediate type: | ||
912 | + | ||
913 | + S1 a_t = ; | ||
914 | + S2 a_T = (TYPE) a_t; | ||
915 | + '--> S3: a_it = (interm_type) a_t; | ||
916 | + S4 prod_T = a_T * CONST; | ||
917 | + '--> S5: prod_T' = a_it w* CONST; | ||
918 | + | ||
919 | + there may be other users of a_T outside the pattern. In that case S2 will | ||
920 | + be marked as relevant (as well as S3), and both S2 and S3 will be analyzed | ||
921 | + and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will | ||
922 | + be recorded in S3. */ | ||
923 | |||
924 | void | ||
925 | vect_pattern_recog (loop_vec_info loop_vinfo) | ||
926 | @@ -925,7 +1056,7 @@ | ||
927 | unsigned int nbbs = loop->num_nodes; | ||
928 | gimple_stmt_iterator si; | ||
929 | unsigned int i, j; | ||
930 | - gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); | ||
931 | + gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); | ||
932 | |||
933 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
934 | fprintf (vect_dump, "=== vect_pattern_recog ==="); | ||
935 | |||
936 | === modified file 'gcc/tree-vect-slp.c' | ||
937 | --- old/gcc/tree-vect-slp.c 2011-06-19 10:59:13 +0000 | ||
938 | +++ new/gcc/tree-vect-slp.c 2011-07-06 12:04:10 +0000 | ||
939 | @@ -152,7 +152,9 @@ | ||
940 | if (loop && def_stmt && gimple_bb (def_stmt) | ||
941 | && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | ||
942 | && vinfo_for_stmt (def_stmt) | ||
943 | - && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))) | ||
944 | + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) | ||
945 | + && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) | ||
946 | + && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) | ||
947 | { | ||
948 | if (!*first_stmt_dt0) | ||
949 | *pattern0 = true; | ||
950 | |||
951 | === modified file 'gcc/tree-vect-stmts.c' | ||
952 | --- old/gcc/tree-vect-stmts.c 2011-06-22 06:21:13 +0000 | ||
953 | +++ new/gcc/tree-vect-stmts.c 2011-07-06 12:04:10 +0000 | ||
954 | @@ -126,33 +126,72 @@ | ||
955 | |||
956 | static void | ||
957 | vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt, | ||
958 | - enum vect_relevant relevant, bool live_p) | ||
959 | + enum vect_relevant relevant, bool live_p, | ||
960 | + bool used_in_pattern) | ||
961 | { | ||
962 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | ||
963 | enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); | ||
964 | bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); | ||
965 | + gimple pattern_stmt; | ||
966 | |||
967 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
968 | fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p); | ||
969 | |||
970 | + /* If this stmt is an original stmt in a pattern, we might need to mark its | ||
971 | + related pattern stmt instead of the original stmt. However, such stmts | ||
972 | + may have their own uses that are not in any pattern, in such cases the | ||
973 | + stmt itself should be marked. */ | ||
974 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)) | ||
975 | { | ||
976 | - gimple pattern_stmt; | ||
977 | - | ||
978 | - /* This is the last stmt in a sequence that was detected as a | ||
979 | - pattern that can potentially be vectorized. Don't mark the stmt | ||
980 | - as relevant/live because it's not going to be vectorized. | ||
981 | - Instead mark the pattern-stmt that replaces it. */ | ||
982 | - | ||
983 | - pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
984 | - | ||
985 | - if (vect_print_dump_info (REPORT_DETAILS)) | ||
986 | - fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); | ||
987 | - stmt_info = vinfo_for_stmt (pattern_stmt); | ||
988 | - gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); | ||
989 | - save_relevant = STMT_VINFO_RELEVANT (stmt_info); | ||
990 | - save_live_p = STMT_VINFO_LIVE_P (stmt_info); | ||
991 | - stmt = pattern_stmt; | ||
992 | + bool found = false; | ||
993 | + if (!used_in_pattern) | ||
994 | + { | ||
995 | + imm_use_iterator imm_iter; | ||
996 | + use_operand_p use_p; | ||
997 | + gimple use_stmt; | ||
998 | + tree lhs; | ||
999 | + | ||
1000 | + if (is_gimple_assign (stmt)) | ||
1001 | + lhs = gimple_assign_lhs (stmt); | ||
1002 | + else | ||
1003 | + lhs = gimple_call_lhs (stmt); | ||
1004 | + | ||
1005 | + /* This use is out of pattern use, if LHS has other uses that are | ||
1006 | + pattern uses, we should mark the stmt itself, and not the pattern | ||
1007 | + stmt. */ | ||
1008 | + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) | ||
1009 | + { | ||
1010 | + if (is_gimple_debug (USE_STMT (use_p))) | ||
1011 | + continue; | ||
1012 | + use_stmt = USE_STMT (use_p); | ||
1013 | + | ||
1014 | + if (vinfo_for_stmt (use_stmt) | ||
1015 | + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) | ||
1016 | + { | ||
1017 | + found = true; | ||
1018 | + break; | ||
1019 | + } | ||
1020 | + } | ||
1021 | + } | ||
1022 | + | ||
1023 | + if (!found) | ||
1024 | + { | ||
1025 | + /* This is the last stmt in a sequence that was detected as a | ||
1026 | + pattern that can potentially be vectorized. Don't mark the stmt | ||
1027 | + as relevant/live because it's not going to be vectorized. | ||
1028 | + Instead mark the pattern-stmt that replaces it. */ | ||
1029 | + | ||
1030 | + pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
1031 | + | ||
1032 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
1033 | + fprintf (vect_dump, "last stmt in pattern. don't mark" | ||
1034 | + " relevant/live."); | ||
1035 | + stmt_info = vinfo_for_stmt (pattern_stmt); | ||
1036 | + gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); | ||
1037 | + save_relevant = STMT_VINFO_RELEVANT (stmt_info); | ||
1038 | + save_live_p = STMT_VINFO_LIVE_P (stmt_info); | ||
1039 | + stmt = pattern_stmt; | ||
1040 | + } | ||
1041 | } | ||
1042 | |||
1043 | STMT_VINFO_LIVE_P (stmt_info) |= live_p; | ||
1044 | @@ -437,7 +476,8 @@ | ||
1045 | } | ||
1046 | } | ||
1047 | |||
1048 | - vect_mark_relevant (worklist, def_stmt, relevant, live_p); | ||
1049 | + vect_mark_relevant (worklist, def_stmt, relevant, live_p, | ||
1050 | + is_pattern_stmt_p (stmt_vinfo)); | ||
1051 | return true; | ||
1052 | } | ||
1053 | |||
1054 | @@ -494,7 +534,7 @@ | ||
1055 | } | ||
1056 | |||
1057 | if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p)) | ||
1058 | - vect_mark_relevant (&worklist, phi, relevant, live_p); | ||
1059 | + vect_mark_relevant (&worklist, phi, relevant, live_p, false); | ||
1060 | } | ||
1061 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | ||
1062 | { | ||
1063 | @@ -506,7 +546,7 @@ | ||
1064 | } | ||
1065 | |||
1066 | if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p)) | ||
1067 | - vect_mark_relevant (&worklist, stmt, relevant, live_p); | ||
1068 | + vect_mark_relevant (&worklist, stmt, relevant, live_p, false); | ||
1069 | } | ||
1070 | } | ||
1071 | |||
1072 | @@ -613,42 +653,55 @@ | ||
1073 | if (is_gimple_assign (stmt)) | ||
1074 | { | ||
1075 | tree rhs = gimple_assign_rhs1 (stmt); | ||
1076 | - if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) | ||
1077 | - == GIMPLE_SINGLE_RHS) | ||
1078 | + unsigned int op_num; | ||
1079 | + tree op; | ||
1080 | + switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) | ||
1081 | { | ||
1082 | - unsigned int op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 | ||
1083 | - (stmt)); | ||
1084 | - for (i = 0; i < op_num; i++) | ||
1085 | - { | ||
1086 | - tree op = TREE_OPERAND (rhs, i); | ||
1087 | - if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1088 | - &worklist)) | ||
1089 | + case GIMPLE_SINGLE_RHS: | ||
1090 | + op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)); | ||
1091 | + for (i = 0; i < op_num; i++) | ||
1092 | { | ||
1093 | - VEC_free (gimple, heap, worklist); | ||
1094 | - return false; | ||
1095 | + op = TREE_OPERAND (rhs, i); | ||
1096 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1097 | + &worklist)) | ||
1098 | + { | ||
1099 | + VEC_free (gimple, heap, worklist); | ||
1100 | + return false; | ||
1101 | + } | ||
1102 | } | ||
1103 | - } | ||
1104 | - } | ||
1105 | - else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) | ||
1106 | - == GIMPLE_BINARY_RHS) | ||
1107 | - { | ||
1108 | - tree op = gimple_assign_rhs1 (stmt); | ||
1109 | - if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1110 | - &worklist)) | ||
1111 | - { | ||
1112 | - VEC_free (gimple, heap, worklist); | ||
1113 | - return false; | ||
1114 | - } | ||
1115 | - op = gimple_assign_rhs2 (stmt); | ||
1116 | - if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1117 | - &worklist)) | ||
1118 | - { | ||
1119 | - VEC_free (gimple, heap, worklist); | ||
1120 | - return false; | ||
1121 | - } | ||
1122 | - } | ||
1123 | - else | ||
1124 | - return false; | ||
1125 | + break; | ||
1126 | + | ||
1127 | + case GIMPLE_BINARY_RHS: | ||
1128 | + op = gimple_assign_rhs1 (stmt); | ||
1129 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1130 | + &worklist)) | ||
1131 | + { | ||
1132 | + VEC_free (gimple, heap, worklist); | ||
1133 | + return false; | ||
1134 | + } | ||
1135 | + op = gimple_assign_rhs2 (stmt); | ||
1136 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1137 | + &worklist)) | ||
1138 | + { | ||
1139 | + VEC_free (gimple, heap, worklist); | ||
1140 | + return false; | ||
1141 | + } | ||
1142 | + break; | ||
1143 | + | ||
1144 | + case GIMPLE_UNARY_RHS: | ||
1145 | + op = gimple_assign_rhs1 (stmt); | ||
1146 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1147 | + &worklist)) | ||
1148 | + { | ||
1149 | + VEC_free (gimple, heap, worklist); | ||
1150 | + return false; | ||
1151 | + } | ||
1152 | + | ||
1153 | + break; | ||
1154 | + | ||
1155 | + default: | ||
1156 | + return false; | ||
1157 | + } | ||
1158 | } | ||
1159 | else if (is_gimple_call (stmt)) | ||
1160 | { | ||
1161 | @@ -1210,7 +1263,14 @@ | ||
1162 | |||
1163 | /* Get the def from the vectorized stmt. */ | ||
1164 | def_stmt_info = vinfo_for_stmt (def_stmt); | ||
1165 | + | ||
1166 | vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); | ||
1167 | + /* Get vectorized pattern statement. */ | ||
1168 | + if (!vec_stmt | ||
1169 | + && STMT_VINFO_IN_PATTERN_P (def_stmt_info) | ||
1170 | + && !STMT_VINFO_RELEVANT (def_stmt_info)) | ||
1171 | + vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt ( | ||
1172 | + STMT_VINFO_RELATED_STMT (def_stmt_info))); | ||
1173 | gcc_assert (vec_stmt); | ||
1174 | if (gimple_code (vec_stmt) == GIMPLE_PHI) | ||
1175 | vec_oprnd = PHI_RESULT (vec_stmt); | ||
1176 | @@ -4886,6 +4946,7 @@ | ||
1177 | enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); | ||
1178 | bool ok; | ||
1179 | tree scalar_type, vectype; | ||
1180 | + gimple pattern_stmt; | ||
1181 | |||
1182 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
1183 | { | ||
1184 | @@ -4907,16 +4968,22 @@ | ||
1185 | - any LABEL_EXPRs in the loop | ||
1186 | - computations that are used only for array indexing or loop control. | ||
1187 | In basic blocks we only analyze statements that are a part of some SLP | ||
1188 | - instance, therefore, all the statements are relevant. */ | ||
1189 | - | ||
1190 | + instance, therefore, all the statements are relevant. | ||
1191 | + | ||
1192 | + Pattern statement need to be analyzed instead of the original statement | ||
1193 | + if the original statement is not relevant. Otherwise, we analyze both | ||
1194 | + statements. */ | ||
1195 | + | ||
1196 | + pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
1197 | if (!STMT_VINFO_RELEVANT_P (stmt_info) | ||
1198 | && !STMT_VINFO_LIVE_P (stmt_info)) | ||
1199 | { | ||
1200 | - gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
1201 | if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
1202 | + && pattern_stmt | ||
1203 | && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | ||
1204 | || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
1205 | { | ||
1206 | + /* Analyze PATTERN_STMT instead of the original stmt. */ | ||
1207 | stmt = pattern_stmt; | ||
1208 | stmt_info = vinfo_for_stmt (pattern_stmt); | ||
1209 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
1210 | @@ -4933,6 +5000,21 @@ | ||
1211 | return true; | ||
1212 | } | ||
1213 | } | ||
1214 | + else if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
1215 | + && pattern_stmt | ||
1216 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | ||
1217 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
1218 | + { | ||
1219 | + /* Analyze PATTERN_STMT too. */ | ||
1220 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
1221 | + { | ||
1222 | + fprintf (vect_dump, "==> examining pattern statement: "); | ||
1223 | + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
1224 | + } | ||
1225 | + | ||
1226 | + if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node)) | ||
1227 | + return false; | ||
1228 | + } | ||
1229 | |||
1230 | switch (STMT_VINFO_DEF_TYPE (stmt_info)) | ||
1231 | { | ||
1232 | @@ -5066,7 +5148,6 @@ | ||
1233 | bool is_store = false; | ||
1234 | gimple vec_stmt = NULL; | ||
1235 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | ||
1236 | - gimple orig_stmt_in_pattern, orig_scalar_stmt = stmt; | ||
1237 | bool done; | ||
1238 | |||
1239 | switch (STMT_VINFO_TYPE (stmt_info)) | ||
1240 | @@ -5205,25 +5286,7 @@ | ||
1241 | } | ||
1242 | |||
1243 | if (vec_stmt) | ||
1244 | - { | ||
1245 | - STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; | ||
1246 | - orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); | ||
1247 | - if (orig_stmt_in_pattern) | ||
1248 | - { | ||
1249 | - stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern); | ||
1250 | - /* STMT was inserted by the vectorizer to replace a computation idiom. | ||
1251 | - ORIG_STMT_IN_PATTERN is a stmt in the original sequence that | ||
1252 | - computed this idiom. We need to record a pointer to VEC_STMT in | ||
1253 | - the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the | ||
1254 | - documentation of vect_pattern_recog. */ | ||
1255 | - if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) | ||
1256 | - { | ||
1257 | - gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) | ||
1258 | - == orig_scalar_stmt); | ||
1259 | - STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt; | ||
1260 | - } | ||
1261 | - } | ||
1262 | - } | ||
1263 | + STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; | ||
1264 | |||
1265 | return is_store; | ||
1266 | } | ||
1267 | @@ -5601,8 +5664,12 @@ | ||
1268 | || *dt == vect_nested_cycle) | ||
1269 | { | ||
1270 | stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt); | ||
1271 | - if (STMT_VINFO_IN_PATTERN_P (stmt_info)) | ||
1272 | + | ||
1273 | + if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
1274 | + && !STMT_VINFO_RELEVANT (stmt_info) | ||
1275 | + && !STMT_VINFO_LIVE_P (stmt_info)) | ||
1276 | stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); | ||
1277 | + | ||
1278 | *vectype = STMT_VINFO_VECTYPE (stmt_info); | ||
1279 | gcc_assert (*vectype != NULL_TREE); | ||
1280 | } | ||
1281 | |||
1282 | === modified file 'gcc/tree-vectorizer.h' | ||
1283 | --- old/gcc/tree-vectorizer.h 2011-07-04 11:13:51 +0000 | ||
1284 | +++ new/gcc/tree-vectorizer.h 2011-07-11 11:02:55 +0000 | ||
1285 | @@ -890,7 +890,7 @@ | ||
1286 | /* Pattern recognition functions. | ||
1287 | Additional pattern recognition functions can (and will) be added | ||
1288 | in the future. */ | ||
1289 | -typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); | ||
1290 | +typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); | ||
1291 | #define NUM_PATTERNS 4 | ||
1292 | void vect_pattern_recog (loop_vec_info); | ||
1293 | |||
1294 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106770.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106770.patch new file mode 100644 index 000000000..82ae3a132 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106770.patch | |||
@@ -0,0 +1,138 @@ | |||
1 | 2011-07-11 Revital Eres <revital.eres@linaro.org> | ||
2 | |||
3 | Backport from mainline -r175090. | ||
4 | gcc/ | ||
5 | * ddg.c (add_intra_loop_mem_dep): New function. | ||
6 | (build_intra_loop_deps): Call it. | ||
7 | |||
8 | gcc/testsuite | ||
9 | * gcc.dg/sms-9.c: New file. | ||
10 | |||
11 | === modified file 'gcc/ddg.c' | ||
12 | --- old/gcc/ddg.c 2011-05-13 16:03:40 +0000 | ||
13 | +++ new/gcc/ddg.c 2011-07-04 11:00:06 +0000 | ||
14 | @@ -390,6 +390,33 @@ | ||
15 | &PATTERN (insn2)); | ||
16 | } | ||
17 | |||
18 | +/* Given two nodes, analyze their RTL insns and add intra-loop mem deps | ||
19 | + to ddg G. */ | ||
20 | +static void | ||
21 | +add_intra_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to) | ||
22 | +{ | ||
23 | + | ||
24 | + if ((from->cuid == to->cuid) | ||
25 | + || !insns_may_alias_p (from->insn, to->insn)) | ||
26 | + /* Do not create edge if memory references have disjoint alias sets | ||
27 | + or 'to' and 'from' are the same instruction. */ | ||
28 | + return; | ||
29 | + | ||
30 | + if (mem_write_insn_p (from->insn)) | ||
31 | + { | ||
32 | + if (mem_read_insn_p (to->insn)) | ||
33 | + create_ddg_dep_no_link (g, from, to, | ||
34 | + DEBUG_INSN_P (to->insn) | ||
35 | + ? ANTI_DEP : TRUE_DEP, MEM_DEP, 0); | ||
36 | + else | ||
37 | + create_ddg_dep_no_link (g, from, to, | ||
38 | + DEBUG_INSN_P (to->insn) | ||
39 | + ? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0); | ||
40 | + } | ||
41 | + else if (!mem_read_insn_p (to->insn)) | ||
42 | + create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0); | ||
43 | +} | ||
44 | + | ||
45 | /* Given two nodes, analyze their RTL insns and add inter-loop mem deps | ||
46 | to ddg G. */ | ||
47 | static void | ||
48 | @@ -477,10 +504,22 @@ | ||
49 | if (DEBUG_INSN_P (j_node->insn)) | ||
50 | continue; | ||
51 | if (mem_access_insn_p (j_node->insn)) | ||
52 | - /* Don't bother calculating inter-loop dep if an intra-loop dep | ||
53 | - already exists. */ | ||
54 | + { | ||
55 | + /* Don't bother calculating inter-loop dep if an intra-loop dep | ||
56 | + already exists. */ | ||
57 | if (! TEST_BIT (dest_node->successors, j)) | ||
58 | add_inter_loop_mem_dep (g, dest_node, j_node); | ||
59 | + /* If -fmodulo-sched-allow-regmoves | ||
60 | + is set certain anti-dep edges are not created. | ||
61 | + It might be that these anti-dep edges are on the | ||
62 | + path from one memory instruction to another such that | ||
63 | + removing these edges could cause a violation of the | ||
64 | + memory dependencies. Thus we add intra edges between | ||
65 | + every two memory instructions in this case. */ | ||
66 | + if (flag_modulo_sched_allow_regmoves | ||
67 | + && !TEST_BIT (dest_node->predecessors, j)) | ||
68 | + add_intra_loop_mem_dep (g, j_node, dest_node); | ||
69 | + } | ||
70 | } | ||
71 | } | ||
72 | } | ||
73 | |||
74 | === added file 'gcc/testsuite/gcc.dg/sms-9.c' | ||
75 | --- old/gcc/testsuite/gcc.dg/sms-9.c 1970-01-01 00:00:00 +0000 | ||
76 | +++ new/gcc/testsuite/gcc.dg/sms-9.c 2011-07-04 11:00:06 +0000 | ||
77 | @@ -0,0 +1,60 @@ | ||
78 | +/* { dg-do run } */ | ||
79 | +/* { dg-options "-O2 -fmodulo-sched -fno-auto-inc-dec -O2 -fmodulo-sched-allow-regmoves" } */ | ||
80 | + | ||
81 | +#include <stdlib.h> | ||
82 | +#include <stdarg.h> | ||
83 | + | ||
84 | +struct df_ref_info | ||
85 | +{ | ||
86 | + unsigned int *begin; | ||
87 | + unsigned int *count; | ||
88 | +}; | ||
89 | + | ||
90 | +extern void *memset (void *s, int c, __SIZE_TYPE__ n); | ||
91 | + | ||
92 | + | ||
93 | +__attribute__ ((noinline)) | ||
94 | + int | ||
95 | + df_reorganize_refs_by_reg_by_insn (struct df_ref_info *ref_info, | ||
96 | + int num, unsigned int start) | ||
97 | +{ | ||
98 | + unsigned int m = num; | ||
99 | + unsigned int offset = 77; | ||
100 | + unsigned int r; | ||
101 | + | ||
102 | + for (r = start; r < m; r++) | ||
103 | + { | ||
104 | + ref_info->begin[r] = offset; | ||
105 | + offset += ref_info->count[r]; | ||
106 | + ref_info->count[r] = 0; | ||
107 | + } | ||
108 | + | ||
109 | + return offset; | ||
110 | +} | ||
111 | + | ||
112 | +int | ||
113 | +main () | ||
114 | +{ | ||
115 | + struct df_ref_info temp; | ||
116 | + int num = 100; | ||
117 | + unsigned int start = 5; | ||
118 | + int i, offset; | ||
119 | + | ||
120 | + temp.begin = malloc (100 * sizeof (unsigned int)); | ||
121 | + temp.count = malloc (100 * sizeof (unsigned int)); | ||
122 | + | ||
123 | + memset (temp.begin, 0, sizeof (unsigned int) * num); | ||
124 | + memset (temp.count, 0, sizeof (unsigned int) * num); | ||
125 | + | ||
126 | + for (i = 0; i < num; i++) | ||
127 | + temp.count[i] = i + 1; | ||
128 | + | ||
129 | + offset = df_reorganize_refs_by_reg_by_insn (&temp, num, start); | ||
130 | + | ||
131 | + if (offset != 5112) | ||
132 | + abort (); | ||
133 | + | ||
134 | + free (temp.begin); | ||
135 | + free (temp.count); | ||
136 | + return 0; | ||
137 | +} | ||
138 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106771.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106771.patch new file mode 100644 index 000000000..70c8638cd --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106771.patch | |||
@@ -0,0 +1,211 @@ | |||
1 | 2011-07-11 Revital Eres <revital.eres@linaro.org> | ||
2 | |||
3 | Backport from mainline -r175091 | ||
4 | gcc/ | ||
5 | * modulo-sched.c (struct ps_insn): Remove row_rest_count | ||
6 | field. | ||
7 | (struct partial_schedule): Add rows_length field. | ||
8 | (verify_partial_schedule): Check rows_length. | ||
9 | (ps_insert_empty_row): Handle rows_length. | ||
10 | (create_partial_schedule): Likewise. | ||
11 | (free_partial_schedule): Likewise. | ||
12 | (reset_partial_schedule): Likewise. | ||
13 | (create_ps_insn): Remove rest_count argument. | ||
14 | (remove_node_from_ps): Update rows_length. | ||
15 | (add_node_to_ps): Update rows_length and call create_ps_insn without | ||
16 | passing row_rest_count. | ||
17 | (rotate_partial_schedule): Update rows_length. | ||
18 | |||
19 | === modified file 'gcc/modulo-sched.c' | ||
20 | --- old/gcc/modulo-sched.c 2011-05-13 16:03:40 +0000 | ||
21 | +++ new/gcc/modulo-sched.c 2011-07-04 12:01:34 +0000 | ||
22 | @@ -134,8 +134,6 @@ | ||
23 | ps_insn_ptr next_in_row, | ||
24 | prev_in_row; | ||
25 | |||
26 | - /* The number of nodes in the same row that come after this node. */ | ||
27 | - int row_rest_count; | ||
28 | }; | ||
29 | |||
30 | /* Holds the partial schedule as an array of II rows. Each entry of the | ||
31 | @@ -149,6 +147,12 @@ | ||
32 | /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */ | ||
33 | ps_insn_ptr *rows; | ||
34 | |||
35 | + /* rows_length[i] holds the number of instructions in the row. | ||
36 | + It is used only (as an optimization) to back off quickly from | ||
37 | + trying to schedule a node in a full row; that is, to avoid running | ||
38 | + through futile DFA state transitions. */ | ||
39 | + int *rows_length; | ||
40 | + | ||
41 | /* The earliest absolute cycle of an insn in the partial schedule. */ | ||
42 | int min_cycle; | ||
43 | |||
44 | @@ -1907,6 +1911,7 @@ | ||
45 | int ii = ps->ii; | ||
46 | int new_ii = ii + 1; | ||
47 | int row; | ||
48 | + int *rows_length_new; | ||
49 | |||
50 | verify_partial_schedule (ps, sched_nodes); | ||
51 | |||
52 | @@ -1921,9 +1926,11 @@ | ||
53 | rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); | ||
54 | |||
55 | rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); | ||
56 | + rows_length_new = (int *) xcalloc (new_ii, sizeof (int)); | ||
57 | for (row = 0; row < split_row; row++) | ||
58 | { | ||
59 | rows_new[row] = ps->rows[row]; | ||
60 | + rows_length_new[row] = ps->rows_length[row]; | ||
61 | ps->rows[row] = NULL; | ||
62 | for (crr_insn = rows_new[row]; | ||
63 | crr_insn; crr_insn = crr_insn->next_in_row) | ||
64 | @@ -1944,6 +1951,7 @@ | ||
65 | for (row = split_row; row < ii; row++) | ||
66 | { | ||
67 | rows_new[row + 1] = ps->rows[row]; | ||
68 | + rows_length_new[row + 1] = ps->rows_length[row]; | ||
69 | ps->rows[row] = NULL; | ||
70 | for (crr_insn = rows_new[row + 1]; | ||
71 | crr_insn; crr_insn = crr_insn->next_in_row) | ||
72 | @@ -1965,6 +1973,8 @@ | ||
73 | + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0); | ||
74 | free (ps->rows); | ||
75 | ps->rows = rows_new; | ||
76 | + free (ps->rows_length); | ||
77 | + ps->rows_length = rows_length_new; | ||
78 | ps->ii = new_ii; | ||
79 | gcc_assert (ps->min_cycle >= 0); | ||
80 | |||
81 | @@ -2040,16 +2050,23 @@ | ||
82 | ps_insn_ptr crr_insn; | ||
83 | |||
84 | for (row = 0; row < ps->ii; row++) | ||
85 | - for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | ||
86 | - { | ||
87 | - ddg_node_ptr u = crr_insn->node; | ||
88 | - | ||
89 | - gcc_assert (TEST_BIT (sched_nodes, u->cuid)); | ||
90 | - /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by | ||
91 | - popcount (sched_nodes) == number of insns in ps. */ | ||
92 | - gcc_assert (SCHED_TIME (u) >= ps->min_cycle); | ||
93 | - gcc_assert (SCHED_TIME (u) <= ps->max_cycle); | ||
94 | - } | ||
95 | + { | ||
96 | + int length = 0; | ||
97 | + | ||
98 | + for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | ||
99 | + { | ||
100 | + ddg_node_ptr u = crr_insn->node; | ||
101 | + | ||
102 | + length++; | ||
103 | + gcc_assert (TEST_BIT (sched_nodes, u->cuid)); | ||
104 | + /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by | ||
105 | + popcount (sched_nodes) == number of insns in ps. */ | ||
106 | + gcc_assert (SCHED_TIME (u) >= ps->min_cycle); | ||
107 | + gcc_assert (SCHED_TIME (u) <= ps->max_cycle); | ||
108 | + } | ||
109 | + | ||
110 | + gcc_assert (ps->rows_length[row] == length); | ||
111 | + } | ||
112 | } | ||
113 | |||
114 | |||
115 | @@ -2455,6 +2472,7 @@ | ||
116 | { | ||
117 | partial_schedule_ptr ps = XNEW (struct partial_schedule); | ||
118 | ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); | ||
119 | + ps->rows_length = (int *) xcalloc (ii, sizeof (int)); | ||
120 | ps->ii = ii; | ||
121 | ps->history = history; | ||
122 | ps->min_cycle = INT_MAX; | ||
123 | @@ -2493,6 +2511,7 @@ | ||
124 | return; | ||
125 | free_ps_insns (ps); | ||
126 | free (ps->rows); | ||
127 | + free (ps->rows_length); | ||
128 | free (ps); | ||
129 | } | ||
130 | |||
131 | @@ -2510,6 +2529,8 @@ | ||
132 | ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii | ||
133 | * sizeof (ps_insn_ptr)); | ||
134 | memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr)); | ||
135 | + ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int)); | ||
136 | + memset (ps->rows_length, 0, new_ii * sizeof (int)); | ||
137 | ps->ii = new_ii; | ||
138 | ps->min_cycle = INT_MAX; | ||
139 | ps->max_cycle = INT_MIN; | ||
140 | @@ -2538,14 +2559,13 @@ | ||
141 | |||
142 | /* Creates an object of PS_INSN and initializes it to the given parameters. */ | ||
143 | static ps_insn_ptr | ||
144 | -create_ps_insn (ddg_node_ptr node, int rest_count, int cycle) | ||
145 | +create_ps_insn (ddg_node_ptr node, int cycle) | ||
146 | { | ||
147 | ps_insn_ptr ps_i = XNEW (struct ps_insn); | ||
148 | |||
149 | ps_i->node = node; | ||
150 | ps_i->next_in_row = NULL; | ||
151 | ps_i->prev_in_row = NULL; | ||
152 | - ps_i->row_rest_count = rest_count; | ||
153 | ps_i->cycle = cycle; | ||
154 | |||
155 | return ps_i; | ||
156 | @@ -2578,6 +2598,8 @@ | ||
157 | if (ps_i->next_in_row) | ||
158 | ps_i->next_in_row->prev_in_row = ps_i->prev_in_row; | ||
159 | } | ||
160 | + | ||
161 | + ps->rows_length[row] -= 1; | ||
162 | free (ps_i); | ||
163 | return true; | ||
164 | } | ||
165 | @@ -2734,17 +2756,12 @@ | ||
166 | sbitmap must_precede, sbitmap must_follow) | ||
167 | { | ||
168 | ps_insn_ptr ps_i; | ||
169 | - int rest_count = 1; | ||
170 | int row = SMODULO (cycle, ps->ii); | ||
171 | |||
172 | - if (ps->rows[row] | ||
173 | - && ps->rows[row]->row_rest_count >= issue_rate) | ||
174 | + if (ps->rows_length[row] >= issue_rate) | ||
175 | return NULL; | ||
176 | |||
177 | - if (ps->rows[row]) | ||
178 | - rest_count += ps->rows[row]->row_rest_count; | ||
179 | - | ||
180 | - ps_i = create_ps_insn (node, rest_count, cycle); | ||
181 | + ps_i = create_ps_insn (node, cycle); | ||
182 | |||
183 | /* Finds and inserts PS_I according to MUST_FOLLOW and | ||
184 | MUST_PRECEDE. */ | ||
185 | @@ -2754,6 +2771,7 @@ | ||
186 | return NULL; | ||
187 | } | ||
188 | |||
189 | + ps->rows_length[row] += 1; | ||
190 | return ps_i; | ||
191 | } | ||
192 | |||
193 | @@ -2909,11 +2927,16 @@ | ||
194 | for (i = 0; i < backward_rotates; i++) | ||
195 | { | ||
196 | ps_insn_ptr first_row = ps->rows[0]; | ||
197 | + int first_row_length = ps->rows_length[0]; | ||
198 | |||
199 | for (row = 0; row < last_row; row++) | ||
200 | - ps->rows[row] = ps->rows[row+1]; | ||
201 | + { | ||
202 | + ps->rows[row] = ps->rows[row + 1]; | ||
203 | + ps->rows_length[row] = ps->rows_length[row + 1]; | ||
204 | + } | ||
205 | |||
206 | ps->rows[last_row] = first_row; | ||
207 | + ps->rows_length[last_row] = first_row_length; | ||
208 | } | ||
209 | |||
210 | ps->max_cycle -= start_cycle; | ||
211 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106772.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106772.patch new file mode 100644 index 000000000..d918f9c58 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106772.patch | |||
@@ -0,0 +1,350 @@ | |||
1 | 2011-07-11 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | 2011-06-22 Dmitry Plotnikov <dplotnikov@ispras.ru> | ||
5 | Dmitry Melnik <dm@ispras.ru> | ||
6 | |||
7 | * config/arm/arm.c (neon_immediate_valid_for_shift): New function. | ||
8 | (neon_output_shift_immediate): Ditto. | ||
9 | * config/arm/arm-protos.h (neon_immediate_valid_for_shift): New | ||
10 | prototype. | ||
11 | (neon_output_shift_immediate): Ditto. | ||
12 | * config/arm/neon.md (vashl<mode>3): Modified constraint. | ||
13 | (vashr<mode>3_imm): New insn pattern. | ||
14 | (vlshr<mode>3_imm): Ditto. | ||
15 | (vashr<mode>3): Modified constraint. | ||
16 | (vlshr<mode>3): Ditto. | ||
17 | * config/arm/predicates.md (imm_for_neon_lshift_operand): New | ||
18 | predicate. | ||
19 | (imm_for_neon_rshift_operand): Ditto. | ||
20 | (imm_lshift_or_reg_neon): Ditto. | ||
21 | (imm_rshift_or_reg_neon): Ditto. | ||
22 | |||
23 | * optabs.c (init_optabs): Init optab codes for vashl, vashr, vlshr. | ||
24 | |||
25 | === modified file 'gcc/config/arm/arm-protos.h' | ||
26 | --- old/gcc/config/arm/arm-protos.h 2011-06-14 16:00:30 +0000 | ||
27 | +++ new/gcc/config/arm/arm-protos.h 2011-07-04 14:03:49 +0000 | ||
28 | @@ -64,8 +64,12 @@ | ||
29 | extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *); | ||
30 | extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *, | ||
31 | int *); | ||
32 | +extern int neon_immediate_valid_for_shift (rtx, enum machine_mode, rtx *, | ||
33 | + int *, bool); | ||
34 | extern char *neon_output_logic_immediate (const char *, rtx *, | ||
35 | enum machine_mode, int, int); | ||
36 | +extern char *neon_output_shift_immediate (const char *, char, rtx *, | ||
37 | + enum machine_mode, int, bool); | ||
38 | extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode, | ||
39 | rtx (*) (rtx, rtx, rtx)); | ||
40 | extern rtx neon_make_constant (rtx); | ||
41 | |||
42 | === modified file 'gcc/config/arm/arm.c' | ||
43 | --- old/gcc/config/arm/arm.c 2011-06-29 09:13:17 +0000 | ||
44 | +++ new/gcc/config/arm/arm.c 2011-07-04 14:03:49 +0000 | ||
45 | @@ -8863,6 +8863,66 @@ | ||
46 | return 1; | ||
47 | } | ||
48 | |||
49 | +/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If | ||
50 | + the immediate is valid, write a constant suitable for using as an operand | ||
51 | + to VSHR/VSHL to *MODCONST and the corresponding element width to | ||
52 | + *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift, | ||
53 | + because they have different limitations. */ | ||
54 | + | ||
55 | +int | ||
56 | +neon_immediate_valid_for_shift (rtx op, enum machine_mode mode, | ||
57 | + rtx *modconst, int *elementwidth, | ||
58 | + bool isleftshift) | ||
59 | +{ | ||
60 | + unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); | ||
61 | + unsigned int n_elts = CONST_VECTOR_NUNITS (op), i; | ||
62 | + unsigned HOST_WIDE_INT last_elt = 0; | ||
63 | + unsigned HOST_WIDE_INT maxshift; | ||
64 | + | ||
65 | + /* Split vector constant out into a byte vector. */ | ||
66 | + for (i = 0; i < n_elts; i++) | ||
67 | + { | ||
68 | + rtx el = CONST_VECTOR_ELT (op, i); | ||
69 | + unsigned HOST_WIDE_INT elpart; | ||
70 | + | ||
71 | + if (GET_CODE (el) == CONST_INT) | ||
72 | + elpart = INTVAL (el); | ||
73 | + else if (GET_CODE (el) == CONST_DOUBLE) | ||
74 | + return 0; | ||
75 | + else | ||
76 | + gcc_unreachable (); | ||
77 | + | ||
78 | + if (i != 0 && elpart != last_elt) | ||
79 | + return 0; | ||
80 | + | ||
81 | + last_elt = elpart; | ||
82 | + } | ||
83 | + | ||
84 | + /* Shift less than element size. */ | ||
85 | + maxshift = innersize * 8; | ||
86 | + | ||
87 | + if (isleftshift) | ||
88 | + { | ||
89 | + /* Left shift immediate value can be from 0 to <size>-1. */ | ||
90 | + if (last_elt >= maxshift) | ||
91 | + return 0; | ||
92 | + } | ||
93 | + else | ||
94 | + { | ||
95 | + /* Right shift immediate value can be from 1 to <size>. */ | ||
96 | + if (last_elt == 0 || last_elt > maxshift) | ||
97 | + return 0; | ||
98 | + } | ||
99 | + | ||
100 | + if (elementwidth) | ||
101 | + *elementwidth = innersize * 8; | ||
102 | + | ||
103 | + if (modconst) | ||
104 | + *modconst = CONST_VECTOR_ELT (op, 0); | ||
105 | + | ||
106 | + return 1; | ||
107 | +} | ||
108 | + | ||
109 | /* Return a string suitable for output of Neon immediate logic operation | ||
110 | MNEM. */ | ||
111 | |||
112 | @@ -8885,6 +8945,28 @@ | ||
113 | return templ; | ||
114 | } | ||
115 | |||
116 | +/* Return a string suitable for output of Neon immediate shift operation | ||
117 | + (VSHR or VSHL) MNEM. */ | ||
118 | + | ||
119 | +char * | ||
120 | +neon_output_shift_immediate (const char *mnem, char sign, rtx *op2, | ||
121 | + enum machine_mode mode, int quad, | ||
122 | + bool isleftshift) | ||
123 | +{ | ||
124 | + int width, is_valid; | ||
125 | + static char templ[40]; | ||
126 | + | ||
127 | + is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift); | ||
128 | + gcc_assert (is_valid != 0); | ||
129 | + | ||
130 | + if (quad) | ||
131 | + sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width); | ||
132 | + else | ||
133 | + sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width); | ||
134 | + | ||
135 | + return templ; | ||
136 | +} | ||
137 | + | ||
138 | /* Output a sequence of pairwise operations to implement a reduction. | ||
139 | NOTE: We do "too much work" here, because pairwise operations work on two | ||
140 | registers-worth of operands in one go. Unfortunately we can't exploit those | ||
141 | |||
142 | === modified file 'gcc/config/arm/neon.md' | ||
143 | --- old/gcc/config/arm/neon.md 2011-07-01 09:19:21 +0000 | ||
144 | +++ new/gcc/config/arm/neon.md 2011-07-04 14:03:49 +0000 | ||
145 | @@ -956,15 +956,57 @@ | ||
146 | ; SImode elements. | ||
147 | |||
148 | (define_insn "vashl<mode>3" | ||
149 | - [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | ||
150 | - (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | ||
151 | - (match_operand:VDQIW 2 "s_register_operand" "w")))] | ||
152 | - "TARGET_NEON" | ||
153 | - "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | ||
154 | - [(set (attr "neon_type") | ||
155 | - (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) | ||
156 | - (const_string "neon_vshl_ddd") | ||
157 | - (const_string "neon_shift_3")))] | ||
158 | + [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w") | ||
159 | + (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w") | ||
160 | + (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))] | ||
161 | + "TARGET_NEON" | ||
162 | + { | ||
163 | + switch (which_alternative) | ||
164 | + { | ||
165 | + case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; | ||
166 | + case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2], | ||
167 | + <MODE>mode, | ||
168 | + VALID_NEON_QREG_MODE (<MODE>mode), | ||
169 | + true); | ||
170 | + default: gcc_unreachable (); | ||
171 | + } | ||
172 | + } | ||
173 | + [(set (attr "neon_type") | ||
174 | + (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) | ||
175 | + (const_string "neon_vshl_ddd") | ||
176 | + (const_string "neon_shift_3")))] | ||
177 | +) | ||
178 | + | ||
179 | +(define_insn "vashr<mode>3_imm" | ||
180 | + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | ||
181 | + (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | ||
182 | + (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))] | ||
183 | + "TARGET_NEON" | ||
184 | + { | ||
185 | + return neon_output_shift_immediate ("vshr", 's', &operands[2], | ||
186 | + <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), | ||
187 | + false); | ||
188 | + } | ||
189 | + [(set (attr "neon_type") | ||
190 | + (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) | ||
191 | + (const_string "neon_vshl_ddd") | ||
192 | + (const_string "neon_shift_3")))] | ||
193 | +) | ||
194 | + | ||
195 | +(define_insn "vlshr<mode>3_imm" | ||
196 | + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | ||
197 | + (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | ||
198 | + (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))] | ||
199 | + "TARGET_NEON" | ||
200 | + { | ||
201 | + return neon_output_shift_immediate ("vshr", 'u', &operands[2], | ||
202 | + <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), | ||
203 | + false); | ||
204 | + } | ||
205 | + [(set (attr "neon_type") | ||
206 | + (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) | ||
207 | + (const_string "neon_vshl_ddd") | ||
208 | + (const_string "neon_shift_3")))] | ||
209 | ) | ||
210 | |||
211 | ; Used for implementing logical shift-right, which is a left-shift by a negative | ||
212 | @@ -1004,28 +1046,34 @@ | ||
213 | (define_expand "vashr<mode>3" | ||
214 | [(set (match_operand:VDQIW 0 "s_register_operand" "") | ||
215 | (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") | ||
216 | - (match_operand:VDQIW 2 "s_register_operand" "")))] | ||
217 | + (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] | ||
218 | "TARGET_NEON" | ||
219 | { | ||
220 | rtx neg = gen_reg_rtx (<MODE>mode); | ||
221 | - | ||
222 | - emit_insn (gen_neg<mode>2 (neg, operands[2])); | ||
223 | - emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg)); | ||
224 | - | ||
225 | + if (REG_P (operands[2])) | ||
226 | + { | ||
227 | + emit_insn (gen_neg<mode>2 (neg, operands[2])); | ||
228 | + emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg)); | ||
229 | + } | ||
230 | + else | ||
231 | + emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2])); | ||
232 | DONE; | ||
233 | }) | ||
234 | |||
235 | (define_expand "vlshr<mode>3" | ||
236 | [(set (match_operand:VDQIW 0 "s_register_operand" "") | ||
237 | (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") | ||
238 | - (match_operand:VDQIW 2 "s_register_operand" "")))] | ||
239 | + (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] | ||
240 | "TARGET_NEON" | ||
241 | { | ||
242 | rtx neg = gen_reg_rtx (<MODE>mode); | ||
243 | - | ||
244 | - emit_insn (gen_neg<mode>2 (neg, operands[2])); | ||
245 | - emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg)); | ||
246 | - | ||
247 | + if (REG_P (operands[2])) | ||
248 | + { | ||
249 | + emit_insn (gen_neg<mode>2 (neg, operands[2])); | ||
250 | + emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg)); | ||
251 | + } | ||
252 | + else | ||
253 | + emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2])); | ||
254 | DONE; | ||
255 | }) | ||
256 | |||
257 | |||
258 | === modified file 'gcc/config/arm/predicates.md' | ||
259 | --- old/gcc/config/arm/predicates.md 2011-06-22 15:50:23 +0000 | ||
260 | +++ new/gcc/config/arm/predicates.md 2011-07-04 14:03:49 +0000 | ||
261 | @@ -585,6 +585,26 @@ | ||
262 | return neon_immediate_valid_for_move (op, mode, NULL, NULL); | ||
263 | }) | ||
264 | |||
265 | +(define_predicate "imm_for_neon_lshift_operand" | ||
266 | + (match_code "const_vector") | ||
267 | +{ | ||
268 | + return neon_immediate_valid_for_shift (op, mode, NULL, NULL, true); | ||
269 | +}) | ||
270 | + | ||
271 | +(define_predicate "imm_for_neon_rshift_operand" | ||
272 | + (match_code "const_vector") | ||
273 | +{ | ||
274 | + return neon_immediate_valid_for_shift (op, mode, NULL, NULL, false); | ||
275 | +}) | ||
276 | + | ||
277 | +(define_predicate "imm_lshift_or_reg_neon" | ||
278 | + (ior (match_operand 0 "s_register_operand") | ||
279 | + (match_operand 0 "imm_for_neon_lshift_operand"))) | ||
280 | + | ||
281 | +(define_predicate "imm_rshift_or_reg_neon" | ||
282 | + (ior (match_operand 0 "s_register_operand") | ||
283 | + (match_operand 0 "imm_for_neon_rshift_operand"))) | ||
284 | + | ||
285 | (define_predicate "imm_for_neon_logic_operand" | ||
286 | (match_code "const_vector") | ||
287 | { | ||
288 | |||
289 | === modified file 'gcc/optabs.c' | ||
290 | --- old/gcc/optabs.c 2011-03-04 10:27:10 +0000 | ||
291 | +++ new/gcc/optabs.c 2011-07-04 14:03:49 +0000 | ||
292 | @@ -6171,6 +6171,9 @@ | ||
293 | init_optab (usashl_optab, US_ASHIFT); | ||
294 | init_optab (ashr_optab, ASHIFTRT); | ||
295 | init_optab (lshr_optab, LSHIFTRT); | ||
296 | + init_optabv (vashl_optab, ASHIFT); | ||
297 | + init_optabv (vashr_optab, ASHIFTRT); | ||
298 | + init_optabv (vlshr_optab, LSHIFTRT); | ||
299 | init_optab (rotl_optab, ROTATE); | ||
300 | init_optab (rotr_optab, ROTATERT); | ||
301 | init_optab (smin_optab, SMIN); | ||
302 | |||
303 | === added file 'gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c' | ||
304 | --- old/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c 1970-01-01 00:00:00 +0000 | ||
305 | +++ new/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c 2011-07-04 14:03:49 +0000 | ||
306 | @@ -0,0 +1,11 @@ | ||
307 | +/* { dg-do compile } */ | ||
308 | +/* { dg-require-effective-target arm_neon_ok } */ | ||
309 | +/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ | ||
310 | +/* { dg-final { scan-assembler "vshr\.u32.*#3" } } */ | ||
311 | + | ||
312 | +/* Verify that VSHR immediate is used. */ | ||
313 | +void f1(int n, unsigned int x[], unsigned int y[]) { | ||
314 | + int i; | ||
315 | + for (i = 0; i < n; ++i) | ||
316 | + y[i] = x[i] >> 3; | ||
317 | +} | ||
318 | |||
319 | === added file 'gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c' | ||
320 | --- old/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c 1970-01-01 00:00:00 +0000 | ||
321 | +++ new/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c 2011-07-04 14:03:49 +0000 | ||
322 | @@ -0,0 +1,11 @@ | ||
323 | +/* { dg-do compile } */ | ||
324 | +/* { dg-require-effective-target arm_neon_ok } */ | ||
325 | +/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ | ||
326 | +/* { dg-final { scan-assembler "vshl\.i32.*#3" } } */ | ||
327 | + | ||
328 | +/* Verify that VSHR immediate is used. */ | ||
329 | +void f1(int n, int x[], int y[]) { | ||
330 | + int i; | ||
331 | + for (i = 0; i < n; ++i) | ||
332 | + y[i] = x[i] << 3; | ||
333 | +} | ||
334 | |||
335 | === added file 'gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c' | ||
336 | --- old/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c 1970-01-01 00:00:00 +0000 | ||
337 | +++ new/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c 2011-07-04 14:03:49 +0000 | ||
338 | @@ -0,0 +1,11 @@ | ||
339 | +/* { dg-do compile } */ | ||
340 | +/* { dg-require-effective-target arm_neon_ok } */ | ||
341 | +/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ | ||
342 | +/* { dg-final { scan-assembler "vshr\.s32.*#3" } } */ | ||
343 | + | ||
344 | +/* Verify that VSHR immediate is used. */ | ||
345 | +void f1(int n, int x[], int y[]) { | ||
346 | + int i; | ||
347 | + for (i = 0; i < n; ++i) | ||
348 | + y[i] = x[i] >> 3; | ||
349 | +} | ||
350 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106773.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106773.patch new file mode 100644 index 000000000..de3f29e19 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106773.patch | |||
@@ -0,0 +1,119 @@ | |||
1 | 2011-07-13 Richard Sandiford <richard.sandiford@linaro.org> | ||
2 | |||
3 | Backport from mainline: | ||
4 | gcc/ | ||
5 | 2011-07-07 Richard Sandiford <richard.sandiford@linaro.org> | ||
6 | |||
7 | * reload1.c (choose_reload_regs): Use mode sizes to check whether | ||
8 | an old reload register completely defines the required value. | ||
9 | |||
10 | gcc/testsuite/ | ||
11 | 2011-07-07 Richard Sandiford <richard.sandiford@linaro.org> | ||
12 | |||
13 | * gcc.target/arm/neon-modes-3.c: New test. | ||
14 | |||
15 | === modified file 'gcc/reload1.c' | ||
16 | --- old/gcc/reload1.c 2011-07-01 09:19:21 +0000 | ||
17 | +++ new/gcc/reload1.c 2011-07-11 10:06:50 +0000 | ||
18 | @@ -6451,6 +6451,8 @@ | ||
19 | |||
20 | if (regno >= 0 | ||
21 | && reg_last_reload_reg[regno] != 0 | ||
22 | + && (GET_MODE_SIZE (GET_MODE (reg_last_reload_reg[regno])) | ||
23 | + >= GET_MODE_SIZE (mode) + byte) | ||
24 | #ifdef CANNOT_CHANGE_MODE_CLASS | ||
25 | /* Verify that the register it's in can be used in | ||
26 | mode MODE. */ | ||
27 | @@ -6462,24 +6464,12 @@ | ||
28 | { | ||
29 | enum reg_class rclass = rld[r].rclass, last_class; | ||
30 | rtx last_reg = reg_last_reload_reg[regno]; | ||
31 | - enum machine_mode need_mode; | ||
32 | |||
33 | i = REGNO (last_reg); | ||
34 | i += subreg_regno_offset (i, GET_MODE (last_reg), byte, mode); | ||
35 | last_class = REGNO_REG_CLASS (i); | ||
36 | |||
37 | - if (byte == 0) | ||
38 | - need_mode = mode; | ||
39 | - else | ||
40 | - need_mode | ||
41 | - = smallest_mode_for_size | ||
42 | - (GET_MODE_BITSIZE (mode) + byte * BITS_PER_UNIT, | ||
43 | - GET_MODE_CLASS (mode) == MODE_PARTIAL_INT | ||
44 | - ? MODE_INT : GET_MODE_CLASS (mode)); | ||
45 | - | ||
46 | - if ((GET_MODE_SIZE (GET_MODE (last_reg)) | ||
47 | - >= GET_MODE_SIZE (need_mode)) | ||
48 | - && reg_reloaded_contents[i] == regno | ||
49 | + if (reg_reloaded_contents[i] == regno | ||
50 | && TEST_HARD_REG_BIT (reg_reloaded_valid, i) | ||
51 | && HARD_REGNO_MODE_OK (i, rld[r].mode) | ||
52 | && (TEST_HARD_REG_BIT (reg_class_contents[(int) rclass], i) | ||
53 | |||
54 | === added file 'gcc/testsuite/gcc.target/arm/neon-modes-3.c' | ||
55 | --- old/gcc/testsuite/gcc.target/arm/neon-modes-3.c 1970-01-01 00:00:00 +0000 | ||
56 | +++ new/gcc/testsuite/gcc.target/arm/neon-modes-3.c 2011-07-11 10:06:50 +0000 | ||
57 | @@ -0,0 +1,61 @@ | ||
58 | +/* { dg-do compile } */ | ||
59 | +/* { dg-require-effective-target arm_neon_ok } */ | ||
60 | +/* { dg-options "-O" } */ | ||
61 | +/* { dg-add-options arm_neon } */ | ||
62 | + | ||
63 | +#include <arm_neon.h> | ||
64 | + | ||
65 | +void f1 (volatile float32x4_t *dest, volatile float32x4x4_t *src, int n) | ||
66 | +{ | ||
67 | + float32x4x4_t a5, a6, a7, a8, a9; | ||
68 | + int i; | ||
69 | + | ||
70 | + a5 = *src; | ||
71 | + a6 = *src; | ||
72 | + a7 = *src; | ||
73 | + a8 = *src; | ||
74 | + a9 = *src; | ||
75 | + while (n--) | ||
76 | + { | ||
77 | + for (i = 0; i < 8; i++) | ||
78 | + { | ||
79 | + float32x4x4_t a0, a1, a2, a3, a4; | ||
80 | + | ||
81 | + a0 = *src; | ||
82 | + a1 = *src; | ||
83 | + a2 = *src; | ||
84 | + a3 = *src; | ||
85 | + a4 = *src; | ||
86 | + *src = a0; | ||
87 | + *dest = a0.val[0]; | ||
88 | + *dest = a0.val[3]; | ||
89 | + *src = a1; | ||
90 | + *dest = a1.val[0]; | ||
91 | + *dest = a1.val[3]; | ||
92 | + *src = a2; | ||
93 | + *dest = a2.val[0]; | ||
94 | + *dest = a2.val[3]; | ||
95 | + *src = a3; | ||
96 | + *dest = a3.val[0]; | ||
97 | + *dest = a3.val[3]; | ||
98 | + *src = a4; | ||
99 | + *dest = a4.val[0]; | ||
100 | + *dest = a4.val[3]; | ||
101 | + } | ||
102 | + *src = a5; | ||
103 | + *dest = a5.val[0]; | ||
104 | + *dest = a5.val[3]; | ||
105 | + *src = a6; | ||
106 | + *dest = a6.val[0]; | ||
107 | + *dest = a6.val[3]; | ||
108 | + *src = a7; | ||
109 | + *dest = a7.val[0]; | ||
110 | + *dest = a7.val[3]; | ||
111 | + *src = a8; | ||
112 | + *dest = a8.val[0]; | ||
113 | + *dest = a8.val[3]; | ||
114 | + *src = a9; | ||
115 | + *dest = a9.val[0]; | ||
116 | + *dest = a9.val[3]; | ||
117 | + } | ||
118 | +} | ||
119 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106775.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106775.patch new file mode 100644 index 000000000..0b05c3824 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106775.patch | |||
@@ -0,0 +1,67 @@ | |||
1 | 2011-07-15 Michael Hope <michael.hope@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | Backport from mainline: | ||
5 | |||
6 | 2011-04-05 Eric Botcazou <ebotcazou@adacore.com> | ||
7 | |||
8 | * ifcvt.c (cond_exec_process_insns): Disallow converting a block | ||
9 | that contains the prologue. | ||
10 | |||
11 | gcc/testsuite/ | ||
12 | Backport from mainline: | ||
13 | |||
14 | 2011-04-01 Bernd Schmidt <bernds@codesourcery.com> | ||
15 | |||
16 | * gcc.c-torture/compile/20110401-1.c: New test. | ||
17 | |||
18 | === modified file 'gcc/ifcvt.c' | ||
19 | --- old/gcc/ifcvt.c 2010-12-14 00:23:40 +0000 | ||
20 | +++ new/gcc/ifcvt.c 2011-07-11 04:02:28 +0000 | ||
21 | @@ -1,5 +1,6 @@ | ||
22 | /* If-conversion support. | ||
23 | - Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010 | ||
24 | + Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, | ||
25 | + 2011 | ||
26 | Free Software Foundation, Inc. | ||
27 | |||
28 | This file is part of GCC. | ||
29 | @@ -304,6 +305,10 @@ | ||
30 | |||
31 | for (insn = start; ; insn = NEXT_INSN (insn)) | ||
32 | { | ||
33 | + /* dwarf2out can't cope with conditional prologues. */ | ||
34 | + if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END) | ||
35 | + return FALSE; | ||
36 | + | ||
37 | if (NOTE_P (insn) || DEBUG_INSN_P (insn)) | ||
38 | goto insn_done; | ||
39 | |||
40 | |||
41 | === added file 'gcc/testsuite/gcc.c-torture/compile/20110401-1.c' | ||
42 | --- old/gcc/testsuite/gcc.c-torture/compile/20110401-1.c 1970-01-01 00:00:00 +0000 | ||
43 | +++ new/gcc/testsuite/gcc.c-torture/compile/20110401-1.c 2011-07-11 04:02:28 +0000 | ||
44 | @@ -0,0 +1,22 @@ | ||
45 | +void asn1_length_der (unsigned long int len, unsigned char *ans, int *ans_len) | ||
46 | +{ | ||
47 | + int k; | ||
48 | + unsigned char temp[4]; | ||
49 | + if (len < 128) { | ||
50 | + if (ans != ((void *) 0)) | ||
51 | + ans[0] = (unsigned char) len; | ||
52 | + *ans_len = 1; | ||
53 | + } else { | ||
54 | + k = 0; | ||
55 | + while (len) { | ||
56 | + temp[k++] = len & 0xFF; | ||
57 | + len = len >> 8; | ||
58 | + } | ||
59 | + *ans_len = k + 1; | ||
60 | + if (ans != ((void *) 0)) { | ||
61 | + ans[0] = ((unsigned char) k & 0x7F) + 128; | ||
62 | + while (k--) | ||
63 | + ans[*ans_len - 1 - k] = temp[k]; | ||
64 | + } | ||
65 | + } | ||
66 | +} | ||
67 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106776.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106776.patch new file mode 100644 index 000000000..3d4d5c504 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106776.patch | |||
@@ -0,0 +1,46 @@ | |||
1 | 2011-07-15 Michael Hope <michael.hope@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | Backport from mainline: | ||
5 | 2011-03-22 Eric Botcazou <ebotcazou@adacore.com> | ||
6 | |||
7 | * combine.c (simplify_set): Try harder to find the best CC mode when | ||
8 | simplifying a nested COMPARE on the RHS. | ||
9 | |||
10 | === modified file 'gcc/combine.c' | ||
11 | --- old/gcc/combine.c 2011-05-27 14:31:18 +0000 | ||
12 | +++ new/gcc/combine.c 2011-07-11 03:52:31 +0000 | ||
13 | @@ -6287,10 +6287,18 @@ | ||
14 | enum rtx_code new_code; | ||
15 | rtx op0, op1, tmp; | ||
16 | int other_changed = 0; | ||
17 | + rtx inner_compare = NULL_RTX; | ||
18 | enum machine_mode compare_mode = GET_MODE (dest); | ||
19 | |||
20 | if (GET_CODE (src) == COMPARE) | ||
21 | - op0 = XEXP (src, 0), op1 = XEXP (src, 1); | ||
22 | + { | ||
23 | + op0 = XEXP (src, 0), op1 = XEXP (src, 1); | ||
24 | + if (GET_CODE (op0) == COMPARE && op1 == const0_rtx) | ||
25 | + { | ||
26 | + inner_compare = op0; | ||
27 | + op0 = XEXP (inner_compare, 0), op1 = XEXP (inner_compare, 1); | ||
28 | + } | ||
29 | + } | ||
30 | else | ||
31 | op0 = src, op1 = CONST0_RTX (GET_MODE (src)); | ||
32 | |||
33 | @@ -6332,6 +6340,12 @@ | ||
34 | need to use a different CC mode here. */ | ||
35 | if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) | ||
36 | compare_mode = GET_MODE (op0); | ||
37 | + else if (inner_compare | ||
38 | + && GET_MODE_CLASS (GET_MODE (inner_compare)) == MODE_CC | ||
39 | + && new_code == old_code | ||
40 | + && op0 == XEXP (inner_compare, 0) | ||
41 | + && op1 == XEXP (inner_compare, 1)) | ||
42 | + compare_mode = GET_MODE (inner_compare); | ||
43 | else | ||
44 | compare_mode = SELECT_CC_MODE (new_code, op0, op1); | ||
45 | |||
46 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106777.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106777.patch new file mode 100644 index 000000000..68b682b3c --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106777.patch | |||
@@ -0,0 +1,192 @@ | |||
1 | 2011-07-15 Michael Hope <michael.hope@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | Backport from mainline: | ||
5 | 2011-06-29 Nathan Sidwell <nathan@codesourcery.com> | ||
6 | |||
7 | * config/arm/unwind-arm.c (enum __cxa_type_match_result): New. | ||
8 | (cxa_type_match): Correct declaration. | ||
9 | (__gnu_unwind_pr_common): Reconstruct | ||
10 | additional indirection when __cxa_type_match returns | ||
11 | succeeded_with_ptr_to_base. | ||
12 | |||
13 | libstdc++-v3/ | ||
14 | Backport from mainline: | ||
15 | |||
16 | 2011-06-29 Nathan Sidwell <nathan@codesourcery.com> | ||
17 | |||
18 | * libsupc++/eh_arm.c (__cxa_type_match): Construct address of | ||
19 | thrown object here. Return succeded_with_ptr_to_base for all | ||
20 | pointer cases. | ||
21 | |||
22 | === modified file 'gcc/config/arm/unwind-arm.c' | ||
23 | --- old/gcc/config/arm/unwind-arm.c 2011-03-22 10:59:10 +0000 | ||
24 | +++ new/gcc/config/arm/unwind-arm.c 2011-07-11 03:35:44 +0000 | ||
25 | @@ -32,13 +32,18 @@ | ||
26 | typedef unsigned char bool; | ||
27 | |||
28 | typedef struct _ZSt9type_info type_info; /* This names C++ type_info type */ | ||
29 | +enum __cxa_type_match_result | ||
30 | + { | ||
31 | + ctm_failed = 0, | ||
32 | + ctm_succeeded = 1, | ||
33 | + ctm_succeeded_with_ptr_to_base = 2 | ||
34 | + }; | ||
35 | |||
36 | void __attribute__((weak)) __cxa_call_unexpected(_Unwind_Control_Block *ucbp); | ||
37 | bool __attribute__((weak)) __cxa_begin_cleanup(_Unwind_Control_Block *ucbp); | ||
38 | -bool __attribute__((weak)) __cxa_type_match(_Unwind_Control_Block *ucbp, | ||
39 | - const type_info *rttip, | ||
40 | - bool is_reference, | ||
41 | - void **matched_object); | ||
42 | +enum __cxa_type_match_result __attribute__((weak)) __cxa_type_match | ||
43 | + (_Unwind_Control_Block *ucbp, const type_info *rttip, | ||
44 | + bool is_reference, void **matched_object); | ||
45 | |||
46 | _Unwind_Ptr __attribute__((weak)) | ||
47 | __gnu_Unwind_Find_exidx (_Unwind_Ptr, int *); | ||
48 | @@ -1107,6 +1112,7 @@ | ||
49 | _uw rtti; | ||
50 | bool is_reference = (data[0] & uint32_highbit) != 0; | ||
51 | void *matched; | ||
52 | + enum __cxa_type_match_result match_type; | ||
53 | |||
54 | /* Check for no-throw areas. */ | ||
55 | if (data[1] == (_uw) -2) | ||
56 | @@ -1118,17 +1124,31 @@ | ||
57 | { | ||
58 | /* Match a catch specification. */ | ||
59 | rtti = _Unwind_decode_target2 ((_uw) &data[1]); | ||
60 | - if (!__cxa_type_match (ucbp, (type_info *) rtti, | ||
61 | - is_reference, | ||
62 | - &matched)) | ||
63 | - matched = (void *)0; | ||
64 | + match_type = __cxa_type_match (ucbp, | ||
65 | + (type_info *) rtti, | ||
66 | + is_reference, | ||
67 | + &matched); | ||
68 | } | ||
69 | + else | ||
70 | + match_type = ctm_succeeded; | ||
71 | |||
72 | - if (matched) | ||
73 | + if (match_type) | ||
74 | { | ||
75 | ucbp->barrier_cache.sp = | ||
76 | _Unwind_GetGR (context, R_SP); | ||
77 | - ucbp->barrier_cache.bitpattern[0] = (_uw) matched; | ||
78 | + // ctm_succeeded_with_ptr_to_base really | ||
79 | + // means _c_t_m indirected the pointer | ||
80 | + // object. We have to reconstruct the | ||
81 | + // additional pointer layer by using a temporary. | ||
82 | + if (match_type == ctm_succeeded_with_ptr_to_base) | ||
83 | + { | ||
84 | + ucbp->barrier_cache.bitpattern[2] | ||
85 | + = (_uw) matched; | ||
86 | + ucbp->barrier_cache.bitpattern[0] | ||
87 | + = (_uw) &ucbp->barrier_cache.bitpattern[2]; | ||
88 | + } | ||
89 | + else | ||
90 | + ucbp->barrier_cache.bitpattern[0] = (_uw) matched; | ||
91 | ucbp->barrier_cache.bitpattern[1] = (_uw) data; | ||
92 | return _URC_HANDLER_FOUND; | ||
93 | } | ||
94 | |||
95 | === modified file 'libstdc++-v3/libsupc++/eh_arm.cc' | ||
96 | --- old/libstdc++-v3/libsupc++/eh_arm.cc 2011-01-03 20:52:22 +0000 | ||
97 | +++ new/libstdc++-v3/libsupc++/eh_arm.cc 2011-07-11 03:35:44 +0000 | ||
98 | @@ -30,10 +30,11 @@ | ||
99 | using namespace __cxxabiv1; | ||
100 | |||
101 | |||
102 | -// Given the thrown type THROW_TYPE, pointer to a variable containing a | ||
103 | -// pointer to the exception object THROWN_PTR_P and a type CATCH_TYPE to | ||
104 | -// compare against, return whether or not there is a match and if so, | ||
105 | -// update *THROWN_PTR_P. | ||
106 | +// Given the thrown type THROW_TYPE, exception object UE_HEADER and a | ||
107 | +// type CATCH_TYPE to compare against, return whether or not there is | ||
108 | +// a match and if so, update *THROWN_PTR_P to point to either the | ||
109 | +// type-matched object, or in the case of a pointer type, the object | ||
110 | +// pointed to by the pointer. | ||
111 | |||
112 | extern "C" __cxa_type_match_result | ||
113 | __cxa_type_match(_Unwind_Exception* ue_header, | ||
114 | @@ -41,51 +42,51 @@ | ||
115 | bool is_reference __attribute__((__unused__)), | ||
116 | void** thrown_ptr_p) | ||
117 | { | ||
118 | - bool forced_unwind = __is_gxx_forced_unwind_class(ue_header->exception_class); | ||
119 | - bool foreign_exception = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class); | ||
120 | - bool dependent_exception = | ||
121 | - __is_dependent_exception(ue_header->exception_class); | ||
122 | + bool forced_unwind | ||
123 | + = __is_gxx_forced_unwind_class(ue_header->exception_class); | ||
124 | + bool foreign_exception | ||
125 | + = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class); | ||
126 | + bool dependent_exception | ||
127 | + = __is_dependent_exception(ue_header->exception_class); | ||
128 | __cxa_exception* xh = __get_exception_header_from_ue(ue_header); | ||
129 | __cxa_dependent_exception *dx = __get_dependent_exception_from_ue(ue_header); | ||
130 | const std::type_info* throw_type; | ||
131 | + void *thrown_ptr = 0; | ||
132 | |||
133 | if (forced_unwind) | ||
134 | throw_type = &typeid(abi::__forced_unwind); | ||
135 | else if (foreign_exception) | ||
136 | throw_type = &typeid(abi::__foreign_exception); | ||
137 | - else if (dependent_exception) | ||
138 | - throw_type = __get_exception_header_from_obj | ||
139 | - (dx->primaryException)->exceptionType; | ||
140 | else | ||
141 | - throw_type = xh->exceptionType; | ||
142 | - | ||
143 | - void* thrown_ptr = *thrown_ptr_p; | ||
144 | + { | ||
145 | + if (dependent_exception) | ||
146 | + xh = __get_exception_header_from_obj (dx->primaryException); | ||
147 | + throw_type = xh->exceptionType; | ||
148 | + // We used to require the caller set the target of thrown_ptr_p, | ||
149 | + // but that's incorrect -- the EHABI makes no such requirement | ||
150 | + // -- and not all callers will set it. Fortunately callers that | ||
151 | + // do initialize will always pass us the value we calculate | ||
152 | + // here, so there's no backwards compatibility problem. | ||
153 | + thrown_ptr = __get_object_from_ue (ue_header); | ||
154 | + } | ||
155 | + | ||
156 | + __cxa_type_match_result result = ctm_succeeded; | ||
157 | |||
158 | // Pointer types need to adjust the actual pointer, not | ||
159 | // the pointer to pointer that is the exception object. | ||
160 | // This also has the effect of passing pointer types | ||
161 | // "by value" through the __cxa_begin_catch return value. | ||
162 | if (throw_type->__is_pointer_p()) | ||
163 | - thrown_ptr = *(void**) thrown_ptr; | ||
164 | + { | ||
165 | + thrown_ptr = *(void**) thrown_ptr; | ||
166 | + // We need to indicate the indirection to our caller. | ||
167 | + result = ctm_succeeded_with_ptr_to_base; | ||
168 | + } | ||
169 | |||
170 | if (catch_type->__do_catch(throw_type, &thrown_ptr, 1)) | ||
171 | { | ||
172 | *thrown_ptr_p = thrown_ptr; | ||
173 | - | ||
174 | - if (typeid(*catch_type) == typeid (typeid(void*))) | ||
175 | - { | ||
176 | - const __pointer_type_info *catch_pointer_type = | ||
177 | - static_cast<const __pointer_type_info *> (catch_type); | ||
178 | - const __pointer_type_info *throw_pointer_type = | ||
179 | - static_cast<const __pointer_type_info *> (throw_type); | ||
180 | - | ||
181 | - if (typeid (*catch_pointer_type->__pointee) != typeid (void) | ||
182 | - && (*catch_pointer_type->__pointee != | ||
183 | - *throw_pointer_type->__pointee)) | ||
184 | - return ctm_succeeded_with_ptr_to_base; | ||
185 | - } | ||
186 | - | ||
187 | - return ctm_succeeded; | ||
188 | + return result; | ||
189 | } | ||
190 | |||
191 | return ctm_failed; | ||
192 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106778.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106778.patch new file mode 100644 index 000000000..b42b425dc --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106778.patch | |||
@@ -0,0 +1,225 @@ | |||
1 | 2011-07-15 Michael Hope <michael.hope@linaro.org> | ||
2 | |||
3 | Backport from mainline r174540 | ||
4 | LP: #807573 | ||
5 | |||
6 | gcc/ | ||
7 | 2011-06-01 Richard Sandiford <rdsandiford@googlemail.com> | ||
8 | |||
9 | PR rtl-optimization/48830 | ||
10 | PR rtl-optimization/48808 | ||
11 | PR rtl-optimization/48792 | ||
12 | * reload.c (push_reload): Check contains_reg_of_mode. | ||
13 | * reload1.c (strip_paradoxical_subreg): New function. | ||
14 | (gen_reload_chain_without_interm_reg_p): Use it to handle | ||
15 | paradoxical subregs. | ||
16 | (emit_output_reload_insns, gen_reload): Likewise. | ||
17 | |||
18 | gcc/testsuite/ | ||
19 | 2011-06-01 Eric Botcazou <ebotcazou@adacore.com> | ||
20 | Hans-Peter Nilsson <hp@axis.com> | ||
21 | |||
22 | PR rtl-optimization/48830 | ||
23 | * gcc.target/sparc/ultrasp12.c: New test. | ||
24 | |||
25 | === modified file 'gcc/reload.c' | ||
26 | --- old/gcc/reload.c 2011-07-01 09:19:21 +0000 | ||
27 | +++ new/gcc/reload.c 2011-07-13 02:09:08 +0000 | ||
28 | @@ -1017,6 +1017,7 @@ | ||
29 | #ifdef CANNOT_CHANGE_MODE_CLASS | ||
30 | && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (in)), inmode, rclass) | ||
31 | #endif | ||
32 | + && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (in))] | ||
33 | && (CONSTANT_P (SUBREG_REG (in)) | ||
34 | || GET_CODE (SUBREG_REG (in)) == PLUS | ||
35 | || strict_low | ||
36 | @@ -1123,6 +1124,7 @@ | ||
37 | #ifdef CANNOT_CHANGE_MODE_CLASS | ||
38 | && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (out)), outmode, rclass) | ||
39 | #endif | ||
40 | + && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (out))] | ||
41 | && (CONSTANT_P (SUBREG_REG (out)) | ||
42 | || strict_low | ||
43 | || (((REG_P (SUBREG_REG (out)) | ||
44 | |||
45 | === modified file 'gcc/reload1.c' | ||
46 | --- old/gcc/reload1.c 2011-07-11 10:06:50 +0000 | ||
47 | +++ new/gcc/reload1.c 2011-07-14 22:14:45 +0000 | ||
48 | @@ -4476,6 +4476,43 @@ | ||
49 | } | ||
50 | } | ||
51 | } | ||
52 | + | ||
53 | +/* *OP_PTR and *OTHER_PTR are two operands to a conceptual reload. | ||
54 | + If *OP_PTR is a paradoxical subreg, try to remove that subreg | ||
55 | + and apply the corresponding narrowing subreg to *OTHER_PTR. | ||
56 | + Return true if the operands were changed, false otherwise. */ | ||
57 | + | ||
58 | +static bool | ||
59 | +strip_paradoxical_subreg (rtx *op_ptr, rtx *other_ptr) | ||
60 | +{ | ||
61 | + rtx op, inner, other, tem; | ||
62 | + | ||
63 | + op = *op_ptr; | ||
64 | + if (GET_CODE (op) != SUBREG) | ||
65 | + return false; | ||
66 | + | ||
67 | + inner = SUBREG_REG (op); | ||
68 | + if (GET_MODE_SIZE (GET_MODE (op)) <= GET_MODE_SIZE (GET_MODE (inner))) | ||
69 | + return false; | ||
70 | + | ||
71 | + other = *other_ptr; | ||
72 | + tem = gen_lowpart_common (GET_MODE (inner), other); | ||
73 | + if (!tem) | ||
74 | + return false; | ||
75 | + | ||
76 | + /* If the lowpart operation turned a hard register into a subreg, | ||
77 | + rather than simplifying it to another hard register, then the | ||
78 | + mode change cannot be properly represented. For example, OTHER | ||
79 | + might be valid in its current mode, but not in the new one. */ | ||
80 | + if (GET_CODE (tem) == SUBREG | ||
81 | + && REG_P (other) | ||
82 | + && HARD_REGISTER_P (other)) | ||
83 | + return false; | ||
84 | + | ||
85 | + *op_ptr = inner; | ||
86 | + *other_ptr = tem; | ||
87 | + return true; | ||
88 | +} | ||
89 | |||
90 | /* A subroutine of reload_as_needed. If INSN has a REG_EH_REGION note, | ||
91 | examine all of the reload insns between PREV and NEXT exclusive, and | ||
92 | @@ -5556,7 +5593,7 @@ | ||
93 | chain reloads or do need an intermediate hard registers. */ | ||
94 | bool result = true; | ||
95 | int regno, n, code; | ||
96 | - rtx out, in, tem, insn; | ||
97 | + rtx out, in, insn; | ||
98 | rtx last = get_last_insn (); | ||
99 | |||
100 | /* Make r2 a component of r1. */ | ||
101 | @@ -5575,11 +5612,7 @@ | ||
102 | |||
103 | /* If IN is a paradoxical SUBREG, remove it and try to put the | ||
104 | opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */ | ||
105 | - if (GET_CODE (in) == SUBREG | ||
106 | - && (GET_MODE_SIZE (GET_MODE (in)) | ||
107 | - > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))) | ||
108 | - && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0) | ||
109 | - in = SUBREG_REG (in), out = tem; | ||
110 | + strip_paradoxical_subreg (&in, &out); | ||
111 | |||
112 | if (GET_CODE (in) == PLUS | ||
113 | && (REG_P (XEXP (in, 0)) | ||
114 | @@ -7571,7 +7604,6 @@ | ||
115 | if (tertiary_icode != CODE_FOR_nothing) | ||
116 | { | ||
117 | rtx third_reloadreg = rld[tertiary_reload].reg_rtx; | ||
118 | - rtx tem; | ||
119 | |||
120 | /* Copy primary reload reg to secondary reload reg. | ||
121 | (Note that these have been swapped above, then | ||
122 | @@ -7580,13 +7612,7 @@ | ||
123 | /* If REAL_OLD is a paradoxical SUBREG, remove it | ||
124 | and try to put the opposite SUBREG on | ||
125 | RELOADREG. */ | ||
126 | - if (GET_CODE (real_old) == SUBREG | ||
127 | - && (GET_MODE_SIZE (GET_MODE (real_old)) | ||
128 | - > GET_MODE_SIZE (GET_MODE (SUBREG_REG (real_old)))) | ||
129 | - && 0 != (tem = gen_lowpart_common | ||
130 | - (GET_MODE (SUBREG_REG (real_old)), | ||
131 | - reloadreg))) | ||
132 | - real_old = SUBREG_REG (real_old), reloadreg = tem; | ||
133 | + strip_paradoxical_subreg (&real_old, &reloadreg); | ||
134 | |||
135 | gen_reload (reloadreg, second_reloadreg, | ||
136 | rl->opnum, rl->when_needed); | ||
137 | @@ -8402,16 +8428,8 @@ | ||
138 | |||
139 | /* If IN is a paradoxical SUBREG, remove it and try to put the | ||
140 | opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */ | ||
141 | - if (GET_CODE (in) == SUBREG | ||
142 | - && (GET_MODE_SIZE (GET_MODE (in)) | ||
143 | - > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))) | ||
144 | - && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0) | ||
145 | - in = SUBREG_REG (in), out = tem; | ||
146 | - else if (GET_CODE (out) == SUBREG | ||
147 | - && (GET_MODE_SIZE (GET_MODE (out)) | ||
148 | - > GET_MODE_SIZE (GET_MODE (SUBREG_REG (out)))) | ||
149 | - && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (out)), in)) != 0) | ||
150 | - out = SUBREG_REG (out), in = tem; | ||
151 | + if (!strip_paradoxical_subreg (&in, &out)) | ||
152 | + strip_paradoxical_subreg (&out, &in); | ||
153 | |||
154 | /* How to do this reload can get quite tricky. Normally, we are being | ||
155 | asked to reload a simple operand, such as a MEM, a constant, or a pseudo | ||
156 | |||
157 | === added file 'gcc/testsuite/gcc.target/sparc/ultrasp12.c' | ||
158 | --- old/gcc/testsuite/gcc.target/sparc/ultrasp12.c 1970-01-01 00:00:00 +0000 | ||
159 | +++ new/gcc/testsuite/gcc.target/sparc/ultrasp12.c 2011-07-13 02:09:08 +0000 | ||
160 | @@ -0,0 +1,64 @@ | ||
161 | +/* PR rtl-optimization/48830 */ | ||
162 | +/* Testcase by Hans-Peter Nilsson <hp@gcc.gnu.org> */ | ||
163 | + | ||
164 | +/* { dg-do compile } */ | ||
165 | +/* { dg-require-effective-target lp64 } */ | ||
166 | +/* { dg-options "-O2 -mcpu=ultrasparc -mvis" } */ | ||
167 | + | ||
168 | +typedef unsigned char uint8_t; | ||
169 | +typedef unsigned int uint32_t; | ||
170 | +typedef unsigned long int uint64_t; | ||
171 | +typedef unsigned long int uintmax_t; | ||
172 | +typedef unsigned char rc_vec_t __attribute__((__vector_size__(8))); | ||
173 | +typedef short rc_svec_type_ __attribute__((__vector_size__(8))); | ||
174 | +typedef unsigned char rc_vec4_type_ __attribute__((__vector_size__(4))); | ||
175 | + | ||
176 | +void | ||
177 | +rc_stat_xsum_acc(const uint8_t *__restrict src1, int src1_dim, | ||
178 | + const uint8_t *__restrict src2, int src2_dim, | ||
179 | + int len, int height, uintmax_t sum[5]) | ||
180 | +{ | ||
181 | + uint32_t s1 = 0; | ||
182 | + uint32_t s2 = 0; | ||
183 | + uintmax_t s11 = 0; | ||
184 | + uintmax_t s22 = 0; | ||
185 | + uintmax_t s12 = 0; | ||
186 | + int full = len / ((1024) < (1024) ? (1024) : (1024)); | ||
187 | + int rem = len % ((1024) < (1024) ? (1024) : (1024)); | ||
188 | + int rem1 = rem / 1; | ||
189 | + int y; | ||
190 | + unsigned int rc_gsr_scale_ __attribute__ ((__unused__)) = 7; unsigned int rc_gsr_align_ __attribute__ ((__unused__)) = 4; unsigned int rc_gsr_set_ __attribute__ ((__unused__)) = 0; register unsigned int rc_gsr_fakedep_ __attribute__ ((__unused__)) = 0; unsigned int rc_gsr_ldinit_ __attribute__ ((__unused__)) = 0; | ||
191 | + for (y = 0; y < height; y++) { | ||
192 | + rc_vec_t a1, a2, a11, a22, a12; | ||
193 | + int i1 = (y)*(src1_dim); | ||
194 | + int i2 = (y)*(src2_dim); | ||
195 | + int x; | ||
196 | + ((a1) = ((rc_vec_t) {0})); | ||
197 | + ((a2) = ((rc_vec_t) {0})); | ||
198 | + ((a11) = ((rc_vec_t) {0})); | ||
199 | + ((a22) = ((rc_vec_t) {0})); | ||
200 | + ((a12) = ((rc_vec_t) {0})); | ||
201 | + for (x = 0; x < full; x++) { | ||
202 | + int k; | ||
203 | + for (k = 0; k < ((1024) < (1024) ? (1024) : (1024)) / | ||
204 | + 1; k++) | ||
205 | + { | ||
206 | + do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0); | ||
207 | + | ||
208 | + } | ||
209 | + do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); | ||
210 | + } | ||
211 | + for (x = 0; x < rem1; x++) { | ||
212 | + do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0); | ||
213 | + } | ||
214 | + do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); | ||
215 | + | ||
216 | + do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); | ||
217 | + } | ||
218 | + sum[0] = s1; | ||
219 | + sum[1] = s2; | ||
220 | + sum[2] = s11; | ||
221 | + sum[3] = s22; | ||
222 | + sum[4] = s12; | ||
223 | + ; | ||
224 | +} | ||
225 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106781.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106781.patch new file mode 100644 index 000000000..a86ddfdec --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106781.patch | |||
@@ -0,0 +1,741 @@ | |||
1 | 2011-07-21 Richard Sandiford <richard.sandiford@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | PR middle-end/49736 | ||
5 | * expr.c (all_zeros_p): Undo bogus part of last change. | ||
6 | |||
7 | 2011-07-21 Richard Sandiford <richard.sandiford@linaro.org> | ||
8 | |||
9 | Backport from mainline: | ||
10 | gcc/cp/ | ||
11 | 2011-07-13 Richard Sandiford <richard.sandiford@linaro.org> | ||
12 | |||
13 | * typeck2.c (split_nonconstant_init_1): Pass the initializer directly, | ||
14 | rather than a pointer to it. Return true if the whole of the value | ||
15 | was initialized by the generated statements. Use | ||
16 | complete_ctor_at_level_p instead of count_type_elements. | ||
17 | |||
18 | gcc/ | ||
19 | 2011-07-13 Richard Sandiford <richard.sandiford@linaro.org> | ||
20 | |||
21 | * tree.h (categorize_ctor_elements): Remove comment. Fix long line. | ||
22 | (count_type_elements): Delete. | ||
23 | (complete_ctor_at_level_p): Declare. | ||
24 | * expr.c (flexible_array_member_p): New function, split out from... | ||
25 | (count_type_elements): ...here. Make static. Replace allow_flexarr | ||
26 | parameter with for_ctor_p. When for_ctor_p is true, return the | ||
27 | number of elements that should appear in the top-level constructor, | ||
28 | otherwise return an estimate of the number of scalars. | ||
29 | (categorize_ctor_elements): Replace p_must_clear with p_complete. | ||
30 | (categorize_ctor_elements_1): Likewise. Use complete_ctor_at_level_p. | ||
31 | (complete_ctor_at_level_p): New function, borrowing union logic | ||
32 | from old categorize_ctor_elements_1. | ||
33 | (mostly_zeros_p): Return true if the constructor is not complete. | ||
34 | (all_zeros_p): Update call to categorize_ctor_elements. | ||
35 | * gimplify.c (gimplify_init_constructor): Update call to | ||
36 | categorize_ctor_elements. Don't call count_type_elements. | ||
37 | Unconditionally prevent clearing for variable-sized types, | ||
38 | otherwise rely on categorize_ctor_elements to detect | ||
39 | incomplete initializers. | ||
40 | |||
41 | gcc/testsuite/ | ||
42 | 2011-07-13 Chung-Lin Tang <cltang@codesourcery.com> | ||
43 | |||
44 | * gcc.target/arm/pr48183.c: New test. | ||
45 | |||
46 | === modified file 'gcc/cp/typeck2.c' | ||
47 | --- old/gcc/cp/typeck2.c 2011-05-20 21:29:14 +0000 | ||
48 | +++ new/gcc/cp/typeck2.c 2011-07-13 13:17:31 +0000 | ||
49 | @@ -473,18 +473,20 @@ | ||
50 | |||
51 | |||
52 | /* The recursive part of split_nonconstant_init. DEST is an lvalue | ||
53 | - expression to which INIT should be assigned. INIT is a CONSTRUCTOR. */ | ||
54 | + expression to which INIT should be assigned. INIT is a CONSTRUCTOR. | ||
55 | + Return true if the whole of the value was initialized by the | ||
56 | + generated statements. */ | ||
57 | |||
58 | -static void | ||
59 | -split_nonconstant_init_1 (tree dest, tree *initp) | ||
60 | +static bool | ||
61 | +split_nonconstant_init_1 (tree dest, tree init) | ||
62 | { | ||
63 | unsigned HOST_WIDE_INT idx; | ||
64 | - tree init = *initp; | ||
65 | tree field_index, value; | ||
66 | tree type = TREE_TYPE (dest); | ||
67 | tree inner_type = NULL; | ||
68 | bool array_type_p = false; | ||
69 | - HOST_WIDE_INT num_type_elements, num_initialized_elements; | ||
70 | + bool complete_p = true; | ||
71 | + HOST_WIDE_INT num_split_elts = 0; | ||
72 | |||
73 | switch (TREE_CODE (type)) | ||
74 | { | ||
75 | @@ -496,7 +498,6 @@ | ||
76 | case RECORD_TYPE: | ||
77 | case UNION_TYPE: | ||
78 | case QUAL_UNION_TYPE: | ||
79 | - num_initialized_elements = 0; | ||
80 | FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, | ||
81 | field_index, value) | ||
82 | { | ||
83 | @@ -519,13 +520,14 @@ | ||
84 | sub = build3 (COMPONENT_REF, inner_type, dest, field_index, | ||
85 | NULL_TREE); | ||
86 | |||
87 | - split_nonconstant_init_1 (sub, &value); | ||
88 | + if (!split_nonconstant_init_1 (sub, value)) | ||
89 | + complete_p = false; | ||
90 | + num_split_elts++; | ||
91 | } | ||
92 | else if (!initializer_constant_valid_p (value, inner_type)) | ||
93 | { | ||
94 | tree code; | ||
95 | tree sub; | ||
96 | - HOST_WIDE_INT inner_elements; | ||
97 | |||
98 | /* FIXME: Ordered removal is O(1) so the whole function is | ||
99 | worst-case quadratic. This could be fixed using an aside | ||
100 | @@ -549,21 +551,9 @@ | ||
101 | code = build_stmt (input_location, EXPR_STMT, code); | ||
102 | add_stmt (code); | ||
103 | |||
104 | - inner_elements = count_type_elements (inner_type, true); | ||
105 | - if (inner_elements < 0) | ||
106 | - num_initialized_elements = -1; | ||
107 | - else if (num_initialized_elements >= 0) | ||
108 | - num_initialized_elements += inner_elements; | ||
109 | - continue; | ||
110 | + num_split_elts++; | ||
111 | } | ||
112 | } | ||
113 | - | ||
114 | - num_type_elements = count_type_elements (type, true); | ||
115 | - /* If all elements of the initializer are non-constant and | ||
116 | - have been split out, we don't need the empty CONSTRUCTOR. */ | ||
117 | - if (num_type_elements > 0 | ||
118 | - && num_type_elements == num_initialized_elements) | ||
119 | - *initp = NULL; | ||
120 | break; | ||
121 | |||
122 | case VECTOR_TYPE: | ||
123 | @@ -575,6 +565,7 @@ | ||
124 | code = build2 (MODIFY_EXPR, type, dest, cons); | ||
125 | code = build_stmt (input_location, EXPR_STMT, code); | ||
126 | add_stmt (code); | ||
127 | + num_split_elts += CONSTRUCTOR_NELTS (init); | ||
128 | } | ||
129 | break; | ||
130 | |||
131 | @@ -584,6 +575,8 @@ | ||
132 | |||
133 | /* The rest of the initializer is now a constant. */ | ||
134 | TREE_CONSTANT (init) = 1; | ||
135 | + return complete_p && complete_ctor_at_level_p (TREE_TYPE (init), | ||
136 | + num_split_elts, inner_type); | ||
137 | } | ||
138 | |||
139 | /* A subroutine of store_init_value. Splits non-constant static | ||
140 | @@ -599,7 +592,8 @@ | ||
141 | if (TREE_CODE (init) == CONSTRUCTOR) | ||
142 | { | ||
143 | code = push_stmt_list (); | ||
144 | - split_nonconstant_init_1 (dest, &init); | ||
145 | + if (split_nonconstant_init_1 (dest, init)) | ||
146 | + init = NULL_TREE; | ||
147 | code = pop_stmt_list (code); | ||
148 | DECL_INITIAL (dest) = init; | ||
149 | TREE_READONLY (dest) = 0; | ||
150 | |||
151 | === modified file 'gcc/expr.c' | ||
152 | --- old/gcc/expr.c 2011-06-02 12:12:00 +0000 | ||
153 | +++ new/gcc/expr.c 2011-07-14 11:52:32 +0000 | ||
154 | @@ -4866,16 +4866,136 @@ | ||
155 | return NULL_RTX; | ||
156 | } | ||
157 | |||
158 | +/* Return true if field F of structure TYPE is a flexible array. */ | ||
159 | + | ||
160 | +static bool | ||
161 | +flexible_array_member_p (const_tree f, const_tree type) | ||
162 | +{ | ||
163 | + const_tree tf; | ||
164 | + | ||
165 | + tf = TREE_TYPE (f); | ||
166 | + return (DECL_CHAIN (f) == NULL | ||
167 | + && TREE_CODE (tf) == ARRAY_TYPE | ||
168 | + && TYPE_DOMAIN (tf) | ||
169 | + && TYPE_MIN_VALUE (TYPE_DOMAIN (tf)) | ||
170 | + && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf))) | ||
171 | + && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf)) | ||
172 | + && int_size_in_bytes (type) >= 0); | ||
173 | +} | ||
174 | + | ||
175 | +/* If FOR_CTOR_P, return the number of top-level elements that a constructor | ||
176 | + must have in order for it to completely initialize a value of type TYPE. | ||
177 | + Return -1 if the number isn't known. | ||
178 | + | ||
179 | + If !FOR_CTOR_P, return an estimate of the number of scalars in TYPE. */ | ||
180 | + | ||
181 | +static HOST_WIDE_INT | ||
182 | +count_type_elements (const_tree type, bool for_ctor_p) | ||
183 | +{ | ||
184 | + switch (TREE_CODE (type)) | ||
185 | + { | ||
186 | + case ARRAY_TYPE: | ||
187 | + { | ||
188 | + tree nelts; | ||
189 | + | ||
190 | + nelts = array_type_nelts (type); | ||
191 | + if (nelts && host_integerp (nelts, 1)) | ||
192 | + { | ||
193 | + unsigned HOST_WIDE_INT n; | ||
194 | + | ||
195 | + n = tree_low_cst (nelts, 1) + 1; | ||
196 | + if (n == 0 || for_ctor_p) | ||
197 | + return n; | ||
198 | + else | ||
199 | + return n * count_type_elements (TREE_TYPE (type), false); | ||
200 | + } | ||
201 | + return for_ctor_p ? -1 : 1; | ||
202 | + } | ||
203 | + | ||
204 | + case RECORD_TYPE: | ||
205 | + { | ||
206 | + unsigned HOST_WIDE_INT n; | ||
207 | + tree f; | ||
208 | + | ||
209 | + n = 0; | ||
210 | + for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) | ||
211 | + if (TREE_CODE (f) == FIELD_DECL) | ||
212 | + { | ||
213 | + if (!for_ctor_p) | ||
214 | + n += count_type_elements (TREE_TYPE (f), false); | ||
215 | + else if (!flexible_array_member_p (f, type)) | ||
216 | + /* Don't count flexible arrays, which are not supposed | ||
217 | + to be initialized. */ | ||
218 | + n += 1; | ||
219 | + } | ||
220 | + | ||
221 | + return n; | ||
222 | + } | ||
223 | + | ||
224 | + case UNION_TYPE: | ||
225 | + case QUAL_UNION_TYPE: | ||
226 | + { | ||
227 | + tree f; | ||
228 | + HOST_WIDE_INT n, m; | ||
229 | + | ||
230 | + gcc_assert (!for_ctor_p); | ||
231 | + /* Estimate the number of scalars in each field and pick the | ||
232 | + maximum. Other estimates would do instead; the idea is simply | ||
233 | + to make sure that the estimate is not sensitive to the ordering | ||
234 | + of the fields. */ | ||
235 | + n = 1; | ||
236 | + for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) | ||
237 | + if (TREE_CODE (f) == FIELD_DECL) | ||
238 | + { | ||
239 | + m = count_type_elements (TREE_TYPE (f), false); | ||
240 | + /* If the field doesn't span the whole union, add an extra | ||
241 | + scalar for the rest. */ | ||
242 | + if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (f)), | ||
243 | + TYPE_SIZE (type)) != 1) | ||
244 | + m++; | ||
245 | + if (n < m) | ||
246 | + n = m; | ||
247 | + } | ||
248 | + return n; | ||
249 | + } | ||
250 | + | ||
251 | + case COMPLEX_TYPE: | ||
252 | + return 2; | ||
253 | + | ||
254 | + case VECTOR_TYPE: | ||
255 | + return TYPE_VECTOR_SUBPARTS (type); | ||
256 | + | ||
257 | + case INTEGER_TYPE: | ||
258 | + case REAL_TYPE: | ||
259 | + case FIXED_POINT_TYPE: | ||
260 | + case ENUMERAL_TYPE: | ||
261 | + case BOOLEAN_TYPE: | ||
262 | + case POINTER_TYPE: | ||
263 | + case OFFSET_TYPE: | ||
264 | + case REFERENCE_TYPE: | ||
265 | + return 1; | ||
266 | + | ||
267 | + case ERROR_MARK: | ||
268 | + return 0; | ||
269 | + | ||
270 | + case VOID_TYPE: | ||
271 | + case METHOD_TYPE: | ||
272 | + case FUNCTION_TYPE: | ||
273 | + case LANG_TYPE: | ||
274 | + default: | ||
275 | + gcc_unreachable (); | ||
276 | + } | ||
277 | +} | ||
278 | + | ||
279 | /* Helper for categorize_ctor_elements. Identical interface. */ | ||
280 | |||
281 | static bool | ||
282 | categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, | ||
283 | - HOST_WIDE_INT *p_elt_count, | ||
284 | - bool *p_must_clear) | ||
285 | + HOST_WIDE_INT *p_init_elts, bool *p_complete) | ||
286 | { | ||
287 | unsigned HOST_WIDE_INT idx; | ||
288 | - HOST_WIDE_INT nz_elts, elt_count; | ||
289 | - tree value, purpose; | ||
290 | + HOST_WIDE_INT nz_elts, init_elts, num_fields; | ||
291 | + tree value, purpose, elt_type; | ||
292 | |||
293 | /* Whether CTOR is a valid constant initializer, in accordance with what | ||
294 | initializer_constant_valid_p does. If inferred from the constructor | ||
295 | @@ -4884,7 +5004,9 @@ | ||
296 | bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor); | ||
297 | |||
298 | nz_elts = 0; | ||
299 | - elt_count = 0; | ||
300 | + init_elts = 0; | ||
301 | + num_fields = 0; | ||
302 | + elt_type = NULL_TREE; | ||
303 | |||
304 | FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, purpose, value) | ||
305 | { | ||
306 | @@ -4899,6 +5021,8 @@ | ||
307 | mult = (tree_low_cst (hi_index, 1) | ||
308 | - tree_low_cst (lo_index, 1) + 1); | ||
309 | } | ||
310 | + num_fields += mult; | ||
311 | + elt_type = TREE_TYPE (value); | ||
312 | |||
313 | switch (TREE_CODE (value)) | ||
314 | { | ||
315 | @@ -4906,11 +5030,11 @@ | ||
316 | { | ||
317 | HOST_WIDE_INT nz = 0, ic = 0; | ||
318 | |||
319 | - bool const_elt_p | ||
320 | - = categorize_ctor_elements_1 (value, &nz, &ic, p_must_clear); | ||
321 | + bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &ic, | ||
322 | + p_complete); | ||
323 | |||
324 | nz_elts += mult * nz; | ||
325 | - elt_count += mult * ic; | ||
326 | + init_elts += mult * ic; | ||
327 | |||
328 | if (const_from_elts_p && const_p) | ||
329 | const_p = const_elt_p; | ||
330 | @@ -4922,12 +5046,12 @@ | ||
331 | case FIXED_CST: | ||
332 | if (!initializer_zerop (value)) | ||
333 | nz_elts += mult; | ||
334 | - elt_count += mult; | ||
335 | + init_elts += mult; | ||
336 | break; | ||
337 | |||
338 | case STRING_CST: | ||
339 | nz_elts += mult * TREE_STRING_LENGTH (value); | ||
340 | - elt_count += mult * TREE_STRING_LENGTH (value); | ||
341 | + init_elts += mult * TREE_STRING_LENGTH (value); | ||
342 | break; | ||
343 | |||
344 | case COMPLEX_CST: | ||
345 | @@ -4935,7 +5059,7 @@ | ||
346 | nz_elts += mult; | ||
347 | if (!initializer_zerop (TREE_IMAGPART (value))) | ||
348 | nz_elts += mult; | ||
349 | - elt_count += mult; | ||
350 | + init_elts += mult; | ||
351 | break; | ||
352 | |||
353 | case VECTOR_CST: | ||
354 | @@ -4945,65 +5069,31 @@ | ||
355 | { | ||
356 | if (!initializer_zerop (TREE_VALUE (v))) | ||
357 | nz_elts += mult; | ||
358 | - elt_count += mult; | ||
359 | + init_elts += mult; | ||
360 | } | ||
361 | } | ||
362 | break; | ||
363 | |||
364 | default: | ||
365 | { | ||
366 | - HOST_WIDE_INT tc = count_type_elements (TREE_TYPE (value), true); | ||
367 | - if (tc < 1) | ||
368 | - tc = 1; | ||
369 | + HOST_WIDE_INT tc = count_type_elements (elt_type, false); | ||
370 | nz_elts += mult * tc; | ||
371 | - elt_count += mult * tc; | ||
372 | + init_elts += mult * tc; | ||
373 | |||
374 | if (const_from_elts_p && const_p) | ||
375 | - const_p = initializer_constant_valid_p (value, TREE_TYPE (value)) | ||
376 | + const_p = initializer_constant_valid_p (value, elt_type) | ||
377 | != NULL_TREE; | ||
378 | } | ||
379 | break; | ||
380 | } | ||
381 | } | ||
382 | |||
383 | - if (!*p_must_clear | ||
384 | - && (TREE_CODE (TREE_TYPE (ctor)) == UNION_TYPE | ||
385 | - || TREE_CODE (TREE_TYPE (ctor)) == QUAL_UNION_TYPE)) | ||
386 | - { | ||
387 | - tree init_sub_type; | ||
388 | - bool clear_this = true; | ||
389 | - | ||
390 | - if (!VEC_empty (constructor_elt, CONSTRUCTOR_ELTS (ctor))) | ||
391 | - { | ||
392 | - /* We don't expect more than one element of the union to be | ||
393 | - initialized. Not sure what we should do otherwise... */ | ||
394 | - gcc_assert (VEC_length (constructor_elt, CONSTRUCTOR_ELTS (ctor)) | ||
395 | - == 1); | ||
396 | - | ||
397 | - init_sub_type = TREE_TYPE (VEC_index (constructor_elt, | ||
398 | - CONSTRUCTOR_ELTS (ctor), | ||
399 | - 0)->value); | ||
400 | - | ||
401 | - /* ??? We could look at each element of the union, and find the | ||
402 | - largest element. Which would avoid comparing the size of the | ||
403 | - initialized element against any tail padding in the union. | ||
404 | - Doesn't seem worth the effort... */ | ||
405 | - if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (ctor)), | ||
406 | - TYPE_SIZE (init_sub_type)) == 1) | ||
407 | - { | ||
408 | - /* And now we have to find out if the element itself is fully | ||
409 | - constructed. E.g. for union { struct { int a, b; } s; } u | ||
410 | - = { .s = { .a = 1 } }. */ | ||
411 | - if (elt_count == count_type_elements (init_sub_type, false)) | ||
412 | - clear_this = false; | ||
413 | - } | ||
414 | - } | ||
415 | - | ||
416 | - *p_must_clear = clear_this; | ||
417 | - } | ||
418 | + if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor), | ||
419 | + num_fields, elt_type)) | ||
420 | + *p_complete = false; | ||
421 | |||
422 | *p_nz_elts += nz_elts; | ||
423 | - *p_elt_count += elt_count; | ||
424 | + *p_init_elts += init_elts; | ||
425 | |||
426 | return const_p; | ||
427 | } | ||
428 | @@ -5013,111 +5103,50 @@ | ||
429 | and place it in *P_NZ_ELTS; | ||
430 | * how many scalar fields in total are in CTOR, | ||
431 | and place it in *P_ELT_COUNT. | ||
432 | - * if a type is a union, and the initializer from the constructor | ||
433 | - is not the largest element in the union, then set *p_must_clear. | ||
434 | + * whether the constructor is complete -- in the sense that every | ||
435 | + meaningful byte is explicitly given a value -- | ||
436 | + and place it in *P_COMPLETE. | ||
437 | |||
438 | Return whether or not CTOR is a valid static constant initializer, the same | ||
439 | as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */ | ||
440 | |||
441 | bool | ||
442 | categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts, | ||
443 | - HOST_WIDE_INT *p_elt_count, | ||
444 | - bool *p_must_clear) | ||
445 | + HOST_WIDE_INT *p_init_elts, bool *p_complete) | ||
446 | { | ||
447 | *p_nz_elts = 0; | ||
448 | - *p_elt_count = 0; | ||
449 | - *p_must_clear = false; | ||
450 | + *p_init_elts = 0; | ||
451 | + *p_complete = true; | ||
452 | |||
453 | - return | ||
454 | - categorize_ctor_elements_1 (ctor, p_nz_elts, p_elt_count, p_must_clear); | ||
455 | + return categorize_ctor_elements_1 (ctor, p_nz_elts, p_init_elts, p_complete); | ||
456 | } | ||
457 | |||
458 | -/* Count the number of scalars in TYPE. Return -1 on overflow or | ||
459 | - variable-sized. If ALLOW_FLEXARR is true, don't count flexible | ||
460 | - array member at the end of the structure. */ | ||
461 | +/* TYPE is initialized by a constructor with NUM_ELTS elements, the last | ||
462 | + of which had type LAST_TYPE. Each element was itself a complete | ||
463 | + initializer, in the sense that every meaningful byte was explicitly | ||
464 | + given a value. Return true if the same is true for the constructor | ||
465 | + as a whole. */ | ||
466 | |||
467 | -HOST_WIDE_INT | ||
468 | -count_type_elements (const_tree type, bool allow_flexarr) | ||
469 | +bool | ||
470 | +complete_ctor_at_level_p (const_tree type, HOST_WIDE_INT num_elts, | ||
471 | + const_tree last_type) | ||
472 | { | ||
473 | - const HOST_WIDE_INT max = ~((HOST_WIDE_INT)1 << (HOST_BITS_PER_WIDE_INT-1)); | ||
474 | - switch (TREE_CODE (type)) | ||
475 | + if (TREE_CODE (type) == UNION_TYPE | ||
476 | + || TREE_CODE (type) == QUAL_UNION_TYPE) | ||
477 | { | ||
478 | - case ARRAY_TYPE: | ||
479 | - { | ||
480 | - tree telts = array_type_nelts (type); | ||
481 | - if (telts && host_integerp (telts, 1)) | ||
482 | - { | ||
483 | - HOST_WIDE_INT n = tree_low_cst (telts, 1) + 1; | ||
484 | - HOST_WIDE_INT m = count_type_elements (TREE_TYPE (type), false); | ||
485 | - if (n == 0) | ||
486 | - return 0; | ||
487 | - else if (max / n > m) | ||
488 | - return n * m; | ||
489 | - } | ||
490 | - return -1; | ||
491 | - } | ||
492 | - | ||
493 | - case RECORD_TYPE: | ||
494 | - { | ||
495 | - HOST_WIDE_INT n = 0, t; | ||
496 | - tree f; | ||
497 | - | ||
498 | - for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) | ||
499 | - if (TREE_CODE (f) == FIELD_DECL) | ||
500 | - { | ||
501 | - t = count_type_elements (TREE_TYPE (f), false); | ||
502 | - if (t < 0) | ||
503 | - { | ||
504 | - /* Check for structures with flexible array member. */ | ||
505 | - tree tf = TREE_TYPE (f); | ||
506 | - if (allow_flexarr | ||
507 | - && DECL_CHAIN (f) == NULL | ||
508 | - && TREE_CODE (tf) == ARRAY_TYPE | ||
509 | - && TYPE_DOMAIN (tf) | ||
510 | - && TYPE_MIN_VALUE (TYPE_DOMAIN (tf)) | ||
511 | - && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf))) | ||
512 | - && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf)) | ||
513 | - && int_size_in_bytes (type) >= 0) | ||
514 | - break; | ||
515 | - | ||
516 | - return -1; | ||
517 | - } | ||
518 | - n += t; | ||
519 | - } | ||
520 | - | ||
521 | - return n; | ||
522 | - } | ||
523 | - | ||
524 | - case UNION_TYPE: | ||
525 | - case QUAL_UNION_TYPE: | ||
526 | - return -1; | ||
527 | - | ||
528 | - case COMPLEX_TYPE: | ||
529 | - return 2; | ||
530 | - | ||
531 | - case VECTOR_TYPE: | ||
532 | - return TYPE_VECTOR_SUBPARTS (type); | ||
533 | - | ||
534 | - case INTEGER_TYPE: | ||
535 | - case REAL_TYPE: | ||
536 | - case FIXED_POINT_TYPE: | ||
537 | - case ENUMERAL_TYPE: | ||
538 | - case BOOLEAN_TYPE: | ||
539 | - case POINTER_TYPE: | ||
540 | - case OFFSET_TYPE: | ||
541 | - case REFERENCE_TYPE: | ||
542 | - return 1; | ||
543 | - | ||
544 | - case ERROR_MARK: | ||
545 | - return 0; | ||
546 | - | ||
547 | - case VOID_TYPE: | ||
548 | - case METHOD_TYPE: | ||
549 | - case FUNCTION_TYPE: | ||
550 | - case LANG_TYPE: | ||
551 | - default: | ||
552 | - gcc_unreachable (); | ||
553 | + if (num_elts == 0) | ||
554 | + return false; | ||
555 | + | ||
556 | + gcc_assert (num_elts == 1 && last_type); | ||
557 | + | ||
558 | + /* ??? We could look at each element of the union, and find the | ||
559 | + largest element. Which would avoid comparing the size of the | ||
560 | + initialized element against any tail padding in the union. | ||
561 | + Doesn't seem worth the effort... */ | ||
562 | + return simple_cst_equal (TYPE_SIZE (type), TYPE_SIZE (last_type)) == 1; | ||
563 | } | ||
564 | + | ||
565 | + return count_type_elements (type, true) == num_elts; | ||
566 | } | ||
567 | |||
568 | /* Return 1 if EXP contains mostly (3/4) zeros. */ | ||
569 | @@ -5126,18 +5155,12 @@ | ||
570 | mostly_zeros_p (const_tree exp) | ||
571 | { | ||
572 | if (TREE_CODE (exp) == CONSTRUCTOR) | ||
573 | - | ||
574 | { | ||
575 | - HOST_WIDE_INT nz_elts, count, elts; | ||
576 | - bool must_clear; | ||
577 | - | ||
578 | - categorize_ctor_elements (exp, &nz_elts, &count, &must_clear); | ||
579 | - if (must_clear) | ||
580 | - return 1; | ||
581 | - | ||
582 | - elts = count_type_elements (TREE_TYPE (exp), false); | ||
583 | - | ||
584 | - return nz_elts < elts / 4; | ||
585 | + HOST_WIDE_INT nz_elts, init_elts; | ||
586 | + bool complete_p; | ||
587 | + | ||
588 | + categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p); | ||
589 | + return !complete_p || nz_elts < init_elts / 4; | ||
590 | } | ||
591 | |||
592 | return initializer_zerop (exp); | ||
593 | @@ -5149,12 +5172,11 @@ | ||
594 | all_zeros_p (const_tree exp) | ||
595 | { | ||
596 | if (TREE_CODE (exp) == CONSTRUCTOR) | ||
597 | - | ||
598 | { | ||
599 | - HOST_WIDE_INT nz_elts, count; | ||
600 | - bool must_clear; | ||
601 | + HOST_WIDE_INT nz_elts, init_elts; | ||
602 | + bool complete_p; | ||
603 | |||
604 | - categorize_ctor_elements (exp, &nz_elts, &count, &must_clear); | ||
605 | + categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p); | ||
606 | return nz_elts == 0; | ||
607 | } | ||
608 | |||
609 | |||
610 | === modified file 'gcc/gimplify.c' | ||
611 | --- old/gcc/gimplify.c 2011-05-26 10:27:57 +0000 | ||
612 | +++ new/gcc/gimplify.c 2011-07-13 13:17:31 +0000 | ||
613 | @@ -3693,9 +3693,8 @@ | ||
614 | case ARRAY_TYPE: | ||
615 | { | ||
616 | struct gimplify_init_ctor_preeval_data preeval_data; | ||
617 | - HOST_WIDE_INT num_type_elements, num_ctor_elements; | ||
618 | - HOST_WIDE_INT num_nonzero_elements; | ||
619 | - bool cleared, valid_const_initializer; | ||
620 | + HOST_WIDE_INT num_ctor_elements, num_nonzero_elements; | ||
621 | + bool cleared, complete_p, valid_const_initializer; | ||
622 | |||
623 | /* Aggregate types must lower constructors to initialization of | ||
624 | individual elements. The exception is that a CONSTRUCTOR node | ||
625 | @@ -3712,7 +3711,7 @@ | ||
626 | can only do so if it known to be a valid constant initializer. */ | ||
627 | valid_const_initializer | ||
628 | = categorize_ctor_elements (ctor, &num_nonzero_elements, | ||
629 | - &num_ctor_elements, &cleared); | ||
630 | + &num_ctor_elements, &complete_p); | ||
631 | |||
632 | /* If a const aggregate variable is being initialized, then it | ||
633 | should never be a lose to promote the variable to be static. */ | ||
634 | @@ -3750,26 +3749,29 @@ | ||
635 | parts in, then generate code for the non-constant parts. */ | ||
636 | /* TODO. There's code in cp/typeck.c to do this. */ | ||
637 | |||
638 | - num_type_elements = count_type_elements (type, true); | ||
639 | + if (int_size_in_bytes (TREE_TYPE (ctor)) < 0) | ||
640 | + /* store_constructor will ignore the clearing of variable-sized | ||
641 | + objects. Initializers for such objects must explicitly set | ||
642 | + every field that needs to be set. */ | ||
643 | + cleared = false; | ||
644 | + else if (!complete_p) | ||
645 | + /* If the constructor isn't complete, clear the whole object | ||
646 | + beforehand. | ||
647 | |||
648 | - /* If count_type_elements could not determine number of type elements | ||
649 | - for a constant-sized object, assume clearing is needed. | ||
650 | - Don't do this for variable-sized objects, as store_constructor | ||
651 | - will ignore the clearing of variable-sized objects. */ | ||
652 | - if (num_type_elements < 0 && int_size_in_bytes (type) >= 0) | ||
653 | + ??? This ought not to be needed. For any element not present | ||
654 | + in the initializer, we should simply set them to zero. Except | ||
655 | + we'd need to *find* the elements that are not present, and that | ||
656 | + requires trickery to avoid quadratic compile-time behavior in | ||
657 | + large cases or excessive memory use in small cases. */ | ||
658 | cleared = true; | ||
659 | - /* If there are "lots" of zeros, then block clear the object first. */ | ||
660 | - else if (num_type_elements - num_nonzero_elements | ||
661 | + else if (num_ctor_elements - num_nonzero_elements | ||
662 | > CLEAR_RATIO (optimize_function_for_speed_p (cfun)) | ||
663 | - && num_nonzero_elements < num_type_elements/4) | ||
664 | - cleared = true; | ||
665 | - /* ??? This bit ought not be needed. For any element not present | ||
666 | - in the initializer, we should simply set them to zero. Except | ||
667 | - we'd need to *find* the elements that are not present, and that | ||
668 | - requires trickery to avoid quadratic compile-time behavior in | ||
669 | - large cases or excessive memory use in small cases. */ | ||
670 | - else if (num_ctor_elements < num_type_elements) | ||
671 | - cleared = true; | ||
672 | + && num_nonzero_elements < num_ctor_elements / 4) | ||
673 | + /* If there are "lots" of zeros, it's more efficient to clear | ||
674 | + the memory and then set the nonzero elements. */ | ||
675 | + cleared = true; | ||
676 | + else | ||
677 | + cleared = false; | ||
678 | |||
679 | /* If there are "lots" of initialized elements, and all of them | ||
680 | are valid address constants, then the entire initializer can | ||
681 | |||
682 | === added file 'gcc/testsuite/gcc.target/arm/pr48183.c' | ||
683 | --- old/gcc/testsuite/gcc.target/arm/pr48183.c 1970-01-01 00:00:00 +0000 | ||
684 | +++ new/gcc/testsuite/gcc.target/arm/pr48183.c 2011-07-13 13:17:31 +0000 | ||
685 | @@ -0,0 +1,25 @@ | ||
686 | +/* testsuite/gcc.target/arm/pr48183.c */ | ||
687 | + | ||
688 | +/* { dg-do compile } */ | ||
689 | +/* { dg-require-effective-target arm_neon_ok } */ | ||
690 | +/* { dg-options "-O -g" } */ | ||
691 | +/* { dg-add-options arm_neon } */ | ||
692 | + | ||
693 | +#include <arm_neon.h> | ||
694 | + | ||
695 | +void move_16bit_to_32bit (int32_t *dst, const short *src, unsigned n) | ||
696 | +{ | ||
697 | + unsigned i; | ||
698 | + int16x4x2_t input; | ||
699 | + int32x4x2_t mid; | ||
700 | + int32x4x2_t output; | ||
701 | + | ||
702 | + for (i = 0; i < n/2; i += 8) { | ||
703 | + input = vld2_s16(src + i); | ||
704 | + mid.val[0] = vmovl_s16(input.val[0]); | ||
705 | + mid.val[1] = vmovl_s16(input.val[1]); | ||
706 | + output.val[0] = vshlq_n_s32(mid.val[0], 8); | ||
707 | + output.val[1] = vshlq_n_s32(mid.val[1], 8); | ||
708 | + vst2q_s32((int32_t *)dst + i, output); | ||
709 | + } | ||
710 | +} | ||
711 | |||
712 | === modified file 'gcc/tree.h' | ||
713 | --- old/gcc/tree.h 2011-07-01 09:19:21 +0000 | ||
714 | +++ new/gcc/tree.h 2011-07-13 13:17:31 +0000 | ||
715 | @@ -4627,21 +4627,10 @@ | ||
716 | |||
717 | extern VEC(tree,gc) *ctor_to_vec (tree); | ||
718 | |||
719 | -/* Examine CTOR to discover: | ||
720 | - * how many scalar fields are set to nonzero values, | ||
721 | - and place it in *P_NZ_ELTS; | ||
722 | - * how many scalar fields in total are in CTOR, | ||
723 | - and place it in *P_ELT_COUNT. | ||
724 | - * if a type is a union, and the initializer from the constructor | ||
725 | - is not the largest element in the union, then set *p_must_clear. | ||
726 | - | ||
727 | - Return whether or not CTOR is a valid static constant initializer, the same | ||
728 | - as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */ | ||
729 | - | ||
730 | -extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, HOST_WIDE_INT *, | ||
731 | - bool *); | ||
732 | - | ||
733 | -extern HOST_WIDE_INT count_type_elements (const_tree, bool); | ||
734 | +extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, | ||
735 | + HOST_WIDE_INT *, bool *); | ||
736 | + | ||
737 | +extern bool complete_ctor_at_level_p (const_tree, HOST_WIDE_INT, const_tree); | ||
738 | |||
739 | /* integer_zerop (tree x) is nonzero if X is an integer constant of value 0. */ | ||
740 | |||
741 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106782.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106782.patch new file mode 100644 index 000000000..5335a9e37 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106782.patch | |||
@@ -0,0 +1,27 @@ | |||
1 | 2011-07-21 Richard Sandiford <rdsandiford@googlemail.com> | ||
2 | |||
3 | gcc/ | ||
4 | Backport from mainline: | ||
5 | |||
6 | 2011-07-21 Richard Sandiford <richard.sandiford@linaro.org> | ||
7 | |||
8 | * regcprop.c (maybe_mode_change): Check HARD_REGNO_MODE_OK. | ||
9 | |||
10 | === modified file 'gcc/regcprop.c' | ||
11 | --- old/gcc/regcprop.c 2010-12-17 22:51:25 +0000 | ||
12 | +++ new/gcc/regcprop.c 2011-07-21 11:30:53 +0000 | ||
13 | @@ -418,10 +418,9 @@ | ||
14 | |||
15 | offset = ((WORDS_BIG_ENDIAN ? wordoffset : 0) | ||
16 | + (BYTES_BIG_ENDIAN ? byteoffset : 0)); | ||
17 | - return gen_rtx_raw_REG (new_mode, | ||
18 | - regno + subreg_regno_offset (regno, orig_mode, | ||
19 | - offset, | ||
20 | - new_mode)); | ||
21 | + regno += subreg_regno_offset (regno, orig_mode, offset, new_mode); | ||
22 | + if (HARD_REGNO_MODE_OK (regno, new_mode)) | ||
23 | + return gen_rtx_raw_REG (new_mode, regno); | ||
24 | } | ||
25 | return NULL_RTX; | ||
26 | } | ||
27 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc b/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc index e3f6114e5..86dceabc3 100644 --- a/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc +++ b/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc | |||
@@ -18,4 +18,22 @@ file://linaro/gcc-4.6-linaro-r106751.patch \ | |||
18 | file://linaro/gcc-4.6-linaro-r106753.patch \ | 18 | file://linaro/gcc-4.6-linaro-r106753.patch \ |
19 | file://linaro/gcc-4.6-linaro-r106754.patch \ | 19 | file://linaro/gcc-4.6-linaro-r106754.patch \ |
20 | file://linaro/gcc-4.6-linaro-r106755.patch \ | 20 | file://linaro/gcc-4.6-linaro-r106755.patch \ |
21 | file://linaro/gcc-4.6-linaro-r106759.patch \ | ||
22 | file://linaro/gcc-4.6-linaro-r106761.patch \ | ||
23 | file://linaro/gcc-4.6-linaro-r106762.patch \ | ||
24 | file://linaro/gcc-4.6-linaro-r106763.patch \ | ||
25 | file://linaro/gcc-4.6-linaro-r106764.patch \ | ||
26 | file://linaro/gcc-4.6-linaro-r106766.patch \ | ||
27 | file://linaro/gcc-4.6-linaro-r106768.patch \ | ||
28 | file://linaro/gcc-4.6-linaro-r106769.patch \ | ||
29 | file://linaro/gcc-4.6-linaro-r106770.patch \ | ||
30 | file://linaro/gcc-4.6-linaro-r106771.patch \ | ||
31 | file://linaro/gcc-4.6-linaro-r106772.patch \ | ||
32 | file://linaro/gcc-4.6-linaro-r106773.patch \ | ||
33 | file://linaro/gcc-4.6-linaro-r106775.patch \ | ||
34 | file://linaro/gcc-4.6-linaro-r106776.patch \ | ||
35 | file://linaro/gcc-4.6-linaro-r106777.patch \ | ||
36 | file://linaro/gcc-4.6-linaro-r106778.patch \ | ||
37 | file://linaro/gcc-4.6-linaro-r106781.patch \ | ||
38 | file://linaro/gcc-4.6-linaro-r106782.patch \ | ||
21 | " | 39 | " |
diff --git a/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc b/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc index e5a1fba59..0faf45e93 100644 --- a/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc +++ b/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc | |||
@@ -1,4 +1,4 @@ | |||
1 | # this will prepend this layer to FILESPATH | 1 | # this will prepend this layer to FILESPATH |
2 | FILESEXTRAPATHS := "${THISDIR}/gcc-4.6" | 2 | FILESEXTRAPATHS := "${THISDIR}/gcc-4.6" |
3 | PRINC = "1" | 3 | PRINC = "2" |
4 | ARM_INSTRUCTION_SET = "arm" | 4 | ARM_INSTRUCTION_SET = "arm" |