diff options
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch')
-rw-r--r-- | toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch | 1270 |
1 files changed, 0 insertions, 1270 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch deleted file mode 100644 index dfdeec7245..0000000000 --- a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch +++ /dev/null | |||
@@ -1,1270 +0,0 @@ | |||
1 | 2011-09-07 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | Backport from mainline: | ||
4 | |||
5 | 2011-08-04 Ira Rosen <ira.rosen@linaro.org> | ||
6 | |||
7 | gcc/ | ||
8 | * tree-vectorizer.h (struct _stmt_vec_info): Add new field for | ||
9 | pattern def statement, and its access macro. | ||
10 | (NUM_PATTERNS): Set to 5. | ||
11 | * tree-vect-loop.c (vect_determine_vectorization_factor): Handle | ||
12 | pattern def statement. | ||
13 | (vect_transform_loop): Likewise. | ||
14 | * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new | ||
15 | function vect_recog_over_widening_pattern (). | ||
16 | (vect_operation_fits_smaller_type): New function. | ||
17 | (vect_recog_over_widening_pattern, vect_mark_pattern_stmts): | ||
18 | Likewise. | ||
19 | (vect_pattern_recog_1): Move the code that marks pattern | ||
20 | statements to vect_mark_pattern_stmts (), and call it. Update | ||
21 | documentation. | ||
22 | * tree-vect-stmts.c (vect_supportable_shift): New function. | ||
23 | (vect_analyze_stmt): Handle pattern def statement. | ||
24 | (new_stmt_vec_info): Initialize pattern def statement. | ||
25 | |||
26 | gcc/testsuite/ | ||
27 | * gcc.dg/vect/vect-over-widen-1.c: New test. | ||
28 | * gcc.dg/vect/vect-over-widen-2.c: New test. | ||
29 | * gcc.dg/vect/vect-over-widen-3.c: New test. | ||
30 | * gcc.dg/vect/vect-over-widen-4.c: New test. | ||
31 | |||
32 | |||
33 | 2011-08-09 Ira Rosen <ira.rosen@linaro.org> | ||
34 | |||
35 | gcc/ | ||
36 | PR tree-optimization/50014 | ||
37 | * tree-vect-loop.c (vectorizable_reduction): Get def type before | ||
38 | calling vect_get_vec_def_for_stmt_copy (). | ||
39 | |||
40 | gcc/testsuite/ | ||
41 | PR tree-optimization/50014 | ||
42 | * gcc.dg/vect/pr50014.c: New test. | ||
43 | |||
44 | |||
45 | 2011-08-11 Ira Rosen <ira.rosen@linaro.org> | ||
46 | |||
47 | gcc/ | ||
48 | PR tree-optimization/50039 | ||
49 | * tree-vect-patterns.c (vect_operation_fits_smaller_type): Check | ||
50 | that DEF_STMT has a stmt_vec_info. | ||
51 | |||
52 | gcc/testsuite/ | ||
53 | PR tree-optimization/50039 | ||
54 | * gcc.dg/vect/vect.exp: Run no-tree-fre-* tests with -fno-tree-fre. | ||
55 | * gcc.dg/vect/no-tree-fre-pr50039.c: New test. | ||
56 | |||
57 | |||
58 | 2011-09-04 Jakub Jelinek <jakub@redhat.com> | ||
59 | Ira Rosen <ira.rosen@linaro.org> | ||
60 | |||
61 | gcc/ | ||
62 | PR tree-optimization/50208 | ||
63 | * tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add an | ||
64 | argument. Check that def_stmt is inside the loop. | ||
65 | (vect_recog_widen_mult_pattern): Update calls to | ||
66 | vect_handle_widen_mult_by_cons. | ||
67 | (vect_operation_fits_smaller_type): Check that def_stmt is | ||
68 | inside the loop. | ||
69 | |||
70 | gcc/testsuite/ | ||
71 | PR tree-optimization/50208 | ||
72 | * gcc.dg/vect/no-fre-pre-pr50208.c: New test. | ||
73 | * gcc.dg/vect/vect.exp: Run no-fre-pre-*.c tests with | ||
74 | -fno-tree-fre -fno-tree-pre. | ||
75 | |||
76 | === added file 'gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c' | ||
77 | --- old/gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c 1970-01-01 00:00:00 +0000 | ||
78 | +++ new/gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c 2011-09-05 06:23:37 +0000 | ||
79 | @@ -0,0 +1,17 @@ | ||
80 | +/* { dg-do compile } */ | ||
81 | + | ||
82 | +char c; | ||
83 | +int a, b; | ||
84 | + | ||
85 | +void foo (int j) | ||
86 | +{ | ||
87 | + int i; | ||
88 | + while (--j) | ||
89 | + { | ||
90 | + b = 3; | ||
91 | + for (i = 0; i < 2; ++i) | ||
92 | + a = b ^ c; | ||
93 | + } | ||
94 | +} | ||
95 | + | ||
96 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
97 | |||
98 | === added file 'gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c' | ||
99 | --- old/gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c 1970-01-01 00:00:00 +0000 | ||
100 | +++ new/gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c 2011-09-05 06:23:37 +0000 | ||
101 | @@ -0,0 +1,15 @@ | ||
102 | +/* { dg-do compile } */ | ||
103 | + | ||
104 | +extern unsigned char g_5; | ||
105 | +extern int g_31, g_76; | ||
106 | +int main(void) { | ||
107 | + int i, j; | ||
108 | + for (j=0; j < 2; ++j) { | ||
109 | + g_31 = -3; | ||
110 | + for (i=0; i < 2; ++i) | ||
111 | + g_76 = (g_31 ? g_31+1 : 0) ^ g_5; | ||
112 | + } | ||
113 | +} | ||
114 | + | ||
115 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
116 | + | ||
117 | |||
118 | === added file 'gcc/testsuite/gcc.dg/vect/pr50014.c' | ||
119 | --- old/gcc/testsuite/gcc.dg/vect/pr50014.c 1970-01-01 00:00:00 +0000 | ||
120 | +++ new/gcc/testsuite/gcc.dg/vect/pr50014.c 2011-09-05 06:23:37 +0000 | ||
121 | @@ -0,0 +1,16 @@ | ||
122 | +/* { dg-do compile } */ | ||
123 | +/* { dg-require-effective-target vect_int } */ | ||
124 | + | ||
125 | +int f(unsigned char *s, int n) | ||
126 | +{ | ||
127 | + int sum = 0; | ||
128 | + int i; | ||
129 | + | ||
130 | + for (i = 0; i < n; i++) | ||
131 | + sum += 256 * s[i]; | ||
132 | + | ||
133 | + return sum; | ||
134 | +} | ||
135 | + | ||
136 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
137 | + | ||
138 | |||
139 | === added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c' | ||
140 | --- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c 1970-01-01 00:00:00 +0000 | ||
141 | +++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c 2011-09-05 06:23:37 +0000 | ||
142 | @@ -0,0 +1,64 @@ | ||
143 | +/* { dg-require-effective-target vect_int } */ | ||
144 | +/* { dg-require-effective-target vect_shift } */ | ||
145 | + | ||
146 | +#include <stdlib.h> | ||
147 | +#include <stdarg.h> | ||
148 | +#include "tree-vect.h" | ||
149 | + | ||
150 | +#define N 64 | ||
151 | + | ||
152 | +/* Modified rgb to rgb conversion from FFmpeg. */ | ||
153 | +__attribute__ ((noinline)) void | ||
154 | +foo (unsigned char *src, unsigned char *dst) | ||
155 | +{ | ||
156 | + unsigned char *s = src; | ||
157 | + unsigned short *d = (unsigned short *)dst; | ||
158 | + int i; | ||
159 | + | ||
160 | + for (i = 0; i < N/4; i++) | ||
161 | + { | ||
162 | + const int b = *s++; | ||
163 | + const int g = *s++; | ||
164 | + const int r = *s++; | ||
165 | + const int a = *s++; | ||
166 | + *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)); | ||
167 | + d++; | ||
168 | + } | ||
169 | + | ||
170 | + s = src; | ||
171 | + d = (unsigned short *)dst; | ||
172 | + for (i = 0; i < N/4; i++) | ||
173 | + { | ||
174 | + const int b = *s++; | ||
175 | + const int g = *s++; | ||
176 | + const int r = *s++; | ||
177 | + const int a = *s++; | ||
178 | + if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5))) | ||
179 | + abort (); | ||
180 | + d++; | ||
181 | + } | ||
182 | +} | ||
183 | + | ||
184 | +int main (void) | ||
185 | +{ | ||
186 | + int i; | ||
187 | + unsigned char in[N], out[N]; | ||
188 | + | ||
189 | + check_vect (); | ||
190 | + | ||
191 | + for (i = 0; i < N; i++) | ||
192 | + { | ||
193 | + in[i] = i; | ||
194 | + out[i] = 255; | ||
195 | + __asm__ volatile (""); | ||
196 | + } | ||
197 | + | ||
198 | + foo (in, out); | ||
199 | + | ||
200 | + return 0; | ||
201 | +} | ||
202 | + | ||
203 | +/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */ | ||
204 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ | ||
205 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
206 | + | ||
207 | |||
208 | === added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c' | ||
209 | --- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c 1970-01-01 00:00:00 +0000 | ||
210 | +++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c 2011-09-05 06:23:37 +0000 | ||
211 | @@ -0,0 +1,65 @@ | ||
212 | +/* { dg-require-effective-target vect_int } */ | ||
213 | +/* { dg-require-effective-target vect_shift } */ | ||
214 | + | ||
215 | +#include <stdlib.h> | ||
216 | +#include <stdarg.h> | ||
217 | +#include "tree-vect.h" | ||
218 | + | ||
219 | +#define N 64 | ||
220 | + | ||
221 | +/* Modified rgb to rgb conversion from FFmpeg. */ | ||
222 | +__attribute__ ((noinline)) void | ||
223 | +foo (unsigned char *src, unsigned char *dst) | ||
224 | +{ | ||
225 | + unsigned char *s = src; | ||
226 | + int *d = (int *)dst; | ||
227 | + int i; | ||
228 | + | ||
229 | + for (i = 0; i < N/4; i++) | ||
230 | + { | ||
231 | + const int b = *s++; | ||
232 | + const int g = *s++; | ||
233 | + const int r = *s++; | ||
234 | + const int a = *s++; | ||
235 | + *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)); | ||
236 | + d++; | ||
237 | + } | ||
238 | + | ||
239 | + s = src; | ||
240 | + d = (int *)dst; | ||
241 | + for (i = 0; i < N/4; i++) | ||
242 | + { | ||
243 | + const int b = *s++; | ||
244 | + const int g = *s++; | ||
245 | + const int r = *s++; | ||
246 | + const int a = *s++; | ||
247 | + if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5))) | ||
248 | + abort (); | ||
249 | + d++; | ||
250 | + } | ||
251 | +} | ||
252 | + | ||
253 | +int main (void) | ||
254 | +{ | ||
255 | + int i; | ||
256 | + unsigned char in[N], out[N]; | ||
257 | + | ||
258 | + check_vect (); | ||
259 | + | ||
260 | + for (i = 0; i < N; i++) | ||
261 | + { | ||
262 | + in[i] = i; | ||
263 | + out[i] = 255; | ||
264 | + __asm__ volatile (""); | ||
265 | + } | ||
266 | + | ||
267 | + foo (in, out); | ||
268 | + | ||
269 | + return 0; | ||
270 | +} | ||
271 | + | ||
272 | +/* Final value stays in int, so no over-widening is detected at the moment. */ | ||
273 | +/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 0 "vect" } } */ | ||
274 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ | ||
275 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
276 | + | ||
277 | |||
278 | === added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c' | ||
279 | --- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c 1970-01-01 00:00:00 +0000 | ||
280 | +++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c 2011-09-05 06:23:37 +0000 | ||
281 | @@ -0,0 +1,64 @@ | ||
282 | +/* { dg-require-effective-target vect_int } */ | ||
283 | +/* { dg-require-effective-target vect_shift } */ | ||
284 | + | ||
285 | +#include <stdlib.h> | ||
286 | +#include <stdarg.h> | ||
287 | +#include "tree-vect.h" | ||
288 | + | ||
289 | +#define N 64 | ||
290 | + | ||
291 | +/* Modified rgb to rgb conversion from FFmpeg. */ | ||
292 | +__attribute__ ((noinline)) void | ||
293 | +foo (unsigned char *src, unsigned char *dst) | ||
294 | +{ | ||
295 | + unsigned char *s = src; | ||
296 | + unsigned short *d = (unsigned short *)dst; | ||
297 | + int i; | ||
298 | + | ||
299 | + for (i = 0; i < N/4; i++) | ||
300 | + { | ||
301 | + const int b = *s++; | ||
302 | + const int g = *s++; | ||
303 | + const int r = *s++; | ||
304 | + const int a = *s++; | ||
305 | + *d = ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9)); | ||
306 | + d++; | ||
307 | + } | ||
308 | + | ||
309 | + s = src; | ||
310 | + d = (unsigned short *)dst; | ||
311 | + for (i = 0; i < N/4; i++) | ||
312 | + { | ||
313 | + const int b = *s++; | ||
314 | + const int g = *s++; | ||
315 | + const int r = *s++; | ||
316 | + const int a = *s++; | ||
317 | + if (*d != ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9))) | ||
318 | + abort (); | ||
319 | + d++; | ||
320 | + } | ||
321 | +} | ||
322 | + | ||
323 | +int main (void) | ||
324 | +{ | ||
325 | + int i; | ||
326 | + unsigned char in[N], out[N]; | ||
327 | + | ||
328 | + check_vect (); | ||
329 | + | ||
330 | + for (i = 0; i < N; i++) | ||
331 | + { | ||
332 | + in[i] = i; | ||
333 | + out[i] = 255; | ||
334 | + __asm__ volatile (""); | ||
335 | + } | ||
336 | + | ||
337 | + foo (in, out); | ||
338 | + | ||
339 | + return 0; | ||
340 | +} | ||
341 | + | ||
342 | +/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 1 "vect" } } */ | ||
343 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ | ||
344 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
345 | + | ||
346 | |||
347 | === added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c' | ||
348 | --- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c 1970-01-01 00:00:00 +0000 | ||
349 | +++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c 2011-09-05 06:23:37 +0000 | ||
350 | @@ -0,0 +1,68 @@ | ||
351 | +/* { dg-require-effective-target vect_int } */ | ||
352 | +/* { dg-require-effective-target vect_shift } */ | ||
353 | + | ||
354 | +#include <stdlib.h> | ||
355 | +#include <stdarg.h> | ||
356 | +#include "tree-vect.h" | ||
357 | + | ||
358 | +#define N 64 | ||
359 | + | ||
360 | +/* Modified rgb to rgb conversion from FFmpeg. */ | ||
361 | +__attribute__ ((noinline)) int | ||
362 | +foo (unsigned char *src, unsigned char *dst) | ||
363 | +{ | ||
364 | + unsigned char *s = src; | ||
365 | + unsigned short *d = (unsigned short *)dst, res; | ||
366 | + int i, result = 0; | ||
367 | + | ||
368 | + for (i = 0; i < N/4; i++) | ||
369 | + { | ||
370 | + const int b = *s++; | ||
371 | + const int g = *s++; | ||
372 | + const int r = *s++; | ||
373 | + const int a = *s++; | ||
374 | + res = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)); | ||
375 | + *d = res; | ||
376 | + result += res; | ||
377 | + d++; | ||
378 | + } | ||
379 | + | ||
380 | + s = src; | ||
381 | + d = (unsigned short *)dst; | ||
382 | + for (i = 0; i < N/4; i++) | ||
383 | + { | ||
384 | + const int b = *s++; | ||
385 | + const int g = *s++; | ||
386 | + const int r = *s++; | ||
387 | + const int a = *s++; | ||
388 | + if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5))) | ||
389 | + abort (); | ||
390 | + d++; | ||
391 | + } | ||
392 | + | ||
393 | + return result; | ||
394 | +} | ||
395 | + | ||
396 | +int main (void) | ||
397 | +{ | ||
398 | + int i; | ||
399 | + unsigned char in[N], out[N]; | ||
400 | + | ||
401 | + check_vect (); | ||
402 | + | ||
403 | + for (i = 0; i < N; i++) | ||
404 | + { | ||
405 | + in[i] = i; | ||
406 | + out[i] = 255; | ||
407 | + __asm__ volatile (""); | ||
408 | + } | ||
409 | + | ||
410 | + foo (in, out); | ||
411 | + | ||
412 | + return 0; | ||
413 | +} | ||
414 | + | ||
415 | +/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */ | ||
416 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ | ||
417 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
418 | + | ||
419 | |||
420 | === modified file 'gcc/testsuite/gcc.dg/vect/vect.exp' | ||
421 | --- old/gcc/testsuite/gcc.dg/vect/vect.exp 2011-05-05 15:43:31 +0000 | ||
422 | +++ new/gcc/testsuite/gcc.dg/vect/vect.exp 2011-09-05 06:23:37 +0000 | ||
423 | @@ -245,6 +245,18 @@ | ||
424 | dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-reassoc-bb-slp-*.\[cS\]]] \ | ||
425 | "" $VECT_SLP_CFLAGS | ||
426 | |||
427 | +# -fno-tree-fre | ||
428 | +set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS | ||
429 | +lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" | ||
430 | +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-fre-*.\[cS\]]] \ | ||
431 | + "" $DEFAULT_VECTCFLAGS | ||
432 | + | ||
433 | +# -fno-tree-fre -fno-tree-pre | ||
434 | +set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS | ||
435 | +lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" "-fno-tree-pre" | ||
436 | +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-fre-pre*.\[cS\]]] \ | ||
437 | + "" $DEFAULT_VECTCFLAGS | ||
438 | + | ||
439 | # Clean up. | ||
440 | set dg-do-what-default ${save-dg-do-what-default} | ||
441 | |||
442 | |||
443 | === modified file 'gcc/tree-vect-loop.c' | ||
444 | --- old/gcc/tree-vect-loop.c 2011-07-11 11:02:55 +0000 | ||
445 | +++ new/gcc/tree-vect-loop.c 2011-09-05 06:23:37 +0000 | ||
446 | @@ -181,8 +181,8 @@ | ||
447 | stmt_vec_info stmt_info; | ||
448 | int i; | ||
449 | HOST_WIDE_INT dummy; | ||
450 | - gimple stmt, pattern_stmt = NULL; | ||
451 | - bool analyze_pattern_stmt = false; | ||
452 | + gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL; | ||
453 | + bool analyze_pattern_stmt = false, pattern_def = false; | ||
454 | |||
455 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
456 | fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); | ||
457 | @@ -297,6 +297,29 @@ | ||
458 | || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
459 | analyze_pattern_stmt = true; | ||
460 | |||
461 | + /* If a pattern statement has a def stmt, analyze it too. */ | ||
462 | + if (is_pattern_stmt_p (stmt_info) | ||
463 | + && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) | ||
464 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) | ||
465 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) | ||
466 | + { | ||
467 | + if (pattern_def) | ||
468 | + pattern_def = false; | ||
469 | + else | ||
470 | + { | ||
471 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
472 | + { | ||
473 | + fprintf (vect_dump, "==> examining pattern def stmt: "); | ||
474 | + print_gimple_stmt (vect_dump, pattern_def_stmt, 0, | ||
475 | + TDF_SLIM); | ||
476 | + } | ||
477 | + | ||
478 | + pattern_def = true; | ||
479 | + stmt = pattern_def_stmt; | ||
480 | + stmt_info = vinfo_for_stmt (stmt); | ||
481 | + } | ||
482 | + } | ||
483 | + | ||
484 | if (gimple_get_lhs (stmt) == NULL_TREE) | ||
485 | { | ||
486 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) | ||
487 | @@ -401,7 +424,7 @@ | ||
488 | || (nunits > vectorization_factor)) | ||
489 | vectorization_factor = nunits; | ||
490 | |||
491 | - if (!analyze_pattern_stmt) | ||
492 | + if (!analyze_pattern_stmt && !pattern_def) | ||
493 | gsi_next (&si); | ||
494 | } | ||
495 | } | ||
496 | @@ -3985,7 +4008,7 @@ | ||
497 | VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL; | ||
498 | VEC (gimple, heap) *phis = NULL; | ||
499 | int vec_num; | ||
500 | - tree def0, def1, tem; | ||
501 | + tree def0, def1, tem, op0, op1 = NULL_TREE; | ||
502 | |||
503 | if (nested_in_vect_loop_p (loop, stmt)) | ||
504 | { | ||
505 | @@ -4418,8 +4441,6 @@ | ||
506 | /* Handle uses. */ | ||
507 | if (j == 0) | ||
508 | { | ||
509 | - tree op0, op1 = NULL_TREE; | ||
510 | - | ||
511 | op0 = ops[!reduc_index]; | ||
512 | if (op_type == ternary_op) | ||
513 | { | ||
514 | @@ -4449,11 +4470,19 @@ | ||
515 | { | ||
516 | if (!slp_node) | ||
517 | { | ||
518 | - enum vect_def_type dt = vect_unknown_def_type; /* Dummy */ | ||
519 | - loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0); | ||
520 | + enum vect_def_type dt; | ||
521 | + gimple dummy_stmt; | ||
522 | + tree dummy; | ||
523 | + | ||
524 | + vect_is_simple_use (ops[!reduc_index], loop_vinfo, NULL, | ||
525 | + &dummy_stmt, &dummy, &dt); | ||
526 | + loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, | ||
527 | + loop_vec_def0); | ||
528 | VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0); | ||
529 | if (op_type == ternary_op) | ||
530 | { | ||
531 | + vect_is_simple_use (op1, loop_vinfo, NULL, &dummy_stmt, | ||
532 | + &dummy, &dt); | ||
533 | loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, | ||
534 | loop_vec_def1); | ||
535 | VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1); | ||
536 | @@ -4758,8 +4787,8 @@ | ||
537 | tree cond_expr = NULL_TREE; | ||
538 | gimple_seq cond_expr_stmt_list = NULL; | ||
539 | bool do_peeling_for_loop_bound; | ||
540 | - gimple stmt, pattern_stmt; | ||
541 | - bool transform_pattern_stmt = false; | ||
542 | + gimple stmt, pattern_stmt, pattern_def_stmt; | ||
543 | + bool transform_pattern_stmt = false, pattern_def = false; | ||
544 | |||
545 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
546 | fprintf (vect_dump, "=== vec_transform_loop ==="); | ||
547 | @@ -4903,6 +4932,30 @@ | ||
548 | || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
549 | transform_pattern_stmt = true; | ||
550 | |||
551 | + /* If pattern statement has a def stmt, vectorize it too. */ | ||
552 | + if (is_pattern_stmt_p (stmt_info) | ||
553 | + && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) | ||
554 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) | ||
555 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) | ||
556 | + { | ||
557 | + if (pattern_def) | ||
558 | + pattern_def = false; | ||
559 | + else | ||
560 | + { | ||
561 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
562 | + { | ||
563 | + fprintf (vect_dump, "==> vectorizing pattern def" | ||
564 | + " stmt: "); | ||
565 | + print_gimple_stmt (vect_dump, pattern_def_stmt, 0, | ||
566 | + TDF_SLIM); | ||
567 | + } | ||
568 | + | ||
569 | + pattern_def = true; | ||
570 | + stmt = pattern_def_stmt; | ||
571 | + stmt_info = vinfo_for_stmt (stmt); | ||
572 | + } | ||
573 | + } | ||
574 | + | ||
575 | gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); | ||
576 | nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( | ||
577 | STMT_VINFO_VECTYPE (stmt_info)); | ||
578 | @@ -4930,7 +4983,7 @@ | ||
579 | /* Hybrid SLP stmts must be vectorized in addition to SLP. */ | ||
580 | if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info)) | ||
581 | { | ||
582 | - if (!transform_pattern_stmt) | ||
583 | + if (!transform_pattern_stmt && !pattern_def) | ||
584 | gsi_next (&si); | ||
585 | continue; | ||
586 | } | ||
587 | @@ -4962,7 +5015,7 @@ | ||
588 | } | ||
589 | } | ||
590 | |||
591 | - if (!transform_pattern_stmt) | ||
592 | + if (!transform_pattern_stmt && !pattern_def) | ||
593 | gsi_next (&si); | ||
594 | } /* stmts in BB */ | ||
595 | } /* BBs in loop */ | ||
596 | |||
597 | === modified file 'gcc/tree-vect-patterns.c' | ||
598 | --- old/gcc/tree-vect-patterns.c 2011-07-06 12:04:10 +0000 | ||
599 | +++ new/gcc/tree-vect-patterns.c 2011-09-05 06:23:37 +0000 | ||
600 | @@ -46,11 +46,14 @@ | ||
601 | static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *, | ||
602 | tree *); | ||
603 | static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); | ||
604 | +static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *, | ||
605 | + tree *); | ||
606 | static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { | ||
607 | vect_recog_widen_mult_pattern, | ||
608 | vect_recog_widen_sum_pattern, | ||
609 | vect_recog_dot_prod_pattern, | ||
610 | - vect_recog_pow_pattern}; | ||
611 | + vect_recog_pow_pattern, | ||
612 | + vect_recog_over_widening_pattern}; | ||
613 | |||
614 | |||
615 | /* Function widened_name_p | ||
616 | @@ -339,12 +342,14 @@ | ||
617 | replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */ | ||
618 | |||
619 | static bool | ||
620 | -vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd, | ||
621 | +vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd, | ||
622 | VEC (gimple, heap) **stmts, tree type, | ||
623 | tree *half_type, gimple def_stmt) | ||
624 | { | ||
625 | tree new_type, new_oprnd, tmp; | ||
626 | gimple new_stmt; | ||
627 | + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); | ||
628 | + struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
629 | |||
630 | if (int_fits_type_p (const_oprnd, *half_type)) | ||
631 | { | ||
632 | @@ -354,6 +359,8 @@ | ||
633 | } | ||
634 | |||
635 | if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) | ||
636 | + || !gimple_bb (def_stmt) | ||
637 | + || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | ||
638 | || !vinfo_for_stmt (def_stmt)) | ||
639 | return false; | ||
640 | |||
641 | @@ -522,7 +529,8 @@ | ||
642 | { | ||
643 | if (TREE_CODE (oprnd0) == INTEGER_CST | ||
644 | && TREE_CODE (half_type1) == INTEGER_TYPE | ||
645 | - && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type, | ||
646 | + && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1, | ||
647 | + stmts, type, | ||
648 | &half_type1, def_stmt1)) | ||
649 | half_type0 = half_type1; | ||
650 | else | ||
651 | @@ -532,7 +540,8 @@ | ||
652 | { | ||
653 | if (TREE_CODE (oprnd1) == INTEGER_CST | ||
654 | && TREE_CODE (half_type0) == INTEGER_TYPE | ||
655 | - && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type, | ||
656 | + && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0, | ||
657 | + stmts, type, | ||
658 | &half_type0, def_stmt0)) | ||
659 | half_type1 = half_type0; | ||
660 | else | ||
661 | @@ -826,6 +835,424 @@ | ||
662 | } | ||
663 | |||
664 | |||
665 | +/* Return TRUE if the operation in STMT can be performed on a smaller type. | ||
666 | + | ||
667 | + Input: | ||
668 | + STMT - a statement to check. | ||
669 | + DEF - we support operations with two operands, one of which is constant. | ||
670 | + The other operand can be defined by a demotion operation, or by a | ||
671 | + previous statement in a sequence of over-promoted operations. In the | ||
672 | + later case DEF is used to replace that operand. (It is defined by a | ||
673 | + pattern statement we created for the previous statement in the | ||
674 | + sequence). | ||
675 | + | ||
676 | + Input/output: | ||
677 | + NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not | ||
678 | + NULL, it's the type of DEF. | ||
679 | + STMTS - additional pattern statements. If a pattern statement (type | ||
680 | + conversion) is created in this function, its original statement is | ||
681 | + added to STMTS. | ||
682 | + | ||
683 | + Output: | ||
684 | + OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new | ||
685 | + operands to use in the new pattern statement for STMT (will be created | ||
686 | + in vect_recog_over_widening_pattern ()). | ||
687 | + NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern | ||
688 | + statements for STMT: the first one is a type promotion and the second | ||
689 | + one is the operation itself. We return the type promotion statement | ||
690 | + in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of | ||
691 | + the second pattern statement. */ | ||
692 | + | ||
693 | +static bool | ||
694 | +vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type, | ||
695 | + tree *op0, tree *op1, gimple *new_def_stmt, | ||
696 | + VEC (gimple, heap) **stmts) | ||
697 | +{ | ||
698 | + enum tree_code code; | ||
699 | + tree const_oprnd, oprnd; | ||
700 | + tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type; | ||
701 | + gimple def_stmt, new_stmt; | ||
702 | + bool first = false; | ||
703 | + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); | ||
704 | + struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
705 | + | ||
706 | + *new_def_stmt = NULL; | ||
707 | + | ||
708 | + if (!is_gimple_assign (stmt)) | ||
709 | + return false; | ||
710 | + | ||
711 | + code = gimple_assign_rhs_code (stmt); | ||
712 | + if (code != LSHIFT_EXPR && code != RSHIFT_EXPR | ||
713 | + && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR) | ||
714 | + return false; | ||
715 | + | ||
716 | + oprnd = gimple_assign_rhs1 (stmt); | ||
717 | + const_oprnd = gimple_assign_rhs2 (stmt); | ||
718 | + type = gimple_expr_type (stmt); | ||
719 | + | ||
720 | + if (TREE_CODE (oprnd) != SSA_NAME | ||
721 | + || TREE_CODE (const_oprnd) != INTEGER_CST) | ||
722 | + return false; | ||
723 | + | ||
724 | + /* If we are in the middle of a sequence, we use DEF from a previous | ||
725 | + statement. Otherwise, OPRND has to be a result of type promotion. */ | ||
726 | + if (*new_type) | ||
727 | + { | ||
728 | + half_type = *new_type; | ||
729 | + oprnd = def; | ||
730 | + } | ||
731 | + else | ||
732 | + { | ||
733 | + first = true; | ||
734 | + if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false) | ||
735 | + || !gimple_bb (def_stmt) | ||
736 | + || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | ||
737 | + || !vinfo_for_stmt (def_stmt)) | ||
738 | + return false; | ||
739 | + } | ||
740 | + | ||
741 | + /* Can we perform the operation on a smaller type? */ | ||
742 | + switch (code) | ||
743 | + { | ||
744 | + case BIT_IOR_EXPR: | ||
745 | + case BIT_XOR_EXPR: | ||
746 | + case BIT_AND_EXPR: | ||
747 | + if (!int_fits_type_p (const_oprnd, half_type)) | ||
748 | + { | ||
749 | + /* HALF_TYPE is not enough. Try a bigger type if possible. */ | ||
750 | + if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) | ||
751 | + return false; | ||
752 | + | ||
753 | + interm_type = build_nonstandard_integer_type ( | ||
754 | + TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); | ||
755 | + if (!int_fits_type_p (const_oprnd, interm_type)) | ||
756 | + return false; | ||
757 | + } | ||
758 | + | ||
759 | + break; | ||
760 | + | ||
761 | + case LSHIFT_EXPR: | ||
762 | + /* Try intermediate type - HALF_TYPE is not enough for sure. */ | ||
763 | + if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) | ||
764 | + return false; | ||
765 | + | ||
766 | + /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size. | ||
767 | + (e.g., if the original value was char, the shift amount is at most 8 | ||
768 | + if we want to use short). */ | ||
769 | + if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1) | ||
770 | + return false; | ||
771 | + | ||
772 | + interm_type = build_nonstandard_integer_type ( | ||
773 | + TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); | ||
774 | + | ||
775 | + if (!vect_supportable_shift (code, interm_type)) | ||
776 | + return false; | ||
777 | + | ||
778 | + break; | ||
779 | + | ||
780 | + case RSHIFT_EXPR: | ||
781 | + if (vect_supportable_shift (code, half_type)) | ||
782 | + break; | ||
783 | + | ||
784 | + /* Try intermediate type - HALF_TYPE is not supported. */ | ||
785 | + if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) | ||
786 | + return false; | ||
787 | + | ||
788 | + interm_type = build_nonstandard_integer_type ( | ||
789 | + TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); | ||
790 | + | ||
791 | + if (!vect_supportable_shift (code, interm_type)) | ||
792 | + return false; | ||
793 | + | ||
794 | + break; | ||
795 | + | ||
796 | + default: | ||
797 | + gcc_unreachable (); | ||
798 | + } | ||
799 | + | ||
800 | + /* There are four possible cases: | ||
801 | + 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's | ||
802 | + the first statement in the sequence) | ||
803 | + a. The original, HALF_TYPE, is not enough - we replace the promotion | ||
804 | + from HALF_TYPE to TYPE with a promotion to INTERM_TYPE. | ||
805 | + b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original | ||
806 | + promotion. | ||
807 | + 2. OPRND is defined by a pattern statement we created. | ||
808 | + a. Its type is not sufficient for the operation, we create a new stmt: | ||
809 | + a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store | ||
810 | + this statement in NEW_DEF_STMT, and it is later put in | ||
811 | + STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT. | ||
812 | + b. OPRND is good to use in the new statement. */ | ||
813 | + if (first) | ||
814 | + { | ||
815 | + if (interm_type) | ||
816 | + { | ||
817 | + /* Replace the original type conversion HALF_TYPE->TYPE with | ||
818 | + HALF_TYPE->INTERM_TYPE. */ | ||
819 | + if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) | ||
820 | + { | ||
821 | + new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); | ||
822 | + /* Check if the already created pattern stmt is what we need. */ | ||
823 | + if (!is_gimple_assign (new_stmt) | ||
824 | + || gimple_assign_rhs_code (new_stmt) != NOP_EXPR | ||
825 | + || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type) | ||
826 | + return false; | ||
827 | + | ||
828 | + oprnd = gimple_assign_lhs (new_stmt); | ||
829 | + } | ||
830 | + else | ||
831 | + { | ||
832 | + /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */ | ||
833 | + oprnd = gimple_assign_rhs1 (def_stmt); | ||
834 | + tmp = create_tmp_reg (interm_type, NULL); | ||
835 | + add_referenced_var (tmp); | ||
836 | + new_oprnd = make_ssa_name (tmp, NULL); | ||
837 | + new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, | ||
838 | + oprnd, NULL_TREE); | ||
839 | + SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; | ||
840 | + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; | ||
841 | + VEC_safe_push (gimple, heap, *stmts, def_stmt); | ||
842 | + oprnd = new_oprnd; | ||
843 | + } | ||
844 | + } | ||
845 | + else | ||
846 | + { | ||
847 | + /* Retrieve the operand before the type promotion. */ | ||
848 | + oprnd = gimple_assign_rhs1 (def_stmt); | ||
849 | + } | ||
850 | + } | ||
851 | + else | ||
852 | + { | ||
853 | + if (interm_type) | ||
854 | + { | ||
855 | + /* Create a type conversion HALF_TYPE->INTERM_TYPE. */ | ||
856 | + tmp = create_tmp_reg (interm_type, NULL); | ||
857 | + add_referenced_var (tmp); | ||
858 | + new_oprnd = make_ssa_name (tmp, NULL); | ||
859 | + new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, | ||
860 | + oprnd, NULL_TREE); | ||
861 | + SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; | ||
862 | + oprnd = new_oprnd; | ||
863 | + *new_def_stmt = new_stmt; | ||
864 | + } | ||
865 | + | ||
866 | + /* Otherwise, OPRND is already set. */ | ||
867 | + } | ||
868 | + | ||
869 | + if (interm_type) | ||
870 | + *new_type = interm_type; | ||
871 | + else | ||
872 | + *new_type = half_type; | ||
873 | + | ||
874 | + *op0 = oprnd; | ||
875 | + *op1 = fold_convert (*new_type, const_oprnd); | ||
876 | + | ||
877 | + return true; | ||
878 | +} | ||
879 | + | ||
880 | + | ||
881 | +/* Try to find a statement or a sequence of statements that can be performed | ||
882 | + on a smaller type: | ||
883 | + | ||
884 | + type x_t; | ||
885 | + TYPE x_T, res0_T, res1_T; | ||
886 | + loop: | ||
887 | + S1 x_t = *p; | ||
888 | + S2 x_T = (TYPE) x_t; | ||
889 | + S3 res0_T = op (x_T, C0); | ||
890 | + S4 res1_T = op (res0_T, C1); | ||
891 | + S5 ... = () res1_T; - type demotion | ||
892 | + | ||
893 | + where type 'TYPE' is at least double the size of type 'type', C0 and C1 are | ||
894 | + constants. | ||
895 | + Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either | ||
896 | + be 'type' or some intermediate type. For now, we expect S5 to be a type | ||
897 | + demotion operation. We also check that S3 and S4 have only one use. | ||
898 | +. | ||
899 | + | ||
900 | +*/ | ||
901 | +static gimple | ||
902 | +vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts, | ||
903 | + tree *type_in, tree *type_out) | ||
904 | +{ | ||
905 | + gimple stmt = VEC_pop (gimple, *stmts); | ||
906 | + gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL; | ||
907 | + tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type; | ||
908 | + imm_use_iterator imm_iter; | ||
909 | + use_operand_p use_p; | ||
910 | + int nuses = 0; | ||
911 | + tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd; | ||
912 | + bool first; | ||
913 | + struct loop *loop = (gimple_bb (stmt))->loop_father; | ||
914 | + | ||
915 | + first = true; | ||
916 | + while (1) | ||
917 | + { | ||
918 | + if (!vinfo_for_stmt (stmt) | ||
919 | + || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt))) | ||
920 | + return NULL; | ||
921 | + | ||
922 | + new_def_stmt = NULL; | ||
923 | + if (!vect_operation_fits_smaller_type (stmt, var, &new_type, | ||
924 | + &op0, &op1, &new_def_stmt, | ||
925 | + stmts)) | ||
926 | + { | ||
927 | + if (first) | ||
928 | + return NULL; | ||
929 | + else | ||
930 | + break; | ||
931 | + } | ||
932 | + | ||
933 | + /* STMT can be performed on a smaller type. Check its uses. */ | ||
934 | + lhs = gimple_assign_lhs (stmt); | ||
935 | + nuses = 0; | ||
936 | + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) | ||
937 | + { | ||
938 | + if (is_gimple_debug (USE_STMT (use_p))) | ||
939 | + continue; | ||
940 | + use_stmt = USE_STMT (use_p); | ||
941 | + nuses++; | ||
942 | + } | ||
943 | + | ||
944 | + if (nuses != 1 || !is_gimple_assign (use_stmt) | ||
945 | + || !gimple_bb (use_stmt) | ||
946 | + || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) | ||
947 | + return NULL; | ||
948 | + | ||
949 | + /* Create pattern statement for STMT. */ | ||
950 | + vectype = get_vectype_for_scalar_type (new_type); | ||
951 | + if (!vectype) | ||
952 | + return NULL; | ||
953 | + | ||
954 | + /* We want to collect all the statements for which we create pattern | ||
955 | + statetments, except for the case when the last statement in the | ||
956 | + sequence doesn't have a corresponding pattern statement. In such | ||
957 | + case we associate the last pattern statement with the last statement | ||
958 | + in the sequence. Therefore, we only add an original statetement to | ||
959 | + the list if we know that it is not the last. */ | ||
960 | + if (prev_stmt) | ||
961 | + VEC_safe_push (gimple, heap, *stmts, prev_stmt); | ||
962 | + | ||
963 | + var = vect_recog_temp_ssa_var (new_type, NULL); | ||
964 | + pattern_stmt = gimple_build_assign_with_ops ( | ||
965 | + gimple_assign_rhs_code (stmt), var, op0, op1); | ||
966 | + SSA_NAME_DEF_STMT (var) = pattern_stmt; | ||
967 | + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; | ||
968 | + STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt; | ||
969 | + | ||
970 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
971 | + { | ||
972 | + fprintf (vect_dump, "created pattern stmt: "); | ||
973 | + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); | ||
974 | + } | ||
975 | + | ||
976 | + prev_stmt = stmt; | ||
977 | + stmt = use_stmt; | ||
978 | + | ||
979 | + first = false; | ||
980 | + } | ||
981 | + | ||
982 | + /* We got a sequence. We expect it to end with a type demotion operation. | ||
983 | + Otherwise, we quit (for now). There are three possible cases: the | ||
984 | + conversion is to NEW_TYPE (we don't do anything), the conversion is to | ||
985 | + a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and | ||
986 | + NEW_TYPE differs (we create a new conversion statement). */ | ||
987 | + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) | ||
988 | + { | ||
989 | + use_lhs = gimple_assign_lhs (use_stmt); | ||
990 | + use_type = TREE_TYPE (use_lhs); | ||
991 | + /* Support only type promotion or signedess change. */ | ||
992 | + if (!INTEGRAL_TYPE_P (use_type) | ||
993 | + || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) | ||
994 | + return NULL; | ||
995 | + | ||
996 | + if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) | ||
997 | + || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type)) | ||
998 | + { | ||
999 | + /* Create NEW_TYPE->USE_TYPE conversion. */ | ||
1000 | + tmp = create_tmp_reg (use_type, NULL); | ||
1001 | + add_referenced_var (tmp); | ||
1002 | + new_oprnd = make_ssa_name (tmp, NULL); | ||
1003 | + pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, | ||
1004 | + var, NULL_TREE); | ||
1005 | + SSA_NAME_DEF_STMT (new_oprnd) = pattern_stmt; | ||
1006 | + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt; | ||
1007 | + | ||
1008 | + *type_in = get_vectype_for_scalar_type (new_type); | ||
1009 | + *type_out = get_vectype_for_scalar_type (use_type); | ||
1010 | + | ||
1011 | + /* We created a pattern statement for the last statement in the | ||
1012 | + sequence, so we don't need to associate it with the pattern | ||
1013 | + statement created for PREV_STMT. Therefore, we add PREV_STMT | ||
1014 | + to the list in order to mark it later in vect_pattern_recog_1. */ | ||
1015 | + if (prev_stmt) | ||
1016 | + VEC_safe_push (gimple, heap, *stmts, prev_stmt); | ||
1017 | + } | ||
1018 | + else | ||
1019 | + { | ||
1020 | + if (prev_stmt) | ||
1021 | + STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt)) | ||
1022 | + = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt)); | ||
1023 | + | ||
1024 | + *type_in = vectype; | ||
1025 | + *type_out = NULL_TREE; | ||
1026 | + } | ||
1027 | + | ||
1028 | + VEC_safe_push (gimple, heap, *stmts, use_stmt); | ||
1029 | + } | ||
1030 | + else | ||
1031 | + /* TODO: support general case, create a conversion to the correct type. */ | ||
1032 | + return NULL; | ||
1033 | + | ||
1034 | + /* Pattern detected. */ | ||
1035 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
1036 | + { | ||
1037 | + fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: "); | ||
1038 | + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); | ||
1039 | + } | ||
1040 | + | ||
1041 | + return pattern_stmt; | ||
1042 | +} | ||
1043 | + | ||
1044 | + | ||
1045 | +/* Mark statements that are involved in a pattern. */ | ||
1046 | + | ||
1047 | +static inline void | ||
1048 | +vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt, | ||
1049 | + tree pattern_vectype) | ||
1050 | +{ | ||
1051 | + stmt_vec_info pattern_stmt_info, def_stmt_info; | ||
1052 | + stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); | ||
1053 | + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); | ||
1054 | + gimple def_stmt; | ||
1055 | + | ||
1056 | + set_vinfo_for_stmt (pattern_stmt, | ||
1057 | + new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); | ||
1058 | + gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); | ||
1059 | + pattern_stmt_info = vinfo_for_stmt (pattern_stmt); | ||
1060 | + | ||
1061 | + STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; | ||
1062 | + STMT_VINFO_DEF_TYPE (pattern_stmt_info) | ||
1063 | + = STMT_VINFO_DEF_TYPE (orig_stmt_info); | ||
1064 | + STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; | ||
1065 | + STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; | ||
1066 | + STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; | ||
1067 | + STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info) | ||
1068 | + = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info); | ||
1069 | + if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)) | ||
1070 | + { | ||
1071 | + def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info); | ||
1072 | + set_vinfo_for_stmt (def_stmt, | ||
1073 | + new_stmt_vec_info (def_stmt, loop_vinfo, NULL)); | ||
1074 | + gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); | ||
1075 | + def_stmt_info = vinfo_for_stmt (def_stmt); | ||
1076 | + STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt; | ||
1077 | + STMT_VINFO_DEF_TYPE (def_stmt_info) | ||
1078 | + = STMT_VINFO_DEF_TYPE (orig_stmt_info); | ||
1079 | + STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; | ||
1080 | + } | ||
1081 | +} | ||
1082 | + | ||
1083 | /* Function vect_pattern_recog_1 | ||
1084 | |||
1085 | Input: | ||
1086 | @@ -855,7 +1282,6 @@ | ||
1087 | { | ||
1088 | gimple stmt = gsi_stmt (si), pattern_stmt; | ||
1089 | stmt_vec_info stmt_info; | ||
1090 | - stmt_vec_info pattern_stmt_info; | ||
1091 | loop_vec_info loop_vinfo; | ||
1092 | tree pattern_vectype; | ||
1093 | tree type_in, type_out; | ||
1094 | @@ -923,16 +1349,7 @@ | ||
1095 | } | ||
1096 | |||
1097 | /* Mark the stmts that are involved in the pattern. */ | ||
1098 | - set_vinfo_for_stmt (pattern_stmt, | ||
1099 | - new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); | ||
1100 | - gimple_set_bb (pattern_stmt, gimple_bb (stmt)); | ||
1101 | - pattern_stmt_info = vinfo_for_stmt (pattern_stmt); | ||
1102 | - | ||
1103 | - STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; | ||
1104 | - STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info); | ||
1105 | - STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; | ||
1106 | - STMT_VINFO_IN_PATTERN_P (stmt_info) = true; | ||
1107 | - STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt; | ||
1108 | + vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype); | ||
1109 | |||
1110 | /* Patterns cannot be vectorized using SLP, because they change the order of | ||
1111 | computation. */ | ||
1112 | @@ -940,9 +1357,9 @@ | ||
1113 | if (next == stmt) | ||
1114 | VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); | ||
1115 | |||
1116 | - /* In case of widen-mult by a constant, it is possible that an additional | ||
1117 | - pattern stmt is created and inserted in STMTS_TO_REPLACE. We create a | ||
1118 | - stmt_info for it, and mark the relevant statements. */ | ||
1119 | + /* It is possible that additional pattern stmts are created and inserted in | ||
1120 | + STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the | ||
1121 | + relevant statements. */ | ||
1122 | for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt) | ||
1123 | && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1); | ||
1124 | i++) | ||
1125 | @@ -955,16 +1372,7 @@ | ||
1126 | print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); | ||
1127 | } | ||
1128 | |||
1129 | - set_vinfo_for_stmt (pattern_stmt, | ||
1130 | - new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); | ||
1131 | - gimple_set_bb (pattern_stmt, gimple_bb (stmt)); | ||
1132 | - pattern_stmt_info = vinfo_for_stmt (pattern_stmt); | ||
1133 | - | ||
1134 | - STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; | ||
1135 | - STMT_VINFO_DEF_TYPE (pattern_stmt_info) | ||
1136 | - = STMT_VINFO_DEF_TYPE (stmt_info); | ||
1137 | - STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info); | ||
1138 | - STMT_VINFO_IN_PATTERN_P (stmt_info) = true; | ||
1139 | + vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE); | ||
1140 | } | ||
1141 | |||
1142 | VEC_free (gimple, heap, stmts_to_replace); | ||
1143 | |||
1144 | === modified file 'gcc/tree-vect-stmts.c' | ||
1145 | --- old/gcc/tree-vect-stmts.c 2011-07-06 12:04:10 +0000 | ||
1146 | +++ new/gcc/tree-vect-stmts.c 2011-09-05 06:23:37 +0000 | ||
1147 | @@ -2246,6 +2246,42 @@ | ||
1148 | } | ||
1149 | |||
1150 | |||
1151 | +/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE | ||
1152 | + either as shift by a scalar or by a vector. */ | ||
1153 | + | ||
1154 | +bool | ||
1155 | +vect_supportable_shift (enum tree_code code, tree scalar_type) | ||
1156 | +{ | ||
1157 | + | ||
1158 | + enum machine_mode vec_mode; | ||
1159 | + optab optab; | ||
1160 | + int icode; | ||
1161 | + tree vectype; | ||
1162 | + | ||
1163 | + vectype = get_vectype_for_scalar_type (scalar_type); | ||
1164 | + if (!vectype) | ||
1165 | + return false; | ||
1166 | + | ||
1167 | + optab = optab_for_tree_code (code, vectype, optab_scalar); | ||
1168 | + if (!optab | ||
1169 | + || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) | ||
1170 | + { | ||
1171 | + optab = optab_for_tree_code (code, vectype, optab_vector); | ||
1172 | + if (!optab | ||
1173 | + || (optab_handler (optab, TYPE_MODE (vectype)) | ||
1174 | + == CODE_FOR_nothing)) | ||
1175 | + return false; | ||
1176 | + } | ||
1177 | + | ||
1178 | + vec_mode = TYPE_MODE (vectype); | ||
1179 | + icode = (int) optab_handler (optab, vec_mode); | ||
1180 | + if (icode == CODE_FOR_nothing) | ||
1181 | + return false; | ||
1182 | + | ||
1183 | + return true; | ||
1184 | +} | ||
1185 | + | ||
1186 | + | ||
1187 | /* Function vectorizable_shift. | ||
1188 | |||
1189 | Check if STMT performs a shift operation that can be vectorized. | ||
1190 | @@ -4946,7 +4982,7 @@ | ||
1191 | enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); | ||
1192 | bool ok; | ||
1193 | tree scalar_type, vectype; | ||
1194 | - gimple pattern_stmt; | ||
1195 | + gimple pattern_stmt, pattern_def_stmt; | ||
1196 | |||
1197 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
1198 | { | ||
1199 | @@ -5016,6 +5052,23 @@ | ||
1200 | return false; | ||
1201 | } | ||
1202 | |||
1203 | + if (is_pattern_stmt_p (stmt_info) | ||
1204 | + && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) | ||
1205 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) | ||
1206 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) | ||
1207 | + { | ||
1208 | + /* Analyze def stmt of STMT if it's a pattern stmt. */ | ||
1209 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
1210 | + { | ||
1211 | + fprintf (vect_dump, "==> examining pattern def statement: "); | ||
1212 | + print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM); | ||
1213 | + } | ||
1214 | + | ||
1215 | + if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node)) | ||
1216 | + return false; | ||
1217 | + } | ||
1218 | + | ||
1219 | + | ||
1220 | switch (STMT_VINFO_DEF_TYPE (stmt_info)) | ||
1221 | { | ||
1222 | case vect_internal_def: | ||
1223 | @@ -5336,6 +5389,7 @@ | ||
1224 | STMT_VINFO_VECTORIZABLE (res) = true; | ||
1225 | STMT_VINFO_IN_PATTERN_P (res) = false; | ||
1226 | STMT_VINFO_RELATED_STMT (res) = NULL; | ||
1227 | + STMT_VINFO_PATTERN_DEF_STMT (res) = NULL; | ||
1228 | STMT_VINFO_DATA_REF (res) = NULL; | ||
1229 | |||
1230 | STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; | ||
1231 | |||
1232 | === modified file 'gcc/tree-vectorizer.h' | ||
1233 | --- old/gcc/tree-vectorizer.h 2011-07-11 11:02:55 +0000 | ||
1234 | +++ new/gcc/tree-vectorizer.h 2011-09-05 06:23:37 +0000 | ||
1235 | @@ -464,6 +464,9 @@ | ||
1236 | pattern). */ | ||
1237 | gimple related_stmt; | ||
1238 | |||
1239 | + /* Used to keep a def stmt of a pattern stmt if such exists. */ | ||
1240 | + gimple pattern_def_stmt; | ||
1241 | + | ||
1242 | /* List of datarefs that are known to have the same alignment as the dataref | ||
1243 | of this stmt. */ | ||
1244 | VEC(dr_p,heap) *same_align_refs; | ||
1245 | @@ -531,6 +534,7 @@ | ||
1246 | |||
1247 | #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p | ||
1248 | #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt | ||
1249 | +#define STMT_VINFO_PATTERN_DEF_STMT(S) (S)->pattern_def_stmt | ||
1250 | #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs | ||
1251 | #define STMT_VINFO_DEF_TYPE(S) (S)->def_type | ||
1252 | #define STMT_VINFO_DR_GROUP_FIRST_DR(S) (S)->first_dr | ||
1253 | @@ -814,6 +818,7 @@ | ||
1254 | extern void vect_get_load_cost (struct data_reference *, int, bool, | ||
1255 | unsigned int *, unsigned int *); | ||
1256 | extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); | ||
1257 | +extern bool vect_supportable_shift (enum tree_code, tree); | ||
1258 | |||
1259 | /* In tree-vect-data-refs.c. */ | ||
1260 | extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); | ||
1261 | @@ -891,7 +896,7 @@ | ||
1262 | Additional pattern recognition functions can (and will) be added | ||
1263 | in the future. */ | ||
1264 | typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); | ||
1265 | -#define NUM_PATTERNS 4 | ||
1266 | +#define NUM_PATTERNS 5 | ||
1267 | void vect_pattern_recog (loop_vec_info); | ||
1268 | |||
1269 | /* In tree-vectorizer.c. */ | ||
1270 | |||