summaryrefslogtreecommitdiffstats
path: root/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch
diff options
context:
space:
mode:
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch')
-rw-r--r--toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch1270
1 files changed, 0 insertions, 1270 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch
deleted file mode 100644
index dfdeec7245..0000000000
--- a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106800.patch
+++ /dev/null
@@ -1,1270 +0,0 @@
12011-09-07 Ira Rosen <ira.rosen@linaro.org>
2
3 Backport from mainline:
4
5 2011-08-04 Ira Rosen <ira.rosen@linaro.org>
6
7 gcc/
8 * tree-vectorizer.h (struct _stmt_vec_info): Add new field for
9 pattern def statement, and its access macro.
10 (NUM_PATTERNS): Set to 5.
11 * tree-vect-loop.c (vect_determine_vectorization_factor): Handle
12 pattern def statement.
13 (vect_transform_loop): Likewise.
14 * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new
15 function vect_recog_over_widening_pattern ().
16 (vect_operation_fits_smaller_type): New function.
17 (vect_recog_over_widening_pattern, vect_mark_pattern_stmts):
18 Likewise.
19 (vect_pattern_recog_1): Move the code that marks pattern
20 statements to vect_mark_pattern_stmts (), and call it. Update
21 documentation.
22 * tree-vect-stmts.c (vect_supportable_shift): New function.
23 (vect_analyze_stmt): Handle pattern def statement.
24 (new_stmt_vec_info): Initialize pattern def statement.
25
26 gcc/testsuite/
27 * gcc.dg/vect/vect-over-widen-1.c: New test.
28 * gcc.dg/vect/vect-over-widen-2.c: New test.
29 * gcc.dg/vect/vect-over-widen-3.c: New test.
30 * gcc.dg/vect/vect-over-widen-4.c: New test.
31
32
33 2011-08-09 Ira Rosen <ira.rosen@linaro.org>
34
35 gcc/
36 PR tree-optimization/50014
37 * tree-vect-loop.c (vectorizable_reduction): Get def type before
38 calling vect_get_vec_def_for_stmt_copy ().
39
40 gcc/testsuite/
41 PR tree-optimization/50014
42 * gcc.dg/vect/pr50014.c: New test.
43
44
45 2011-08-11 Ira Rosen <ira.rosen@linaro.org>
46
47 gcc/
48 PR tree-optimization/50039
49 * tree-vect-patterns.c (vect_operation_fits_smaller_type): Check
50 that DEF_STMT has a stmt_vec_info.
51
52 gcc/testsuite/
53 PR tree-optimization/50039
54 * gcc.dg/vect/vect.exp: Run no-tree-fre-* tests with -fno-tree-fre.
55 * gcc.dg/vect/no-tree-fre-pr50039.c: New test.
56
57
58 2011-09-04 Jakub Jelinek <jakub@redhat.com>
59 Ira Rosen <ira.rosen@linaro.org>
60
61 gcc/
62 PR tree-optimization/50208
63 * tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add an
64 argument. Check that def_stmt is inside the loop.
65 (vect_recog_widen_mult_pattern): Update calls to
66 vect_handle_widen_mult_by_cons.
67 (vect_operation_fits_smaller_type): Check that def_stmt is
68 inside the loop.
69
70 gcc/testsuite/
71 PR tree-optimization/50208
72 * gcc.dg/vect/no-fre-pre-pr50208.c: New test.
73 * gcc.dg/vect/vect.exp: Run no-fre-pre-*.c tests with
74 -fno-tree-fre -fno-tree-pre.
75
76=== added file 'gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c'
77--- old/gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c 1970-01-01 00:00:00 +0000
78+++ new/gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c 2011-09-05 06:23:37 +0000
79@@ -0,0 +1,17 @@
80+/* { dg-do compile } */
81+
82+char c;
83+int a, b;
84+
85+void foo (int j)
86+{
87+ int i;
88+ while (--j)
89+ {
90+ b = 3;
91+ for (i = 0; i < 2; ++i)
92+ a = b ^ c;
93+ }
94+}
95+
96+/* { dg-final { cleanup-tree-dump "vect" } } */
97
98=== added file 'gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c'
99--- old/gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c 1970-01-01 00:00:00 +0000
100+++ new/gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c 2011-09-05 06:23:37 +0000
101@@ -0,0 +1,15 @@
102+/* { dg-do compile } */
103+
104+extern unsigned char g_5;
105+extern int g_31, g_76;
106+int main(void) {
107+ int i, j;
108+ for (j=0; j < 2; ++j) {
109+ g_31 = -3;
110+ for (i=0; i < 2; ++i)
111+ g_76 = (g_31 ? g_31+1 : 0) ^ g_5;
112+ }
113+}
114+
115+/* { dg-final { cleanup-tree-dump "vect" } } */
116+
117
118=== added file 'gcc/testsuite/gcc.dg/vect/pr50014.c'
119--- old/gcc/testsuite/gcc.dg/vect/pr50014.c 1970-01-01 00:00:00 +0000
120+++ new/gcc/testsuite/gcc.dg/vect/pr50014.c 2011-09-05 06:23:37 +0000
121@@ -0,0 +1,16 @@
122+/* { dg-do compile } */
123+/* { dg-require-effective-target vect_int } */
124+
125+int f(unsigned char *s, int n)
126+{
127+ int sum = 0;
128+ int i;
129+
130+ for (i = 0; i < n; i++)
131+ sum += 256 * s[i];
132+
133+ return sum;
134+}
135+
136+/* { dg-final { cleanup-tree-dump "vect" } } */
137+
138
139=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c'
140--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c 1970-01-01 00:00:00 +0000
141+++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c 2011-09-05 06:23:37 +0000
142@@ -0,0 +1,64 @@
143+/* { dg-require-effective-target vect_int } */
144+/* { dg-require-effective-target vect_shift } */
145+
146+#include <stdlib.h>
147+#include <stdarg.h>
148+#include "tree-vect.h"
149+
150+#define N 64
151+
152+/* Modified rgb to rgb conversion from FFmpeg. */
153+__attribute__ ((noinline)) void
154+foo (unsigned char *src, unsigned char *dst)
155+{
156+ unsigned char *s = src;
157+ unsigned short *d = (unsigned short *)dst;
158+ int i;
159+
160+ for (i = 0; i < N/4; i++)
161+ {
162+ const int b = *s++;
163+ const int g = *s++;
164+ const int r = *s++;
165+ const int a = *s++;
166+ *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
167+ d++;
168+ }
169+
170+ s = src;
171+ d = (unsigned short *)dst;
172+ for (i = 0; i < N/4; i++)
173+ {
174+ const int b = *s++;
175+ const int g = *s++;
176+ const int r = *s++;
177+ const int a = *s++;
178+ if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
179+ abort ();
180+ d++;
181+ }
182+}
183+
184+int main (void)
185+{
186+ int i;
187+ unsigned char in[N], out[N];
188+
189+ check_vect ();
190+
191+ for (i = 0; i < N; i++)
192+ {
193+ in[i] = i;
194+ out[i] = 255;
195+ __asm__ volatile ("");
196+ }
197+
198+ foo (in, out);
199+
200+ return 0;
201+}
202+
203+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
204+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
205+/* { dg-final { cleanup-tree-dump "vect" } } */
206+
207
208=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c'
209--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c 1970-01-01 00:00:00 +0000
210+++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c 2011-09-05 06:23:37 +0000
211@@ -0,0 +1,65 @@
212+/* { dg-require-effective-target vect_int } */
213+/* { dg-require-effective-target vect_shift } */
214+
215+#include <stdlib.h>
216+#include <stdarg.h>
217+#include "tree-vect.h"
218+
219+#define N 64
220+
221+/* Modified rgb to rgb conversion from FFmpeg. */
222+__attribute__ ((noinline)) void
223+foo (unsigned char *src, unsigned char *dst)
224+{
225+ unsigned char *s = src;
226+ int *d = (int *)dst;
227+ int i;
228+
229+ for (i = 0; i < N/4; i++)
230+ {
231+ const int b = *s++;
232+ const int g = *s++;
233+ const int r = *s++;
234+ const int a = *s++;
235+ *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
236+ d++;
237+ }
238+
239+ s = src;
240+ d = (int *)dst;
241+ for (i = 0; i < N/4; i++)
242+ {
243+ const int b = *s++;
244+ const int g = *s++;
245+ const int r = *s++;
246+ const int a = *s++;
247+ if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
248+ abort ();
249+ d++;
250+ }
251+}
252+
253+int main (void)
254+{
255+ int i;
256+ unsigned char in[N], out[N];
257+
258+ check_vect ();
259+
260+ for (i = 0; i < N; i++)
261+ {
262+ in[i] = i;
263+ out[i] = 255;
264+ __asm__ volatile ("");
265+ }
266+
267+ foo (in, out);
268+
269+ return 0;
270+}
271+
272+/* Final value stays in int, so no over-widening is detected at the moment. */
273+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 0 "vect" } } */
274+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
275+/* { dg-final { cleanup-tree-dump "vect" } } */
276+
277
278=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c'
279--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c 1970-01-01 00:00:00 +0000
280+++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c 2011-09-05 06:23:37 +0000
281@@ -0,0 +1,64 @@
282+/* { dg-require-effective-target vect_int } */
283+/* { dg-require-effective-target vect_shift } */
284+
285+#include <stdlib.h>
286+#include <stdarg.h>
287+#include "tree-vect.h"
288+
289+#define N 64
290+
291+/* Modified rgb to rgb conversion from FFmpeg. */
292+__attribute__ ((noinline)) void
293+foo (unsigned char *src, unsigned char *dst)
294+{
295+ unsigned char *s = src;
296+ unsigned short *d = (unsigned short *)dst;
297+ int i;
298+
299+ for (i = 0; i < N/4; i++)
300+ {
301+ const int b = *s++;
302+ const int g = *s++;
303+ const int r = *s++;
304+ const int a = *s++;
305+ *d = ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9));
306+ d++;
307+ }
308+
309+ s = src;
310+ d = (unsigned short *)dst;
311+ for (i = 0; i < N/4; i++)
312+ {
313+ const int b = *s++;
314+ const int g = *s++;
315+ const int r = *s++;
316+ const int a = *s++;
317+ if (*d != ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9)))
318+ abort ();
319+ d++;
320+ }
321+}
322+
323+int main (void)
324+{
325+ int i;
326+ unsigned char in[N], out[N];
327+
328+ check_vect ();
329+
330+ for (i = 0; i < N; i++)
331+ {
332+ in[i] = i;
333+ out[i] = 255;
334+ __asm__ volatile ("");
335+ }
336+
337+ foo (in, out);
338+
339+ return 0;
340+}
341+
342+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 1 "vect" } } */
343+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
344+/* { dg-final { cleanup-tree-dump "vect" } } */
345+
346
347=== added file 'gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c'
348--- old/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c 1970-01-01 00:00:00 +0000
349+++ new/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c 2011-09-05 06:23:37 +0000
350@@ -0,0 +1,68 @@
351+/* { dg-require-effective-target vect_int } */
352+/* { dg-require-effective-target vect_shift } */
353+
354+#include <stdlib.h>
355+#include <stdarg.h>
356+#include "tree-vect.h"
357+
358+#define N 64
359+
360+/* Modified rgb to rgb conversion from FFmpeg. */
361+__attribute__ ((noinline)) int
362+foo (unsigned char *src, unsigned char *dst)
363+{
364+ unsigned char *s = src;
365+ unsigned short *d = (unsigned short *)dst, res;
366+ int i, result = 0;
367+
368+ for (i = 0; i < N/4; i++)
369+ {
370+ const int b = *s++;
371+ const int g = *s++;
372+ const int r = *s++;
373+ const int a = *s++;
374+ res = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
375+ *d = res;
376+ result += res;
377+ d++;
378+ }
379+
380+ s = src;
381+ d = (unsigned short *)dst;
382+ for (i = 0; i < N/4; i++)
383+ {
384+ const int b = *s++;
385+ const int g = *s++;
386+ const int r = *s++;
387+ const int a = *s++;
388+ if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
389+ abort ();
390+ d++;
391+ }
392+
393+ return result;
394+}
395+
396+int main (void)
397+{
398+ int i;
399+ unsigned char in[N], out[N];
400+
401+ check_vect ();
402+
403+ for (i = 0; i < N; i++)
404+ {
405+ in[i] = i;
406+ out[i] = 255;
407+ __asm__ volatile ("");
408+ }
409+
410+ foo (in, out);
411+
412+ return 0;
413+}
414+
415+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
416+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
417+/* { dg-final { cleanup-tree-dump "vect" } } */
418+
419
420=== modified file 'gcc/testsuite/gcc.dg/vect/vect.exp'
421--- old/gcc/testsuite/gcc.dg/vect/vect.exp 2011-05-05 15:43:31 +0000
422+++ new/gcc/testsuite/gcc.dg/vect/vect.exp 2011-09-05 06:23:37 +0000
423@@ -245,6 +245,18 @@
424 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-reassoc-bb-slp-*.\[cS\]]] \
425 "" $VECT_SLP_CFLAGS
426
427+# -fno-tree-fre
428+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
429+lappend DEFAULT_VECTCFLAGS "-fno-tree-fre"
430+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-fre-*.\[cS\]]] \
431+ "" $DEFAULT_VECTCFLAGS
432+
433+# -fno-tree-fre -fno-tree-pre
434+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
435+lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" "-fno-tree-pre"
436+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-fre-pre*.\[cS\]]] \
437+ "" $DEFAULT_VECTCFLAGS
438+
439 # Clean up.
440 set dg-do-what-default ${save-dg-do-what-default}
441
442
443=== modified file 'gcc/tree-vect-loop.c'
444--- old/gcc/tree-vect-loop.c 2011-07-11 11:02:55 +0000
445+++ new/gcc/tree-vect-loop.c 2011-09-05 06:23:37 +0000
446@@ -181,8 +181,8 @@
447 stmt_vec_info stmt_info;
448 int i;
449 HOST_WIDE_INT dummy;
450- gimple stmt, pattern_stmt = NULL;
451- bool analyze_pattern_stmt = false;
452+ gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL;
453+ bool analyze_pattern_stmt = false, pattern_def = false;
454
455 if (vect_print_dump_info (REPORT_DETAILS))
456 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
457@@ -297,6 +297,29 @@
458 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
459 analyze_pattern_stmt = true;
460
461+ /* If a pattern statement has a def stmt, analyze it too. */
462+ if (is_pattern_stmt_p (stmt_info)
463+ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
464+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
465+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
466+ {
467+ if (pattern_def)
468+ pattern_def = false;
469+ else
470+ {
471+ if (vect_print_dump_info (REPORT_DETAILS))
472+ {
473+ fprintf (vect_dump, "==> examining pattern def stmt: ");
474+ print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
475+ TDF_SLIM);
476+ }
477+
478+ pattern_def = true;
479+ stmt = pattern_def_stmt;
480+ stmt_info = vinfo_for_stmt (stmt);
481+ }
482+ }
483+
484 if (gimple_get_lhs (stmt) == NULL_TREE)
485 {
486 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
487@@ -401,7 +424,7 @@
488 || (nunits > vectorization_factor))
489 vectorization_factor = nunits;
490
491- if (!analyze_pattern_stmt)
492+ if (!analyze_pattern_stmt && !pattern_def)
493 gsi_next (&si);
494 }
495 }
496@@ -3985,7 +4008,7 @@
497 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
498 VEC (gimple, heap) *phis = NULL;
499 int vec_num;
500- tree def0, def1, tem;
501+ tree def0, def1, tem, op0, op1 = NULL_TREE;
502
503 if (nested_in_vect_loop_p (loop, stmt))
504 {
505@@ -4418,8 +4441,6 @@
506 /* Handle uses. */
507 if (j == 0)
508 {
509- tree op0, op1 = NULL_TREE;
510-
511 op0 = ops[!reduc_index];
512 if (op_type == ternary_op)
513 {
514@@ -4449,11 +4470,19 @@
515 {
516 if (!slp_node)
517 {
518- enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
519- loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
520+ enum vect_def_type dt;
521+ gimple dummy_stmt;
522+ tree dummy;
523+
524+ vect_is_simple_use (ops[!reduc_index], loop_vinfo, NULL,
525+ &dummy_stmt, &dummy, &dt);
526+ loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
527+ loop_vec_def0);
528 VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
529 if (op_type == ternary_op)
530 {
531+ vect_is_simple_use (op1, loop_vinfo, NULL, &dummy_stmt,
532+ &dummy, &dt);
533 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
534 loop_vec_def1);
535 VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
536@@ -4758,8 +4787,8 @@
537 tree cond_expr = NULL_TREE;
538 gimple_seq cond_expr_stmt_list = NULL;
539 bool do_peeling_for_loop_bound;
540- gimple stmt, pattern_stmt;
541- bool transform_pattern_stmt = false;
542+ gimple stmt, pattern_stmt, pattern_def_stmt;
543+ bool transform_pattern_stmt = false, pattern_def = false;
544
545 if (vect_print_dump_info (REPORT_DETAILS))
546 fprintf (vect_dump, "=== vec_transform_loop ===");
547@@ -4903,6 +4932,30 @@
548 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
549 transform_pattern_stmt = true;
550
551+ /* If pattern statement has a def stmt, vectorize it too. */
552+ if (is_pattern_stmt_p (stmt_info)
553+ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
554+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
555+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
556+ {
557+ if (pattern_def)
558+ pattern_def = false;
559+ else
560+ {
561+ if (vect_print_dump_info (REPORT_DETAILS))
562+ {
563+ fprintf (vect_dump, "==> vectorizing pattern def"
564+ " stmt: ");
565+ print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
566+ TDF_SLIM);
567+ }
568+
569+ pattern_def = true;
570+ stmt = pattern_def_stmt;
571+ stmt_info = vinfo_for_stmt (stmt);
572+ }
573+ }
574+
575 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
576 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
577 STMT_VINFO_VECTYPE (stmt_info));
578@@ -4930,7 +4983,7 @@
579 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
580 if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
581 {
582- if (!transform_pattern_stmt)
583+ if (!transform_pattern_stmt && !pattern_def)
584 gsi_next (&si);
585 continue;
586 }
587@@ -4962,7 +5015,7 @@
588 }
589 }
590
591- if (!transform_pattern_stmt)
592+ if (!transform_pattern_stmt && !pattern_def)
593 gsi_next (&si);
594 } /* stmts in BB */
595 } /* BBs in loop */
596
597=== modified file 'gcc/tree-vect-patterns.c'
598--- old/gcc/tree-vect-patterns.c 2011-07-06 12:04:10 +0000
599+++ new/gcc/tree-vect-patterns.c 2011-09-05 06:23:37 +0000
600@@ -46,11 +46,14 @@
601 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
602 tree *);
603 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
604+static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
605+ tree *);
606 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
607 vect_recog_widen_mult_pattern,
608 vect_recog_widen_sum_pattern,
609 vect_recog_dot_prod_pattern,
610- vect_recog_pow_pattern};
611+ vect_recog_pow_pattern,
612+ vect_recog_over_widening_pattern};
613
614
615 /* Function widened_name_p
616@@ -339,12 +342,14 @@
617 replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
618
619 static bool
620-vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd,
621+vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
622 VEC (gimple, heap) **stmts, tree type,
623 tree *half_type, gimple def_stmt)
624 {
625 tree new_type, new_oprnd, tmp;
626 gimple new_stmt;
627+ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
628+ struct loop *loop = LOOP_VINFO_LOOP (loop_info);
629
630 if (int_fits_type_p (const_oprnd, *half_type))
631 {
632@@ -354,6 +359,8 @@
633 }
634
635 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
636+ || !gimple_bb (def_stmt)
637+ || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
638 || !vinfo_for_stmt (def_stmt))
639 return false;
640
641@@ -522,7 +529,8 @@
642 {
643 if (TREE_CODE (oprnd0) == INTEGER_CST
644 && TREE_CODE (half_type1) == INTEGER_TYPE
645- && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type,
646+ && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
647+ stmts, type,
648 &half_type1, def_stmt1))
649 half_type0 = half_type1;
650 else
651@@ -532,7 +540,8 @@
652 {
653 if (TREE_CODE (oprnd1) == INTEGER_CST
654 && TREE_CODE (half_type0) == INTEGER_TYPE
655- && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type,
656+ && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
657+ stmts, type,
658 &half_type0, def_stmt0))
659 half_type1 = half_type0;
660 else
661@@ -826,6 +835,424 @@
662 }
663
664
665+/* Return TRUE if the operation in STMT can be performed on a smaller type.
666+
667+ Input:
668+ STMT - a statement to check.
669+ DEF - we support operations with two operands, one of which is constant.
670+ The other operand can be defined by a demotion operation, or by a
671+ previous statement in a sequence of over-promoted operations. In the
672+ later case DEF is used to replace that operand. (It is defined by a
673+ pattern statement we created for the previous statement in the
674+ sequence).
675+
676+ Input/output:
677+ NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
678+ NULL, it's the type of DEF.
679+ STMTS - additional pattern statements. If a pattern statement (type
680+ conversion) is created in this function, its original statement is
681+ added to STMTS.
682+
683+ Output:
684+ OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
685+ operands to use in the new pattern statement for STMT (will be created
686+ in vect_recog_over_widening_pattern ()).
687+ NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
688+ statements for STMT: the first one is a type promotion and the second
689+ one is the operation itself. We return the type promotion statement
690+ in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
691+ the second pattern statement. */
692+
693+static bool
694+vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
695+ tree *op0, tree *op1, gimple *new_def_stmt,
696+ VEC (gimple, heap) **stmts)
697+{
698+ enum tree_code code;
699+ tree const_oprnd, oprnd;
700+ tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
701+ gimple def_stmt, new_stmt;
702+ bool first = false;
703+ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
704+ struct loop *loop = LOOP_VINFO_LOOP (loop_info);
705+
706+ *new_def_stmt = NULL;
707+
708+ if (!is_gimple_assign (stmt))
709+ return false;
710+
711+ code = gimple_assign_rhs_code (stmt);
712+ if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
713+ && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
714+ return false;
715+
716+ oprnd = gimple_assign_rhs1 (stmt);
717+ const_oprnd = gimple_assign_rhs2 (stmt);
718+ type = gimple_expr_type (stmt);
719+
720+ if (TREE_CODE (oprnd) != SSA_NAME
721+ || TREE_CODE (const_oprnd) != INTEGER_CST)
722+ return false;
723+
724+ /* If we are in the middle of a sequence, we use DEF from a previous
725+ statement. Otherwise, OPRND has to be a result of type promotion. */
726+ if (*new_type)
727+ {
728+ half_type = *new_type;
729+ oprnd = def;
730+ }
731+ else
732+ {
733+ first = true;
734+ if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
735+ || !gimple_bb (def_stmt)
736+ || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
737+ || !vinfo_for_stmt (def_stmt))
738+ return false;
739+ }
740+
741+ /* Can we perform the operation on a smaller type? */
742+ switch (code)
743+ {
744+ case BIT_IOR_EXPR:
745+ case BIT_XOR_EXPR:
746+ case BIT_AND_EXPR:
747+ if (!int_fits_type_p (const_oprnd, half_type))
748+ {
749+ /* HALF_TYPE is not enough. Try a bigger type if possible. */
750+ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
751+ return false;
752+
753+ interm_type = build_nonstandard_integer_type (
754+ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
755+ if (!int_fits_type_p (const_oprnd, interm_type))
756+ return false;
757+ }
758+
759+ break;
760+
761+ case LSHIFT_EXPR:
762+ /* Try intermediate type - HALF_TYPE is not enough for sure. */
763+ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
764+ return false;
765+
766+ /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
767+ (e.g., if the original value was char, the shift amount is at most 8
768+ if we want to use short). */
769+ if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
770+ return false;
771+
772+ interm_type = build_nonstandard_integer_type (
773+ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
774+
775+ if (!vect_supportable_shift (code, interm_type))
776+ return false;
777+
778+ break;
779+
780+ case RSHIFT_EXPR:
781+ if (vect_supportable_shift (code, half_type))
782+ break;
783+
784+ /* Try intermediate type - HALF_TYPE is not supported. */
785+ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
786+ return false;
787+
788+ interm_type = build_nonstandard_integer_type (
789+ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
790+
791+ if (!vect_supportable_shift (code, interm_type))
792+ return false;
793+
794+ break;
795+
796+ default:
797+ gcc_unreachable ();
798+ }
799+
800+ /* There are four possible cases:
801+ 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
802+ the first statement in the sequence)
803+ a. The original, HALF_TYPE, is not enough - we replace the promotion
804+ from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
805+ b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
806+ promotion.
807+ 2. OPRND is defined by a pattern statement we created.
808+ a. Its type is not sufficient for the operation, we create a new stmt:
809+ a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
810+ this statement in NEW_DEF_STMT, and it is later put in
811+ STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
812+ b. OPRND is good to use in the new statement. */
813+ if (first)
814+ {
815+ if (interm_type)
816+ {
817+ /* Replace the original type conversion HALF_TYPE->TYPE with
818+ HALF_TYPE->INTERM_TYPE. */
819+ if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
820+ {
821+ new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
822+ /* Check if the already created pattern stmt is what we need. */
823+ if (!is_gimple_assign (new_stmt)
824+ || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
825+ || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
826+ return false;
827+
828+ oprnd = gimple_assign_lhs (new_stmt);
829+ }
830+ else
831+ {
832+ /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
833+ oprnd = gimple_assign_rhs1 (def_stmt);
834+ tmp = create_tmp_reg (interm_type, NULL);
835+ add_referenced_var (tmp);
836+ new_oprnd = make_ssa_name (tmp, NULL);
837+ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
838+ oprnd, NULL_TREE);
839+ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
840+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
841+ VEC_safe_push (gimple, heap, *stmts, def_stmt);
842+ oprnd = new_oprnd;
843+ }
844+ }
845+ else
846+ {
847+ /* Retrieve the operand before the type promotion. */
848+ oprnd = gimple_assign_rhs1 (def_stmt);
849+ }
850+ }
851+ else
852+ {
853+ if (interm_type)
854+ {
855+ /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
856+ tmp = create_tmp_reg (interm_type, NULL);
857+ add_referenced_var (tmp);
858+ new_oprnd = make_ssa_name (tmp, NULL);
859+ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
860+ oprnd, NULL_TREE);
861+ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
862+ oprnd = new_oprnd;
863+ *new_def_stmt = new_stmt;
864+ }
865+
866+ /* Otherwise, OPRND is already set. */
867+ }
868+
869+ if (interm_type)
870+ *new_type = interm_type;
871+ else
872+ *new_type = half_type;
873+
874+ *op0 = oprnd;
875+ *op1 = fold_convert (*new_type, const_oprnd);
876+
877+ return true;
878+}
879+
880+
881+/* Try to find a statement or a sequence of statements that can be performed
882+ on a smaller type:
883+
884+ type x_t;
885+ TYPE x_T, res0_T, res1_T;
886+ loop:
887+ S1 x_t = *p;
888+ S2 x_T = (TYPE) x_t;
889+ S3 res0_T = op (x_T, C0);
890+ S4 res1_T = op (res0_T, C1);
891+ S5 ... = () res1_T; - type demotion
892+
893+ where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
894+ constants.
895+ Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
896+ be 'type' or some intermediate type. For now, we expect S5 to be a type
897+ demotion operation. We also check that S3 and S4 have only one use.
898+.
899+
900+*/
901+static gimple
902+vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
903+ tree *type_in, tree *type_out)
904+{
905+ gimple stmt = VEC_pop (gimple, *stmts);
906+ gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
907+ tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
908+ imm_use_iterator imm_iter;
909+ use_operand_p use_p;
910+ int nuses = 0;
911+ tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
912+ bool first;
913+ struct loop *loop = (gimple_bb (stmt))->loop_father;
914+
915+ first = true;
916+ while (1)
917+ {
918+ if (!vinfo_for_stmt (stmt)
919+ || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
920+ return NULL;
921+
922+ new_def_stmt = NULL;
923+ if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
924+ &op0, &op1, &new_def_stmt,
925+ stmts))
926+ {
927+ if (first)
928+ return NULL;
929+ else
930+ break;
931+ }
932+
933+ /* STMT can be performed on a smaller type. Check its uses. */
934+ lhs = gimple_assign_lhs (stmt);
935+ nuses = 0;
936+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
937+ {
938+ if (is_gimple_debug (USE_STMT (use_p)))
939+ continue;
940+ use_stmt = USE_STMT (use_p);
941+ nuses++;
942+ }
943+
944+ if (nuses != 1 || !is_gimple_assign (use_stmt)
945+ || !gimple_bb (use_stmt)
946+ || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
947+ return NULL;
948+
949+ /* Create pattern statement for STMT. */
950+ vectype = get_vectype_for_scalar_type (new_type);
951+ if (!vectype)
952+ return NULL;
953+
954+ /* We want to collect all the statements for which we create pattern
955+ statetments, except for the case when the last statement in the
956+ sequence doesn't have a corresponding pattern statement. In such
957+ case we associate the last pattern statement with the last statement
958+ in the sequence. Therefore, we only add an original statetement to
959+ the list if we know that it is not the last. */
960+ if (prev_stmt)
961+ VEC_safe_push (gimple, heap, *stmts, prev_stmt);
962+
963+ var = vect_recog_temp_ssa_var (new_type, NULL);
964+ pattern_stmt = gimple_build_assign_with_ops (
965+ gimple_assign_rhs_code (stmt), var, op0, op1);
966+ SSA_NAME_DEF_STMT (var) = pattern_stmt;
967+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
968+ STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
969+
970+ if (vect_print_dump_info (REPORT_DETAILS))
971+ {
972+ fprintf (vect_dump, "created pattern stmt: ");
973+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
974+ }
975+
976+ prev_stmt = stmt;
977+ stmt = use_stmt;
978+
979+ first = false;
980+ }
981+
982+ /* We got a sequence. We expect it to end with a type demotion operation.
983+ Otherwise, we quit (for now). There are three possible cases: the
984+ conversion is to NEW_TYPE (we don't do anything), the conversion is to
985+ a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
986+ NEW_TYPE differs (we create a new conversion statement). */
987+ if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
988+ {
989+ use_lhs = gimple_assign_lhs (use_stmt);
990+ use_type = TREE_TYPE (use_lhs);
991+ /* Support only type promotion or signedess change. */
992+ if (!INTEGRAL_TYPE_P (use_type)
993+ || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
994+ return NULL;
995+
996+ if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
997+ || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
998+ {
999+ /* Create NEW_TYPE->USE_TYPE conversion. */
1000+ tmp = create_tmp_reg (use_type, NULL);
1001+ add_referenced_var (tmp);
1002+ new_oprnd = make_ssa_name (tmp, NULL);
1003+ pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1004+ var, NULL_TREE);
1005+ SSA_NAME_DEF_STMT (new_oprnd) = pattern_stmt;
1006+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1007+
1008+ *type_in = get_vectype_for_scalar_type (new_type);
1009+ *type_out = get_vectype_for_scalar_type (use_type);
1010+
1011+ /* We created a pattern statement for the last statement in the
1012+ sequence, so we don't need to associate it with the pattern
1013+ statement created for PREV_STMT. Therefore, we add PREV_STMT
1014+ to the list in order to mark it later in vect_pattern_recog_1. */
1015+ if (prev_stmt)
1016+ VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1017+ }
1018+ else
1019+ {
1020+ if (prev_stmt)
1021+ STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
1022+ = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
1023+
1024+ *type_in = vectype;
1025+ *type_out = NULL_TREE;
1026+ }
1027+
1028+ VEC_safe_push (gimple, heap, *stmts, use_stmt);
1029+ }
1030+ else
1031+ /* TODO: support general case, create a conversion to the correct type. */
1032+ return NULL;
1033+
1034+ /* Pattern detected. */
1035+ if (vect_print_dump_info (REPORT_DETAILS))
1036+ {
1037+ fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1038+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1039+ }
1040+
1041+ return pattern_stmt;
1042+}
1043+
1044+
1045+/* Mark statements that are involved in a pattern. */
1046+
1047+static inline void
1048+vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
1049+ tree pattern_vectype)
1050+{
1051+ stmt_vec_info pattern_stmt_info, def_stmt_info;
1052+ stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1053+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
1054+ gimple def_stmt;
1055+
1056+ set_vinfo_for_stmt (pattern_stmt,
1057+ new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
1058+ gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1059+ pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
1060+
1061+ STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
1062+ STMT_VINFO_DEF_TYPE (pattern_stmt_info)
1063+ = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1064+ STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
1065+ STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
1066+ STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
1067+ STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
1068+ = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
1069+ if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
1070+ {
1071+ def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
1072+ set_vinfo_for_stmt (def_stmt,
1073+ new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
1074+ gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
1075+ def_stmt_info = vinfo_for_stmt (def_stmt);
1076+ STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
1077+ STMT_VINFO_DEF_TYPE (def_stmt_info)
1078+ = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1079+ STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
1080+ }
1081+}
1082+
1083 /* Function vect_pattern_recog_1
1084
1085 Input:
1086@@ -855,7 +1282,6 @@
1087 {
1088 gimple stmt = gsi_stmt (si), pattern_stmt;
1089 stmt_vec_info stmt_info;
1090- stmt_vec_info pattern_stmt_info;
1091 loop_vec_info loop_vinfo;
1092 tree pattern_vectype;
1093 tree type_in, type_out;
1094@@ -923,16 +1349,7 @@
1095 }
1096
1097 /* Mark the stmts that are involved in the pattern. */
1098- set_vinfo_for_stmt (pattern_stmt,
1099- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
1100- gimple_set_bb (pattern_stmt, gimple_bb (stmt));
1101- pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
1102-
1103- STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
1104- STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info);
1105- STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
1106- STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
1107- STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt;
1108+ vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
1109
1110 /* Patterns cannot be vectorized using SLP, because they change the order of
1111 computation. */
1112@@ -940,9 +1357,9 @@
1113 if (next == stmt)
1114 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
1115
1116- /* In case of widen-mult by a constant, it is possible that an additional
1117- pattern stmt is created and inserted in STMTS_TO_REPLACE. We create a
1118- stmt_info for it, and mark the relevant statements. */
1119+ /* It is possible that additional pattern stmts are created and inserted in
1120+ STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
1121+ relevant statements. */
1122 for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
1123 && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
1124 i++)
1125@@ -955,16 +1372,7 @@
1126 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1127 }
1128
1129- set_vinfo_for_stmt (pattern_stmt,
1130- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
1131- gimple_set_bb (pattern_stmt, gimple_bb (stmt));
1132- pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
1133-
1134- STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
1135- STMT_VINFO_DEF_TYPE (pattern_stmt_info)
1136- = STMT_VINFO_DEF_TYPE (stmt_info);
1137- STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info);
1138- STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
1139+ vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
1140 }
1141
1142 VEC_free (gimple, heap, stmts_to_replace);
1143
1144=== modified file 'gcc/tree-vect-stmts.c'
1145--- old/gcc/tree-vect-stmts.c 2011-07-06 12:04:10 +0000
1146+++ new/gcc/tree-vect-stmts.c 2011-09-05 06:23:37 +0000
1147@@ -2246,6 +2246,42 @@
1148 }
1149
1150
1151+/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
1152+ either as shift by a scalar or by a vector. */
1153+
1154+bool
1155+vect_supportable_shift (enum tree_code code, tree scalar_type)
1156+{
1157+
1158+ enum machine_mode vec_mode;
1159+ optab optab;
1160+ int icode;
1161+ tree vectype;
1162+
1163+ vectype = get_vectype_for_scalar_type (scalar_type);
1164+ if (!vectype)
1165+ return false;
1166+
1167+ optab = optab_for_tree_code (code, vectype, optab_scalar);
1168+ if (!optab
1169+ || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1170+ {
1171+ optab = optab_for_tree_code (code, vectype, optab_vector);
1172+ if (!optab
1173+ || (optab_handler (optab, TYPE_MODE (vectype))
1174+ == CODE_FOR_nothing))
1175+ return false;
1176+ }
1177+
1178+ vec_mode = TYPE_MODE (vectype);
1179+ icode = (int) optab_handler (optab, vec_mode);
1180+ if (icode == CODE_FOR_nothing)
1181+ return false;
1182+
1183+ return true;
1184+}
1185+
1186+
1187 /* Function vectorizable_shift.
1188
1189 Check if STMT performs a shift operation that can be vectorized.
1190@@ -4946,7 +4982,7 @@
1191 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
1192 bool ok;
1193 tree scalar_type, vectype;
1194- gimple pattern_stmt;
1195+ gimple pattern_stmt, pattern_def_stmt;
1196
1197 if (vect_print_dump_info (REPORT_DETAILS))
1198 {
1199@@ -5016,6 +5052,23 @@
1200 return false;
1201 }
1202
1203+ if (is_pattern_stmt_p (stmt_info)
1204+ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
1205+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
1206+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
1207+ {
1208+ /* Analyze def stmt of STMT if it's a pattern stmt. */
1209+ if (vect_print_dump_info (REPORT_DETAILS))
1210+ {
1211+ fprintf (vect_dump, "==> examining pattern def statement: ");
1212+ print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
1213+ }
1214+
1215+ if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
1216+ return false;
1217+ }
1218+
1219+
1220 switch (STMT_VINFO_DEF_TYPE (stmt_info))
1221 {
1222 case vect_internal_def:
1223@@ -5336,6 +5389,7 @@
1224 STMT_VINFO_VECTORIZABLE (res) = true;
1225 STMT_VINFO_IN_PATTERN_P (res) = false;
1226 STMT_VINFO_RELATED_STMT (res) = NULL;
1227+ STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
1228 STMT_VINFO_DATA_REF (res) = NULL;
1229
1230 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
1231
1232=== modified file 'gcc/tree-vectorizer.h'
1233--- old/gcc/tree-vectorizer.h 2011-07-11 11:02:55 +0000
1234+++ new/gcc/tree-vectorizer.h 2011-09-05 06:23:37 +0000
1235@@ -464,6 +464,9 @@
1236 pattern). */
1237 gimple related_stmt;
1238
1239+ /* Used to keep a def stmt of a pattern stmt if such exists. */
1240+ gimple pattern_def_stmt;
1241+
1242 /* List of datarefs that are known to have the same alignment as the dataref
1243 of this stmt. */
1244 VEC(dr_p,heap) *same_align_refs;
1245@@ -531,6 +534,7 @@
1246
1247 #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p
1248 #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
1249+#define STMT_VINFO_PATTERN_DEF_STMT(S) (S)->pattern_def_stmt
1250 #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs
1251 #define STMT_VINFO_DEF_TYPE(S) (S)->def_type
1252 #define STMT_VINFO_DR_GROUP_FIRST_DR(S) (S)->first_dr
1253@@ -814,6 +818,7 @@
1254 extern void vect_get_load_cost (struct data_reference *, int, bool,
1255 unsigned int *, unsigned int *);
1256 extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
1257+extern bool vect_supportable_shift (enum tree_code, tree);
1258
1259 /* In tree-vect-data-refs.c. */
1260 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
1261@@ -891,7 +896,7 @@
1262 Additional pattern recognition functions can (and will) be added
1263 in the future. */
1264 typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
1265-#define NUM_PATTERNS 4
1266+#define NUM_PATTERNS 5
1267 void vect_pattern_recog (loop_vec_info);
1268
1269 /* In tree-vectorizer.c. */
1270