summaryrefslogtreecommitdiffstats
path: root/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch
diff options
context:
space:
mode:
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch')
-rw-r--r--toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch1355
1 files changed, 0 insertions, 1355 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch
deleted file mode 100644
index bbf9819ec..000000000
--- a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106762.patch
+++ /dev/null
@@ -1,1355 +0,0 @@
12011-06-28 Ira Rosen <ira.rosen@linaro.org>
2
3 Backport from FSF:
4
5 2011-06-07 Ira Rosen <ira.rosen@linaro.org>
6
7 gcc/
8 * tree-vectorizer.h (vect_recog_func_ptr): Make last argument to be
9 a pointer.
10 * tree-vect-patterns.c (vect_recog_widen_sum_pattern,
11 vect_recog_widen_mult_pattern, vect_recog_dot_prod_pattern,
12 vect_recog_pow_pattern): Likewise.
13 (vect_pattern_recog_1): Remove declaration.
14 (widened_name_p): Remove declaration. Add new argument to specify
15 whether to check that both types are either signed or unsigned.
16 (vect_recog_widen_mult_pattern): Update documentation. Handle
17 unsigned patterns and multiplication by constants.
18 (vect_pattern_recog_1): Update vect_recog_func references. Use
19 statement information from the statement returned from pattern
20 detection functions.
21 (vect_pattern_recog): Update vect_recog_func reference.
22 * tree-vect-stmts.c (vectorizable_type_promotion): For widening
23 multiplication by a constant use the type of the other operand.
24
25 gcc/testsuite
26 * lib/target-supports.exp
27 (check_effective_target_vect_widen_mult_qi_to_hi):
28 Add NEON as supporting target.
29 (check_effective_target_vect_widen_mult_hi_to_si): Likewise.
30 (check_effective_target_vect_widen_mult_qi_to_hi_pattern): New.
31 (check_effective_target_vect_widen_mult_hi_to_si_pattern): New.
32 * gcc.dg/vect/vect-widen-mult-u8.c: Expect to be vectorized
33 using widening multiplication on targets that support it.
34 * gcc.dg/vect/vect-widen-mult-u16.c: Likewise.
35 * gcc.dg/vect/vect-widen-mult-const-s16.c: New test.
36 * gcc.dg/vect/vect-widen-mult-const-u16.c: New test.
37
38 and
39
40 2011-06-15 Ira Rosen <ira.rosen@linaro.org>
41
42 gcc/
43 * tree-vect-loop-manip.c (remove_dead_stmts_from_loop): Remove.
44 (slpeel_tree_peel_loop_to_edge): Don't call
45 remove_dead_stmts_from_loop.
46 * tree-vect-loop.c (vect_determine_vectorization_factor): Don't
47 remove irrelevant pattern statements. For irrelevant statements
48 check if it is the last statement of a detected pattern, use
49 corresponding pattern statement instead.
50 (destroy_loop_vec_info): No need to remove pattern statements,
51 only free stmt_vec_info.
52 (vect_transform_loop): For irrelevant statements check if it is
53 the last statement of a detected pattern, use corresponding
54 pattern statement instead.
55 * tree-vect-patterns.c (vect_pattern_recog_1): Don't insert
56 pattern statements. Set basic block for the new statement.
57 (vect_pattern_recog): Update documentation.
58 * tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Scan
59 operands of pattern statements.
60 (vectorizable_call): Fix printing. In case of a pattern statement
61 use the lhs of the original statement when creating a dummy
62 statement to replace the original call.
63 (vect_analyze_stmt): For irrelevant statements check if it is
64 the last statement of a detected pattern, use corresponding
65 pattern statement instead.
66 * tree-vect-slp.c (vect_schedule_slp_instance): For pattern
67 statements use gsi of the original statement.
68
69 and
70 2011-06-21 Ira Rosen <ira.rosen@linaro.org>
71
72 PR tree-optimization/49478
73 gcc/
74
75 * tree-vect-loop.c (vectorizable_reduction): Handle DOT_PROD_EXPR
76 with constant operand.
77
78=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c'
79--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c 1970-01-01 00:00:00 +0000
80+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c 2011-06-19 10:59:13 +0000
81@@ -0,0 +1,60 @@
82+/* { dg-require-effective-target vect_int } */
83+
84+#include "tree-vect.h"
85+#include <stdlib.h>
86+
87+#define N 32
88+
89+__attribute__ ((noinline)) void
90+foo (int *__restrict a,
91+ short *__restrict b,
92+ int n)
93+{
94+ int i;
95+
96+ for (i = 0; i < n; i++)
97+ a[i] = b[i] * 2333;
98+
99+ for (i = 0; i < n; i++)
100+ if (a[i] != b[i] * 2333)
101+ abort ();
102+}
103+
104+__attribute__ ((noinline)) void
105+bar (int *__restrict a,
106+ short *__restrict b,
107+ int n)
108+{
109+ int i;
110+
111+ for (i = 0; i < n; i++)
112+ a[i] = b[i] * (short) 2333;
113+
114+ for (i = 0; i < n; i++)
115+ if (a[i] != b[i] * (short) 2333)
116+ abort ();
117+}
118+
119+int main (void)
120+{
121+ int i;
122+ int a[N];
123+ short b[N];
124+
125+ for (i = 0; i < N; i++)
126+ {
127+ a[i] = 0;
128+ b[i] = i;
129+ __asm__ volatile ("");
130+ }
131+
132+ foo (a, b, N);
133+ bar (a, b, N);
134+ return 0;
135+}
136+
137+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
138+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
139+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
140+/* { dg-final { cleanup-tree-dump "vect" } } */
141+
142
143=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c'
144--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c 1970-01-01 00:00:00 +0000
145+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c 2011-06-19 10:59:13 +0000
146@@ -0,0 +1,77 @@
147+/* { dg-require-effective-target vect_int } */
148+
149+#include "tree-vect.h"
150+#include <stdlib.h>
151+
152+#define N 32
153+
154+__attribute__ ((noinline)) void
155+foo (unsigned int *__restrict a,
156+ unsigned short *__restrict b,
157+ int n)
158+{
159+ int i;
160+
161+ for (i = 0; i < n; i++)
162+ a[i] = b[i] * 2333;
163+
164+ for (i = 0; i < n; i++)
165+ if (a[i] != b[i] * 2333)
166+ abort ();
167+}
168+
169+__attribute__ ((noinline)) void
170+bar (unsigned int *__restrict a,
171+ unsigned short *__restrict b,
172+ int n)
173+{
174+ int i;
175+
176+ for (i = 0; i < n; i++)
177+ a[i] = (unsigned short) 2333 * b[i];
178+
179+ for (i = 0; i < n; i++)
180+ if (a[i] != b[i] * (unsigned short) 2333)
181+ abort ();
182+}
183+
184+__attribute__ ((noinline)) void
185+baz (unsigned int *__restrict a,
186+ unsigned short *__restrict b,
187+ int n)
188+{
189+ int i;
190+
191+ for (i = 0; i < n; i++)
192+ a[i] = b[i] * 233333333;
193+
194+ for (i = 0; i < n; i++)
195+ if (a[i] != b[i] * 233333333)
196+ abort ();
197+}
198+
199+
200+int main (void)
201+{
202+ int i;
203+ unsigned int a[N];
204+ unsigned short b[N];
205+
206+ for (i = 0; i < N; i++)
207+ {
208+ a[i] = 0;
209+ b[i] = i;
210+ __asm__ volatile ("");
211+ }
212+
213+ foo (a, b, N);
214+ bar (a, b, N);
215+ baz (a, b, N);
216+ return 0;
217+}
218+
219+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target vect_widen_mult_hi_to_si } } } */
220+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
221+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
222+/* { dg-final { cleanup-tree-dump "vect" } } */
223+
224
225=== modified file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c'
226--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c 2010-05-27 12:23:45 +0000
227+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c 2011-06-19 10:59:13 +0000
228@@ -9,13 +9,11 @@
229 unsigned short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
230 unsigned int result[N];
231
232-/* short->int widening-mult */
233+/* unsigned short->unsigned int widening-mult. */
234 __attribute__ ((noinline)) int
235 foo1(int len) {
236 int i;
237
238- /* Not vectorized because X[i] and Y[i] are casted to 'int'
239- so the widening multiplication pattern is not recognized. */
240 for (i=0; i<len; i++) {
241 result[i] = (unsigned int)(X[i] * Y[i]);
242 }
243@@ -43,8 +41,8 @@
244 return 0;
245 }
246
247-/*The induction loop is vectorized */
248-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
249-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
250+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
251+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
252+/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
253 /* { dg-final { cleanup-tree-dump "vect" } } */
254
255
256=== modified file 'gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c'
257--- old/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c 2009-05-08 12:39:01 +0000
258+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c 2011-06-19 10:59:13 +0000
259@@ -9,7 +9,7 @@
260 unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
261 unsigned short result[N];
262
263-/* char->short widening-mult */
264+/* unsigned char-> unsigned short widening-mult. */
265 __attribute__ ((noinline)) int
266 foo1(int len) {
267 int i;
268@@ -28,8 +28,7 @@
269 for (i=0; i<N; i++) {
270 X[i] = i;
271 Y[i] = 64-i;
272- if (i%4 == 0)
273- X[i] = 5;
274+ __asm__ volatile ("");
275 }
276
277 foo1 (N);
278@@ -43,5 +42,7 @@
279 }
280
281 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
282+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */
283+/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */
284 /* { dg-final { cleanup-tree-dump "vect" } } */
285
286
287=== modified file 'gcc/testsuite/lib/target-supports.exp'
288--- old/gcc/testsuite/lib/target-supports.exp 2011-06-02 12:12:00 +0000
289+++ new/gcc/testsuite/lib/target-supports.exp 2011-06-19 10:59:13 +0000
290@@ -2663,7 +2663,8 @@
291 } else {
292 set et_vect_widen_mult_qi_to_hi_saved 0
293 }
294- if { [istarget powerpc*-*-*] } {
295+ if { [istarget powerpc*-*-*]
296+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
297 set et_vect_widen_mult_qi_to_hi_saved 1
298 }
299 }
300@@ -2696,7 +2697,8 @@
301 || [istarget spu-*-*]
302 || [istarget ia64-*-*]
303 || [istarget i?86-*-*]
304- || [istarget x86_64-*-*] } {
305+ || [istarget x86_64-*-*]
306+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
307 set et_vect_widen_mult_hi_to_si_saved 1
308 }
309 }
310@@ -2705,6 +2707,52 @@
311 }
312
313 # Return 1 if the target plus current options supports a vector
314+# widening multiplication of *char* args into *short* result, 0 otherwise.
315+#
316+# This won't change for different subtargets so cache the result.
317+
318+proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } {
319+ global et_vect_widen_mult_qi_to_hi_pattern
320+
321+ if [info exists et_vect_widen_mult_qi_to_hi_pattern_saved] {
322+ verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: using cached result" 2
323+ } else {
324+ set et_vect_widen_mult_qi_to_hi_pattern_saved 0
325+ if { [istarget powerpc*-*-*]
326+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
327+ set et_vect_widen_mult_qi_to_hi_pattern_saved 1
328+ }
329+ }
330+ verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: returning $et_vect_widen_mult_qi_to_hi_pattern_saved" 2
331+ return $et_vect_widen_mult_qi_to_hi_pattern_saved
332+}
333+
334+# Return 1 if the target plus current options supports a vector
335+# widening multiplication of *short* args into *int* result, 0 otherwise.
336+#
337+# This won't change for different subtargets so cache the result.
338+
339+proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } {
340+ global et_vect_widen_mult_hi_to_si_pattern
341+
342+ if [info exists et_vect_widen_mult_hi_to_si_pattern_saved] {
343+ verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: using cached result" 2
344+ } else {
345+ set et_vect_widen_mult_hi_to_si_pattern_saved 0
346+ if { [istarget powerpc*-*-*]
347+ || [istarget spu-*-*]
348+ || [istarget ia64-*-*]
349+ || [istarget i?86-*-*]
350+ || [istarget x86_64-*-*]
351+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
352+ set et_vect_widen_mult_hi_to_si_pattern_saved 1
353+ }
354+ }
355+ verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: returning $et_vect_widen_mult_hi_to_si_pattern_saved" 2
356+ return $et_vect_widen_mult_hi_to_si_pattern_saved
357+}
358+
359+# Return 1 if the target plus current options supports a vector
360 # dot-product of signed chars, 0 otherwise.
361 #
362 # This won't change for different subtargets so cache the result.
363
364=== modified file 'gcc/tree-vect-loop-manip.c'
365--- old/gcc/tree-vect-loop-manip.c 2011-05-18 13:24:05 +0000
366+++ new/gcc/tree-vect-loop-manip.c 2011-06-19 10:59:13 +0000
367@@ -1105,35 +1105,6 @@
368 first_niters = PHI_RESULT (newphi);
369 }
370
371-
372-/* Remove dead assignments from loop NEW_LOOP. */
373-
374-static void
375-remove_dead_stmts_from_loop (struct loop *new_loop)
376-{
377- basic_block *bbs = get_loop_body (new_loop);
378- unsigned i;
379- for (i = 0; i < new_loop->num_nodes; ++i)
380- {
381- gimple_stmt_iterator gsi;
382- for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);)
383- {
384- gimple stmt = gsi_stmt (gsi);
385- if (is_gimple_assign (stmt)
386- && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
387- && has_zero_uses (gimple_assign_lhs (stmt)))
388- {
389- gsi_remove (&gsi, true);
390- release_defs (stmt);
391- }
392- else
393- gsi_next (&gsi);
394- }
395- }
396- free (bbs);
397-}
398-
399-
400 /* Function slpeel_tree_peel_loop_to_edge.
401
402 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
403@@ -1445,13 +1416,6 @@
404 BITMAP_FREE (definitions);
405 delete_update_ssa ();
406
407- /* Remove all pattern statements from the loop copy. They will confuse
408- the expander if DCE is disabled.
409- ??? The pattern recognizer should be split into an analysis and
410- a transformation phase that is then run only on the loop that is
411- going to be transformed. */
412- remove_dead_stmts_from_loop (new_loop);
413-
414 adjust_vec_debug_stmts ();
415
416 return new_loop;
417
418=== modified file 'gcc/tree-vect-loop.c'
419--- old/gcc/tree-vect-loop.c 2011-03-01 13:18:25 +0000
420+++ new/gcc/tree-vect-loop.c 2011-06-22 06:21:13 +0000
421@@ -244,7 +244,7 @@
422 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
423 {
424 tree vf_vectype;
425- gimple stmt = gsi_stmt (si);
426+ gimple stmt = gsi_stmt (si), pattern_stmt;
427 stmt_info = vinfo_for_stmt (stmt);
428
429 if (vect_print_dump_info (REPORT_DETAILS))
430@@ -259,9 +259,25 @@
431 if (!STMT_VINFO_RELEVANT_P (stmt_info)
432 && !STMT_VINFO_LIVE_P (stmt_info))
433 {
434- if (vect_print_dump_info (REPORT_DETAILS))
435- fprintf (vect_dump, "skip.");
436- continue;
437+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
438+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
439+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
440+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
441+ {
442+ stmt = pattern_stmt;
443+ stmt_info = vinfo_for_stmt (pattern_stmt);
444+ if (vect_print_dump_info (REPORT_DETAILS))
445+ {
446+ fprintf (vect_dump, "==> examining pattern statement: ");
447+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
448+ }
449+ }
450+ else
451+ {
452+ if (vect_print_dump_info (REPORT_DETAILS))
453+ fprintf (vect_dump, "skip.");
454+ continue;
455+ }
456 }
457
458 if (gimple_get_lhs (stmt) == NULL_TREE)
459@@ -816,25 +832,17 @@
460
461 if (stmt_info)
462 {
463- /* Check if this is a "pattern stmt" (introduced by the
464- vectorizer during the pattern recognition pass). */
465- bool remove_stmt_p = false;
466- gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
467- if (orig_stmt)
468- {
469- stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
470- if (orig_stmt_info
471- && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
472- remove_stmt_p = true;
473- }
474+ /* Check if this statement has a related "pattern stmt"
475+ (introduced by the vectorizer during the pattern recognition
476+ pass). Free pattern's stmt_vec_info. */
477+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
478+ && vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)))
479+ free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
480
481 /* Free stmt_vec_info. */
482 free_stmt_vec_info (stmt);
483+ }
484
485- /* Remove dead "pattern stmts". */
486- if (remove_stmt_p)
487- gsi_remove (&si, true);
488- }
489 gsi_next (&si);
490 }
491 }
492@@ -4262,6 +4270,25 @@
493 return false;
494 }
495
496+ /* In case of widenning multiplication by a constant, we update the type
497+ of the constant to be the type of the other operand. We check that the
498+ constant fits the type in the pattern recognition pass. */
499+ if (code == DOT_PROD_EXPR
500+ && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
501+ {
502+ if (TREE_CODE (ops[0]) == INTEGER_CST)
503+ ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
504+ else if (TREE_CODE (ops[1]) == INTEGER_CST)
505+ ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
506+ else
507+ {
508+ if (vect_print_dump_info (REPORT_DETAILS))
509+ fprintf (vect_dump, "invalid types in dot-prod");
510+
511+ return false;
512+ }
513+ }
514+
515 if (!vec_stmt) /* transformation not required. */
516 {
517 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
518@@ -4796,7 +4823,7 @@
519
520 for (si = gsi_start_bb (bb); !gsi_end_p (si);)
521 {
522- gimple stmt = gsi_stmt (si);
523+ gimple stmt = gsi_stmt (si), pattern_stmt;
524 bool is_store;
525
526 if (vect_print_dump_info (REPORT_DETAILS))
527@@ -4821,14 +4848,25 @@
528
529 if (!STMT_VINFO_RELEVANT_P (stmt_info)
530 && !STMT_VINFO_LIVE_P (stmt_info))
531- {
532- gsi_next (&si);
533- continue;
534+ {
535+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
536+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
537+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
538+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
539+ {
540+ stmt = pattern_stmt;
541+ stmt_info = vinfo_for_stmt (stmt);
542+ }
543+ else
544+ {
545+ gsi_next (&si);
546+ continue;
547+ }
548 }
549
550 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
551- nunits =
552- (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
553+ nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
554+ STMT_VINFO_VECTYPE (stmt_info));
555 if (!STMT_SLP_TYPE (stmt_info)
556 && nunits != (unsigned int) vectorization_factor
557 && vect_print_dump_info (REPORT_DETAILS))
558
559=== modified file 'gcc/tree-vect-patterns.c'
560--- old/gcc/tree-vect-patterns.c 2010-12-02 11:47:12 +0000
561+++ new/gcc/tree-vect-patterns.c 2011-06-22 12:10:44 +0000
562@@ -38,16 +38,11 @@
563 #include "recog.h"
564 #include "diagnostic-core.h"
565
566-/* Function prototypes */
567-static void vect_pattern_recog_1
568- (gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator);
569-static bool widened_name_p (tree, gimple, tree *, gimple *);
570-
571 /* Pattern recognition functions */
572-static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *);
573-static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *);
574-static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *);
575-static gimple vect_recog_pow_pattern (gimple, tree *, tree *);
576+static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *);
577+static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *);
578+static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *);
579+static gimple vect_recog_pow_pattern (gimple *, tree *, tree *);
580 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
581 vect_recog_widen_mult_pattern,
582 vect_recog_widen_sum_pattern,
583@@ -61,10 +56,12 @@
584 is a result of a type-promotion, such that:
585 DEF_STMT: NAME = NOP (name0)
586 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
587-*/
588+ If CHECK_SIGN is TRUE, check that either both types are signed or both are
589+ unsigned. */
590
591 static bool
592-widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt)
593+widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
594+ bool check_sign)
595 {
596 tree dummy;
597 gimple dummy_gimple;
598@@ -98,7 +95,7 @@
599
600 *half_type = TREE_TYPE (oprnd0);
601 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
602- || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type))
603+ || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
604 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
605 return false;
606
607@@ -168,12 +165,12 @@
608 inner-loop nested in an outer-loop that us being vectorized). */
609
610 static gimple
611-vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
612+vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
613 {
614 gimple stmt;
615 tree oprnd0, oprnd1;
616 tree oprnd00, oprnd01;
617- stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
618+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
619 tree type, half_type;
620 gimple pattern_stmt;
621 tree prod_type;
622@@ -181,10 +178,10 @@
623 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
624 tree var, rhs;
625
626- if (!is_gimple_assign (last_stmt))
627+ if (!is_gimple_assign (*last_stmt))
628 return NULL;
629
630- type = gimple_expr_type (last_stmt);
631+ type = gimple_expr_type (*last_stmt);
632
633 /* Look for the following pattern
634 DX = (TYPE1) X;
635@@ -210,7 +207,7 @@
636 /* Starting from LAST_STMT, follow the defs of its uses in search
637 of the above pattern. */
638
639- if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
640+ if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
641 return NULL;
642
643 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
644@@ -231,14 +228,14 @@
645
646 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
647 return NULL;
648- oprnd0 = gimple_assign_rhs1 (last_stmt);
649- oprnd1 = gimple_assign_rhs2 (last_stmt);
650+ oprnd0 = gimple_assign_rhs1 (*last_stmt);
651+ oprnd1 = gimple_assign_rhs2 (*last_stmt);
652 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
653 || !types_compatible_p (TREE_TYPE (oprnd1), type))
654 return NULL;
655- stmt = last_stmt;
656+ stmt = *last_stmt;
657
658- if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt))
659+ if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
660 {
661 stmt = def_stmt;
662 oprnd0 = gimple_assign_rhs1 (stmt);
663@@ -293,10 +290,10 @@
664 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
665 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
666 return NULL;
667- if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt))
668+ if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
669 return NULL;
670 oprnd00 = gimple_assign_rhs1 (def_stmt);
671- if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt))
672+ if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
673 return NULL;
674 oprnd01 = gimple_assign_rhs1 (def_stmt);
675 if (!types_compatible_p (half_type0, half_type1))
676@@ -322,7 +319,7 @@
677
678 /* We don't allow changing the order of the computation in the inner-loop
679 when doing outer-loop vectorization. */
680- gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
681+ gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
682
683 return pattern_stmt;
684 }
685@@ -342,24 +339,47 @@
686
687 where type 'TYPE' is at least double the size of type 'type'.
688
689- Input:
690-
691- * LAST_STMT: A stmt from which the pattern search begins. In the example,
692- when this function is called with S5, the pattern {S3,S4,S5} is be detected.
693-
694- Output:
695-
696- * TYPE_IN: The type of the input arguments to the pattern.
697-
698- * TYPE_OUT: The type of the output of this pattern.
699-
700- * Return value: A new stmt that will be used to replace the sequence of
701- stmts that constitute the pattern. In this case it will be:
702- WIDEN_MULT <a_t, b_t>
703-*/
704+ Also detect unsgigned cases:
705+
706+ unsigned type a_t, b_t;
707+ unsigned TYPE u_prod_T;
708+ TYPE a_T, b_T, prod_T;
709+
710+ S1 a_t = ;
711+ S2 b_t = ;
712+ S3 a_T = (TYPE) a_t;
713+ S4 b_T = (TYPE) b_t;
714+ S5 prod_T = a_T * b_T;
715+ S6 u_prod_T = (unsigned TYPE) prod_T;
716+
717+ and multiplication by constants:
718+
719+ type a_t;
720+ TYPE a_T, prod_T;
721+
722+ S1 a_t = ;
723+ S3 a_T = (TYPE) a_t;
724+ S5 prod_T = a_T * CONST;
725+
726+ Input:
727+
728+ * LAST_STMT: A stmt from which the pattern search begins. In the example,
729+ when this function is called with S5, the pattern {S3,S4,S5,(S6)} is
730+ detected.
731+
732+ Output:
733+
734+ * TYPE_IN: The type of the input arguments to the pattern.
735+
736+ * TYPE_OUT: The type of the output of this pattern.
737+
738+ * Return value: A new stmt that will be used to replace the sequence of
739+ stmts that constitute the pattern. In this case it will be:
740+ WIDEN_MULT <a_t, b_t>
741+ */
742
743 static gimple
744-vect_recog_widen_mult_pattern (gimple last_stmt,
745+vect_recog_widen_mult_pattern (gimple *last_stmt,
746 tree *type_in,
747 tree *type_out)
748 {
749@@ -367,39 +387,112 @@
750 tree oprnd0, oprnd1;
751 tree type, half_type0, half_type1;
752 gimple pattern_stmt;
753- tree vectype, vectype_out;
754+ tree vectype, vectype_out = NULL_TREE;
755 tree dummy;
756 tree var;
757 enum tree_code dummy_code;
758 int dummy_int;
759 VEC (tree, heap) *dummy_vec;
760+ bool op0_ok, op1_ok;
761
762- if (!is_gimple_assign (last_stmt))
763+ if (!is_gimple_assign (*last_stmt))
764 return NULL;
765
766- type = gimple_expr_type (last_stmt);
767+ type = gimple_expr_type (*last_stmt);
768
769 /* Starting from LAST_STMT, follow the defs of its uses in search
770 of the above pattern. */
771
772- if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
773+ if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR)
774 return NULL;
775
776- oprnd0 = gimple_assign_rhs1 (last_stmt);
777- oprnd1 = gimple_assign_rhs2 (last_stmt);
778+ oprnd0 = gimple_assign_rhs1 (*last_stmt);
779+ oprnd1 = gimple_assign_rhs2 (*last_stmt);
780 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
781 || !types_compatible_p (TREE_TYPE (oprnd1), type))
782 return NULL;
783
784- /* Check argument 0 */
785- if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0))
786- return NULL;
787- oprnd0 = gimple_assign_rhs1 (def_stmt0);
788-
789- /* Check argument 1 */
790- if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1))
791- return NULL;
792- oprnd1 = gimple_assign_rhs1 (def_stmt1);
793+ /* Check argument 0. */
794+ op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false);
795+ /* Check argument 1. */
796+ op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false);
797+
798+ /* In case of multiplication by a constant one of the operands may not match
799+ the pattern, but not both. */
800+ if (!op0_ok && !op1_ok)
801+ return NULL;
802+
803+ if (op0_ok && op1_ok)
804+ {
805+ oprnd0 = gimple_assign_rhs1 (def_stmt0);
806+ oprnd1 = gimple_assign_rhs1 (def_stmt1);
807+ }
808+ else if (!op0_ok)
809+ {
810+ if (CONSTANT_CLASS_P (oprnd0)
811+ && TREE_CODE (half_type1) == INTEGER_TYPE
812+ && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1))
813+ && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0))
814+ {
815+ /* OPRND0 is a constant of HALF_TYPE1. */
816+ half_type0 = half_type1;
817+ oprnd1 = gimple_assign_rhs1 (def_stmt1);
818+ }
819+ else
820+ return NULL;
821+ }
822+ else if (!op1_ok)
823+ {
824+ if (CONSTANT_CLASS_P (oprnd1)
825+ && TREE_CODE (half_type0) == INTEGER_TYPE
826+ && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0))
827+ && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1))
828+ {
829+ /* OPRND1 is a constant of HALF_TYPE0. */
830+ half_type1 = half_type0;
831+ oprnd0 = gimple_assign_rhs1 (def_stmt0);
832+ }
833+ else
834+ return NULL;
835+ }
836+
837+ /* Handle unsigned case. Look for
838+ S6 u_prod_T = (unsigned TYPE) prod_T;
839+ Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
840+ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
841+ {
842+ tree lhs = gimple_assign_lhs (*last_stmt), use_lhs;
843+ imm_use_iterator imm_iter;
844+ use_operand_p use_p;
845+ int nuses = 0;
846+ gimple use_stmt = NULL;
847+ tree use_type;
848+
849+ if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
850+ return NULL;
851+
852+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
853+ {
854+ if (is_gimple_debug (USE_STMT (use_p)))
855+ continue;
856+ use_stmt = USE_STMT (use_p);
857+ nuses++;
858+ }
859+
860+ if (nuses != 1 || !is_gimple_assign (use_stmt)
861+ || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
862+ return NULL;
863+
864+ use_lhs = gimple_assign_lhs (use_stmt);
865+ use_type = TREE_TYPE (use_lhs);
866+ if (!INTEGRAL_TYPE_P (use_type)
867+ || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
868+ || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
869+ return NULL;
870+
871+ type = use_type;
872+ *last_stmt = use_stmt;
873+ }
874
875 if (!types_compatible_p (half_type0, half_type1))
876 return NULL;
877@@ -413,7 +506,7 @@
878 vectype_out = get_vectype_for_scalar_type (type);
879 if (!vectype
880 || !vectype_out
881- || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
882+ || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt,
883 vectype_out, vectype,
884 &dummy, &dummy, &dummy_code,
885 &dummy_code, &dummy_int, &dummy_vec))
886@@ -462,16 +555,16 @@
887 */
888
889 static gimple
890-vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out)
891+vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
892 {
893 tree fn, base, exp = NULL;
894 gimple stmt;
895 tree var;
896
897- if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
898+ if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL)
899 return NULL;
900
901- fn = gimple_call_fndecl (last_stmt);
902+ fn = gimple_call_fndecl (*last_stmt);
903 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
904 return NULL;
905
906@@ -481,8 +574,8 @@
907 case BUILT_IN_POWI:
908 case BUILT_IN_POWF:
909 case BUILT_IN_POW:
910- base = gimple_call_arg (last_stmt, 0);
911- exp = gimple_call_arg (last_stmt, 1);
912+ base = gimple_call_arg (*last_stmt, 0);
913+ exp = gimple_call_arg (*last_stmt, 1);
914 if (TREE_CODE (exp) != REAL_CST
915 && TREE_CODE (exp) != INTEGER_CST)
916 return NULL;
917@@ -574,21 +667,21 @@
918 inner-loop nested in an outer-loop that us being vectorized). */
919
920 static gimple
921-vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out)
922+vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
923 {
924 gimple stmt;
925 tree oprnd0, oprnd1;
926- stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
927+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
928 tree type, half_type;
929 gimple pattern_stmt;
930 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
931 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
932 tree var;
933
934- if (!is_gimple_assign (last_stmt))
935+ if (!is_gimple_assign (*last_stmt))
936 return NULL;
937
938- type = gimple_expr_type (last_stmt);
939+ type = gimple_expr_type (*last_stmt);
940
941 /* Look for the following pattern
942 DX = (TYPE) X;
943@@ -600,25 +693,25 @@
944 /* Starting from LAST_STMT, follow the defs of its uses in search
945 of the above pattern. */
946
947- if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
948+ if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
949 return NULL;
950
951 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
952 return NULL;
953
954- oprnd0 = gimple_assign_rhs1 (last_stmt);
955- oprnd1 = gimple_assign_rhs2 (last_stmt);
956+ oprnd0 = gimple_assign_rhs1 (*last_stmt);
957+ oprnd1 = gimple_assign_rhs2 (*last_stmt);
958 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
959 || !types_compatible_p (TREE_TYPE (oprnd1), type))
960 return NULL;
961
962- /* So far so good. Since last_stmt was detected as a (summation) reduction,
963+ /* So far so good. Since *last_stmt was detected as a (summation) reduction,
964 we know that oprnd1 is the reduction variable (defined by a loop-header
965 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
966 Left to check that oprnd0 is defined by a cast from type 'type' to type
967 'TYPE'. */
968
969- if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt))
970+ if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true))
971 return NULL;
972
973 oprnd0 = gimple_assign_rhs1 (stmt);
974@@ -639,7 +732,7 @@
975
976 /* We don't allow changing the order of the computation in the inner-loop
977 when doing outer-loop vectorization. */
978- gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
979+ gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
980
981 return pattern_stmt;
982 }
983@@ -669,23 +762,27 @@
984
985 static void
986 vect_pattern_recog_1 (
987- gimple (* vect_recog_func) (gimple, tree *, tree *),
988+ gimple (* vect_recog_func) (gimple *, tree *, tree *),
989 gimple_stmt_iterator si)
990 {
991 gimple stmt = gsi_stmt (si), pattern_stmt;
992- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
993+ stmt_vec_info stmt_info;
994 stmt_vec_info pattern_stmt_info;
995- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
996+ loop_vec_info loop_vinfo;
997 tree pattern_vectype;
998 tree type_in, type_out;
999 enum tree_code code;
1000 int i;
1001 gimple next;
1002
1003- pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out);
1004+ pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out);
1005 if (!pattern_stmt)
1006 return;
1007
1008+ si = gsi_for_stmt (stmt);
1009+ stmt_info = vinfo_for_stmt (stmt);
1010+ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1011+
1012 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
1013 {
1014 /* No need to check target support (already checked by the pattern
1015@@ -736,9 +833,9 @@
1016 }
1017
1018 /* Mark the stmts that are involved in the pattern. */
1019- gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT);
1020 set_vinfo_for_stmt (pattern_stmt,
1021 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
1022+ gimple_set_bb (pattern_stmt, gimple_bb (stmt));
1023 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
1024
1025 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
1026@@ -761,8 +858,8 @@
1027 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
1028 computation idioms.
1029
1030- Output - for each computation idiom that is detected we insert a new stmt
1031- that provides the same functionality and that can be vectorized. We
1032+ Output - for each computation idiom that is detected we create a new stmt
1033+ that provides the same functionality and that can be vectorized. We
1034 also record some information in the struct_stmt_info of the relevant
1035 stmts, as explained below:
1036
1037@@ -777,52 +874,48 @@
1038 S5: ... = ..use(a_0).. - - -
1039
1040 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
1041- represented by a single stmt. We then:
1042- - create a new stmt S6 that will replace the pattern.
1043- - insert the new stmt S6 before the last stmt in the pattern
1044+ represented by a single stmt. We then:
1045+ - create a new stmt S6 equivalent to the pattern (the stmt is not
1046+ inserted into the code)
1047 - fill in the STMT_VINFO fields as follows:
1048
1049 in_pattern_p related_stmt vec_stmt
1050 S1: a_i = .... - - -
1051 S2: a_2 = ..use(a_i).. - - -
1052 S3: a_1 = ..use(a_2).. - - -
1053- > S6: a_new = .... - S4 -
1054 S4: a_0 = ..use(a_1).. true S6 -
1055+ '---> S6: a_new = .... - S4 -
1056 S5: ... = ..use(a_0).. - - -
1057
1058 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
1059- to each other through the RELATED_STMT field).
1060+ to each other through the RELATED_STMT field).
1061
1062 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
1063 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
1064 remain irrelevant unless used by stmts other than S4.
1065
1066 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
1067- (because they are marked as irrelevant). It will vectorize S6, and record
1068+ (because they are marked as irrelevant). It will vectorize S6, and record
1069 a pointer to the new vector stmt VS6 both from S6 (as usual), and also
1070- from S4. We do that so that when we get to vectorizing stmts that use the
1071+ from S4. We do that so that when we get to vectorizing stmts that use the
1072 def of S4 (like S5 that uses a_0), we'll know where to take the relevant
1073- vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
1074+ vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
1075
1076 in_pattern_p related_stmt vec_stmt
1077 S1: a_i = .... - - -
1078 S2: a_2 = ..use(a_i).. - - -
1079 S3: a_1 = ..use(a_2).. - - -
1080 > VS6: va_new = .... - - -
1081- S6: a_new = .... - S4 VS6
1082 S4: a_0 = ..use(a_1).. true S6 VS6
1083+ '---> S6: a_new = .... - S4 VS6
1084 > VS5: ... = ..vuse(va_new).. - - -
1085 S5: ... = ..use(a_0).. - - -
1086
1087- DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used
1088+ DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
1089 elsewhere), and we'll end up with:
1090
1091 VS6: va_new = ....
1092- VS5: ... = ..vuse(va_new)..
1093-
1094- If vectorization does not succeed, DCE will clean S6 away (its def is
1095- not used), and we'll end up with the original sequence.
1096-*/
1097+ VS5: ... = ..vuse(va_new).. */
1098
1099 void
1100 vect_pattern_recog (loop_vec_info loop_vinfo)
1101@@ -832,7 +925,7 @@
1102 unsigned int nbbs = loop->num_nodes;
1103 gimple_stmt_iterator si;
1104 unsigned int i, j;
1105- gimple (* vect_recog_func_ptr) (gimple, tree *, tree *);
1106+ gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
1107
1108 if (vect_print_dump_info (REPORT_DETAILS))
1109 fprintf (vect_dump, "=== vect_pattern_recog ===");
1110
1111=== modified file 'gcc/tree-vect-slp.c'
1112--- old/gcc/tree-vect-slp.c 2011-05-05 15:43:06 +0000
1113+++ new/gcc/tree-vect-slp.c 2011-06-19 10:59:13 +0000
1114@@ -2510,6 +2510,8 @@
1115 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
1116 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
1117 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
1118+ else if (is_pattern_stmt_p (stmt_info))
1119+ si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
1120 else
1121 si = gsi_for_stmt (stmt);
1122
1123
1124=== modified file 'gcc/tree-vect-stmts.c'
1125--- old/gcc/tree-vect-stmts.c 2011-06-02 12:12:00 +0000
1126+++ new/gcc/tree-vect-stmts.c 2011-06-22 06:21:13 +0000
1127@@ -605,15 +605,76 @@
1128 break;
1129 }
1130
1131- FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1132- {
1133- tree op = USE_FROM_PTR (use_p);
1134- if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
1135- {
1136- VEC_free (gimple, heap, worklist);
1137- return false;
1138- }
1139- }
1140+ if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1141+ {
1142+ /* Pattern statements are not inserted into the code, so
1143+ FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
1144+ have to scan the RHS or function arguments instead. */
1145+ if (is_gimple_assign (stmt))
1146+ {
1147+ tree rhs = gimple_assign_rhs1 (stmt);
1148+ if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1149+ == GIMPLE_SINGLE_RHS)
1150+ {
1151+ unsigned int op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1
1152+ (stmt));
1153+ for (i = 0; i < op_num; i++)
1154+ {
1155+ tree op = TREE_OPERAND (rhs, i);
1156+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
1157+ &worklist))
1158+ {
1159+ VEC_free (gimple, heap, worklist);
1160+ return false;
1161+ }
1162+ }
1163+ }
1164+ else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1165+ == GIMPLE_BINARY_RHS)
1166+ {
1167+ tree op = gimple_assign_rhs1 (stmt);
1168+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
1169+ &worklist))
1170+ {
1171+ VEC_free (gimple, heap, worklist);
1172+ return false;
1173+ }
1174+ op = gimple_assign_rhs2 (stmt);
1175+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
1176+ &worklist))
1177+ {
1178+ VEC_free (gimple, heap, worklist);
1179+ return false;
1180+ }
1181+ }
1182+ else
1183+ return false;
1184+ }
1185+ else if (is_gimple_call (stmt))
1186+ {
1187+ for (i = 0; i < gimple_call_num_args (stmt); i++)
1188+ {
1189+ tree arg = gimple_call_arg (stmt, i);
1190+ if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
1191+ &worklist))
1192+ {
1193+ VEC_free (gimple, heap, worklist);
1194+ return false;
1195+ }
1196+ }
1197+ }
1198+ }
1199+ else
1200+ FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1201+ {
1202+ tree op = USE_FROM_PTR (use_p);
1203+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
1204+ &worklist))
1205+ {
1206+ VEC_free (gimple, heap, worklist);
1207+ return false;
1208+ }
1209+ }
1210 } /* while worklist */
1211
1212 VEC_free (gimple, heap, worklist);
1213@@ -1405,6 +1466,7 @@
1214 VEC(tree, heap) *vargs = NULL;
1215 enum { NARROW, NONE, WIDEN } modifier;
1216 size_t i, nargs;
1217+ tree lhs;
1218
1219 /* FORNOW: unsupported in basic block SLP. */
1220 gcc_assert (loop_vinfo);
1221@@ -1542,7 +1604,7 @@
1222 /** Transform. **/
1223
1224 if (vect_print_dump_info (REPORT_DETAILS))
1225- fprintf (vect_dump, "transform operation.");
1226+ fprintf (vect_dump, "transform call.");
1227
1228 /* Handle def. */
1229 scalar_dest = gimple_call_lhs (stmt);
1230@@ -1661,8 +1723,11 @@
1231 rhs of the statement with something harmless. */
1232
1233 type = TREE_TYPE (scalar_dest);
1234- new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1235- build_zero_cst (type));
1236+ if (is_pattern_stmt_p (stmt_info))
1237+ lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1238+ else
1239+ lhs = gimple_call_lhs (stmt);
1240+ new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1241 set_vinfo_for_stmt (new_stmt, stmt_info);
1242 set_vinfo_for_stmt (stmt, NULL);
1243 STMT_VINFO_STMT (stmt_info) = new_stmt;
1244@@ -3231,6 +3296,33 @@
1245 fprintf (vect_dump, "use not simple.");
1246 return false;
1247 }
1248+
1249+ op_type = TREE_CODE_LENGTH (code);
1250+ if (op_type == binary_op)
1251+ {
1252+ bool ok;
1253+
1254+ op1 = gimple_assign_rhs2 (stmt);
1255+ if (code == WIDEN_MULT_EXPR)
1256+ {
1257+ /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
1258+ OP1. */
1259+ if (CONSTANT_CLASS_P (op0))
1260+ ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
1261+ &def_stmt, &def, &dt[1], &vectype_in);
1262+ else
1263+ ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
1264+ &dt[1]);
1265+
1266+ if (!ok)
1267+ {
1268+ if (vect_print_dump_info (REPORT_DETAILS))
1269+ fprintf (vect_dump, "use not simple.");
1270+ return false;
1271+ }
1272+ }
1273+ }
1274+
1275 /* If op0 is an external or constant def use a vector type with
1276 the same size as the output vector type. */
1277 if (!vectype_in)
1278@@ -3263,18 +3355,6 @@
1279
1280 gcc_assert (ncopies >= 1);
1281
1282- op_type = TREE_CODE_LENGTH (code);
1283- if (op_type == binary_op)
1284- {
1285- op1 = gimple_assign_rhs2 (stmt);
1286- if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
1287- {
1288- if (vect_print_dump_info (REPORT_DETAILS))
1289- fprintf (vect_dump, "use not simple.");
1290- return false;
1291- }
1292- }
1293-
1294 /* Supportable by target? */
1295 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
1296 &decl1, &decl2, &code1, &code2,
1297@@ -3300,6 +3380,14 @@
1298 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
1299 ncopies);
1300
1301+ if (code == WIDEN_MULT_EXPR)
1302+ {
1303+ if (CONSTANT_CLASS_P (op0))
1304+ op0 = fold_convert (TREE_TYPE (op1), op0);
1305+ else if (CONSTANT_CLASS_P (op1))
1306+ op1 = fold_convert (TREE_TYPE (op0), op1);
1307+ }
1308+
1309 /* Handle def. */
1310 /* In case of multi-step promotion, we first generate promotion operations
1311 to the intermediate types, and then from that types to the final one.
1312@@ -4824,10 +4912,26 @@
1313 if (!STMT_VINFO_RELEVANT_P (stmt_info)
1314 && !STMT_VINFO_LIVE_P (stmt_info))
1315 {
1316- if (vect_print_dump_info (REPORT_DETAILS))
1317- fprintf (vect_dump, "irrelevant.");
1318+ gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1319+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1320+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
1321+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
1322+ {
1323+ stmt = pattern_stmt;
1324+ stmt_info = vinfo_for_stmt (pattern_stmt);
1325+ if (vect_print_dump_info (REPORT_DETAILS))
1326+ {
1327+ fprintf (vect_dump, "==> examining pattern statement: ");
1328+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1329+ }
1330+ }
1331+ else
1332+ {
1333+ if (vect_print_dump_info (REPORT_DETAILS))
1334+ fprintf (vect_dump, "irrelevant.");
1335
1336- return true;
1337+ return true;
1338+ }
1339 }
1340
1341 switch (STMT_VINFO_DEF_TYPE (stmt_info))
1342
1343=== modified file 'gcc/tree-vectorizer.h'
1344--- old/gcc/tree-vectorizer.h 2011-05-05 15:43:06 +0000
1345+++ new/gcc/tree-vectorizer.h 2011-06-19 10:59:13 +0000
1346@@ -884,7 +884,7 @@
1347 /* Pattern recognition functions.
1348 Additional pattern recognition functions can (and will) be added
1349 in the future. */
1350-typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *);
1351+typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
1352 #define NUM_PATTERNS 4
1353 void vect_pattern_recog (loop_vec_info);
1354
1355