summaryrefslogtreecommitdiffstats
path: root/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch
diff options
context:
space:
mode:
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch')
-rw-r--r--toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch1281
1 files changed, 0 insertions, 1281 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch
deleted file mode 100644
index 3a149231f..000000000
--- a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch
+++ /dev/null
@@ -1,1281 +0,0 @@
12011-07-11 Ira Rosen <ira.rosen@linaro.org>
2
3 Backport from FSF:
4 2011-06-16 Ira Rosen <ira.rosen@linaro.org>
5
6 gcc/
7 * tree-vectorizer.h (vect_recog_func_ptr): Change the first
8 argument to be a VEC of statements.
9 * tree-vect-loop.c (vect_determine_vectorization_factor): Remove the
10 assert that pattern statements have to have their vector type set.
11 * tree-vect-patterns.c (vect_recog_widen_sum_pattern):
12 Change the first argument to be a VEC of statements. Update
13 documentation.
14 (vect_recog_dot_prod_pattern, vect_recog_pow_pattern): Likewise.
15 (vect_handle_widen_mult_by_const): New function.
16 (vect_recog_widen_mult_pattern): Change the first argument to be a
17 VEC of statements. Update documentation. Check that the constant is
18 INTEGER_CST. Support multiplication by a constant that fits an
19 intermediate type - call vect_handle_widen_mult_by_const.
20 (vect_pattern_recog_1): Update vect_recog_func_ptr and its
21 call. Handle additional pattern statements if necessary.
22
23 gcc/testsuite/
24 * gcc.dg/vect/vect-widen-mult-half-u8.c: New test.
25
26 and
27 2011-06-30 Ira Rosen <ira.rosen@linaro.org>
28
29 gcc/
30 * tree-vect-loop.c (vect_determine_vectorization_factor): Handle
31 both pattern and original statements if necessary.
32 (vect_transform_loop): Likewise.
33 * tree-vect-patterns.c (vect_pattern_recog): Update documentation.
34 * tree-vect-stmts.c (vect_mark_relevant): Add new argument.
35 Mark the pattern statement only if the original statement doesn't
36 have its own uses.
37 (process_use): Call vect_mark_relevant with additional parameter.
38 (vect_mark_stmts_to_be_vectorized): Likewise.
39 (vect_get_vec_def_for_operand): Use vectorized pattern statement.
40 (vect_analyze_stmt): Handle both pattern and original statements
41 if necessary.
42 (vect_transform_stmt): Don't store vectorized pattern statement
43 in the original statement.
44 (vect_is_simple_use_1): Use related pattern statement only if the
45 original statement is irrelevant.
46 * tree-vect-slp.c (vect_get_and_check_slp_defs): Likewise.
47
48 gcc/testsuite/
49 * gcc.dg/vect/slp-widen-mult-half.c: New test.
50 * gcc.dg/vect/vect-widen-mult-half.c: New test.
51
52=== added file 'gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c'
53Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
54===================================================================
55--- /dev/null 1970-01-01 00:00:00.000000000 +0000
56+++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 2012-01-09 15:03:29.156918805 -0800
57@@ -0,0 +1,52 @@
58+/* { dg-require-effective-target vect_int } */
59+
60+#include "tree-vect.h"
61+#include <stdlib.h>
62+
63+#define N 32
64+#define COEF 32470
65+#define COEF2 324700
66+
67+unsigned char in[N];
68+int out[N];
69+int out2[N];
70+
71+__attribute__ ((noinline)) void
72+foo ()
73+{
74+ int i;
75+
76+ for (i = 0; i < N/2; i++)
77+ {
78+ out[2*i] = in[2*i] * COEF;
79+ out2[2*i] = in[2*i] + COEF2;
80+ out[2*i+1] = in[2*i+1] * COEF;
81+ out2[2*i+1] = in[2*i+1] + COEF2;
82+ }
83+}
84+
85+int main (void)
86+{
87+ int i;
88+
89+ for (i = 0; i < N; i++)
90+ {
91+ in[i] = i;
92+ __asm__ volatile ("");
93+ }
94+
95+ foo ();
96+
97+ for (i = 0; i < N; i++)
98+ if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2)
99+ abort ();
100+
101+ return 0;
102+}
103+
104+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
105+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
106+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
107+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
108+/* { dg-final { cleanup-tree-dump "vect" } } */
109+
110Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
111===================================================================
112--- /dev/null 1970-01-01 00:00:00.000000000 +0000
113+++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c 2012-01-09 15:03:29.156918805 -0800
114@@ -0,0 +1,59 @@
115+/* { dg-require-effective-target vect_int } */
116+
117+#include "tree-vect.h"
118+#include <stdlib.h>
119+
120+#define N 32
121+#define COEF 32470
122+
123+unsigned char in[N];
124+int out[N];
125+
126+__attribute__ ((noinline)) void
127+foo ()
128+{
129+ int i;
130+
131+ for (i = 0; i < N; i++)
132+ out[i] = in[i] * COEF;
133+}
134+
135+__attribute__ ((noinline)) void
136+bar ()
137+{
138+ int i;
139+
140+ for (i = 0; i < N; i++)
141+ out[i] = COEF * in[i];
142+}
143+
144+int main (void)
145+{
146+ int i;
147+
148+ for (i = 0; i < N; i++)
149+ {
150+ in[i] = i;
151+ __asm__ volatile ("");
152+ }
153+
154+ foo ();
155+
156+ for (i = 0; i < N; i++)
157+ if (out[i] != in[i] * COEF)
158+ abort ();
159+
160+ bar ();
161+
162+ for (i = 0; i < N; i++)
163+ if (out[i] != in[i] * COEF)
164+ abort ();
165+
166+ return 0;
167+}
168+
169+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
170+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
171+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
172+/* { dg-final { cleanup-tree-dump "vect" } } */
173+
174Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
175===================================================================
176--- /dev/null 1970-01-01 00:00:00.000000000 +0000
177+++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c 2012-01-09 15:03:29.160918806 -0800
178@@ -0,0 +1,49 @@
179+/* { dg-require-effective-target vect_int } */
180+
181+#include "tree-vect.h"
182+#include <stdlib.h>
183+
184+#define N 32
185+#define COEF 32470
186+#define COEF2 324700
187+
188+unsigned char in[N];
189+int out[N];
190+int out2[N];
191+
192+__attribute__ ((noinline)) void
193+foo (int a)
194+{
195+ int i;
196+
197+ for (i = 0; i < N; i++)
198+ {
199+ out[i] = in[i] * COEF;
200+ out2[i] = in[i] + a;
201+ }
202+}
203+
204+int main (void)
205+{
206+ int i;
207+
208+ for (i = 0; i < N; i++)
209+ {
210+ in[i] = i;
211+ __asm__ volatile ("");
212+ }
213+
214+ foo (COEF2);
215+
216+ for (i = 0; i < N; i++)
217+ if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2)
218+ abort ();
219+
220+ return 0;
221+}
222+
223+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
224+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
225+/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
226+/* { dg-final { cleanup-tree-dump "vect" } } */
227+
228Index: gcc-4_6-branch/gcc/tree-vect-loop.c
229===================================================================
230--- gcc-4_6-branch.orig/gcc/tree-vect-loop.c 2012-01-09 15:02:15.000000000 -0800
231+++ gcc-4_6-branch/gcc/tree-vect-loop.c 2012-01-09 15:03:29.160918806 -0800
232@@ -181,6 +181,8 @@
233 stmt_vec_info stmt_info;
234 int i;
235 HOST_WIDE_INT dummy;
236+ gimple stmt, pattern_stmt = NULL;
237+ bool analyze_pattern_stmt = false;
238
239 if (vect_print_dump_info (REPORT_DETAILS))
240 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
241@@ -241,12 +243,20 @@
242 }
243 }
244
245- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
246+ for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
247 {
248- tree vf_vectype;
249- gimple stmt = gsi_stmt (si), pattern_stmt;
250- stmt_info = vinfo_for_stmt (stmt);
251+ tree vf_vectype;
252+
253+ if (analyze_pattern_stmt)
254+ {
255+ stmt = pattern_stmt;
256+ analyze_pattern_stmt = false;
257+ }
258+ else
259+ stmt = gsi_stmt (si);
260
261+ stmt_info = vinfo_for_stmt (stmt);
262+
263 if (vect_print_dump_info (REPORT_DETAILS))
264 {
265 fprintf (vect_dump, "==> examining statement: ");
266@@ -276,10 +286,17 @@
267 {
268 if (vect_print_dump_info (REPORT_DETAILS))
269 fprintf (vect_dump, "skip.");
270+ gsi_next (&si);
271 continue;
272 }
273 }
274
275+ else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
276+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
277+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
278+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
279+ analyze_pattern_stmt = true;
280+
281 if (gimple_get_lhs (stmt) == NULL_TREE)
282 {
283 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
284@@ -311,9 +328,7 @@
285 }
286 else
287 {
288- gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
289- && !is_pattern_stmt_p (stmt_info));
290-
291+ gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
292 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
293 if (vect_print_dump_info (REPORT_DETAILS))
294 {
295@@ -385,6 +400,9 @@
296 if (!vectorization_factor
297 || (nunits > vectorization_factor))
298 vectorization_factor = nunits;
299+
300+ if (!analyze_pattern_stmt)
301+ gsi_next (&si);
302 }
303 }
304
305@@ -4740,6 +4758,8 @@
306 tree cond_expr = NULL_TREE;
307 gimple_seq cond_expr_stmt_list = NULL;
308 bool do_peeling_for_loop_bound;
309+ gimple stmt, pattern_stmt;
310+ bool transform_pattern_stmt = false;
311
312 if (vect_print_dump_info (REPORT_DETAILS))
313 fprintf (vect_dump, "=== vec_transform_loop ===");
314@@ -4827,11 +4847,19 @@
315 }
316 }
317
318- for (si = gsi_start_bb (bb); !gsi_end_p (si);)
319+ pattern_stmt = NULL;
320+ for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
321 {
322- gimple stmt = gsi_stmt (si), pattern_stmt;
323 bool is_store;
324
325+ if (transform_pattern_stmt)
326+ {
327+ stmt = pattern_stmt;
328+ transform_pattern_stmt = false;
329+ }
330+ else
331+ stmt = gsi_stmt (si);
332+
333 if (vect_print_dump_info (REPORT_DETAILS))
334 {
335 fprintf (vect_dump, "------>vectorizing statement: ");
336@@ -4869,6 +4897,11 @@
337 continue;
338 }
339 }
340+ else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
341+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
342+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
343+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
344+ transform_pattern_stmt = true;
345
346 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
347 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
348@@ -4897,8 +4930,9 @@
349 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
350 if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
351 {
352- gsi_next (&si);
353- continue;
354+ if (!transform_pattern_stmt)
355+ gsi_next (&si);
356+ continue;
357 }
358 }
359
360@@ -4917,7 +4951,7 @@
361 the chain. */
362 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
363 gsi_remove (&si, true);
364- continue;
365+ continue;
366 }
367 else
368 {
369@@ -4927,7 +4961,9 @@
370 continue;
371 }
372 }
373- gsi_next (&si);
374+
375+ if (!transform_pattern_stmt)
376+ gsi_next (&si);
377 } /* stmts in BB */
378 } /* BBs in loop */
379
380Index: gcc-4_6-branch/gcc/tree-vect-patterns.c
381===================================================================
382--- gcc-4_6-branch.orig/gcc/tree-vect-patterns.c 2012-01-09 15:02:15.000000000 -0800
383+++ gcc-4_6-branch/gcc/tree-vect-patterns.c 2012-01-09 15:03:29.160918806 -0800
384@@ -39,10 +39,13 @@
385 #include "diagnostic-core.h"
386
387 /* Pattern recognition functions */
388-static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *);
389-static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *);
390-static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *);
391-static gimple vect_recog_pow_pattern (gimple *, tree *, tree *);
392+static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
393+ tree *);
394+static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
395+ tree *);
396+static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
397+ tree *);
398+static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
399 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
400 vect_recog_widen_mult_pattern,
401 vect_recog_widen_sum_pattern,
402@@ -142,9 +145,9 @@
403
404 Input:
405
406- * LAST_STMT: A stmt from which the pattern search begins. In the example,
407- when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be
408- detected.
409+ * STMTS: Contains a stmt from which the pattern search begins. In the
410+ example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
411+ will be detected.
412
413 Output:
414
415@@ -165,12 +168,13 @@
416 inner-loop nested in an outer-loop that us being vectorized). */
417
418 static gimple
419-vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
420+vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
421+ tree *type_out)
422 {
423- gimple stmt;
424+ gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
425 tree oprnd0, oprnd1;
426 tree oprnd00, oprnd01;
427- stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
428+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
429 tree type, half_type;
430 gimple pattern_stmt;
431 tree prod_type;
432@@ -178,10 +182,10 @@
433 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
434 tree var, rhs;
435
436- if (!is_gimple_assign (*last_stmt))
437+ if (!is_gimple_assign (last_stmt))
438 return NULL;
439
440- type = gimple_expr_type (*last_stmt);
441+ type = gimple_expr_type (last_stmt);
442
443 /* Look for the following pattern
444 DX = (TYPE1) X;
445@@ -207,7 +211,7 @@
446 /* Starting from LAST_STMT, follow the defs of its uses in search
447 of the above pattern. */
448
449- if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
450+ if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
451 return NULL;
452
453 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
454@@ -228,12 +232,12 @@
455
456 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
457 return NULL;
458- oprnd0 = gimple_assign_rhs1 (*last_stmt);
459- oprnd1 = gimple_assign_rhs2 (*last_stmt);
460+ oprnd0 = gimple_assign_rhs1 (last_stmt);
461+ oprnd1 = gimple_assign_rhs2 (last_stmt);
462 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
463 || !types_compatible_p (TREE_TYPE (oprnd1), type))
464 return NULL;
465- stmt = *last_stmt;
466+ stmt = last_stmt;
467
468 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
469 {
470@@ -319,11 +323,79 @@
471
472 /* We don't allow changing the order of the computation in the inner-loop
473 when doing outer-loop vectorization. */
474- gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
475+ gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
476
477 return pattern_stmt;
478 }
479
480+/* Handle two cases of multiplication by a constant. The first one is when
481+ the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
482+ operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
483+ TYPE.
484+
485+ Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
486+ HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
487+ TYPE), we can perform widen-mult from the intermediate type to TYPE and
488+ replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
489+
490+static bool
491+vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd,
492+ VEC (gimple, heap) **stmts, tree type,
493+ tree *half_type, gimple def_stmt)
494+{
495+ tree new_type, new_oprnd, tmp;
496+ gimple new_stmt;
497+
498+ if (int_fits_type_p (const_oprnd, *half_type))
499+ {
500+ /* CONST_OPRND is a constant of HALF_TYPE. */
501+ *oprnd = gimple_assign_rhs1 (def_stmt);
502+ return true;
503+ }
504+
505+ if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
506+ || !vinfo_for_stmt (def_stmt))
507+ return false;
508+
509+ /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
510+ a type 2 times bigger than HALF_TYPE. */
511+ new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
512+ TYPE_UNSIGNED (type));
513+ if (!int_fits_type_p (const_oprnd, new_type))
514+ return false;
515+
516+ /* Use NEW_TYPE for widen_mult. */
517+ if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
518+ {
519+ new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
520+ /* Check if the already created pattern stmt is what we need. */
521+ if (!is_gimple_assign (new_stmt)
522+ || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
523+ || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
524+ return false;
525+
526+ *oprnd = gimple_assign_lhs (new_stmt);
527+ }
528+ else
529+ {
530+ /* Create a_T = (NEW_TYPE) a_t; */
531+ *oprnd = gimple_assign_rhs1 (def_stmt);
532+ tmp = create_tmp_var (new_type, NULL);
533+ add_referenced_var (tmp);
534+ new_oprnd = make_ssa_name (tmp, NULL);
535+ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
536+ NULL_TREE);
537+ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
538+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
539+ VEC_safe_push (gimple, heap, *stmts, def_stmt);
540+ *oprnd = new_oprnd;
541+ }
542+
543+ *half_type = new_type;
544+ return true;
545+}
546+
547+
548 /* Function vect_recog_widen_mult_pattern
549
550 Try to find the following pattern:
551@@ -361,28 +433,47 @@
552 S3 a_T = (TYPE) a_t;
553 S5 prod_T = a_T * CONST;
554
555- Input:
556+ A special case of multiplication by constants is when 'TYPE' is 4 times
557+ bigger than 'type', but CONST fits an intermediate type 2 times smaller
558+ than 'TYPE'. In that case we create an additional pattern stmt for S3
559+ to create a variable of the intermediate type, and perform widen-mult
560+ on the intermediate type as well:
561+
562+ type a_t;
563+ interm_type a_it;
564+ TYPE a_T, prod_T, prod_T';
565+
566+ S1 a_t = ;
567+ S3 a_T = (TYPE) a_t;
568+ '--> a_it = (interm_type) a_t;
569+ S5 prod_T = a_T * CONST;
570+ '--> prod_T' = a_it w* CONST;
571+
572+ Input/Output:
573
574- * LAST_STMT: A stmt from which the pattern search begins. In the example,
575- when this function is called with S5, the pattern {S3,S4,S5,(S6)} is
576- detected.
577+ * STMTS: Contains a stmt from which the pattern search begins. In the
578+ example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
579+ is detected. In case of unsigned widen-mult, the original stmt (S5) is
580+ replaced with S6 in STMTS. In case of multiplication by a constant
581+ of an intermediate type (the last case above), STMTS also contains S3
582+ (inserted before S5).
583
584- Output:
585+ Output:
586
587- * TYPE_IN: The type of the input arguments to the pattern.
588+ * TYPE_IN: The type of the input arguments to the pattern.
589
590- * TYPE_OUT: The type of the output of this pattern.
591+ * TYPE_OUT: The type of the output of this pattern.
592
593- * Return value: A new stmt that will be used to replace the sequence of
594- stmts that constitute the pattern. In this case it will be:
595- WIDEN_MULT <a_t, b_t>
596- */
597+ * Return value: A new stmt that will be used to replace the sequence of
598+ stmts that constitute the pattern. In this case it will be:
599+ WIDEN_MULT <a_t, b_t>
600+*/
601
602 static gimple
603-vect_recog_widen_mult_pattern (gimple *last_stmt,
604- tree *type_in,
605- tree *type_out)
606+vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
607+ tree *type_in, tree *type_out)
608 {
609+ gimple last_stmt = VEC_pop (gimple, *stmts);
610 gimple def_stmt0, def_stmt1;
611 tree oprnd0, oprnd1;
612 tree type, half_type0, half_type1;
613@@ -395,27 +486,27 @@
614 VEC (tree, heap) *dummy_vec;
615 bool op0_ok, op1_ok;
616
617- if (!is_gimple_assign (*last_stmt))
618+ if (!is_gimple_assign (last_stmt))
619 return NULL;
620
621- type = gimple_expr_type (*last_stmt);
622+ type = gimple_expr_type (last_stmt);
623
624 /* Starting from LAST_STMT, follow the defs of its uses in search
625 of the above pattern. */
626
627- if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR)
628+ if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
629 return NULL;
630
631- oprnd0 = gimple_assign_rhs1 (*last_stmt);
632- oprnd1 = gimple_assign_rhs2 (*last_stmt);
633+ oprnd0 = gimple_assign_rhs1 (last_stmt);
634+ oprnd1 = gimple_assign_rhs2 (last_stmt);
635 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
636 || !types_compatible_p (TREE_TYPE (oprnd1), type))
637 return NULL;
638
639 /* Check argument 0. */
640- op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false);
641+ op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
642 /* Check argument 1. */
643- op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false);
644+ op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
645
646 /* In case of multiplication by a constant one of the operands may not match
647 the pattern, but not both. */
648@@ -429,29 +520,21 @@
649 }
650 else if (!op0_ok)
651 {
652- if (CONSTANT_CLASS_P (oprnd0)
653- && TREE_CODE (half_type1) == INTEGER_TYPE
654- && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1))
655- && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0))
656- {
657- /* OPRND0 is a constant of HALF_TYPE1. */
658- half_type0 = half_type1;
659- oprnd1 = gimple_assign_rhs1 (def_stmt1);
660- }
661+ if (TREE_CODE (oprnd0) == INTEGER_CST
662+ && TREE_CODE (half_type1) == INTEGER_TYPE
663+ && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type,
664+ &half_type1, def_stmt1))
665+ half_type0 = half_type1;
666 else
667 return NULL;
668 }
669 else if (!op1_ok)
670 {
671- if (CONSTANT_CLASS_P (oprnd1)
672+ if (TREE_CODE (oprnd1) == INTEGER_CST
673 && TREE_CODE (half_type0) == INTEGER_TYPE
674- && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0))
675- && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1))
676- {
677- /* OPRND1 is a constant of HALF_TYPE0. */
678- half_type1 = half_type0;
679- oprnd0 = gimple_assign_rhs1 (def_stmt0);
680- }
681+ && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type,
682+ &half_type0, def_stmt0))
683+ half_type1 = half_type0;
684 else
685 return NULL;
686 }
687@@ -461,7 +544,7 @@
688 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
689 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
690 {
691- tree lhs = gimple_assign_lhs (*last_stmt), use_lhs;
692+ tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
693 imm_use_iterator imm_iter;
694 use_operand_p use_p;
695 int nuses = 0;
696@@ -491,7 +574,7 @@
697 return NULL;
698
699 type = use_type;
700- *last_stmt = use_stmt;
701+ last_stmt = use_stmt;
702 }
703
704 if (!types_compatible_p (half_type0, half_type1))
705@@ -506,7 +589,7 @@
706 vectype_out = get_vectype_for_scalar_type (type);
707 if (!vectype
708 || !vectype_out
709- || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt,
710+ || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
711 vectype_out, vectype,
712 &dummy, &dummy, &dummy_code,
713 &dummy_code, &dummy_int, &dummy_vec))
714@@ -524,6 +607,7 @@
715 if (vect_print_dump_info (REPORT_DETAILS))
716 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
717
718+ VEC_safe_push (gimple, heap, *stmts, last_stmt);
719 return pattern_stmt;
720 }
721
722@@ -555,16 +639,17 @@
723 */
724
725 static gimple
726-vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
727+vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out)
728 {
729+ gimple last_stmt = VEC_index (gimple, *stmts, 0);
730 tree fn, base, exp = NULL;
731 gimple stmt;
732 tree var;
733
734- if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL)
735+ if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
736 return NULL;
737
738- fn = gimple_call_fndecl (*last_stmt);
739+ fn = gimple_call_fndecl (last_stmt);
740 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
741 return NULL;
742
743@@ -574,8 +659,8 @@
744 case BUILT_IN_POWI:
745 case BUILT_IN_POWF:
746 case BUILT_IN_POW:
747- base = gimple_call_arg (*last_stmt, 0);
748- exp = gimple_call_arg (*last_stmt, 1);
749+ base = gimple_call_arg (last_stmt, 0);
750+ exp = gimple_call_arg (last_stmt, 1);
751 if (TREE_CODE (exp) != REAL_CST
752 && TREE_CODE (exp) != INTEGER_CST)
753 return NULL;
754@@ -667,21 +752,23 @@
755 inner-loop nested in an outer-loop that us being vectorized). */
756
757 static gimple
758-vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
759+vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
760+ tree *type_out)
761 {
762+ gimple last_stmt = VEC_index (gimple, *stmts, 0);
763 gimple stmt;
764 tree oprnd0, oprnd1;
765- stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
766+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
767 tree type, half_type;
768 gimple pattern_stmt;
769 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
770 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
771 tree var;
772
773- if (!is_gimple_assign (*last_stmt))
774+ if (!is_gimple_assign (last_stmt))
775 return NULL;
776
777- type = gimple_expr_type (*last_stmt);
778+ type = gimple_expr_type (last_stmt);
779
780 /* Look for the following pattern
781 DX = (TYPE) X;
782@@ -693,25 +780,25 @@
783 /* Starting from LAST_STMT, follow the defs of its uses in search
784 of the above pattern. */
785
786- if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
787+ if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
788 return NULL;
789
790 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
791 return NULL;
792
793- oprnd0 = gimple_assign_rhs1 (*last_stmt);
794- oprnd1 = gimple_assign_rhs2 (*last_stmt);
795+ oprnd0 = gimple_assign_rhs1 (last_stmt);
796+ oprnd1 = gimple_assign_rhs2 (last_stmt);
797 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
798 || !types_compatible_p (TREE_TYPE (oprnd1), type))
799 return NULL;
800
801- /* So far so good. Since *last_stmt was detected as a (summation) reduction,
802+ /* So far so good. Since last_stmt was detected as a (summation) reduction,
803 we know that oprnd1 is the reduction variable (defined by a loop-header
804 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
805 Left to check that oprnd0 is defined by a cast from type 'type' to type
806 'TYPE'. */
807
808- if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true))
809+ if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
810 return NULL;
811
812 oprnd0 = gimple_assign_rhs1 (stmt);
813@@ -732,8 +819,9 @@
814
815 /* We don't allow changing the order of the computation in the inner-loop
816 when doing outer-loop vectorization. */
817- gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
818+ gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
819
820+ VEC_safe_push (gimple, heap, *stmts, last_stmt);
821 return pattern_stmt;
822 }
823
824@@ -762,7 +850,7 @@
825
826 static void
827 vect_pattern_recog_1 (
828- gimple (* vect_recog_func) (gimple *, tree *, tree *),
829+ gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
830 gimple_stmt_iterator si)
831 {
832 gimple stmt = gsi_stmt (si), pattern_stmt;
833@@ -774,12 +862,14 @@
834 enum tree_code code;
835 int i;
836 gimple next;
837+ VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
838
839- pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out);
840+ VEC_quick_push (gimple, stmts_to_replace, stmt);
841+ pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out);
842 if (!pattern_stmt)
843 return;
844
845- si = gsi_for_stmt (stmt);
846+ stmt = VEC_last (gimple, stmts_to_replace);
847 stmt_info = vinfo_for_stmt (stmt);
848 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
849
850@@ -849,6 +939,35 @@
851 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
852 if (next == stmt)
853 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
854+
855+ /* In case of widen-mult by a constant, it is possible that an additional
856+ pattern stmt is created and inserted in STMTS_TO_REPLACE. We create a
857+ stmt_info for it, and mark the relevant statements. */
858+ for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
859+ && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
860+ i++)
861+ {
862+ stmt_info = vinfo_for_stmt (stmt);
863+ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
864+ if (vect_print_dump_info (REPORT_DETAILS))
865+ {
866+ fprintf (vect_dump, "additional pattern stmt: ");
867+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
868+ }
869+
870+ set_vinfo_for_stmt (pattern_stmt,
871+ new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
872+ gimple_set_bb (pattern_stmt, gimple_bb (stmt));
873+ pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
874+
875+ STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
876+ STMT_VINFO_DEF_TYPE (pattern_stmt_info)
877+ = STMT_VINFO_DEF_TYPE (stmt_info);
878+ STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info);
879+ STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
880+ }
881+
882+ VEC_free (gimple, heap, stmts_to_replace);
883 }
884
885
886@@ -896,10 +1015,8 @@
887
888 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
889 (because they are marked as irrelevant). It will vectorize S6, and record
890- a pointer to the new vector stmt VS6 both from S6 (as usual), and also
891- from S4. We do that so that when we get to vectorizing stmts that use the
892- def of S4 (like S5 that uses a_0), we'll know where to take the relevant
893- vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
894+ a pointer to the new vector stmt VS6 from S6 (as usual).
895+ S4 will be skipped, and S5 will be vectorized as usual:
896
897 in_pattern_p related_stmt vec_stmt
898 S1: a_i = .... - - -
899@@ -915,7 +1032,21 @@
900 elsewhere), and we'll end up with:
901
902 VS6: va_new = ....
903- VS5: ... = ..vuse(va_new).. */
904+ VS5: ... = ..vuse(va_new)..
905+
906+ In case of more than one pattern statements, e.g., widen-mult with
907+ intermediate type:
908+
909+ S1 a_t = ;
910+ S2 a_T = (TYPE) a_t;
911+ '--> S3: a_it = (interm_type) a_t;
912+ S4 prod_T = a_T * CONST;
913+ '--> S5: prod_T' = a_it w* CONST;
914+
915+ there may be other users of a_T outside the pattern. In that case S2 will
916+ be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
917+ and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
918+ be recorded in S3. */
919
920 void
921 vect_pattern_recog (loop_vec_info loop_vinfo)
922@@ -925,7 +1056,7 @@
923 unsigned int nbbs = loop->num_nodes;
924 gimple_stmt_iterator si;
925 unsigned int i, j;
926- gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
927+ gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
928
929 if (vect_print_dump_info (REPORT_DETAILS))
930 fprintf (vect_dump, "=== vect_pattern_recog ===");
931Index: gcc-4_6-branch/gcc/tree-vect-slp.c
932===================================================================
933--- gcc-4_6-branch.orig/gcc/tree-vect-slp.c 2012-01-09 15:02:15.000000000 -0800
934+++ gcc-4_6-branch/gcc/tree-vect-slp.c 2012-01-09 15:03:29.160918806 -0800
935@@ -152,7 +152,9 @@
936 if (loop && def_stmt && gimple_bb (def_stmt)
937 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
938 && vinfo_for_stmt (def_stmt)
939- && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
940+ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
941+ && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
942+ && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
943 {
944 if (!*first_stmt_dt0)
945 *pattern0 = true;
946Index: gcc-4_6-branch/gcc/tree-vect-stmts.c
947===================================================================
948--- gcc-4_6-branch.orig/gcc/tree-vect-stmts.c 2012-01-09 15:02:15.000000000 -0800
949+++ gcc-4_6-branch/gcc/tree-vect-stmts.c 2012-01-09 15:06:23.636927250 -0800
950@@ -126,33 +126,72 @@
951
952 static void
953 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
954- enum vect_relevant relevant, bool live_p)
955+ enum vect_relevant relevant, bool live_p,
956+ bool used_in_pattern)
957 {
958 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
959 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
960 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
961+ gimple pattern_stmt;
962
963 if (vect_print_dump_info (REPORT_DETAILS))
964 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
965
966+ /* If this stmt is an original stmt in a pattern, we might need to mark its
967+ related pattern stmt instead of the original stmt. However, such stmts
968+ may have their own uses that are not in any pattern, in such cases the
969+ stmt itself should be marked. */
970 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
971 {
972- gimple pattern_stmt;
973+ bool found = false;
974+ if (!used_in_pattern)
975+ {
976+ imm_use_iterator imm_iter;
977+ use_operand_p use_p;
978+ gimple use_stmt;
979+ tree lhs;
980+
981+ if (is_gimple_assign (stmt))
982+ lhs = gimple_assign_lhs (stmt);
983+ else
984+ lhs = gimple_call_lhs (stmt);
985+
986+ /* This use is out of pattern use, if LHS has other uses that are
987+ pattern uses, we should mark the stmt itself, and not the pattern
988+ stmt. */
989+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
990+ {
991+ if (is_gimple_debug (USE_STMT (use_p)))
992+ continue;
993+ use_stmt = USE_STMT (use_p);
994+
995+ if (vinfo_for_stmt (use_stmt)
996+ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
997+ {
998+ found = true;
999+ break;
1000+ }
1001+ }
1002+ }
1003+
1004+ if (!found)
1005+ {
1006+ /* This is the last stmt in a sequence that was detected as a
1007+ pattern that can potentially be vectorized. Don't mark the stmt
1008+ as relevant/live because it's not going to be vectorized.
1009+ Instead mark the pattern-stmt that replaces it. */
1010+
1011+ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1012
1013- /* This is the last stmt in a sequence that was detected as a
1014- pattern that can potentially be vectorized. Don't mark the stmt
1015- as relevant/live because it's not going to be vectorized.
1016- Instead mark the pattern-stmt that replaces it. */
1017-
1018- pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1019-
1020- if (vect_print_dump_info (REPORT_DETAILS))
1021- fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
1022- stmt_info = vinfo_for_stmt (pattern_stmt);
1023- gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
1024- save_relevant = STMT_VINFO_RELEVANT (stmt_info);
1025- save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1026- stmt = pattern_stmt;
1027+ if (vect_print_dump_info (REPORT_DETAILS))
1028+ fprintf (vect_dump, "last stmt in pattern. don't mark"
1029+ " relevant/live.");
1030+ stmt_info = vinfo_for_stmt (pattern_stmt);
1031+ gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
1032+ save_relevant = STMT_VINFO_RELEVANT (stmt_info);
1033+ save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1034+ stmt = pattern_stmt;
1035+ }
1036 }
1037
1038 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1039@@ -437,7 +476,8 @@
1040 }
1041 }
1042
1043- vect_mark_relevant (worklist, def_stmt, relevant, live_p);
1044+ vect_mark_relevant (worklist, def_stmt, relevant, live_p,
1045+ is_pattern_stmt_p (stmt_vinfo));
1046 return true;
1047 }
1048
1049@@ -494,7 +534,7 @@
1050 }
1051
1052 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
1053- vect_mark_relevant (&worklist, phi, relevant, live_p);
1054+ vect_mark_relevant (&worklist, phi, relevant, live_p, false);
1055 }
1056 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1057 {
1058@@ -506,7 +546,7 @@
1059 }
1060
1061 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
1062- vect_mark_relevant (&worklist, stmt, relevant, live_p);
1063+ vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
1064 }
1065 }
1066
1067@@ -613,42 +653,55 @@
1068 if (is_gimple_assign (stmt))
1069 {
1070 tree rhs = gimple_assign_rhs1 (stmt);
1071- if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1072- == GIMPLE_SINGLE_RHS)
1073+ unsigned int op_num;
1074+ tree op;
1075+ switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
1076 {
1077- unsigned int op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1
1078- (stmt));
1079- for (i = 0; i < op_num; i++)
1080- {
1081- tree op = TREE_OPERAND (rhs, i);
1082- if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
1083- &worklist))
1084+ case GIMPLE_SINGLE_RHS:
1085+ op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
1086+ for (i = 0; i < op_num; i++)
1087 {
1088- VEC_free (gimple, heap, worklist);
1089- return false;
1090+ op = TREE_OPERAND (rhs, i);
1091+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
1092+ &worklist))
1093+ {
1094+ VEC_free (gimple, heap, worklist);
1095+ return false;
1096+ }
1097 }
1098- }
1099- }
1100- else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1101- == GIMPLE_BINARY_RHS)
1102- {
1103- tree op = gimple_assign_rhs1 (stmt);
1104- if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
1105- &worklist))
1106- {
1107- VEC_free (gimple, heap, worklist);
1108- return false;
1109- }
1110- op = gimple_assign_rhs2 (stmt);
1111- if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
1112- &worklist))
1113- {
1114- VEC_free (gimple, heap, worklist);
1115- return false;
1116- }
1117+ break;
1118+
1119+ case GIMPLE_BINARY_RHS:
1120+ op = gimple_assign_rhs1 (stmt);
1121+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
1122+ &worklist))
1123+ {
1124+ VEC_free (gimple, heap, worklist);
1125+ return false;
1126+ }
1127+ op = gimple_assign_rhs2 (stmt);
1128+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
1129+ &worklist))
1130+ {
1131+ VEC_free (gimple, heap, worklist);
1132+ return false;
1133+ }
1134+ break;
1135+
1136+ case GIMPLE_UNARY_RHS:
1137+ op = gimple_assign_rhs1 (stmt);
1138+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
1139+ &worklist))
1140+ {
1141+ VEC_free (gimple, heap, worklist);
1142+ return false;
1143+ }
1144+
1145+ break;
1146+
1147+ default:
1148+ return false;
1149 }
1150- else
1151- return false;
1152 }
1153 else if (is_gimple_call (stmt))
1154 {
1155@@ -1210,7 +1263,14 @@
1156
1157 /* Get the def from the vectorized stmt. */
1158 def_stmt_info = vinfo_for_stmt (def_stmt);
1159+
1160 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1161+ /* Get vectorized pattern statement. */
1162+ if (!vec_stmt
1163+ && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1164+ && !STMT_VINFO_RELEVANT (def_stmt_info))
1165+ vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1166+ STMT_VINFO_RELATED_STMT (def_stmt_info)));
1167 gcc_assert (vec_stmt);
1168 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1169 vec_oprnd = PHI_RESULT (vec_stmt);
1170@@ -4894,6 +4954,7 @@
1171 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
1172 bool ok;
1173 tree scalar_type, vectype;
1174+ gimple pattern_stmt;
1175
1176 if (vect_print_dump_info (REPORT_DETAILS))
1177 {
1178@@ -4915,16 +4976,22 @@
1179 - any LABEL_EXPRs in the loop
1180 - computations that are used only for array indexing or loop control.
1181 In basic blocks we only analyze statements that are a part of some SLP
1182- instance, therefore, all the statements are relevant. */
1183+ instance, therefore, all the statements are relevant.
1184+
1185+ Pattern statement need to be analyzed instead of the original statement
1186+ if the original statement is not relevant. Otherwise, we analyze both
1187+ statements. */
1188
1189+ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1190 if (!STMT_VINFO_RELEVANT_P (stmt_info)
1191 && !STMT_VINFO_LIVE_P (stmt_info))
1192 {
1193- gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1194 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1195+ && pattern_stmt
1196 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
1197 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
1198 {
1199+ /* Analyze PATTERN_STMT instead of the original stmt. */
1200 stmt = pattern_stmt;
1201 stmt_info = vinfo_for_stmt (pattern_stmt);
1202 if (vect_print_dump_info (REPORT_DETAILS))
1203@@ -4941,6 +5008,21 @@
1204 return true;
1205 }
1206 }
1207+ else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1208+ && pattern_stmt
1209+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
1210+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
1211+ {
1212+ /* Analyze PATTERN_STMT too. */
1213+ if (vect_print_dump_info (REPORT_DETAILS))
1214+ {
1215+ fprintf (vect_dump, "==> examining pattern statement: ");
1216+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1217+ }
1218+
1219+ if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
1220+ return false;
1221+ }
1222
1223 switch (STMT_VINFO_DEF_TYPE (stmt_info))
1224 {
1225@@ -5074,7 +5156,6 @@
1226 bool is_store = false;
1227 gimple vec_stmt = NULL;
1228 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1229- gimple orig_stmt_in_pattern;
1230 bool done;
1231
1232 switch (STMT_VINFO_TYPE (stmt_info))
1233@@ -5213,21 +5294,7 @@
1234 }
1235
1236 if (vec_stmt)
1237- {
1238 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1239- orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
1240- if (orig_stmt_in_pattern)
1241- {
1242- stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
1243- /* STMT was inserted by the vectorizer to replace a computation idiom.
1244- ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
1245- computed this idiom. We need to record a pointer to VEC_STMT in
1246- the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
1247- documentation of vect_pattern_recog. */
1248- if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1249- STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
1250- }
1251- }
1252
1253 return is_store;
1254 }
1255@@ -5605,8 +5672,12 @@
1256 || *dt == vect_nested_cycle)
1257 {
1258 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
1259- if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1260+
1261+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1262+ && !STMT_VINFO_RELEVANT (stmt_info)
1263+ && !STMT_VINFO_LIVE_P (stmt_info))
1264 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
1265+
1266 *vectype = STMT_VINFO_VECTYPE (stmt_info);
1267 gcc_assert (*vectype != NULL_TREE);
1268 }
1269Index: gcc-4_6-branch/gcc/tree-vectorizer.h
1270===================================================================
1271--- gcc-4_6-branch.orig/gcc/tree-vectorizer.h 2012-01-09 15:02:15.000000000 -0800
1272+++ gcc-4_6-branch/gcc/tree-vectorizer.h 2012-01-09 15:03:29.164918806 -0800
1273@@ -890,7 +890,7 @@
1274 /* Pattern recognition functions.
1275 Additional pattern recognition functions can (and will) be added
1276 in the future. */
1277-typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
1278+typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
1279 #define NUM_PATTERNS 4
1280 void vect_pattern_recog (loop_vec_info);
1281