summaryrefslogtreecommitdiffstats
path: root/meta-oe/recipes-devtools/gcc
diff options
context:
space:
mode:
authorKhem Raj <raj.khem@gmail.com>2012-01-09 23:44:16 +0000
committerKoen Kooi <koen@dominion.thruhere.net>2012-01-10 21:43:59 +0100
commit8413bf3c5de39d830969ad181b4dd4e136d91482 (patch)
tree2328801dc6da7f78f77a211925ab9b5ba913a2ec /meta-oe/recipes-devtools/gcc
parentf2179dabaaba99ef5b9fe48e38c73b2178d50390 (diff)
downloadmeta-openembedded-8413bf3c5de39d830969ad181b4dd4e136d91482.tar.gz
gcc-4.6: Update linaro patches past 2012.12 release
Signed-off-by: Khem Raj <raj.khem@gmail.com> Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
Diffstat (limited to 'meta-oe/recipes-devtools/gcc')
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch388
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch805
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch495
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch1818
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch487
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch276
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch69
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106854.patch354
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch22
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc9
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc2
11 files changed, 4724 insertions, 1 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch
new file mode 100644
index 000000000..74f139d33
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch
@@ -0,0 +1,388 @@
12011-11-22 Ira Rosen <ira.rosen@linaro.org>
2
3 Backport from mainline:
4
5 2011-10-06 Jakub Jelinek <jakub@redhat.com>
6
7 gcc/
8 PR tree-optimization/50596
9 * tree-vectorizer.h (vect_is_simple_cond): New prototype.
10 (NUM_PATTERNS): Change to 6.
11 * tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): New
12 function.
13 (vect_vect_recog_func_ptrs): Add vect_recog_mixed_size_cond_pattern.
14 (vect_mark_pattern_stmts): Don't create stmt_vinfo for def_stmt
15 if it already has one, and don't set STMT_VINFO_VECTYPE in it
16 if it is already set.
17 * tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Handle
18 COND_EXPR in pattern stmts.
19 (vect_is_simple_cond): No longer static.
20
21 gcc/testsuite:
22 PR tree-optimization/50596
23 * gcc.dg/vect/vect-cond-8.c: New test.
24
25 2011-10-07 Jakub Jelinek <jakub@redhat.com>
26
27 gcc/
28 PR tree-optimization/50650
29 * tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Don't
30 call vect_is_simple_cond here, instead fail if cond_expr isn't
31 COMPARISON_CLASS_P or if get_vectype_for_scalar_type returns NULL
32 for cond_expr's first operand.
33 * tree-vect-stmts.c (vect_is_simple_cond): Static again.
34 * tree-vectorizer.h (vect_is_simple_cond): Remove prototype.
35
36
37 gcc/
38 * tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Reduce
39 it to integral types only.
40
41 gcc/testsuite/
42 * gcc.dg/vect/pr30858.c: Expect the error message twice for targets
43 with multiple vector sizes.
44 * gcc.dg/vect/vect-cond-8.c: Rename to...
45 * gcc.dg/vect/vect-cond-8a.c: ... this and change the type from float
46 to int.
47 * lib/target-supports.exp (check_effective_target_vect_condition):
48 Return true for NEON.
49
50=== modified file 'gcc/testsuite/gcc.dg/vect/pr30858.c'
51--- old/gcc/testsuite/gcc.dg/vect/pr30858.c 2007-02-22 08:16:18 +0000
52+++ new/gcc/testsuite/gcc.dg/vect/pr30858.c 2011-11-20 09:11:09 +0000
53@@ -11,5 +11,6 @@
54 }
55
56 /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
57-/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" } } */
58+/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" { xfail vect_multiple_sizes } } } */
59+/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 2 "vect" { target vect_multiple_sizes } } } */
60 /* { dg-final { cleanup-tree-dump "vect" } } */
61
62=== added file 'gcc/testsuite/gcc.dg/vect/vect-cond-8a.c'
63--- old/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c 1970-01-01 00:00:00 +0000
64+++ new/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c 2011-11-20 09:11:09 +0000
65@@ -0,0 +1,75 @@
66+/* { dg-require-effective-target vect_condition } */
67+
68+#include "tree-vect.h"
69+
70+#define N 1024
71+int a[N], b[N], c[N];
72+char d[N], e[N], f[N];
73+unsigned char k[N];
74+
75+__attribute__((noinline, noclone)) void
76+f1 (void)
77+{
78+ int i;
79+ for (i = 0; i < N; ++i)
80+ k[i] = a[i] < b[i] ? 17 : 0;
81+}
82+
83+__attribute__((noinline, noclone)) void
84+f2 (void)
85+{
86+ int i;
87+ for (i = 0; i < N; ++i)
88+ k[i] = a[i] < b[i] ? 0 : 24;
89+}
90+
91+__attribute__((noinline, noclone)) void
92+f3 (void)
93+{
94+ int i;
95+ for (i = 0; i < N; ++i)
96+ k[i] = a[i] < b[i] ? 51 : 12;
97+}
98+
99+int
100+main ()
101+{
102+ int i;
103+
104+ check_vect ();
105+
106+ for (i = 0; i < N; i++)
107+ {
108+ switch (i % 9)
109+ {
110+ case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
111+ case 1: a[i] = 0; b[i] = 0; break;
112+ case 2: a[i] = i + 1; b[i] = - i - 1; break;
113+ case 3: a[i] = i; b[i] = i + 7; break;
114+ case 4: a[i] = i; b[i] = i; break;
115+ case 5: a[i] = i + 16; b[i] = i + 3; break;
116+ case 6: a[i] = - i - 5; b[i] = - i; break;
117+ case 7: a[i] = - i; b[i] = - i; break;
118+ case 8: a[i] = - i; b[i] = - i - 7; break;
119+ }
120+ d[i] = i;
121+ e[i] = 2 * i;
122+ }
123+ f1 ();
124+ for (i = 0; i < N; i++)
125+ if (k[i] != ((i % 3) == 0 ? 17 : 0))
126+ abort ();
127+ f2 ();
128+ for (i = 0; i < N; i++)
129+ if (k[i] != ((i % 3) == 0 ? 0 : 24))
130+ abort ();
131+ f3 ();
132+ for (i = 0; i < N; i++)
133+ if (k[i] != ((i % 3) == 0 ? 51 : 12))
134+ abort ();
135+
136+ return 0;
137+}
138+
139+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 3 "vect" } } */
140+/* { dg-final { cleanup-tree-dump "vect" } } */
141
142=== modified file 'gcc/testsuite/lib/target-supports.exp'
143--- old/gcc/testsuite/lib/target-supports.exp 2011-11-21 01:45:54 +0000
144+++ new/gcc/testsuite/lib/target-supports.exp 2011-11-22 16:52:23 +0000
145@@ -3150,7 +3150,8 @@
146 || [istarget ia64-*-*]
147 || [istarget i?86-*-*]
148 || [istarget spu-*-*]
149- || [istarget x86_64-*-*] } {
150+ || [istarget x86_64-*-*]
151+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
152 set et_vect_cond_saved 1
153 }
154 }
155
156=== modified file 'gcc/tree-vect-patterns.c'
157--- old/gcc/tree-vect-patterns.c 2011-10-23 13:33:07 +0000
158+++ new/gcc/tree-vect-patterns.c 2011-11-20 09:11:09 +0000
159@@ -50,13 +50,16 @@
160 tree *);
161 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
162 tree *, tree *);
163+static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
164+ tree *, tree *);
165 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
166 vect_recog_widen_mult_pattern,
167 vect_recog_widen_sum_pattern,
168 vect_recog_dot_prod_pattern,
169 vect_recog_pow_pattern,
170 vect_recog_over_widening_pattern,
171- vect_recog_widen_shift_pattern};
172+ vect_recog_widen_shift_pattern,
173+ vect_recog_mixed_size_cond_pattern};
174
175
176 /* Function widened_name_p
177@@ -1441,6 +1444,118 @@
178 return pattern_stmt;
179 }
180
181+/* Function vect_recog_mixed_size_cond_pattern
182+
183+ Try to find the following pattern:
184+
185+ type x_t, y_t;
186+ TYPE a_T, b_T, c_T;
187+ loop:
188+ S1 a_T = x_t CMP y_t ? b_T : c_T;
189+
190+ where type 'TYPE' is an integral type which has different size
191+ from 'type'. b_T and c_T are constants and if 'TYPE' is wider
192+ than 'type', the constants need to fit into an integer type
193+ with the same width as 'type'.
194+
195+ Input:
196+
197+ * LAST_STMT: A stmt from which the pattern search begins.
198+
199+ Output:
200+
201+ * TYPE_IN: The type of the input arguments to the pattern.
202+
203+ * TYPE_OUT: The type of the output of this pattern.
204+
205+ * Return value: A new stmt that will be used to replace the pattern.
206+ Additionally a def_stmt is added.
207+
208+ a_it = x_t CMP y_t ? b_it : c_it;
209+ a_T = (TYPE) a_it; */
210+
211+static gimple
212+vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
213+ tree *type_out)
214+{
215+ gimple last_stmt = VEC_index (gimple, *stmts, 0);
216+ tree cond_expr, then_clause, else_clause;
217+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
218+ tree type, vectype, comp_vectype, comp_type, op, tmp;
219+ enum machine_mode cmpmode;
220+ gimple pattern_stmt, def_stmt;
221+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
222+
223+ if (!is_gimple_assign (last_stmt)
224+ || gimple_assign_rhs_code (last_stmt) != COND_EXPR
225+ || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
226+ return NULL;
227+
228+ op = gimple_assign_rhs1 (last_stmt);
229+ cond_expr = TREE_OPERAND (op, 0);
230+ then_clause = TREE_OPERAND (op, 1);
231+ else_clause = TREE_OPERAND (op, 2);
232+
233+ if (TREE_CODE (then_clause) != INTEGER_CST
234+ || TREE_CODE (else_clause) != INTEGER_CST)
235+ return NULL;
236+
237+ if (!COMPARISON_CLASS_P (cond_expr))
238+ return NULL;
239+
240+ type = gimple_expr_type (last_stmt);
241+ comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
242+ if (!INTEGRAL_TYPE_P (comp_type)
243+ || !INTEGRAL_TYPE_P (type))
244+ return NULL;
245+
246+ comp_vectype = get_vectype_for_scalar_type (comp_type);
247+ if (comp_vectype == NULL_TREE)
248+ return NULL;
249+
250+ cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
251+
252+ if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
253+ return NULL;
254+
255+ vectype = get_vectype_for_scalar_type (type);
256+ if (vectype == NULL_TREE)
257+ return NULL;
258+
259+ if (types_compatible_p (vectype, comp_vectype))
260+ return NULL;
261+
262+ if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype)))
263+ return NULL;
264+
265+ if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
266+ {
267+ if (!int_fits_type_p (then_clause, comp_type)
268+ || !int_fits_type_p (else_clause, comp_type))
269+ return NULL;
270+ }
271+
272+ tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr),
273+ fold_convert (comp_type, then_clause),
274+ fold_convert (comp_type, else_clause));
275+ def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), tmp);
276+
277+ pattern_stmt
278+ = gimple_build_assign_with_ops (NOP_EXPR,
279+ vect_recog_temp_ssa_var (type, NULL),
280+ gimple_assign_lhs (def_stmt), NULL_TREE);
281+
282+ STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
283+ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
284+ set_vinfo_for_stmt (def_stmt, def_stmt_info);
285+ STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype;
286+ *type_in = vectype;
287+ *type_out = vectype;
288+
289+ return pattern_stmt;
290+}
291+
292+
293 /* Mark statements that are involved in a pattern. */
294
295 static inline void
296@@ -1468,14 +1583,18 @@
297 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
298 {
299 def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
300- set_vinfo_for_stmt (def_stmt,
301- new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
302+ def_stmt_info = vinfo_for_stmt (def_stmt);
303+ if (def_stmt_info == NULL)
304+ {
305+ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
306+ set_vinfo_for_stmt (def_stmt, def_stmt_info);
307+ }
308 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
309- def_stmt_info = vinfo_for_stmt (def_stmt);
310 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
311 STMT_VINFO_DEF_TYPE (def_stmt_info)
312 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
313- STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
314+ if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
315+ STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
316 }
317 }
318
319
320=== modified file 'gcc/tree-vect-stmts.c'
321--- old/gcc/tree-vect-stmts.c 2011-11-14 11:38:08 +0000
322+++ new/gcc/tree-vect-stmts.c 2011-11-22 16:52:23 +0000
323@@ -655,20 +655,40 @@
324 tree rhs = gimple_assign_rhs1 (stmt);
325 unsigned int op_num;
326 tree op;
327+ enum tree_code rhs_code;
328 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
329 {
330 case GIMPLE_SINGLE_RHS:
331- op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
332- for (i = 0; i < op_num; i++)
333- {
334- op = TREE_OPERAND (rhs, i);
335- if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
336- &worklist))
337- {
338- VEC_free (gimple, heap, worklist);
339- return false;
340- }
341- }
342+ op = gimple_assign_rhs1 (stmt);
343+ rhs_code = gimple_assign_rhs_code (stmt);
344+ i = 0;
345+ if (rhs_code == COND_EXPR
346+ && COMPARISON_CLASS_P (TREE_OPERAND (op, 0)))
347+ {
348+ op = TREE_OPERAND (op, 0);
349+ if (!process_use (stmt, TREE_OPERAND (op, 0),
350+ loop_vinfo,
351+ live_p, relevant, &worklist)
352+ || !process_use (stmt, TREE_OPERAND (op, 1),
353+ loop_vinfo,
354+ live_p, relevant, &worklist))
355+ {
356+ VEC_free (gimple, heap, worklist);
357+ return false;
358+ }
359+ i = 1;
360+ }
361+ op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
362+ for (i; i < op_num; i++)
363+ {
364+ op = TREE_OPERAND (rhs, i);
365+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
366+ &worklist))
367+ {
368+ VEC_free (gimple, heap, worklist);
369+ return false;
370+ }
371+ }
372 break;
373
374 case GIMPLE_BINARY_RHS:
375
376=== modified file 'gcc/tree-vectorizer.h'
377--- old/gcc/tree-vectorizer.h 2011-11-14 11:38:08 +0000
378+++ new/gcc/tree-vectorizer.h 2011-11-22 16:52:23 +0000
379@@ -917,7 +917,7 @@
380 Additional pattern recognition functions can (and will) be added
381 in the future. */
382 typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
383-#define NUM_PATTERNS 6
384+#define NUM_PATTERNS 7
385 void vect_pattern_recog (loop_vec_info);
386
387 /* In tree-vectorizer.c. */
388
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch
new file mode 100644
index 000000000..ea42ad6ee
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch
@@ -0,0 +1,805 @@
12011-11-22 Ira Rosen <ira.rosen@linaro.org>
2 Backport from mainline:
3
4 2011-11-06 Ira Rosen <ira.rosen@linaro.org>
5
6 gcc/
7 * tree-vectorizer.h (vectorizable_condition): Add argument.
8 * tree-vect-loop.c (vectorizable_reduction): Fail for condition
9 in SLP. Update calls to vectorizable_condition.
10 * tree-vect-stmts.c (vect_is_simple_cond): Add basic block info to
11 the arguments. Pass it to vect_is_simple_use_1.
12 (vectorizable_condition): Add slp_node to the arguments. Support
13 vectorization of basic blocks. Fail for reduction in SLP. Update
14 calls to vect_is_simple_cond and vect_is_simple_use. Support SLP:
15 call vect_get_slp_defs to get vector operands.
16 (vect_analyze_stmt): Update calls to vectorizable_condition.
17 (vect_transform_stmt): Likewise.
18 * tree-vect-slp.c (vect_create_new_slp_node): Handle COND_EXPR.
19 (vect_get_and_check_slp_defs): Handle COND_EXPR. Allow pattern
20 def stmts.
21 (vect_build_slp_tree): Handle COND_EXPR.
22 (vect_analyze_slp_instance): Push pattern statements to root node.
23 (vect_get_constant_vectors): Fix comments. Handle COND_EXPR.
24
25 gcc/testsuite/
26 * gcc.dg/vect/bb-slp-cond-1.c: New test.
27 * gcc.dg/vect/slp-cond-1.c: New test.
28
29=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c'
30--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 1970-01-01 00:00:00 +0000
31+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 2011-11-20 08:24:08 +0000
32@@ -0,0 +1,46 @@
33+/* { dg-require-effective-target vect_condition } */
34+
35+#include "tree-vect.h"
36+
37+#define N 128
38+
39+__attribute__((noinline, noclone)) void
40+foo (int *a, int stride)
41+{
42+ int i;
43+
44+ for (i = 0; i < N/stride; i++, a += stride)
45+ {
46+ a[0] = a[0] ? 1 : 5;
47+ a[1] = a[1] ? 2 : 6;
48+ a[2] = a[2] ? 3 : 7;
49+ a[3] = a[3] ? 4 : 8;
50+ }
51+}
52+
53+
54+int a[N];
55+int main ()
56+{
57+ int i;
58+
59+ check_vect ();
60+
61+ for (i = 0; i < N; i++)
62+ a[i] = i;
63+
64+ foo (a, 4);
65+
66+ for (i = 1; i < N; i++)
67+ if (a[i] != i%4 + 1)
68+ abort ();
69+
70+ if (a[0] != 5)
71+ abort ();
72+
73+ return 0;
74+}
75+
76+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
77+/* { dg-final { cleanup-tree-dump "slp" } } */
78+
79
80=== added file 'gcc/testsuite/gcc.dg/vect/slp-cond-1.c'
81--- old/gcc/testsuite/gcc.dg/vect/slp-cond-1.c 1970-01-01 00:00:00 +0000
82+++ new/gcc/testsuite/gcc.dg/vect/slp-cond-1.c 2011-11-20 08:24:08 +0000
83@@ -0,0 +1,126 @@
84+/* { dg-require-effective-target vect_condition } */
85+#include "tree-vect.h"
86+
87+#define N 32
88+int a[N], b[N];
89+int d[N], e[N];
90+int k[N];
91+
92+__attribute__((noinline, noclone)) void
93+f1 (void)
94+{
95+ int i;
96+ for (i = 0; i < N/4; i++)
97+ {
98+ k[4*i] = a[4*i] < b[4*i] ? 17 : 0;
99+ k[4*i+1] = a[4*i+1] < b[4*i+1] ? 17 : 0;
100+ k[4*i+2] = a[4*i+2] < b[4*i+2] ? 17 : 0;
101+ k[4*i+3] = a[4*i+3] < b[4*i+3] ? 17 : 0;
102+ }
103+}
104+
105+__attribute__((noinline, noclone)) void
106+f2 (void)
107+{
108+ int i;
109+ for (i = 0; i < N/2; ++i)
110+ {
111+ k[2*i] = a[2*i] < b[2*i] ? 0 : 24;
112+ k[2*i+1] = a[2*i+1] < b[2*i+1] ? 7 : 4;
113+ }
114+}
115+
116+__attribute__((noinline, noclone)) void
117+f3 (void)
118+{
119+ int i;
120+ for (i = 0; i < N/2; ++i)
121+ {
122+ k[2*i] = a[2*i] < b[2*i] ? 51 : 12;
123+ k[2*i+1] = a[2*i+1] > b[2*i+1] ? 51 : 12;
124+ }
125+}
126+
127+__attribute__((noinline, noclone)) void
128+f4 (void)
129+{
130+ int i;
131+ for (i = 0; i < N/2; ++i)
132+ {
133+ int d0 = d[2*i], e0 = e[2*i];
134+ int d1 = d[2*i+1], e1 = e[2*i+1];
135+ k[2*i] = a[2*i] >= b[2*i] ? d0 : e0;
136+ k[2*i+1] = a[2*i+1] >= b[2*i+1] ? d1 : e1;
137+ }
138+}
139+
140+int
141+main ()
142+{
143+ int i;
144+
145+ check_vect ();
146+
147+ for (i = 0; i < N; i++)
148+ {
149+ switch (i % 9)
150+ {
151+ case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
152+ case 1: a[i] = 0; b[i] = 0; break;
153+ case 2: a[i] = i + 1; b[i] = - i - 1; break;
154+ case 3: a[i] = i; b[i] = i + 7; break;
155+ case 4: a[i] = i; b[i] = i; break;
156+ case 5: a[i] = i + 16; b[i] = i + 3; break;
157+ case 6: a[i] = - i - 5; b[i] = - i; break;
158+ case 7: a[i] = - i; b[i] = - i; break;
159+ case 8: a[i] = - i; b[i] = - i - 7; break;
160+ }
161+ d[i] = i;
162+ e[i] = 2 * i;
163+ }
164+ f1 ();
165+ for (i = 0; i < N; i++)
166+ if (k[i] != ((i % 3) == 0 ? 17 : 0))
167+ abort ();
168+
169+ f2 ();
170+ for (i = 0; i < N; i++)
171+ {
172+ switch (i % 9)
173+ {
174+ case 0:
175+ case 6:
176+ if (k[i] != ((i/9 % 2) == 0 ? 0 : 7))
177+ abort ();
178+ break;
179+ case 1:
180+ case 5:
181+ case 7:
182+ if (k[i] != ((i/9 % 2) == 0 ? 4 : 24))
183+ abort ();
184+ break;
185+ case 2:
186+ case 4:
187+ case 8:
188+ if (k[i] != ((i/9 % 2) == 0 ? 24 : 4))
189+ abort ();
190+ break;
191+ case 3:
192+ if (k[i] != ((i/9 % 2) == 0 ? 7 : 0))
193+ abort ();
194+ break;
195+ }
196+ }
197+
198+ f3 ();
199+
200+ f4 ();
201+ for (i = 0; i < N; i++)
202+ if (k[i] != ((i % 3) == 0 ? e[i] : d[i]))
203+ abort ();
204+
205+ return 0;
206+}
207+
208+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
209+/* { dg-final { cleanup-tree-dump "vect" } } */
210
211=== modified file 'gcc/tree-vect-loop.c'
212--- old/gcc/tree-vect-loop.c 2011-11-14 11:38:08 +0000
213+++ new/gcc/tree-vect-loop.c 2011-11-20 08:24:08 +0000
214@@ -4087,6 +4087,9 @@
215 gcc_unreachable ();
216 }
217
218+ if (code == COND_EXPR && slp_node)
219+ return false;
220+
221 scalar_dest = gimple_assign_lhs (stmt);
222 scalar_type = TREE_TYPE (scalar_dest);
223 if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
224@@ -4161,7 +4164,7 @@
225
226 if (code == COND_EXPR)
227 {
228- if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
229+ if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
230 {
231 if (vect_print_dump_info (REPORT_DETAILS))
232 fprintf (vect_dump, "unsupported condition in reduction");
233@@ -4433,7 +4436,7 @@
234 gcc_assert (!slp_node);
235 vectorizable_condition (stmt, gsi, vec_stmt,
236 PHI_RESULT (VEC_index (gimple, phis, 0)),
237- reduc_index);
238+ reduc_index, NULL);
239 /* Multiple types are not supported for condition. */
240 break;
241 }
242
243=== modified file 'gcc/tree-vect-slp.c'
244--- old/gcc/tree-vect-slp.c 2011-11-14 11:38:08 +0000
245+++ new/gcc/tree-vect-slp.c 2011-11-21 06:58:40 +0000
246@@ -109,7 +109,11 @@
247 if (is_gimple_call (stmt))
248 nops = gimple_call_num_args (stmt);
249 else if (is_gimple_assign (stmt))
250- nops = gimple_num_ops (stmt) - 1;
251+ {
252+ nops = gimple_num_ops (stmt) - 1;
253+ if (gimple_assign_rhs_code (stmt) == COND_EXPR)
254+ nops = 4;
255+ }
256 else
257 return NULL;
258
259@@ -190,20 +194,51 @@
260 bool different_types = false;
261 bool pattern = false;
262 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
263+ int op_idx = 1;
264+ tree compare_rhs = NULL_TREE, rhs = NULL_TREE;
265+ int cond_idx = -1;
266
267 if (loop_vinfo)
268 loop = LOOP_VINFO_LOOP (loop_vinfo);
269
270 if (is_gimple_call (stmt))
271 number_of_oprnds = gimple_call_num_args (stmt);
272+ else if (is_gimple_assign (stmt))
273+ {
274+ number_of_oprnds = gimple_num_ops (stmt) - 1;
275+ if (gimple_assign_rhs_code (stmt) == COND_EXPR)
276+ {
277+ number_of_oprnds = 4;
278+ cond_idx = 0;
279+ rhs = gimple_assign_rhs1 (stmt);
280+ }
281+ }
282 else
283- number_of_oprnds = gimple_num_ops (stmt) - 1;
284+ return false;
285
286 for (i = 0; i < number_of_oprnds; i++)
287 {
288- oprnd = gimple_op (stmt, i + 1);
289+ if (compare_rhs)
290+ oprnd = compare_rhs;
291+ else
292+ oprnd = gimple_op (stmt, op_idx++);
293+
294 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
295
296+ if (-1 < cond_idx && cond_idx < 4)
297+ {
298+ if (compare_rhs)
299+ compare_rhs = NULL_TREE;
300+ else
301+ oprnd = TREE_OPERAND (rhs, cond_idx++);
302+ }
303+
304+ if (COMPARISON_CLASS_P (oprnd))
305+ {
306+ compare_rhs = TREE_OPERAND (oprnd, 1);
307+ oprnd = TREE_OPERAND (oprnd, 0);
308+ }
309+
310 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
311 &dt)
312 || (!def_stmt && dt != vect_constant_def))
313@@ -243,8 +278,7 @@
314 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
315 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
316
317- if (dt == vect_unknown_def_type
318- || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt)))
319+ if (dt == vect_unknown_def_type)
320 {
321 if (vect_print_dump_info (REPORT_DETAILS))
322 fprintf (vect_dump, "Unsupported pattern.");
323@@ -423,6 +457,7 @@
324 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
325 gimple stmt = VEC_index (gimple, stmts, 0);
326 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
327+ enum tree_code first_cond_code = ERROR_MARK;
328 tree lhs;
329 bool stop_recursion = false, need_same_oprnds = false;
330 tree vectype, scalar_type, first_op1 = NULL_TREE;
331@@ -439,11 +474,18 @@
332 VEC (slp_oprnd_info, heap) *oprnds_info;
333 unsigned int nops;
334 slp_oprnd_info oprnd_info;
335+ tree cond;
336
337 if (is_gimple_call (stmt))
338 nops = gimple_call_num_args (stmt);
339+ else if (is_gimple_assign (stmt))
340+ {
341+ nops = gimple_num_ops (stmt) - 1;
342+ if (gimple_assign_rhs_code (stmt) == COND_EXPR)
343+ nops = 4;
344+ }
345 else
346- nops = gimple_num_ops (stmt) - 1;
347+ return false;
348
349 oprnds_info = vect_create_oprnd_info (nops, group_size);
350
351@@ -484,6 +526,22 @@
352 return false;
353 }
354
355+ if (is_gimple_assign (stmt)
356+ && gimple_assign_rhs_code (stmt) == COND_EXPR
357+ && (cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0))
358+ && !COMPARISON_CLASS_P (cond))
359+ {
360+ if (vect_print_dump_info (REPORT_SLP))
361+ {
362+ fprintf (vect_dump,
363+ "Build SLP failed: condition is not comparison ");
364+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
365+ }
366+
367+ vect_free_oprnd_info (&oprnds_info);
368+ return false;
369+ }
370+
371 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
372 vectype = get_vectype_for_scalar_type (scalar_type);
373 if (!vectype)
374@@ -737,7 +795,8 @@
375
376 /* Not memory operation. */
377 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
378- && TREE_CODE_CLASS (rhs_code) != tcc_unary)
379+ && TREE_CODE_CLASS (rhs_code) != tcc_unary
380+ && rhs_code != COND_EXPR)
381 {
382 if (vect_print_dump_info (REPORT_SLP))
383 {
384@@ -750,6 +809,26 @@
385 return false;
386 }
387
388+ if (rhs_code == COND_EXPR)
389+ {
390+ tree cond_expr = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
391+
392+ if (i == 0)
393+ first_cond_code = TREE_CODE (cond_expr);
394+ else if (first_cond_code != TREE_CODE (cond_expr))
395+ {
396+ if (vect_print_dump_info (REPORT_SLP))
397+ {
398+ fprintf (vect_dump, "Build SLP failed: different"
399+ " operation");
400+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
401+ }
402+
403+ vect_free_oprnd_info (&oprnds_info);
404+ return false;
405+ }
406+ }
407+
408 /* Find the def-stmts. */
409 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
410 ncopies_for_cost, (i == 0),
411@@ -1395,7 +1474,12 @@
412 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
413 while (next)
414 {
415- VEC_safe_push (gimple, heap, scalar_stmts, next);
416+ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
417+ && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
418+ VEC_safe_push (gimple, heap, scalar_stmts,
419+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
420+ else
421+ VEC_safe_push (gimple, heap, scalar_stmts, next);
422 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
423 }
424 }
425@@ -1404,7 +1488,7 @@
426 /* Collect reduction statements. */
427 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
428 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
429- VEC_safe_push (gimple, heap, scalar_stmts, next);
430+ VEC_safe_push (gimple, heap, scalar_stmts, next);
431 }
432
433 node = vect_create_new_slp_node (scalar_stmts);
434@@ -2160,15 +2244,15 @@
435
436 For example, we have two scalar operands, s1 and s2 (e.g., group of
437 strided accesses of size two), while NUNITS is four (i.e., four scalars
438- of this type can be packed in a vector). The output vector will contain
439- two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
440+ of this type can be packed in a vector). The output vector will contain
441+ two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
442 will be 2).
443
444 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
445 containing the operands.
446
447 For example, NUNITS is four as before, and the group size is 8
448- (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
449+ (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
450 {s5, s6, s7, s8}. */
451
452 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
453@@ -2180,8 +2264,18 @@
454 {
455 if (is_store)
456 op = gimple_assign_rhs1 (stmt);
457- else
458+ else if (gimple_assign_rhs_code (stmt) != COND_EXPR)
459 op = gimple_op (stmt, op_num + 1);
460+ else
461+ {
462+ if (op_num == 0 || op_num == 1)
463+ {
464+ tree cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
465+ op = TREE_OPERAND (cond, op_num);
466+ }
467+ else
468+ op = TREE_OPERAND (gimple_assign_rhs1 (stmt), op_num - 1);
469+ }
470
471 if (reduc_index != -1)
472 {
473
474=== modified file 'gcc/tree-vect-stmts.c'
475--- old/gcc/tree-vect-stmts.c 2011-11-22 16:52:23 +0000
476+++ new/gcc/tree-vect-stmts.c 2011-11-22 17:10:17 +0000
477@@ -4816,7 +4816,7 @@
478 condition operands are supportable using vec_is_simple_use. */
479
480 static bool
481-vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
482+vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
483 {
484 tree lhs, rhs;
485 tree def;
486@@ -4831,7 +4831,7 @@
487 if (TREE_CODE (lhs) == SSA_NAME)
488 {
489 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
490- if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
491+ if (!vect_is_simple_use (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
492 &dt))
493 return false;
494 }
495@@ -4842,7 +4842,7 @@
496 if (TREE_CODE (rhs) == SSA_NAME)
497 {
498 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
499- if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
500+ if (!vect_is_simple_use (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
501 &dt))
502 return false;
503 }
504@@ -4868,7 +4868,8 @@
505
506 bool
507 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
508- gimple *vec_stmt, tree reduc_def, int reduc_index)
509+ gimple *vec_stmt, tree reduc_def, int reduc_index,
510+ slp_tree slp_node)
511 {
512 tree scalar_dest = NULL_TREE;
513 tree vec_dest = NULL_TREE;
514@@ -4885,19 +4886,24 @@
515 tree def;
516 enum vect_def_type dt, dts[4];
517 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
518- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
519+ int ncopies;
520 enum tree_code code;
521 stmt_vec_info prev_stmt_info = NULL;
522- int j;
523+ int i, j;
524+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
525+ VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
526+ VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
527
528- /* FORNOW: unsupported in basic block SLP. */
529- gcc_assert (loop_vinfo);
530+ if (slp_node || PURE_SLP_STMT (stmt_info))
531+ ncopies = 1;
532+ else
533+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
534
535 gcc_assert (ncopies >= 1);
536- if (reduc_index && ncopies > 1)
537+ if (reduc_index && (ncopies > 1 || STMT_SLP_TYPE (stmt_info)))
538 return false; /* FORNOW */
539
540- if (!STMT_VINFO_RELEVANT_P (stmt_info))
541+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
542 return false;
543
544 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
545@@ -4905,10 +4911,6 @@
546 && reduc_def))
547 return false;
548
549- /* FORNOW: SLP not supported. */
550- if (STMT_SLP_TYPE (stmt_info))
551- return false;
552-
553 /* FORNOW: not yet supported. */
554 if (STMT_VINFO_LIVE_P (stmt_info))
555 {
556@@ -4932,7 +4934,7 @@
557 then_clause = TREE_OPERAND (op, 1);
558 else_clause = TREE_OPERAND (op, 2);
559
560- if (!vect_is_simple_cond (cond_expr, loop_vinfo))
561+ if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo))
562 return false;
563
564 /* We do not handle two different vector types for the condition
565@@ -4944,7 +4946,7 @@
566 if (TREE_CODE (then_clause) == SSA_NAME)
567 {
568 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
569- if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
570+ if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
571 &then_def_stmt, &def, &dt))
572 return false;
573 }
574@@ -4956,7 +4958,7 @@
575 if (TREE_CODE (else_clause) == SSA_NAME)
576 {
577 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
578- if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
579+ if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
580 &else_def_stmt, &def, &dt))
581 return false;
582 }
583@@ -4974,7 +4976,15 @@
584 return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
585 }
586
587- /* Transform */
588+ /* Transform. */
589+
590+ if (!slp_node)
591+ {
592+ vec_oprnds0 = VEC_alloc (tree, heap, 1);
593+ vec_oprnds1 = VEC_alloc (tree, heap, 1);
594+ vec_oprnds2 = VEC_alloc (tree, heap, 1);
595+ vec_oprnds3 = VEC_alloc (tree, heap, 1);
596+ }
597
598 /* Handle def. */
599 scalar_dest = gimple_assign_lhs (stmt);
600@@ -4983,67 +4993,118 @@
601 /* Handle cond expr. */
602 for (j = 0; j < ncopies; j++)
603 {
604- gimple new_stmt;
605+ gimple new_stmt = NULL;
606 if (j == 0)
607 {
608- gimple gtemp;
609- vec_cond_lhs =
610+ if (slp_node)
611+ {
612+ VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
613+ VEC (slp_void_p, heap) *vec_defs;
614+
615+ vec_defs = VEC_alloc (slp_void_p, heap, 4);
616+ VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
617+ VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
618+ VEC_safe_push (tree, heap, ops, then_clause);
619+ VEC_safe_push (tree, heap, ops, else_clause);
620+ vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
621+ vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
622+ vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
623+ vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
624+ vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
625+
626+ VEC_free (tree, heap, ops);
627+ VEC_free (slp_void_p, heap, vec_defs);
628+ }
629+ else
630+ {
631+ gimple gtemp;
632+ vec_cond_lhs =
633 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
634 stmt, NULL);
635- vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
636+ vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
637 NULL, &gtemp, &def, &dts[0]);
638- vec_cond_rhs =
639- vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
640- stmt, NULL);
641- vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
642- NULL, &gtemp, &def, &dts[1]);
643- if (reduc_index == 1)
644- vec_then_clause = reduc_def;
645- else
646- {
647- vec_then_clause = vect_get_vec_def_for_operand (then_clause,
648- stmt, NULL);
649- vect_is_simple_use (then_clause, loop_vinfo,
650- NULL, &gtemp, &def, &dts[2]);
651- }
652- if (reduc_index == 2)
653- vec_else_clause = reduc_def;
654- else
655- {
656- vec_else_clause = vect_get_vec_def_for_operand (else_clause,
657- stmt, NULL);
658- vect_is_simple_use (else_clause, loop_vinfo,
659+
660+ vec_cond_rhs =
661+ vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
662+ stmt, NULL);
663+ vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
664+ NULL, &gtemp, &def, &dts[1]);
665+ if (reduc_index == 1)
666+ vec_then_clause = reduc_def;
667+ else
668+ {
669+ vec_then_clause = vect_get_vec_def_for_operand (then_clause,
670+ stmt, NULL);
671+ vect_is_simple_use (then_clause, loop_vinfo,
672+ NULL, &gtemp, &def, &dts[2]);
673+ }
674+ if (reduc_index == 2)
675+ vec_else_clause = reduc_def;
676+ else
677+ {
678+ vec_else_clause = vect_get_vec_def_for_operand (else_clause,
679+ stmt, NULL);
680+ vect_is_simple_use (else_clause, loop_vinfo,
681 NULL, &gtemp, &def, &dts[3]);
682+ }
683 }
684 }
685 else
686 {
687- vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
688- vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
689+ vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
690+ VEC_pop (tree, vec_oprnds0));
691+ vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
692+ VEC_pop (tree, vec_oprnds1));
693 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
694- vec_then_clause);
695+ VEC_pop (tree, vec_oprnds2));
696 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
697- vec_else_clause);
698+ VEC_pop (tree, vec_oprnds3));
699+ }
700+
701+ if (!slp_node)
702+ {
703+ VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
704+ VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
705+ VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
706+ VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
707 }
708
709 /* Arguments are ready. Create the new vector stmt. */
710- vec_compare = build2 (TREE_CODE (cond_expr), vectype,
711- vec_cond_lhs, vec_cond_rhs);
712- vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
713- vec_compare, vec_then_clause, vec_else_clause);
714-
715- new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
716- new_temp = make_ssa_name (vec_dest, new_stmt);
717- gimple_assign_set_lhs (new_stmt, new_temp);
718- vect_finish_stmt_generation (stmt, new_stmt, gsi);
719- if (j == 0)
720- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
721- else
722- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
723-
724- prev_stmt_info = vinfo_for_stmt (new_stmt);
725+ FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
726+ {
727+ vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
728+ vec_then_clause = VEC_index (tree, vec_oprnds2, i);
729+ vec_else_clause = VEC_index (tree, vec_oprnds3, i);
730+
731+ vec_compare = build2 (TREE_CODE (cond_expr), vectype,
732+ vec_cond_lhs, vec_cond_rhs);
733+ vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
734+ vec_compare, vec_then_clause, vec_else_clause);
735+
736+ new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
737+ new_temp = make_ssa_name (vec_dest, new_stmt);
738+ gimple_assign_set_lhs (new_stmt, new_temp);
739+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
740+ if (slp_node)
741+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
742+ }
743+
744+ if (slp_node)
745+ continue;
746+
747+ if (j == 0)
748+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
749+ else
750+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
751+
752+ prev_stmt_info = vinfo_for_stmt (new_stmt);
753 }
754
755+ VEC_free (tree, heap, vec_oprnds0);
756+ VEC_free (tree, heap, vec_oprnds1);
757+ VEC_free (tree, heap, vec_oprnds2);
758+ VEC_free (tree, heap, vec_oprnds3);
759+
760 return true;
761 }
762
763@@ -5217,7 +5278,7 @@
764 || vectorizable_call (stmt, NULL, NULL)
765 || vectorizable_store (stmt, NULL, NULL, NULL)
766 || vectorizable_reduction (stmt, NULL, NULL, NULL)
767- || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
768+ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
769 else
770 {
771 if (bb_vinfo)
772@@ -5227,7 +5288,8 @@
773 || vectorizable_operation (stmt, NULL, NULL, node)
774 || vectorizable_assignment (stmt, NULL, NULL, node)
775 || vectorizable_load (stmt, NULL, NULL, node, NULL)
776- || vectorizable_store (stmt, NULL, NULL, node));
777+ || vectorizable_store (stmt, NULL, NULL, node)
778+ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
779 }
780
781 if (!ok)
782@@ -5343,8 +5405,7 @@
783 break;
784
785 case condition_vec_info_type:
786- gcc_assert (!slp_node);
787- done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
788+ done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
789 gcc_assert (done);
790 break;
791
792
793=== modified file 'gcc/tree-vectorizer.h'
794--- old/gcc/tree-vectorizer.h 2011-11-22 16:52:23 +0000
795+++ new/gcc/tree-vectorizer.h 2011-11-22 17:10:17 +0000
796@@ -832,7 +832,7 @@
797 extern void vect_remove_stores (gimple);
798 extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
799 extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
800- tree, int);
801+ tree, int, slp_tree);
802 extern void vect_get_load_cost (struct data_reference *, int, bool,
803 unsigned int *, unsigned int *);
804 extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
805
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch
new file mode 100644
index 000000000..e501959c7
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch
@@ -0,0 +1,495 @@
12011-11-27 Ira Rosen <ira.rosen@linaro.org>
2
3 gcc/
4 * tree-vectorizer.h (vect_pattern_recog): Add new argument.
5 * tree-vect-loop.c (vect_analyze_loop_2): Update call to
6 vect_pattern_recog.
7 * tree-vect-patterns.c (widened_name_p): Pass basic block
8 info to vect_is_simple_use.
9 (vect_recog_dot_prod_pattern): Fail for basic blocks.
10 (vect_recog_widen_sum_pattern): Likewise.
11 (vect_handle_widen_op_by_const): Support basic blocks.
12 (vect_operation_fits_smaller_type,
13 vect_recog_over_widening_pattern): Likewise.
14 (vect_recog_mixed_size_cond_pattern): Support basic blocks.
15 Add printing.
16 (vect_mark_pattern_stmts): Update calls to new_stmt_vec_info.
17 (vect_pattern_recog_1): Check for reduction only in loops.
18 (vect_pattern_recog): Add new argument. Support basic blocks.
19 * tree-vect-stmts.c (vectorizable_conversion): Pass basic block
20 info to vect_is_simple_use_1.
21 * tree-vect-slp.c (vect_get_and_check_slp_defs): Support basic
22 blocks.
23 (vect_slp_analyze_bb_1): Call vect_pattern_recog.
24
25 gcc/testsuite/
26 * gcc.dg/vect/bb-slp-pattern-1.c: New test.
27 * gcc.dg/vect/bb-slp-pattern-2.c: New test.
28
29=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c'
30--- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c 1970-01-01 00:00:00 +0000
31+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c 2011-11-23 06:37:10 +0000
32@@ -0,0 +1,55 @@
33+/* { dg-require-effective-target vect_int } */
34+
35+#include <stdarg.h>
36+#include "tree-vect.h"
37+
38+#define N 8
39+
40+unsigned short X[N];
41+unsigned short Y[N];
42+unsigned int result[N];
43+
44+/* unsigned short->unsigned int widening-mult. */
45+__attribute__ ((noinline, noclone)) void
46+foo (void)
47+{
48+ result[0] = (unsigned int)(X[0] * Y[0]);
49+ result[1] = (unsigned int)(X[1] * Y[1]);
50+ result[2] = (unsigned int)(X[2] * Y[2]);
51+ result[3] = (unsigned int)(X[3] * Y[3]);
52+ result[4] = (unsigned int)(X[4] * Y[4]);
53+ result[5] = (unsigned int)(X[5] * Y[5]);
54+ result[6] = (unsigned int)(X[6] * Y[6]);
55+ result[7] = (unsigned int)(X[7] * Y[7]);
56+}
57+
58+int main (void)
59+{
60+ int i, tmp;
61+
62+ check_vect ();
63+
64+ for (i = 0; i < N; i++)
65+ {
66+ X[i] = i;
67+ Y[i] = 64-i;
68+ }
69+
70+ foo ();
71+
72+ for (i = 0; i < N; i++)
73+ {
74+ __asm__ volatile ("");
75+ tmp = X[i] * Y[i];
76+ if (result[i] != tmp)
77+ abort ();
78+ }
79+
80+ return 0;
81+}
82+
83+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
84+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */
85+/* { dg-final { scan-tree-dump-times "pattern recognized" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */
86+/* { dg-final { cleanup-tree-dump "slp" } } */
87+
88
89=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c'
90--- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c 1970-01-01 00:00:00 +0000
91+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c 2011-11-23 06:37:10 +0000
92@@ -0,0 +1,53 @@
93+/* { dg-require-effective-target vect_condition } */
94+
95+#include "tree-vect.h"
96+
97+#define N 128
98+
99+__attribute__((noinline, noclone)) void
100+foo (short * __restrict__ a, int * __restrict__ b, int stride)
101+{
102+ int i;
103+
104+ for (i = 0; i < N/stride; i++, a += stride, b += stride)
105+ {
106+ a[0] = b[0] ? 1 : 7;
107+ a[1] = b[1] ? 2 : 0;
108+ a[2] = b[2] ? 3 : 0;
109+ a[3] = b[3] ? 4 : 0;
110+ a[4] = b[4] ? 5 : 0;
111+ a[5] = b[5] ? 6 : 0;
112+ a[6] = b[6] ? 7 : 0;
113+ a[7] = b[7] ? 8 : 0;
114+ }
115+}
116+
117+short a[N];
118+int b[N];
119+int main ()
120+{
121+ int i;
122+
123+ check_vect ();
124+
125+ for (i = 0; i < N; i++)
126+ {
127+ a[i] = i;
128+ b[i] = -i;
129+ }
130+
131+ foo (a, b, 8);
132+
133+ for (i = 1; i < N; i++)
134+ if (a[i] != i%8 + 1)
135+ abort ();
136+
137+ if (a[0] != 7)
138+ abort ();
139+
140+ return 0;
141+}
142+
143+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_pack_trunc } } } } */
144+/* { dg-final { cleanup-tree-dump "slp" } } */
145+
146
147=== modified file 'gcc/tree-vect-loop.c'
148--- old/gcc/tree-vect-loop.c 2011-11-20 08:24:08 +0000
149+++ new/gcc/tree-vect-loop.c 2011-11-23 06:47:35 +0000
150@@ -1458,7 +1458,7 @@
151
152 vect_analyze_scalar_cycles (loop_vinfo);
153
154- vect_pattern_recog (loop_vinfo);
155+ vect_pattern_recog (loop_vinfo, NULL);
156
157 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
158
159
160=== modified file 'gcc/tree-vect-patterns.c'
161--- old/gcc/tree-vect-patterns.c 2011-11-20 09:11:09 +0000
162+++ new/gcc/tree-vect-patterns.c 2011-11-23 07:49:33 +0000
163@@ -83,11 +83,13 @@
164 tree oprnd0;
165 enum vect_def_type dt;
166 tree def;
167+ bb_vec_info bb_vinfo;
168
169 stmt_vinfo = vinfo_for_stmt (use_stmt);
170 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
171+ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
172
173- if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
174+ if (!vect_is_simple_use (name, loop_vinfo, bb_vinfo, def_stmt, &def, &dt))
175 return false;
176
177 if (dt != vect_internal_def
178@@ -111,7 +113,7 @@
179 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
180 return false;
181
182- if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
183+ if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy,
184 &dt))
185 return false;
186
187@@ -188,9 +190,14 @@
188 gimple pattern_stmt;
189 tree prod_type;
190 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
191- struct loop *loop = LOOP_VINFO_LOOP (loop_info);
192+ struct loop *loop;
193 tree var, rhs;
194
195+ if (!loop_info)
196+ return NULL;
197+
198+ loop = LOOP_VINFO_LOOP (loop_info);
199+
200 if (!is_gimple_assign (last_stmt))
201 return NULL;
202
203@@ -358,8 +365,16 @@
204 {
205 tree new_type, new_oprnd, tmp;
206 gimple new_stmt;
207- loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
208- struct loop *loop = LOOP_VINFO_LOOP (loop_info);
209+ loop_vec_info loop_vinfo;
210+ struct loop *loop = NULL;
211+ bb_vec_info bb_vinfo;
212+ stmt_vec_info stmt_vinfo;
213+
214+ stmt_vinfo = vinfo_for_stmt (stmt);
215+ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
216+ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
217+ if (loop_vinfo)
218+ loop = LOOP_VINFO_LOOP (loop_vinfo);
219
220 if (code != MULT_EXPR && code != LSHIFT_EXPR)
221 return false;
222@@ -377,7 +392,9 @@
223
224 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
225 || !gimple_bb (def_stmt)
226- || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
227+ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
228+ || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_vinfo)
229+ && gimple_code (def_stmt) != GIMPLE_PHI)
230 || !vinfo_for_stmt (def_stmt))
231 return false;
232
233@@ -774,9 +791,14 @@
234 tree type, half_type;
235 gimple pattern_stmt;
236 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
237- struct loop *loop = LOOP_VINFO_LOOP (loop_info);
238+ struct loop *loop;
239 tree var;
240
241+ if (!loop_info)
242+ return NULL;
243+
244+ loop = LOOP_VINFO_LOOP (loop_info);
245+
246 if (!is_gimple_assign (last_stmt))
247 return NULL;
248
249@@ -877,7 +899,11 @@
250 gimple def_stmt, new_stmt;
251 bool first = false;
252 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
253- struct loop *loop = LOOP_VINFO_LOOP (loop_info);
254+ bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
255+ struct loop *loop = NULL;
256+
257+ if (loop_info)
258+ loop = LOOP_VINFO_LOOP (loop_info);
259
260 *new_def_stmt = NULL;
261
262@@ -909,7 +935,9 @@
263 first = true;
264 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
265 || !gimple_bb (def_stmt)
266- || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
267+ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
268+ || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info)
269+ && gimple_code (def_stmt) != GIMPLE_PHI)
270 || !vinfo_for_stmt (def_stmt))
271 return false;
272 }
273@@ -1087,7 +1115,16 @@
274 int nuses = 0;
275 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
276 bool first;
277- struct loop *loop = (gimple_bb (stmt))->loop_father;
278+ loop_vec_info loop_vinfo;
279+ struct loop *loop = NULL;
280+ bb_vec_info bb_vinfo;
281+ stmt_vec_info stmt_vinfo;
282+
283+ stmt_vinfo = vinfo_for_stmt (stmt);
284+ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
285+ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
286+ if (loop_vinfo)
287+ loop = LOOP_VINFO_LOOP (loop_vinfo);
288
289 first = true;
290 while (1)
291@@ -1120,7 +1157,8 @@
292
293 if (nuses != 1 || !is_gimple_assign (use_stmt)
294 || !gimple_bb (use_stmt)
295- || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
296+ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
297+ || (!loop && gimple_bb (use_stmt) != BB_VINFO_BB (bb_vinfo)))
298 return NULL;
299
300 /* Create pattern statement for STMT. */
301@@ -1485,6 +1523,7 @@
302 enum machine_mode cmpmode;
303 gimple pattern_stmt, def_stmt;
304 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
305+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
306
307 if (!is_gimple_assign (last_stmt)
308 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
309@@ -1538,7 +1577,8 @@
310 tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr),
311 fold_convert (comp_type, then_clause),
312 fold_convert (comp_type, else_clause));
313- def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), tmp);
314+ def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL),
315+ tmp);
316
317 pattern_stmt
318 = gimple_build_assign_with_ops (NOP_EXPR,
319@@ -1546,12 +1586,15 @@
320 gimple_assign_lhs (def_stmt), NULL_TREE);
321
322 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
323- def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
324+ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
325 set_vinfo_for_stmt (def_stmt, def_stmt_info);
326 STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype;
327 *type_in = vectype;
328 *type_out = vectype;
329
330+ if (vect_print_dump_info (REPORT_DETAILS))
331+ fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: ");
332+
333 return pattern_stmt;
334 }
335
336@@ -1565,10 +1608,11 @@
337 stmt_vec_info pattern_stmt_info, def_stmt_info;
338 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
339 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
340+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
341 gimple def_stmt;
342
343 set_vinfo_for_stmt (pattern_stmt,
344- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
345+ new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo));
346 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
347 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
348
349@@ -1586,7 +1630,7 @@
350 def_stmt_info = vinfo_for_stmt (def_stmt);
351 if (def_stmt_info == NULL)
352 {
353- def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
354+ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
355 set_vinfo_for_stmt (def_stmt, def_stmt_info);
356 }
357 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
358@@ -1697,9 +1741,10 @@
359
360 /* Patterns cannot be vectorized using SLP, because they change the order of
361 computation. */
362- FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
363- if (next == stmt)
364- VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
365+ if (loop_vinfo)
366+ FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
367+ if (next == stmt)
368+ VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
369
370 /* It is possible that additional pattern stmts are created and inserted in
371 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
372@@ -1799,26 +1844,46 @@
373 be recorded in S3. */
374
375 void
376-vect_pattern_recog (loop_vec_info loop_vinfo)
377+vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
378 {
379- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
380- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
381- unsigned int nbbs = loop->num_nodes;
382+ struct loop *loop;
383+ basic_block *bbs, bb;
384+ unsigned int nbbs;
385 gimple_stmt_iterator si;
386 unsigned int i, j;
387 gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
388 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
389+ gimple stmt;
390
391 if (vect_print_dump_info (REPORT_DETAILS))
392 fprintf (vect_dump, "=== vect_pattern_recog ===");
393
394- /* Scan through the loop stmts, applying the pattern recognition
395+ if (loop_vinfo)
396+ {
397+ loop = LOOP_VINFO_LOOP (loop_vinfo);
398+ bbs = LOOP_VINFO_BBS (loop_vinfo);
399+ nbbs = loop->num_nodes;
400+ }
401+ else
402+ {
403+ bb = BB_VINFO_BB (bb_vinfo);
404+ nbbs = 1;
405+ bbs = XNEW (basic_block);
406+ bbs[0] = bb;
407+ }
408+
409+ /* Scan through the stmts, applying the pattern recognition
410 functions starting at each stmt visited: */
411 for (i = 0; i < nbbs; i++)
412 {
413 basic_block bb = bbs[i];
414 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
415 {
416+ if (bb_vinfo && (stmt = gsi_stmt (si))
417+ && vinfo_for_stmt (stmt)
418+ && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
419+ continue;
420+
421 /* Scan over all generic vect_recog_xxx_pattern functions. */
422 for (j = 0; j < NUM_PATTERNS; j++)
423 {
424@@ -1830,4 +1895,6 @@
425 }
426
427 VEC_free (gimple, heap, stmts_to_replace);
428+ if (bb_vinfo)
429+ free (bbs);
430 }
431
432=== modified file 'gcc/tree-vect-slp.c'
433--- old/gcc/tree-vect-slp.c 2011-11-21 06:58:40 +0000
434+++ new/gcc/tree-vect-slp.c 2011-11-23 06:47:35 +0000
435@@ -255,12 +255,14 @@
436 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
437 from the pattern. Check that all the stmts of the node are in the
438 pattern. */
439- if (loop && def_stmt && gimple_bb (def_stmt)
440- && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
441+ if (def_stmt && gimple_bb (def_stmt)
442+ && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
443+ || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
444+ && gimple_code (def_stmt) != GIMPLE_PHI))
445 && vinfo_for_stmt (def_stmt)
446 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
447- && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
448- && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
449+ && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
450+ && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
451 {
452 pattern = true;
453 if (!first && !oprnd_info->first_pattern)
454@@ -1972,6 +1974,8 @@
455 return NULL;
456 }
457
458+ vect_pattern_recog (NULL, bb_vinfo);
459+
460 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
461 &data_dependence_in_bb)
462 || min_vf > max_vf
463
464=== modified file 'gcc/tree-vect-stmts.c'
465--- old/gcc/tree-vect-stmts.c 2011-11-22 17:10:17 +0000
466+++ new/gcc/tree-vect-stmts.c 2011-11-23 06:47:35 +0000
467@@ -3451,11 +3451,11 @@
468 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
469 OP1. */
470 if (CONSTANT_CLASS_P (op0))
471- ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
472+ ok = vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo,
473 &def_stmt, &def, &dt[1], &vectype_in);
474 else
475- ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
476- &dt[1]);
477+ ok = vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt,
478+ &def, &dt[1]);
479
480 if (!ok)
481 {
482
483=== modified file 'gcc/tree-vectorizer.h'
484--- old/gcc/tree-vectorizer.h 2011-11-22 17:10:17 +0000
485+++ new/gcc/tree-vectorizer.h 2011-11-23 06:47:35 +0000
486@@ -918,7 +918,7 @@
487 in the future. */
488 typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
489 #define NUM_PATTERNS 7
490-void vect_pattern_recog (loop_vec_info);
491+void vect_pattern_recog (loop_vec_info, bb_vec_info);
492
493 /* In tree-vectorizer.c. */
494 unsigned vectorize_loops (void);
495
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch
new file mode 100644
index 000000000..17cfd1068
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch
@@ -0,0 +1,1818 @@
12011-11-28 David Alan Gilbert <david.gilbert@linaro.org>
2
3 Backport from mainline (svn r19983):
4
5 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org>
6
7 gcc/testsuite/
8 * gcc.dg/di-longlong64-sync-1.c: New test.
9 * gcc.dg/di-sync-multithread.c: New test.
10 * gcc.target/arm/di-longlong64-sync-withhelpers.c: New test.
11 * gcc.target/arm/di-longlong64-sync-withldrexd.c: New test.
12 * lib/target-supports.exp: (arm_arch_*_ok): Series of effective-target
13 tests for v5, v6, v6k, and v7-a, and add-options helpers.
14 (check_effective_target_arm_arm_ok): New helper.
15 (check_effective_target_sync_longlong): New helper.
16
172011-11-28 David Alan Gilbert <david.gilbert@linaro.org>
18
19 Backport from mainline (svn r19982):
20
21 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org>
22
23 gcc/
24 * config/arm/linux-atomic-64bit.c: New (based on linux-atomic.c).
25 * config/arm/linux-atomic.c: Change comment to point to 64bit version.
26 (SYNC_LOCK_RELEASE): Instantiate 64bit version.
27 * config/arm/t-linux-eabi: Pull in linux-atomic-64bit.c.
28
292011-11-28 David Alan Gilbert <david.gilbert@linaro.org>
30
31 Backport from mainline (svn r19981):
32
33 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org>
34
35 gcc/
36 * config/arm/arm.c (arm_output_ldrex): Support ldrexd.
37 (arm_output_strex): Support strexd.
38 (arm_output_it): New helper to output it in Thumb2 mode only.
39 (arm_output_sync_loop): Support DI mode. Change comment to
40 not support const_int.
41 (arm_expand_sync): Support DI mode.
42 * config/arm/arm.h (TARGET_HAVE_LDREXBHD): Split into LDREXBH
43 and LDREXD.
44 * config/arm/iterators.md (NARROW): move from sync.md.
45 (QHSD): New iterator for all current ARM integer modes.
46 (SIDI): New iterator for SI and DI modes only.
47 * config/arm/sync.md (sync_predtab): New mode_attr.
48 (sync_compare_and_swapsi): Fold into sync_compare_and_swap<mode>.
49 (sync_lock_test_and_setsi): Fold into sync_lock_test_and_setsi<mode>.
50 (sync_<sync_optab>si): Fold into sync_<sync_optab><mode>.
51 (sync_nandsi): Fold into sync_nand<mode>.
52 (sync_new_<sync_optab>si): Fold into sync_new_<sync_optab><mode>.
53 (sync_new_nandsi): Fold into sync_new_nand<mode>.
54 (sync_old_<sync_optab>si): Fold into sync_old_<sync_optab><mode>.
55 (sync_old_nandsi): Fold into sync_old_nand<mode>.
56 (sync_compare_and_swap<mode>): Support SI & DI.
57 (sync_lock_test_and_set<mode>): Likewise.
58 (sync_<sync_optab><mode>): Likewise.
59 (sync_nand<mode>): Likewise.
60 (sync_new_<sync_optab><mode>): Likewise.
61 (sync_new_nand<mode>): Likewise.
62 (sync_old_<sync_optab><mode>): Likewise.
63 (sync_old_nand<mode>): Likewise.
64 (arm_sync_compare_and_swapsi): Turn into iterator on SI & DI.
65 (arm_sync_lock_test_and_setsi): Likewise.
66 (arm_sync_new_<sync_optab>si): Likewise.
67 (arm_sync_new_nandsi): Likewise.
68 (arm_sync_old_<sync_optab>si): Likewise.
69 (arm_sync_old_nandsi): Likewise.
70 (arm_sync_compare_and_swap<mode> NARROW): use sync_predtab, fix indent.
71 (arm_sync_lock_test_and_setsi<mode> NARROW): Likewise.
72 (arm_sync_new_<sync_optab><mode> NARROW): Likewise.
73 (arm_sync_new_nand<mode> NARROW): Likewise.
74 (arm_sync_old_<sync_optab><mode> NARROW): Likewise.
75 (arm_sync_old_nand<mode> NARROW): Likewise.
76
772011-11-28 David Alan Gilbert <david.gilbert@linaro.org>
78
79 Backport from mainline (svn r19980):
80
81 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org>
82
83 PR target/48126
84
85 gcc/
86 * config/arm/arm.c (arm_output_sync_loop): Move label before barrier.
87
882011-11-28 David Alan Gilbert <david.gilbert@linaro.org>
89
90 Backport from mainline (svn r19979):
91
92 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org>
93
94 gcc/
95 * config/arm/arm.h (TARGET_HAVE_DMB_MCR): MCR Not available in Thumb1.
96
97=== modified file 'gcc/config/arm/arm.c'
98--- old/gcc/config/arm/arm.c 2011-11-21 01:45:54 +0000
99+++ new/gcc/config/arm/arm.c 2011-11-28 15:07:01 +0000
100@@ -24307,12 +24307,26 @@
101 rtx target,
102 rtx memory)
103 {
104- const char *suffix = arm_ldrex_suffix (mode);
105- rtx operands[2];
106+ rtx operands[3];
107
108 operands[0] = target;
109- operands[1] = memory;
110- arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
111+ if (mode != DImode)
112+ {
113+ const char *suffix = arm_ldrex_suffix (mode);
114+ operands[1] = memory;
115+ arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
116+ }
117+ else
118+ {
119+ /* The restrictions on target registers in ARM mode are that the two
120+ registers are consecutive and the first one is even; Thumb is
121+ actually more flexible, but DI should give us this anyway.
122+ Note that the 1st register always gets the lowest word in memory. */
123+ gcc_assert ((REGNO (target) & 1) == 0);
124+ operands[1] = gen_rtx_REG (SImode, REGNO (target) + 1);
125+ operands[2] = memory;
126+ arm_output_asm_insn (emit, 0, operands, "ldrexd\t%%0, %%1, %%C2");
127+ }
128 }
129
130 /* Emit a strex{b,h,d, } instruction appropriate for the specified
131@@ -24325,14 +24339,41 @@
132 rtx value,
133 rtx memory)
134 {
135- const char *suffix = arm_ldrex_suffix (mode);
136- rtx operands[3];
137+ rtx operands[4];
138
139 operands[0] = result;
140 operands[1] = value;
141- operands[2] = memory;
142- arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
143- cc);
144+ if (mode != DImode)
145+ {
146+ const char *suffix = arm_ldrex_suffix (mode);
147+ operands[2] = memory;
148+ arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2",
149+ suffix, cc);
150+ }
151+ else
152+ {
153+ /* The restrictions on target registers in ARM mode are that the two
154+ registers are consecutive and the first one is even; Thumb is
155+ actually more flexible, but DI should give us this anyway.
156+ Note that the 1st register always gets the lowest word in memory. */
157+ gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
158+ operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1);
159+ operands[3] = memory;
160+ arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3",
161+ cc);
162+ }
163+}
164+
165+/* Helper to emit an it instruction in Thumb2 mode only; although the assembler
166+ will ignore it in ARM mode, emitting it will mess up instruction counts we
167+ sometimes keep 'flags' are the extra t's and e's if it's more than one
168+ instruction that is conditional. */
169+static void
170+arm_output_it (emit_f emit, const char *flags, const char *cond)
171+{
172+ rtx operands[1]; /* Don't actually use the operand. */
173+ if (TARGET_THUMB2)
174+ arm_output_asm_insn (emit, 0, operands, "it%s\t%s", flags, cond);
175 }
176
177 /* Helper to emit a two operand instruction. */
178@@ -24374,7 +24415,7 @@
179
180 required_value:
181
182- RTX register or const_int representing the required old_value for
183+ RTX register representing the required old_value for
184 the modify to continue, if NULL no comparsion is performed. */
185 static void
186 arm_output_sync_loop (emit_f emit,
187@@ -24388,7 +24429,13 @@
188 enum attr_sync_op sync_op,
189 int early_barrier_required)
190 {
191- rtx operands[1];
192+ rtx operands[2];
193+ /* We'll use the lo for the normal rtx in the none-DI case
194+ as well as the least-sig word in the DI case. */
195+ rtx old_value_lo, required_value_lo, new_value_lo, t1_lo;
196+ rtx old_value_hi, required_value_hi, new_value_hi, t1_hi;
197+
198+ bool is_di = mode == DImode;
199
200 gcc_assert (t1 != t2);
201
202@@ -24399,82 +24446,142 @@
203
204 arm_output_ldrex (emit, mode, old_value, memory);
205
206+ if (is_di)
207+ {
208+ old_value_lo = gen_lowpart (SImode, old_value);
209+ old_value_hi = gen_highpart (SImode, old_value);
210+ if (required_value)
211+ {
212+ required_value_lo = gen_lowpart (SImode, required_value);
213+ required_value_hi = gen_highpart (SImode, required_value);
214+ }
215+ else
216+ {
217+ /* Silence false potentially unused warning. */
218+ required_value_lo = NULL_RTX;
219+ required_value_hi = NULL_RTX;
220+ }
221+ new_value_lo = gen_lowpart (SImode, new_value);
222+ new_value_hi = gen_highpart (SImode, new_value);
223+ t1_lo = gen_lowpart (SImode, t1);
224+ t1_hi = gen_highpart (SImode, t1);
225+ }
226+ else
227+ {
228+ old_value_lo = old_value;
229+ new_value_lo = new_value;
230+ required_value_lo = required_value;
231+ t1_lo = t1;
232+
233+ /* Silence false potentially unused warning. */
234+ t1_hi = NULL_RTX;
235+ new_value_hi = NULL_RTX;
236+ required_value_hi = NULL_RTX;
237+ old_value_hi = NULL_RTX;
238+ }
239+
240 if (required_value)
241 {
242- rtx operands[2];
243+ operands[0] = old_value_lo;
244+ operands[1] = required_value_lo;
245
246- operands[0] = old_value;
247- operands[1] = required_value;
248 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
249+ if (is_di)
250+ {
251+ arm_output_it (emit, "", "eq");
252+ arm_output_op2 (emit, "cmpeq", old_value_hi, required_value_hi);
253+ }
254 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
255 }
256
257 switch (sync_op)
258 {
259 case SYNC_OP_ADD:
260- arm_output_op3 (emit, "add", t1, old_value, new_value);
261+ arm_output_op3 (emit, is_di ? "adds" : "add",
262+ t1_lo, old_value_lo, new_value_lo);
263+ if (is_di)
264+ arm_output_op3 (emit, "adc", t1_hi, old_value_hi, new_value_hi);
265 break;
266
267 case SYNC_OP_SUB:
268- arm_output_op3 (emit, "sub", t1, old_value, new_value);
269+ arm_output_op3 (emit, is_di ? "subs" : "sub",
270+ t1_lo, old_value_lo, new_value_lo);
271+ if (is_di)
272+ arm_output_op3 (emit, "sbc", t1_hi, old_value_hi, new_value_hi);
273 break;
274
275 case SYNC_OP_IOR:
276- arm_output_op3 (emit, "orr", t1, old_value, new_value);
277+ arm_output_op3 (emit, "orr", t1_lo, old_value_lo, new_value_lo);
278+ if (is_di)
279+ arm_output_op3 (emit, "orr", t1_hi, old_value_hi, new_value_hi);
280 break;
281
282 case SYNC_OP_XOR:
283- arm_output_op3 (emit, "eor", t1, old_value, new_value);
284+ arm_output_op3 (emit, "eor", t1_lo, old_value_lo, new_value_lo);
285+ if (is_di)
286+ arm_output_op3 (emit, "eor", t1_hi, old_value_hi, new_value_hi);
287 break;
288
289 case SYNC_OP_AND:
290- arm_output_op3 (emit,"and", t1, old_value, new_value);
291+ arm_output_op3 (emit,"and", t1_lo, old_value_lo, new_value_lo);
292+ if (is_di)
293+ arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
294 break;
295
296 case SYNC_OP_NAND:
297- arm_output_op3 (emit, "and", t1, old_value, new_value);
298- arm_output_op2 (emit, "mvn", t1, t1);
299+ arm_output_op3 (emit, "and", t1_lo, old_value_lo, new_value_lo);
300+ if (is_di)
301+ arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
302+ arm_output_op2 (emit, "mvn", t1_lo, t1_lo);
303+ if (is_di)
304+ arm_output_op2 (emit, "mvn", t1_hi, t1_hi);
305 break;
306
307 case SYNC_OP_NONE:
308 t1 = new_value;
309+ t1_lo = new_value_lo;
310+ if (is_di)
311+ t1_hi = new_value_hi;
312 break;
313 }
314
315+ /* Note that the result of strex is a 0/1 flag that's always 1 register. */
316 if (t2)
317 {
318- arm_output_strex (emit, mode, "", t2, t1, memory);
319- operands[0] = t2;
320- arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
321- arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
322- LOCAL_LABEL_PREFIX);
323+ arm_output_strex (emit, mode, "", t2, t1, memory);
324+ operands[0] = t2;
325+ arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
326+ arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
327+ LOCAL_LABEL_PREFIX);
328 }
329 else
330 {
331 /* Use old_value for the return value because for some operations
332 the old_value can easily be restored. This saves one register. */
333- arm_output_strex (emit, mode, "", old_value, t1, memory);
334- operands[0] = old_value;
335+ arm_output_strex (emit, mode, "", old_value_lo, t1, memory);
336+ operands[0] = old_value_lo;
337 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
338 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
339 LOCAL_LABEL_PREFIX);
340
341+ /* Note that we only used the _lo half of old_value as a temporary
342+ so in DI we don't have to restore the _hi part. */
343 switch (sync_op)
344 {
345 case SYNC_OP_ADD:
346- arm_output_op3 (emit, "sub", old_value, t1, new_value);
347+ arm_output_op3 (emit, "sub", old_value_lo, t1_lo, new_value_lo);
348 break;
349
350 case SYNC_OP_SUB:
351- arm_output_op3 (emit, "add", old_value, t1, new_value);
352+ arm_output_op3 (emit, "add", old_value_lo, t1_lo, new_value_lo);
353 break;
354
355 case SYNC_OP_XOR:
356- arm_output_op3 (emit, "eor", old_value, t1, new_value);
357+ arm_output_op3 (emit, "eor", old_value_lo, t1_lo, new_value_lo);
358 break;
359
360 case SYNC_OP_NONE:
361- arm_output_op2 (emit, "mov", old_value, required_value);
362+ arm_output_op2 (emit, "mov", old_value_lo, required_value_lo);
363 break;
364
365 default:
366@@ -24482,8 +24589,11 @@
367 }
368 }
369
370+ /* Note: label is before barrier so that in cmp failure case we still get
371+ a barrier to stop subsequent loads floating upwards past the ldrex
372+ PR target/48126. */
373+ arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
374 arm_process_output_memory_barrier (emit, NULL);
375- arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
376 }
377
378 static rtx
379@@ -24577,7 +24687,7 @@
380 target = gen_reg_rtx (mode);
381
382 memory = arm_legitimize_sync_memory (memory);
383- if (mode != SImode)
384+ if (mode != SImode && mode != DImode)
385 {
386 rtx load_temp = gen_reg_rtx (SImode);
387
388
389=== modified file 'gcc/config/arm/arm.h'
390--- old/gcc/config/arm/arm.h 2011-11-21 01:45:54 +0000
391+++ new/gcc/config/arm/arm.h 2011-11-28 15:07:01 +0000
392@@ -300,7 +300,8 @@
393 #define TARGET_HAVE_DMB (arm_arch7)
394
395 /* Nonzero if this chip implements a memory barrier via CP15. */
396-#define TARGET_HAVE_DMB_MCR (arm_arch6k && ! TARGET_HAVE_DMB)
397+#define TARGET_HAVE_DMB_MCR (arm_arch6 && ! TARGET_HAVE_DMB \
398+ && ! TARGET_THUMB1)
399
400 /* Nonzero if this chip implements a memory barrier instruction. */
401 #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR)
402@@ -308,8 +309,12 @@
403 /* Nonzero if this chip supports ldrex and strex */
404 #define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) || arm_arch7)
405
406-/* Nonzero if this chip supports ldrex{bhd} and strex{bhd}. */
407-#define TARGET_HAVE_LDREXBHD ((arm_arch6k && TARGET_ARM) || arm_arch7)
408+/* Nonzero if this chip supports ldrex{bh} and strex{bh}. */
409+#define TARGET_HAVE_LDREXBH ((arm_arch6k && TARGET_ARM) || arm_arch7)
410+
411+/* Nonzero if this chip supports ldrexd and strexd. */
412+#define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) || arm_arch7) \
413+ && arm_arch_notm)
414
415 /* Nonzero if integer division instructions supported. */
416 #define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \
417
418=== modified file 'gcc/config/arm/iterators.md'
419--- old/gcc/config/arm/iterators.md 2011-10-23 13:33:07 +0000
420+++ new/gcc/config/arm/iterators.md 2011-11-28 15:07:01 +0000
421@@ -33,6 +33,15 @@
422 ;; A list of integer modes that are up to one word long
423 (define_mode_iterator QHSI [QI HI SI])
424
425+;; A list of integer modes that are less than a word
426+(define_mode_iterator NARROW [QI HI])
427+
428+;; A list of all the integer modes upto 64bit
429+(define_mode_iterator QHSD [QI HI SI DI])
430+
431+;; A list of the 32bit and 64bit integer modes
432+(define_mode_iterator SIDI [SI DI])
433+
434 ;; Integer element sizes implemented by IWMMXT.
435 (define_mode_iterator VMMX [V2SI V4HI V8QI])
436
437
438=== added file 'gcc/config/arm/linux-atomic-64bit.c'
439--- old/gcc/config/arm/linux-atomic-64bit.c 1970-01-01 00:00:00 +0000
440+++ new/gcc/config/arm/linux-atomic-64bit.c 2011-10-14 15:50:44 +0000
441@@ -0,0 +1,166 @@
442+/* 64bit Linux-specific atomic operations for ARM EABI.
443+ Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
444+ Based on linux-atomic.c
445+
446+ 64 bit additions david.gilbert@linaro.org
447+
448+This file is part of GCC.
449+
450+GCC is free software; you can redistribute it and/or modify it under
451+the terms of the GNU General Public License as published by the Free
452+Software Foundation; either version 3, or (at your option) any later
453+version.
454+
455+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
456+WARRANTY; without even the implied warranty of MERCHANTABILITY or
457+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
458+for more details.
459+
460+Under Section 7 of GPL version 3, you are granted additional
461+permissions described in the GCC Runtime Library Exception, version
462+3.1, as published by the Free Software Foundation.
463+
464+You should have received a copy of the GNU General Public License and
465+a copy of the GCC Runtime Library Exception along with this program;
466+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
467+<http://www.gnu.org/licenses/>. */
468+
469+/* 64bit helper functions for atomic operations; the compiler will
470+ call these when the code is compiled for a CPU without ldrexd/strexd.
471+ (If the CPU had those then the compiler inlines the operation).
472+
473+ These helpers require a kernel helper that's only present on newer
474+ kernels; we check for that in an init section and bail out rather
475+ unceremoneously. */
476+
477+extern unsigned int __write (int fd, const void *buf, unsigned int count);
478+extern void abort (void);
479+
480+/* Kernel helper for compare-and-exchange. */
481+typedef int (__kernel_cmpxchg64_t) (const long long* oldval,
482+ const long long* newval,
483+ long long *ptr);
484+#define __kernel_cmpxchg64 (*(__kernel_cmpxchg64_t *) 0xffff0f60)
485+
486+/* Kernel helper page version number. */
487+#define __kernel_helper_version (*(unsigned int *)0xffff0ffc)
488+
489+/* Check that the kernel has a new enough version at load. */
490+static void __check_for_sync8_kernelhelper (void)
491+{
492+ if (__kernel_helper_version < 5)
493+ {
494+ const char err[] = "A newer kernel is required to run this binary. "
495+ "(__kernel_cmpxchg64 helper)\n";
496+ /* At this point we need a way to crash with some information
497+ for the user - I'm not sure I can rely on much else being
498+ available at this point, so do the same as generic-morestack.c
499+ write () and abort (). */
500+ __write (2 /* stderr. */, err, sizeof (err));
501+ abort ();
502+ }
503+};
504+
505+static void (*__sync8_kernelhelper_inithook[]) (void)
506+ __attribute__ ((used, section (".init_array"))) = {
507+ &__check_for_sync8_kernelhelper
508+};
509+
510+#define HIDDEN __attribute__ ((visibility ("hidden")))
511+
512+#define FETCH_AND_OP_WORD64(OP, PFX_OP, INF_OP) \
513+ long long HIDDEN \
514+ __sync_fetch_and_##OP##_8 (long long *ptr, long long val) \
515+ { \
516+ int failure; \
517+ long long tmp,tmp2; \
518+ \
519+ do { \
520+ tmp = *ptr; \
521+ tmp2 = PFX_OP (tmp INF_OP val); \
522+ failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr); \
523+ } while (failure != 0); \
524+ \
525+ return tmp; \
526+ }
527+
528+FETCH_AND_OP_WORD64 (add, , +)
529+FETCH_AND_OP_WORD64 (sub, , -)
530+FETCH_AND_OP_WORD64 (or, , |)
531+FETCH_AND_OP_WORD64 (and, , &)
532+FETCH_AND_OP_WORD64 (xor, , ^)
533+FETCH_AND_OP_WORD64 (nand, ~, &)
534+
535+#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
536+#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
537+
538+/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
539+ subword-sized quantities. */
540+
541+#define OP_AND_FETCH_WORD64(OP, PFX_OP, INF_OP) \
542+ long long HIDDEN \
543+ __sync_##OP##_and_fetch_8 (long long *ptr, long long val) \
544+ { \
545+ int failure; \
546+ long long tmp,tmp2; \
547+ \
548+ do { \
549+ tmp = *ptr; \
550+ tmp2 = PFX_OP (tmp INF_OP val); \
551+ failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr); \
552+ } while (failure != 0); \
553+ \
554+ return tmp2; \
555+ }
556+
557+OP_AND_FETCH_WORD64 (add, , +)
558+OP_AND_FETCH_WORD64 (sub, , -)
559+OP_AND_FETCH_WORD64 (or, , |)
560+OP_AND_FETCH_WORD64 (and, , &)
561+OP_AND_FETCH_WORD64 (xor, , ^)
562+OP_AND_FETCH_WORD64 (nand, ~, &)
563+
564+long long HIDDEN
565+__sync_val_compare_and_swap_8 (long long *ptr, long long oldval,
566+ long long newval)
567+{
568+ int failure;
569+ long long actual_oldval;
570+
571+ while (1)
572+ {
573+ actual_oldval = *ptr;
574+
575+ if (__builtin_expect (oldval != actual_oldval, 0))
576+ return actual_oldval;
577+
578+ failure = __kernel_cmpxchg64 (&actual_oldval, &newval, ptr);
579+
580+ if (__builtin_expect (!failure, 1))
581+ return oldval;
582+ }
583+}
584+
585+typedef unsigned char bool;
586+
587+bool HIDDEN
588+__sync_bool_compare_and_swap_8 (long long *ptr, long long oldval,
589+ long long newval)
590+{
591+ int failure = __kernel_cmpxchg64 (&oldval, &newval, ptr);
592+ return (failure == 0);
593+}
594+
595+long long HIDDEN
596+__sync_lock_test_and_set_8 (long long *ptr, long long val)
597+{
598+ int failure;
599+ long long oldval;
600+
601+ do {
602+ oldval = *ptr;
603+ failure = __kernel_cmpxchg64 (&oldval, &val, ptr);
604+ } while (failure != 0);
605+
606+ return oldval;
607+}
608
609=== modified file 'gcc/config/arm/linux-atomic.c'
610--- old/gcc/config/arm/linux-atomic.c 2011-01-03 20:52:22 +0000
611+++ new/gcc/config/arm/linux-atomic.c 2011-10-14 15:50:44 +0000
612@@ -32,8 +32,8 @@
613 #define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0)
614
615 /* Note: we implement byte, short and int versions of atomic operations using
616- the above kernel helpers, but there is no support for "long long" (64-bit)
617- operations as yet. */
618+ the above kernel helpers; see linux-atomic-64bit.c for "long long" (64-bit)
619+ operations. */
620
621 #define HIDDEN __attribute__ ((visibility ("hidden")))
622
623@@ -273,6 +273,7 @@
624 *ptr = 0; \
625 }
626
627+SYNC_LOCK_RELEASE (long long, 8)
628 SYNC_LOCK_RELEASE (int, 4)
629 SYNC_LOCK_RELEASE (short, 2)
630 SYNC_LOCK_RELEASE (char, 1)
631
632=== modified file 'gcc/config/arm/sync.md'
633--- old/gcc/config/arm/sync.md 2010-12-31 13:25:33 +0000
634+++ new/gcc/config/arm/sync.md 2011-10-14 15:47:15 +0000
635@@ -1,6 +1,7 @@
636 ;; Machine description for ARM processor synchronization primitives.
637 ;; Copyright (C) 2010 Free Software Foundation, Inc.
638 ;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com)
639+;; 64bit Atomics by Dave Gilbert (david.gilbert@linaro.org)
640 ;;
641 ;; This file is part of GCC.
642 ;;
643@@ -33,31 +34,24 @@
644 MEM_VOLATILE_P (operands[0]) = 1;
645 })
646
647-(define_expand "sync_compare_and_swapsi"
648- [(set (match_operand:SI 0 "s_register_operand")
649- (unspec_volatile:SI [(match_operand:SI 1 "memory_operand")
650- (match_operand:SI 2 "s_register_operand")
651- (match_operand:SI 3 "s_register_operand")]
652- VUNSPEC_SYNC_COMPARE_AND_SWAP))]
653- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
654- {
655- struct arm_sync_generator generator;
656- generator.op = arm_sync_generator_omrn;
657- generator.u.omrn = gen_arm_sync_compare_and_swapsi;
658- arm_expand_sync (SImode, &generator, operands[0], operands[1], operands[2],
659- operands[3]);
660- DONE;
661- })
662
663-(define_mode_iterator NARROW [QI HI])
664+(define_mode_attr sync_predtab [(SI "TARGET_HAVE_LDREX &&
665+ TARGET_HAVE_MEMORY_BARRIER")
666+ (QI "TARGET_HAVE_LDREXBH &&
667+ TARGET_HAVE_MEMORY_BARRIER")
668+ (HI "TARGET_HAVE_LDREXBH &&
669+ TARGET_HAVE_MEMORY_BARRIER")
670+ (DI "TARGET_HAVE_LDREXD &&
671+ ARM_DOUBLEWORD_ALIGN &&
672+ TARGET_HAVE_MEMORY_BARRIER")])
673
674 (define_expand "sync_compare_and_swap<mode>"
675- [(set (match_operand:NARROW 0 "s_register_operand")
676- (unspec_volatile:NARROW [(match_operand:NARROW 1 "memory_operand")
677- (match_operand:NARROW 2 "s_register_operand")
678- (match_operand:NARROW 3 "s_register_operand")]
679+ [(set (match_operand:QHSD 0 "s_register_operand")
680+ (unspec_volatile:QHSD [(match_operand:QHSD 1 "memory_operand")
681+ (match_operand:QHSD 2 "s_register_operand")
682+ (match_operand:QHSD 3 "s_register_operand")]
683 VUNSPEC_SYNC_COMPARE_AND_SWAP))]
684- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
685+ "<sync_predtab>"
686 {
687 struct arm_sync_generator generator;
688 generator.op = arm_sync_generator_omrn;
689@@ -67,25 +61,11 @@
690 DONE;
691 })
692
693-(define_expand "sync_lock_test_and_setsi"
694- [(match_operand:SI 0 "s_register_operand")
695- (match_operand:SI 1 "memory_operand")
696- (match_operand:SI 2 "s_register_operand")]
697- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
698- {
699- struct arm_sync_generator generator;
700- generator.op = arm_sync_generator_omn;
701- generator.u.omn = gen_arm_sync_lock_test_and_setsi;
702- arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
703- operands[2]);
704- DONE;
705- })
706-
707 (define_expand "sync_lock_test_and_set<mode>"
708- [(match_operand:NARROW 0 "s_register_operand")
709- (match_operand:NARROW 1 "memory_operand")
710- (match_operand:NARROW 2 "s_register_operand")]
711- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
712+ [(match_operand:QHSD 0 "s_register_operand")
713+ (match_operand:QHSD 1 "memory_operand")
714+ (match_operand:QHSD 2 "s_register_operand")]
715+ "<sync_predtab>"
716 {
717 struct arm_sync_generator generator;
718 generator.op = arm_sync_generator_omn;
719@@ -115,51 +95,25 @@
720 (plus "*")
721 (minus "*")])
722
723-(define_expand "sync_<sync_optab>si"
724- [(match_operand:SI 0 "memory_operand")
725- (match_operand:SI 1 "s_register_operand")
726- (syncop:SI (match_dup 0) (match_dup 1))]
727- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
728- {
729- struct arm_sync_generator generator;
730- generator.op = arm_sync_generator_omn;
731- generator.u.omn = gen_arm_sync_new_<sync_optab>si;
732- arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]);
733- DONE;
734- })
735-
736-(define_expand "sync_nandsi"
737- [(match_operand:SI 0 "memory_operand")
738- (match_operand:SI 1 "s_register_operand")
739- (not:SI (and:SI (match_dup 0) (match_dup 1)))]
740- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
741- {
742- struct arm_sync_generator generator;
743- generator.op = arm_sync_generator_omn;
744- generator.u.omn = gen_arm_sync_new_nandsi;
745- arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]);
746- DONE;
747- })
748-
749 (define_expand "sync_<sync_optab><mode>"
750- [(match_operand:NARROW 0 "memory_operand")
751- (match_operand:NARROW 1 "s_register_operand")
752- (syncop:NARROW (match_dup 0) (match_dup 1))]
753- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
754+ [(match_operand:QHSD 0 "memory_operand")
755+ (match_operand:QHSD 1 "s_register_operand")
756+ (syncop:QHSD (match_dup 0) (match_dup 1))]
757+ "<sync_predtab>"
758 {
759 struct arm_sync_generator generator;
760 generator.op = arm_sync_generator_omn;
761 generator.u.omn = gen_arm_sync_new_<sync_optab><mode>;
762 arm_expand_sync (<MODE>mode, &generator, NULL, operands[0], NULL,
763- operands[1]);
764+ operands[1]);
765 DONE;
766 })
767
768 (define_expand "sync_nand<mode>"
769- [(match_operand:NARROW 0 "memory_operand")
770- (match_operand:NARROW 1 "s_register_operand")
771- (not:NARROW (and:NARROW (match_dup 0) (match_dup 1)))]
772- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
773+ [(match_operand:QHSD 0 "memory_operand")
774+ (match_operand:QHSD 1 "s_register_operand")
775+ (not:QHSD (and:QHSD (match_dup 0) (match_dup 1)))]
776+ "<sync_predtab>"
777 {
778 struct arm_sync_generator generator;
779 generator.op = arm_sync_generator_omn;
780@@ -169,57 +123,27 @@
781 DONE;
782 })
783
784-(define_expand "sync_new_<sync_optab>si"
785- [(match_operand:SI 0 "s_register_operand")
786- (match_operand:SI 1 "memory_operand")
787- (match_operand:SI 2 "s_register_operand")
788- (syncop:SI (match_dup 1) (match_dup 2))]
789- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
790- {
791- struct arm_sync_generator generator;
792- generator.op = arm_sync_generator_omn;
793- generator.u.omn = gen_arm_sync_new_<sync_optab>si;
794- arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
795- operands[2]);
796- DONE;
797- })
798-
799-(define_expand "sync_new_nandsi"
800- [(match_operand:SI 0 "s_register_operand")
801- (match_operand:SI 1 "memory_operand")
802- (match_operand:SI 2 "s_register_operand")
803- (not:SI (and:SI (match_dup 1) (match_dup 2)))]
804- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
805- {
806- struct arm_sync_generator generator;
807- generator.op = arm_sync_generator_omn;
808- generator.u.omn = gen_arm_sync_new_nandsi;
809- arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
810- operands[2]);
811- DONE;
812- })
813-
814 (define_expand "sync_new_<sync_optab><mode>"
815- [(match_operand:NARROW 0 "s_register_operand")
816- (match_operand:NARROW 1 "memory_operand")
817- (match_operand:NARROW 2 "s_register_operand")
818- (syncop:NARROW (match_dup 1) (match_dup 2))]
819- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
820+ [(match_operand:QHSD 0 "s_register_operand")
821+ (match_operand:QHSD 1 "memory_operand")
822+ (match_operand:QHSD 2 "s_register_operand")
823+ (syncop:QHSD (match_dup 1) (match_dup 2))]
824+ "<sync_predtab>"
825 {
826 struct arm_sync_generator generator;
827 generator.op = arm_sync_generator_omn;
828 generator.u.omn = gen_arm_sync_new_<sync_optab><mode>;
829 arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
830- NULL, operands[2]);
831+ NULL, operands[2]);
832 DONE;
833 })
834
835 (define_expand "sync_new_nand<mode>"
836- [(match_operand:NARROW 0 "s_register_operand")
837- (match_operand:NARROW 1 "memory_operand")
838- (match_operand:NARROW 2 "s_register_operand")
839- (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))]
840- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
841+ [(match_operand:QHSD 0 "s_register_operand")
842+ (match_operand:QHSD 1 "memory_operand")
843+ (match_operand:QHSD 2 "s_register_operand")
844+ (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))]
845+ "<sync_predtab>"
846 {
847 struct arm_sync_generator generator;
848 generator.op = arm_sync_generator_omn;
849@@ -229,57 +153,27 @@
850 DONE;
851 });
852
853-(define_expand "sync_old_<sync_optab>si"
854- [(match_operand:SI 0 "s_register_operand")
855- (match_operand:SI 1 "memory_operand")
856- (match_operand:SI 2 "s_register_operand")
857- (syncop:SI (match_dup 1) (match_dup 2))]
858- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
859- {
860- struct arm_sync_generator generator;
861- generator.op = arm_sync_generator_omn;
862- generator.u.omn = gen_arm_sync_old_<sync_optab>si;
863- arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
864- operands[2]);
865- DONE;
866- })
867-
868-(define_expand "sync_old_nandsi"
869- [(match_operand:SI 0 "s_register_operand")
870- (match_operand:SI 1 "memory_operand")
871- (match_operand:SI 2 "s_register_operand")
872- (not:SI (and:SI (match_dup 1) (match_dup 2)))]
873- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
874- {
875- struct arm_sync_generator generator;
876- generator.op = arm_sync_generator_omn;
877- generator.u.omn = gen_arm_sync_old_nandsi;
878- arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
879- operands[2]);
880- DONE;
881- })
882-
883 (define_expand "sync_old_<sync_optab><mode>"
884- [(match_operand:NARROW 0 "s_register_operand")
885- (match_operand:NARROW 1 "memory_operand")
886- (match_operand:NARROW 2 "s_register_operand")
887- (syncop:NARROW (match_dup 1) (match_dup 2))]
888- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
889+ [(match_operand:QHSD 0 "s_register_operand")
890+ (match_operand:QHSD 1 "memory_operand")
891+ (match_operand:QHSD 2 "s_register_operand")
892+ (syncop:QHSD (match_dup 1) (match_dup 2))]
893+ "<sync_predtab>"
894 {
895 struct arm_sync_generator generator;
896 generator.op = arm_sync_generator_omn;
897 generator.u.omn = gen_arm_sync_old_<sync_optab><mode>;
898 arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
899- NULL, operands[2]);
900+ NULL, operands[2]);
901 DONE;
902 })
903
904 (define_expand "sync_old_nand<mode>"
905- [(match_operand:NARROW 0 "s_register_operand")
906- (match_operand:NARROW 1 "memory_operand")
907- (match_operand:NARROW 2 "s_register_operand")
908- (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))]
909- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
910+ [(match_operand:QHSD 0 "s_register_operand")
911+ (match_operand:QHSD 1 "memory_operand")
912+ (match_operand:QHSD 2 "s_register_operand")
913+ (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))]
914+ "<sync_predtab>"
915 {
916 struct arm_sync_generator generator;
917 generator.op = arm_sync_generator_omn;
918@@ -289,22 +183,22 @@
919 DONE;
920 })
921
922-(define_insn "arm_sync_compare_and_swapsi"
923- [(set (match_operand:SI 0 "s_register_operand" "=&r")
924- (unspec_volatile:SI
925- [(match_operand:SI 1 "arm_sync_memory_operand" "+Q")
926- (match_operand:SI 2 "s_register_operand" "r")
927- (match_operand:SI 3 "s_register_operand" "r")]
928- VUNSPEC_SYNC_COMPARE_AND_SWAP))
929- (set (match_dup 1) (unspec_volatile:SI [(match_dup 2)]
930+(define_insn "arm_sync_compare_and_swap<mode>"
931+ [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
932+ (unspec_volatile:SIDI
933+ [(match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
934+ (match_operand:SIDI 2 "s_register_operand" "r")
935+ (match_operand:SIDI 3 "s_register_operand" "r")]
936+ VUNSPEC_SYNC_COMPARE_AND_SWAP))
937+ (set (match_dup 1) (unspec_volatile:SIDI [(match_dup 2)]
938 VUNSPEC_SYNC_COMPARE_AND_SWAP))
939 (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
940 VUNSPEC_SYNC_COMPARE_AND_SWAP))
941 ]
942- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
943+ "<sync_predtab>"
944 {
945 return arm_output_sync_insn (insn, operands);
946- }
947+ }
948 [(set_attr "sync_result" "0")
949 (set_attr "sync_memory" "1")
950 (set_attr "sync_required_value" "2")
951@@ -318,7 +212,7 @@
952 (zero_extend:SI
953 (unspec_volatile:NARROW
954 [(match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")
955- (match_operand:SI 2 "s_register_operand" "r")
956+ (match_operand:SI 2 "s_register_operand" "r")
957 (match_operand:SI 3 "s_register_operand" "r")]
958 VUNSPEC_SYNC_COMPARE_AND_SWAP)))
959 (set (match_dup 1) (unspec_volatile:NARROW [(match_dup 2)]
960@@ -326,10 +220,10 @@
961 (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
962 VUNSPEC_SYNC_COMPARE_AND_SWAP))
963 ]
964- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
965+ "<sync_predtab>"
966 {
967 return arm_output_sync_insn (insn, operands);
968- }
969+ }
970 [(set_attr "sync_result" "0")
971 (set_attr "sync_memory" "1")
972 (set_attr "sync_required_value" "2")
973@@ -338,18 +232,18 @@
974 (set_attr "conds" "clob")
975 (set_attr "predicable" "no")])
976
977-(define_insn "arm_sync_lock_test_and_setsi"
978- [(set (match_operand:SI 0 "s_register_operand" "=&r")
979- (match_operand:SI 1 "arm_sync_memory_operand" "+Q"))
980+(define_insn "arm_sync_lock_test_and_set<mode>"
981+ [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
982+ (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q"))
983 (set (match_dup 1)
984- (unspec_volatile:SI [(match_operand:SI 2 "s_register_operand" "r")]
985- VUNSPEC_SYNC_LOCK))
986+ (unspec_volatile:SIDI [(match_operand:SIDI 2 "s_register_operand" "r")]
987+ VUNSPEC_SYNC_LOCK))
988 (clobber (reg:CC CC_REGNUM))
989 (clobber (match_scratch:SI 3 "=&r"))]
990- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
991+ "<sync_predtab>"
992 {
993 return arm_output_sync_insn (insn, operands);
994- }
995+ }
996 [(set_attr "sync_release_barrier" "no")
997 (set_attr "sync_result" "0")
998 (set_attr "sync_memory" "1")
999@@ -364,10 +258,10 @@
1000 (zero_extend:SI (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")))
1001 (set (match_dup 1)
1002 (unspec_volatile:NARROW [(match_operand:SI 2 "s_register_operand" "r")]
1003- VUNSPEC_SYNC_LOCK))
1004+ VUNSPEC_SYNC_LOCK))
1005 (clobber (reg:CC CC_REGNUM))
1006 (clobber (match_scratch:SI 3 "=&r"))]
1007- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
1008+ "<sync_predtab>"
1009 {
1010 return arm_output_sync_insn (insn, operands);
1011 }
1012@@ -380,22 +274,48 @@
1013 (set_attr "conds" "clob")
1014 (set_attr "predicable" "no")])
1015
1016-(define_insn "arm_sync_new_<sync_optab>si"
1017+(define_insn "arm_sync_new_<sync_optab><mode>"
1018+ [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
1019+ (unspec_volatile:SIDI [(syncop:SIDI
1020+ (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
1021+ (match_operand:SIDI 2 "s_register_operand" "r"))
1022+ ]
1023+ VUNSPEC_SYNC_NEW_OP))
1024+ (set (match_dup 1)
1025+ (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
1026+ VUNSPEC_SYNC_NEW_OP))
1027+ (clobber (reg:CC CC_REGNUM))
1028+ (clobber (match_scratch:SI 3 "=&r"))]
1029+ "<sync_predtab>"
1030+ {
1031+ return arm_output_sync_insn (insn, operands);
1032+ }
1033+ [(set_attr "sync_result" "0")
1034+ (set_attr "sync_memory" "1")
1035+ (set_attr "sync_new_value" "2")
1036+ (set_attr "sync_t1" "0")
1037+ (set_attr "sync_t2" "3")
1038+ (set_attr "sync_op" "<sync_optab>")
1039+ (set_attr "conds" "clob")
1040+ (set_attr "predicable" "no")])
1041+
1042+(define_insn "arm_sync_new_<sync_optab><mode>"
1043 [(set (match_operand:SI 0 "s_register_operand" "=&r")
1044 (unspec_volatile:SI [(syncop:SI
1045- (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
1046- (match_operand:SI 2 "s_register_operand" "r"))
1047- ]
1048- VUNSPEC_SYNC_NEW_OP))
1049+ (zero_extend:SI
1050+ (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
1051+ (match_operand:SI 2 "s_register_operand" "r"))
1052+ ]
1053+ VUNSPEC_SYNC_NEW_OP))
1054 (set (match_dup 1)
1055- (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
1056- VUNSPEC_SYNC_NEW_OP))
1057+ (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
1058+ VUNSPEC_SYNC_NEW_OP))
1059 (clobber (reg:CC CC_REGNUM))
1060 (clobber (match_scratch:SI 3 "=&r"))]
1061- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
1062+ "<sync_predtab>"
1063 {
1064 return arm_output_sync_insn (insn, operands);
1065- }
1066+ }
1067 [(set_attr "sync_result" "0")
1068 (set_attr "sync_memory" "1")
1069 (set_attr "sync_new_value" "2")
1070@@ -405,22 +325,22 @@
1071 (set_attr "conds" "clob")
1072 (set_attr "predicable" "no")])
1073
1074-(define_insn "arm_sync_new_nandsi"
1075- [(set (match_operand:SI 0 "s_register_operand" "=&r")
1076- (unspec_volatile:SI [(not:SI (and:SI
1077- (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
1078- (match_operand:SI 2 "s_register_operand" "r")))
1079- ]
1080- VUNSPEC_SYNC_NEW_OP))
1081+(define_insn "arm_sync_new_nand<mode>"
1082+ [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
1083+ (unspec_volatile:SIDI [(not:SIDI (and:SIDI
1084+ (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
1085+ (match_operand:SIDI 2 "s_register_operand" "r")))
1086+ ]
1087+ VUNSPEC_SYNC_NEW_OP))
1088 (set (match_dup 1)
1089- (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
1090- VUNSPEC_SYNC_NEW_OP))
1091+ (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
1092+ VUNSPEC_SYNC_NEW_OP))
1093 (clobber (reg:CC CC_REGNUM))
1094 (clobber (match_scratch:SI 3 "=&r"))]
1095- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
1096+ "<sync_predtab>"
1097 {
1098 return arm_output_sync_insn (insn, operands);
1099- }
1100+ }
1101 [(set_attr "sync_result" "0")
1102 (set_attr "sync_memory" "1")
1103 (set_attr "sync_new_value" "2")
1104@@ -430,50 +350,24 @@
1105 (set_attr "conds" "clob")
1106 (set_attr "predicable" "no")])
1107
1108-(define_insn "arm_sync_new_<sync_optab><mode>"
1109- [(set (match_operand:SI 0 "s_register_operand" "=&r")
1110- (unspec_volatile:SI [(syncop:SI
1111- (zero_extend:SI
1112- (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
1113- (match_operand:SI 2 "s_register_operand" "r"))
1114- ]
1115- VUNSPEC_SYNC_NEW_OP))
1116- (set (match_dup 1)
1117- (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
1118- VUNSPEC_SYNC_NEW_OP))
1119- (clobber (reg:CC CC_REGNUM))
1120- (clobber (match_scratch:SI 3 "=&r"))]
1121- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
1122- {
1123- return arm_output_sync_insn (insn, operands);
1124- }
1125- [(set_attr "sync_result" "0")
1126- (set_attr "sync_memory" "1")
1127- (set_attr "sync_new_value" "2")
1128- (set_attr "sync_t1" "0")
1129- (set_attr "sync_t2" "3")
1130- (set_attr "sync_op" "<sync_optab>")
1131- (set_attr "conds" "clob")
1132- (set_attr "predicable" "no")])
1133-
1134 (define_insn "arm_sync_new_nand<mode>"
1135 [(set (match_operand:SI 0 "s_register_operand" "=&r")
1136 (unspec_volatile:SI
1137 [(not:SI
1138 (and:SI
1139- (zero_extend:SI
1140- (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
1141- (match_operand:SI 2 "s_register_operand" "r")))
1142+ (zero_extend:SI
1143+ (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
1144+ (match_operand:SI 2 "s_register_operand" "r")))
1145 ] VUNSPEC_SYNC_NEW_OP))
1146 (set (match_dup 1)
1147 (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
1148- VUNSPEC_SYNC_NEW_OP))
1149+ VUNSPEC_SYNC_NEW_OP))
1150 (clobber (reg:CC CC_REGNUM))
1151 (clobber (match_scratch:SI 3 "=&r"))]
1152- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
1153+ "<sync_predtab>"
1154 {
1155 return arm_output_sync_insn (insn, operands);
1156- }
1157+ }
1158 [(set_attr "sync_result" "0")
1159 (set_attr "sync_memory" "1")
1160 (set_attr "sync_new_value" "2")
1161@@ -483,20 +377,20 @@
1162 (set_attr "conds" "clob")
1163 (set_attr "predicable" "no")])
1164
1165-(define_insn "arm_sync_old_<sync_optab>si"
1166- [(set (match_operand:SI 0 "s_register_operand" "=&r")
1167- (unspec_volatile:SI [(syncop:SI
1168- (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
1169- (match_operand:SI 2 "s_register_operand" "r"))
1170- ]
1171- VUNSPEC_SYNC_OLD_OP))
1172+(define_insn "arm_sync_old_<sync_optab><mode>"
1173+ [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
1174+ (unspec_volatile:SIDI [(syncop:SIDI
1175+ (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
1176+ (match_operand:SIDI 2 "s_register_operand" "r"))
1177+ ]
1178+ VUNSPEC_SYNC_OLD_OP))
1179 (set (match_dup 1)
1180- (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
1181- VUNSPEC_SYNC_OLD_OP))
1182+ (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
1183+ VUNSPEC_SYNC_OLD_OP))
1184 (clobber (reg:CC CC_REGNUM))
1185- (clobber (match_scratch:SI 3 "=&r"))
1186+ (clobber (match_scratch:SIDI 3 "=&r"))
1187 (clobber (match_scratch:SI 4 "<sync_clobber>"))]
1188- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
1189+ "<sync_predtab>"
1190 {
1191 return arm_output_sync_insn (insn, operands);
1192 }
1193@@ -509,47 +403,21 @@
1194 (set_attr "conds" "clob")
1195 (set_attr "predicable" "no")])
1196
1197-(define_insn "arm_sync_old_nandsi"
1198- [(set (match_operand:SI 0 "s_register_operand" "=&r")
1199- (unspec_volatile:SI [(not:SI (and:SI
1200- (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
1201- (match_operand:SI 2 "s_register_operand" "r")))
1202- ]
1203- VUNSPEC_SYNC_OLD_OP))
1204- (set (match_dup 1)
1205- (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
1206- VUNSPEC_SYNC_OLD_OP))
1207- (clobber (reg:CC CC_REGNUM))
1208- (clobber (match_scratch:SI 3 "=&r"))
1209- (clobber (match_scratch:SI 4 "=&r"))]
1210- "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
1211- {
1212- return arm_output_sync_insn (insn, operands);
1213- }
1214- [(set_attr "sync_result" "0")
1215- (set_attr "sync_memory" "1")
1216- (set_attr "sync_new_value" "2")
1217- (set_attr "sync_t1" "3")
1218- (set_attr "sync_t2" "4")
1219- (set_attr "sync_op" "nand")
1220- (set_attr "conds" "clob")
1221- (set_attr "predicable" "no")])
1222-
1223 (define_insn "arm_sync_old_<sync_optab><mode>"
1224 [(set (match_operand:SI 0 "s_register_operand" "=&r")
1225 (unspec_volatile:SI [(syncop:SI
1226- (zero_extend:SI
1227- (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
1228- (match_operand:SI 2 "s_register_operand" "r"))
1229- ]
1230- VUNSPEC_SYNC_OLD_OP))
1231+ (zero_extend:SI
1232+ (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
1233+ (match_operand:SI 2 "s_register_operand" "r"))
1234+ ]
1235+ VUNSPEC_SYNC_OLD_OP))
1236 (set (match_dup 1)
1237- (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
1238- VUNSPEC_SYNC_OLD_OP))
1239+ (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
1240+ VUNSPEC_SYNC_OLD_OP))
1241 (clobber (reg:CC CC_REGNUM))
1242 (clobber (match_scratch:SI 3 "=&r"))
1243 (clobber (match_scratch:SI 4 "<sync_clobber>"))]
1244- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
1245+ "<sync_predtab>"
1246 {
1247 return arm_output_sync_insn (insn, operands);
1248 }
1249@@ -563,20 +431,46 @@
1250 (set_attr "predicable" "no")])
1251
1252 (define_insn "arm_sync_old_nand<mode>"
1253+ [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
1254+ (unspec_volatile:SIDI [(not:SIDI (and:SIDI
1255+ (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
1256+ (match_operand:SIDI 2 "s_register_operand" "r")))
1257+ ]
1258+ VUNSPEC_SYNC_OLD_OP))
1259+ (set (match_dup 1)
1260+ (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
1261+ VUNSPEC_SYNC_OLD_OP))
1262+ (clobber (reg:CC CC_REGNUM))
1263+ (clobber (match_scratch:SIDI 3 "=&r"))
1264+ (clobber (match_scratch:SI 4 "=&r"))]
1265+ "<sync_predtab>"
1266+ {
1267+ return arm_output_sync_insn (insn, operands);
1268+ }
1269+ [(set_attr "sync_result" "0")
1270+ (set_attr "sync_memory" "1")
1271+ (set_attr "sync_new_value" "2")
1272+ (set_attr "sync_t1" "3")
1273+ (set_attr "sync_t2" "4")
1274+ (set_attr "sync_op" "nand")
1275+ (set_attr "conds" "clob")
1276+ (set_attr "predicable" "no")])
1277+
1278+(define_insn "arm_sync_old_nand<mode>"
1279 [(set (match_operand:SI 0 "s_register_operand" "=&r")
1280- (unspec_volatile:SI [(not:SI (and:SI
1281- (zero_extend:SI
1282- (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
1283- (match_operand:SI 2 "s_register_operand" "r")))
1284- ]
1285- VUNSPEC_SYNC_OLD_OP))
1286+ (unspec_volatile:SI [(not:SI (and:SI
1287+ (zero_extend:SI
1288+ (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
1289+ (match_operand:SI 2 "s_register_operand" "r")))
1290+ ]
1291+ VUNSPEC_SYNC_OLD_OP))
1292 (set (match_dup 1)
1293- (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
1294- VUNSPEC_SYNC_OLD_OP))
1295+ (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
1296+ VUNSPEC_SYNC_OLD_OP))
1297 (clobber (reg:CC CC_REGNUM))
1298 (clobber (match_scratch:SI 3 "=&r"))
1299 (clobber (match_scratch:SI 4 "=&r"))]
1300- "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
1301+ "<sync_predtab>"
1302 {
1303 return arm_output_sync_insn (insn, operands);
1304 }
1305
1306=== modified file 'gcc/config/arm/t-linux-eabi'
1307--- old/gcc/config/arm/t-linux-eabi 2011-01-03 20:52:22 +0000
1308+++ new/gcc/config/arm/t-linux-eabi 2011-10-14 15:50:44 +0000
1309@@ -36,3 +36,4 @@
1310 EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
1311
1312 LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c
1313+LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic-64bit.c
1314
1315=== added file 'gcc/testsuite/gcc.dg/di-longlong64-sync-1.c'
1316--- old/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c 1970-01-01 00:00:00 +0000
1317+++ new/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c 2011-10-14 15:56:32 +0000
1318@@ -0,0 +1,164 @@
1319+/* { dg-do run } */
1320+/* { dg-require-effective-target sync_longlong } */
1321+/* { dg-options "-std=gnu99" } */
1322+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
1323+/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
1324+
1325+
1326+/* Test basic functionality of the intrinsics. The operations should
1327+ not be optimized away if no one checks the return values. */
1328+
1329+/* Based on ia64-sync-[12].c, but 1) long on ARM is 32 bit so use long long
1330+ (an explicit 64bit type maybe a better bet) and 2) Use values that cross
1331+ the 32bit boundary and cause carries since the actual maths are done as
1332+ pairs of 32 bit instructions. */
1333+
1334+/* Note: This file is #included by some of the ARM tests. */
1335+
1336+__extension__ typedef __SIZE_TYPE__ size_t;
1337+
1338+extern void abort (void);
1339+extern void *memcpy (void *, const void *, size_t);
1340+extern int memcmp (const void *, const void *, size_t);
1341+
1342+/* Temporary space where the work actually gets done. */
1343+static long long AL[24];
1344+/* Values copied into AL before we start. */
1345+static long long init_di[24] = { 0x100000002ll, 0x200000003ll, 0, 1,
1346+
1347+ 0x100000002ll, 0x100000002ll,
1348+ 0x100000002ll, 0x100000002ll,
1349+
1350+ 0, 0x1000e0de0000ll,
1351+ 42 , 0xc001c0de0000ll,
1352+
1353+ -1ll, 0, 0xff00ff0000ll, -1ll,
1354+
1355+ 0, 0x1000e0de0000ll,
1356+ 42 , 0xc001c0de0000ll,
1357+
1358+ -1ll, 0, 0xff00ff0000ll, -1ll};
1359+/* This is what should be in AL at the end. */
1360+static long long test_di[24] = { 0x1234567890ll, 0x1234567890ll, 1, 0,
1361+
1362+ 0x100000002ll, 0x100000002ll,
1363+ 0x100000002ll, 0x100000002ll,
1364+
1365+ 1, 0xc001c0de0000ll,
1366+ 20, 0x1000e0de0000ll,
1367+
1368+ 0x300000007ll , 0x500000009ll,
1369+ 0xf100ff0001ll, ~0xa00000007ll,
1370+
1371+ 1, 0xc001c0de0000ll,
1372+ 20, 0x1000e0de0000ll,
1373+
1374+ 0x300000007ll , 0x500000009ll,
1375+ 0xf100ff0001ll, ~0xa00000007ll };
1376+
1377+/* First check they work in terms of what they do to memory. */
1378+static void
1379+do_noret_di (void)
1380+{
1381+ __sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll);
1382+ __sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll);
1383+ __sync_lock_test_and_set (AL+2, 1);
1384+ __sync_lock_release (AL+3);
1385+
1386+ /* The following tests should not change the value since the
1387+ original does NOT match. */
1388+ __sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll);
1389+ __sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll);
1390+ __sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll);
1391+ __sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll);
1392+
1393+ __sync_fetch_and_add (AL+8, 1);
1394+ __sync_fetch_and_add (AL+9, 0xb000e0000000ll); /* + to both halves & carry. */
1395+ __sync_fetch_and_sub (AL+10, 22);
1396+ __sync_fetch_and_sub (AL+11, 0xb000e0000000ll);
1397+
1398+ __sync_fetch_and_and (AL+12, 0x300000007ll);
1399+ __sync_fetch_and_or (AL+13, 0x500000009ll);
1400+ __sync_fetch_and_xor (AL+14, 0xe00000001ll);
1401+ __sync_fetch_and_nand (AL+15, 0xa00000007ll);
1402+
1403+ /* These should be the same as the fetch_and_* cases except for
1404+ return value. */
1405+ __sync_add_and_fetch (AL+16, 1);
1406+ /* add to both halves & carry. */
1407+ __sync_add_and_fetch (AL+17, 0xb000e0000000ll);
1408+ __sync_sub_and_fetch (AL+18, 22);
1409+ __sync_sub_and_fetch (AL+19, 0xb000e0000000ll);
1410+
1411+ __sync_and_and_fetch (AL+20, 0x300000007ll);
1412+ __sync_or_and_fetch (AL+21, 0x500000009ll);
1413+ __sync_xor_and_fetch (AL+22, 0xe00000001ll);
1414+ __sync_nand_and_fetch (AL+23, 0xa00000007ll);
1415+}
1416+
1417+/* Now check return values. */
1418+static void
1419+do_ret_di (void)
1420+{
1421+ if (__sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll) !=
1422+ 0x100000002ll) abort ();
1423+ if (__sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll) !=
1424+ 1) abort ();
1425+ if (__sync_lock_test_and_set (AL+2, 1) != 0) abort ();
1426+ __sync_lock_release (AL+3); /* no return value, but keep to match results. */
1427+
1428+ /* The following tests should not change the value since the
1429+ original does NOT match. */
1430+ if (__sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll) !=
1431+ 0x100000002ll) abort ();
1432+ if (__sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll) !=
1433+ 0x100000002ll) abort ();
1434+ if (__sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll) !=
1435+ 0) abort ();
1436+ if (__sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll) !=
1437+ 0) abort ();
1438+
1439+ if (__sync_fetch_and_add (AL+8, 1) != 0) abort ();
1440+ if (__sync_fetch_and_add (AL+9, 0xb000e0000000ll) != 0x1000e0de0000ll) abort ();
1441+ if (__sync_fetch_and_sub (AL+10, 22) != 42) abort ();
1442+ if (__sync_fetch_and_sub (AL+11, 0xb000e0000000ll) != 0xc001c0de0000ll)
1443+ abort ();
1444+
1445+ if (__sync_fetch_and_and (AL+12, 0x300000007ll) != -1ll) abort ();
1446+ if (__sync_fetch_and_or (AL+13, 0x500000009ll) != 0) abort ();
1447+ if (__sync_fetch_and_xor (AL+14, 0xe00000001ll) != 0xff00ff0000ll) abort ();
1448+ if (__sync_fetch_and_nand (AL+15, 0xa00000007ll) != -1ll) abort ();
1449+
1450+ /* These should be the same as the fetch_and_* cases except for
1451+ return value. */
1452+ if (__sync_add_and_fetch (AL+16, 1) != 1) abort ();
1453+ if (__sync_add_and_fetch (AL+17, 0xb000e0000000ll) != 0xc001c0de0000ll)
1454+ abort ();
1455+ if (__sync_sub_and_fetch (AL+18, 22) != 20) abort ();
1456+ if (__sync_sub_and_fetch (AL+19, 0xb000e0000000ll) != 0x1000e0de0000ll)
1457+ abort ();
1458+
1459+ if (__sync_and_and_fetch (AL+20, 0x300000007ll) != 0x300000007ll) abort ();
1460+ if (__sync_or_and_fetch (AL+21, 0x500000009ll) != 0x500000009ll) abort ();
1461+ if (__sync_xor_and_fetch (AL+22, 0xe00000001ll) != 0xf100ff0001ll) abort ();
1462+ if (__sync_nand_and_fetch (AL+23, 0xa00000007ll) != ~0xa00000007ll) abort ();
1463+}
1464+
1465+int main ()
1466+{
1467+ memcpy (AL, init_di, sizeof (init_di));
1468+
1469+ do_noret_di ();
1470+
1471+ if (memcmp (AL, test_di, sizeof (test_di)))
1472+ abort ();
1473+
1474+ memcpy (AL, init_di, sizeof (init_di));
1475+
1476+ do_ret_di ();
1477+
1478+ if (memcmp (AL, test_di, sizeof (test_di)))
1479+ abort ();
1480+
1481+ return 0;
1482+}
1483
1484=== added file 'gcc/testsuite/gcc.dg/di-sync-multithread.c'
1485--- old/gcc/testsuite/gcc.dg/di-sync-multithread.c 1970-01-01 00:00:00 +0000
1486+++ new/gcc/testsuite/gcc.dg/di-sync-multithread.c 2011-10-14 15:56:32 +0000
1487@@ -0,0 +1,205 @@
1488+/* { dg-do run } */
1489+/* { dg-require-effective-target sync_longlong } */
1490+/* { dg-require-effective-target pthread_h } */
1491+/* { dg-require-effective-target pthread } */
1492+/* { dg-options "-pthread -std=gnu99" } */
1493+
1494+/* test of long long atomic ops performed in parallel in 3 pthreads
1495+ david.gilbert@linaro.org */
1496+
1497+#include <pthread.h>
1498+#include <unistd.h>
1499+
1500+/*#define DEBUGIT 1 */
1501+
1502+#ifdef DEBUGIT
1503+#include <stdio.h>
1504+
1505+#define DOABORT(x,...) {\
1506+ fprintf (stderr, x, __VA_ARGS__); fflush (stderr); abort ();\
1507+ }
1508+
1509+#else
1510+
1511+#define DOABORT(x,...) abort ();
1512+
1513+#endif
1514+
1515+/* Passed to each thread to describe which bits it is going to work on. */
1516+struct threadwork {
1517+ unsigned long long count; /* incremented each time the worker loops. */
1518+ unsigned int thread; /* ID */
1519+ unsigned int addlsb; /* 8 bit */
1520+ unsigned int logic1lsb; /* 5 bit */
1521+ unsigned int logic2lsb; /* 8 bit */
1522+};
1523+
1524+/* The shared word where all the atomic work is done. */
1525+static volatile long long workspace;
1526+
1527+/* A shared word to tell the workers to quit when non-0. */
1528+static long long doquit;
1529+
1530+extern void abort (void);
1531+
1532+/* Note this test doesn't test the return values much. */
1533+void*
1534+worker (void* data)
1535+{
1536+ struct threadwork *tw = (struct threadwork*)data;
1537+ long long add1bit = 1ll << tw->addlsb;
1538+ long long logic1bit = 1ll << tw->logic1lsb;
1539+ long long logic2bit = 1ll << tw->logic2lsb;
1540+
1541+ /* Clear the bits we use. */
1542+ __sync_and_and_fetch (&workspace, ~(0xffll * add1bit));
1543+ __sync_fetch_and_and (&workspace, ~(0x1fll * logic1bit));
1544+ __sync_fetch_and_and (&workspace, ~(0xffll * logic2bit));
1545+
1546+ do
1547+ {
1548+ long long tmp1, tmp2, tmp3;
1549+ /* OK, lets try and do some stuff to the workspace - by the end
1550+ of the main loop our area should be the same as it is now - i.e. 0. */
1551+
1552+ /* Push the arithmetic section upto 128 - one of the threads will
1553+ case this to carry accross the 32bit boundary. */
1554+ for (tmp2 = 0; tmp2 < 64; tmp2++)
1555+ {
1556+ /* Add 2 using the two different adds. */
1557+ tmp1 = __sync_add_and_fetch (&workspace, add1bit);
1558+ tmp3 = __sync_fetch_and_add (&workspace, add1bit);
1559+
1560+ /* The value should be the intermediate add value in both cases. */
1561+ if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff)))
1562+ DOABORT ("Mismatch of add intermediates on thread %d "
1563+ "workspace=0x%llx tmp1=0x%llx "
1564+ "tmp2=0x%llx tmp3=0x%llx\n",
1565+ tw->thread, workspace, tmp1, tmp2, tmp3);
1566+ }
1567+
1568+ /* Set the logic bits. */
1569+ tmp2=__sync_or_and_fetch (&workspace,
1570+ 0x1fll * logic1bit | 0xffll * logic2bit);
1571+
1572+ /* Check the logic bits are set and the arithmetic value is correct. */
1573+ if ((tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit
1574+ | 0xffll * add1bit))
1575+ != (0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit))
1576+ DOABORT ("Midloop check failed on thread %d "
1577+ "workspace=0x%llx tmp2=0x%llx "
1578+ "masktmp2=0x%llx expected=0x%llx\n",
1579+ tw->thread, workspace, tmp2,
1580+ tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit |
1581+ 0xffll * add1bit),
1582+ (0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit));
1583+
1584+ /* Pull the arithmetic set back down to 0 - again this should cause a
1585+ carry across the 32bit boundary in one thread. */
1586+
1587+ for (tmp2 = 0; tmp2 < 64; tmp2++)
1588+ {
1589+ /* Subtract 2 using the two different subs. */
1590+ tmp1=__sync_sub_and_fetch (&workspace, add1bit);
1591+ tmp3=__sync_fetch_and_sub (&workspace, add1bit);
1592+
1593+ /* The value should be the intermediate sub value in both cases. */
1594+ if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff)))
1595+ DOABORT ("Mismatch of sub intermediates on thread %d "
1596+ "workspace=0x%llx tmp1=0x%llx "
1597+ "tmp2=0x%llx tmp3=0x%llx\n",
1598+ tw->thread, workspace, tmp1, tmp2, tmp3);
1599+ }
1600+
1601+
1602+ /* Clear the logic bits. */
1603+ __sync_fetch_and_xor (&workspace, 0x1fll * logic1bit);
1604+ tmp3=__sync_and_and_fetch (&workspace, ~(0xffll * logic2bit));
1605+
1606+ /* The logic bits and the arithmetic bits should be zero again. */
1607+ if (tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit))
1608+ DOABORT ("End of worker loop; bits none 0 on thread %d "
1609+ "workspace=0x%llx tmp3=0x%llx "
1610+ "mask=0x%llx maskedtmp3=0x%llx\n",
1611+ tw->thread, workspace, tmp3, (0x1fll * logic1bit |
1612+ 0xffll * logic2bit | 0xffll * add1bit),
1613+ tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit));
1614+
1615+ __sync_add_and_fetch (&tw->count, 1);
1616+ }
1617+ while (!__sync_bool_compare_and_swap (&doquit, 1, 1));
1618+
1619+ pthread_exit (0);
1620+}
1621+
1622+int
1623+main ()
1624+{
1625+ /* We have 3 threads doing three sets of operations, an 8 bit
1626+ arithmetic field, a 5 bit logic field and an 8 bit logic
1627+ field (just to pack them all in).
1628+
1629+ 6 5 4 4 3 2 1
1630+ 3 6 8 0 2 4 6 8 0
1631+ |...,...|...,...|...,...|...,...|...,...|...,...|...,...|...,...
1632+ - T0 -- T1 -- T2 --T2 -- T0 -*- T2-- T1-- T1 -***- T0-
1633+ logic2 logic2 arith log2 arith log1 log1 arith log1
1634+
1635+ */
1636+ unsigned int t;
1637+ long long tmp;
1638+ int err;
1639+
1640+ struct threadwork tw[3]={
1641+ { 0ll, 0, 27, 0, 56 },
1642+ { 0ll, 1, 8,16, 48 },
1643+ { 0ll, 2, 40,21, 35 }
1644+ };
1645+
1646+ pthread_t threads[3];
1647+
1648+ __sync_lock_release (&doquit);
1649+
1650+ /* Get the work space into a known value - All 1's. */
1651+ __sync_lock_release (&workspace); /* Now all 0. */
1652+ tmp = __sync_val_compare_and_swap (&workspace, 0, -1ll);
1653+ if (tmp!=0)
1654+ DOABORT ("Initial __sync_val_compare_and_swap wasn't 0 workspace=0x%llx "
1655+ "tmp=0x%llx\n", workspace,tmp);
1656+
1657+ for (t = 0; t < 3; t++)
1658+ {
1659+ err=pthread_create (&threads[t], NULL , worker, &tw[t]);
1660+ if (err) DOABORT ("pthread_create failed on thread %d with error %d\n",
1661+ t, err);
1662+ };
1663+
1664+ sleep (5);
1665+
1666+ /* Stop please. */
1667+ __sync_lock_test_and_set (&doquit, 1ll);
1668+
1669+ for (t = 0; t < 3; t++)
1670+ {
1671+ err=pthread_join (threads[t], NULL);
1672+ if (err)
1673+ DOABORT ("pthread_join failed on thread %d with error %d\n", t, err);
1674+ };
1675+
1676+ __sync_synchronize ();
1677+
1678+ /* OK, so all the workers have finished -
1679+ the workers should have zero'd their workspace, the unused areas
1680+ should still be 1. */
1681+ if (!__sync_bool_compare_and_swap (&workspace, 0x040000e0ll, 0))
1682+ DOABORT ("End of run workspace mismatch, got %llx\n", workspace);
1683+
1684+ /* All the workers should have done some work. */
1685+ for (t = 0; t < 3; t++)
1686+ {
1687+ if (tw[t].count == 0) DOABORT ("Worker %d gave 0 count\n", t);
1688+ };
1689+
1690+ return 0;
1691+}
1692+
1693
1694=== added file 'gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c'
1695--- old/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c 1970-01-01 00:00:00 +0000
1696+++ new/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c 2011-10-14 15:56:32 +0000
1697@@ -0,0 +1,14 @@
1698+/* { dg-do compile } */
1699+/* { dg-require-effective-target arm_arch_v5_ok } */
1700+/* { dg-options "-std=gnu99" } */
1701+/* { dg-add-options arm_arch_v5 } */
1702+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
1703+/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
1704+/* { dg-message "file included" "In file included" { target *-*-* } 0 } */
1705+
1706+#include "../../gcc.dg/di-longlong64-sync-1.c"
1707+
1708+/* On an old ARM we have no ldrexd or strexd so we have to use helpers. */
1709+/* { dg-final { scan-assembler-not "ldrexd" } } */
1710+/* { dg-final { scan-assembler-not "strexd" } } */
1711+/* { dg-final { scan-assembler "__sync_" } } */
1712
1713=== added file 'gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c'
1714--- old/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c 1970-01-01 00:00:00 +0000
1715+++ new/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c 2011-10-14 15:56:32 +0000
1716@@ -0,0 +1,17 @@
1717+/* { dg-do compile } */
1718+/* { dg-require-effective-target arm_arm_ok } */
1719+/* { dg-options "-marm -std=gnu99" } */
1720+/* { dg-require-effective-target arm_arch_v6k_ok } */
1721+/* { dg-add-options arm_arch_v6k } */
1722+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
1723+/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
1724+/* { dg-message "file included" "In file included" { target *-*-* } 0 } */
1725+
1726+#include "../../gcc.dg/di-longlong64-sync-1.c"
1727+
1728+/* We should be using ldrexd, strexd and no helpers or shorter ldrex. */
1729+/* { dg-final { scan-assembler-times "\tldrexd" 46 } } */
1730+/* { dg-final { scan-assembler-times "\tstrexd" 46 } } */
1731+/* { dg-final { scan-assembler-not "__sync_" } } */
1732+/* { dg-final { scan-assembler-not "ldrex\t" } } */
1733+/* { dg-final { scan-assembler-not "strex\t" } } */
1734
1735=== modified file 'gcc/testsuite/lib/target-supports.exp'
1736--- old/gcc/testsuite/lib/target-supports.exp 2011-11-22 17:10:17 +0000
1737+++ new/gcc/testsuite/lib/target-supports.exp 2011-11-28 15:07:01 +0000
1738@@ -2000,6 +2000,47 @@
1739 check_effective_target_arm_fp16_ok_nocache]
1740 }
1741
1742+# Creates a series of routines that return 1 if the given architecture
1743+# can be selected and a routine to give the flags to select that architecture
1744+# Note: Extra flags may be added to disable options from newer compilers
1745+# (Thumb in particular - but others may be added in the future)
1746+# Usage: /* { dg-require-effective-target arm_arch_v5_ok } */
1747+# /* { dg-add-options arm_arch_v5 } */
1748+foreach { armfunc armflag armdef } { v5 "-march=armv5 -marm" __ARM_ARCH_5__
1749+ v6 "-march=armv6" __ARM_ARCH_6__
1750+ v6k "-march=armv6k" __ARM_ARCH_6K__
1751+ v7a "-march=armv7-a" __ARM_ARCH_7A__ } {
1752+ eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
1753+ proc check_effective_target_arm_arch_FUNC_ok { } {
1754+ if { [ string match "*-marm*" "FLAG" ] &&
1755+ ![check_effective_target_arm_arm_ok] } {
1756+ return 0
1757+ }
1758+ return [check_no_compiler_messages arm_arch_FUNC_ok assembly {
1759+ #if !defined (DEF)
1760+ #error FOO
1761+ #endif
1762+ } "FLAG" ]
1763+ }
1764+
1765+ proc add_options_for_arm_arch_FUNC { flags } {
1766+ return "$flags FLAG"
1767+ }
1768+ }]
1769+}
1770+
1771+# Return 1 if this is an ARM target where -marm causes ARM to be
1772+# used (not Thumb)
1773+
1774+proc check_effective_target_arm_arm_ok { } {
1775+ return [check_no_compiler_messages arm_arm_ok assembly {
1776+ #if !defined (__arm__) || defined (__thumb__) || defined (__thumb2__)
1777+ #error FOO
1778+ #endif
1779+ } "-marm"]
1780+}
1781+
1782+
1783 # Return 1 is this is an ARM target where -mthumb causes Thumb-1 to be
1784 # used.
1785
1786@@ -3384,6 +3425,31 @@
1787 return $et_sync_int_long_saved
1788 }
1789
1790+# Return 1 if the target supports atomic operations on "long long" and can
1791+# execute them
1792+# So far only put checks in for ARM, others may want to add their own
1793+proc check_effective_target_sync_longlong { } {
1794+ return [check_runtime sync_longlong_runtime {
1795+ #include <stdlib.h>
1796+ int main ()
1797+ {
1798+ long long l1;
1799+
1800+ if (sizeof (long long) != 8)
1801+ exit (1);
1802+
1803+ #ifdef __arm__
1804+ /* Just check for native; checking for kernel fallback is tricky. */
1805+ asm volatile ("ldrexd r0,r1, [%0]" : : "r" (&l1) : "r0", "r1");
1806+ #else
1807+ # error "Add other suitable archs here"
1808+ #endif
1809+
1810+ exit (0);
1811+ }
1812+ } "" ]
1813+}
1814+
1815 # Return 1 if the target supports atomic operations on "char" and "short".
1816
1817 proc check_effective_target_sync_char_short { } {
1818
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch
new file mode 100644
index 000000000..88d74c72f
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch
@@ -0,0 +1,487 @@
12011-11-27 Ira Rosen <ira.rosen@linaro.org>
2
3 Needs to be merged upstream:
4
5 gcc/
6 * tree-vect-patterns.c (widened_name_p): Rename to ...
7 (type_conversion_p): ... this. Add new argument to determine
8 if it's a promotion or demotion operation. Check for
9 CONVERT_EXPR_CODE_P instead of NOP_EXPR.
10 (vect_recog_dot_prod_pattern): Call type_conversion_p instead
11 widened_name_p.
12 (vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern,
13 vect_operation_fits_smaller_type, vect_recog_widen_shift_pattern):
14 Likewise.
15 (vect_recog_mixed_size_cond_pattern): Likewise and allow
16 non-constant then and else clauses.
17
18 gcc/testsuite/
19 * gcc.dg/vect/bb-slp-cond-3.c: New test.
20 * gcc.dg/vect/bb-slp-cond-4.c: New test.
21
22=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c'
23--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c 1970-01-01 00:00:00 +0000
24+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c 2011-11-27 11:29:32 +0000
25@@ -0,0 +1,85 @@
26+/* { dg-require-effective-target vect_condition } */
27+
28+#include "tree-vect.h"
29+
30+#define N 64
31+
32+/* Comparison in int, then/else and result in unsigned char. */
33+
34+static inline unsigned char
35+foo (int x, int y, int a, int b)
36+{
37+ if (x >= y)
38+ return a;
39+ else
40+ return b;
41+}
42+
43+__attribute__((noinline, noclone)) void
44+bar (unsigned char * __restrict__ a, unsigned char * __restrict__ b,
45+ unsigned char * __restrict__ c, unsigned char * __restrict__ d,
46+ unsigned char * __restrict__ e, int stride, int w)
47+{
48+ int i;
49+ for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride,
50+ d += stride, e += stride)
51+ {
52+ e[0] = foo (c[0], d[0], a[0] * w, b[0] * w);
53+ e[1] = foo (c[1], d[1], a[1] * w, b[1] * w);
54+ e[2] = foo (c[2], d[2], a[2] * w, b[2] * w);
55+ e[3] = foo (c[3], d[3], a[3] * w, b[3] * w);
56+ e[4] = foo (c[4], d[4], a[4] * w, b[4] * w);
57+ e[5] = foo (c[5], d[5], a[5] * w, b[5] * w);
58+ e[6] = foo (c[6], d[6], a[6] * w, b[6] * w);
59+ e[7] = foo (c[7], d[7], a[7] * w, b[7] * w);
60+ e[8] = foo (c[8], d[8], a[8] * w, b[8] * w);
61+ e[9] = foo (c[9], d[9], a[9] * w, b[9] * w);
62+ e[10] = foo (c[10], d[10], a[10] * w, b[10] * w);
63+ e[11] = foo (c[11], d[11], a[11] * w, b[11] * w);
64+ e[12] = foo (c[12], d[12], a[12] * w, b[12] * w);
65+ e[13] = foo (c[13], d[13], a[13] * w, b[13] * w);
66+ e[14] = foo (c[14], d[14], a[14] * w, b[14] * w);
67+ e[15] = foo (c[15], d[15], a[15] * w, b[15] * w);
68+ }
69+}
70+
71+
72+unsigned char a[N], b[N], c[N], d[N], e[N];
73+
74+int main ()
75+{
76+ int i;
77+
78+ check_vect ();
79+
80+ for (i = 0; i < N; i++)
81+ {
82+ a[i] = i;
83+ b[i] = 5;
84+ e[i] = 0;
85+
86+ switch (i % 9)
87+ {
88+ case 0: asm (""); c[i] = i; d[i] = i + 1; break;
89+ case 1: c[i] = 0; d[i] = 0; break;
90+ case 2: c[i] = i + 1; d[i] = i - 1; break;
91+ case 3: c[i] = i; d[i] = i + 7; break;
92+ case 4: c[i] = i; d[i] = i; break;
93+ case 5: c[i] = i + 16; d[i] = i + 3; break;
94+ case 6: c[i] = i - 5; d[i] = i; break;
95+ case 7: c[i] = i; d[i] = i; break;
96+ case 8: c[i] = i; d[i] = i - 7; break;
97+ }
98+ }
99+
100+ bar (a, b, c, d, e, 16, 2);
101+ for (i = 0; i < N; i++)
102+ if (e[i] != ((i % 3) == 0 ? 10 : 2 * i))
103+ abort ();
104+
105+ return 0;
106+}
107+
108+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_int_mult } } } } */
109+/* { dg-final { cleanup-tree-dump "slp" } } */
110+
111
112=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c'
113--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c 1970-01-01 00:00:00 +0000
114+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c 2011-11-27 11:29:32 +0000
115@@ -0,0 +1,85 @@
116+/* { dg-require-effective-target vect_condition } */
117+
118+#include "tree-vect.h"
119+
120+#define N 64
121+
122+/* Comparison in short, then/else and result in int. */
123+static inline int
124+foo (short x, short y, int a, int b)
125+{
126+ if (x >= y)
127+ return a;
128+ else
129+ return b;
130+}
131+
132+__attribute__((noinline, noclone)) void
133+bar (short * __restrict__ a, short * __restrict__ b,
134+ short * __restrict__ c, short * __restrict__ d,
135+ int * __restrict__ e, int stride, int w)
136+{
137+ int i;
138+ for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride,
139+ d += stride, e += stride)
140+ {
141+ e[0] = foo (c[0], d[0], a[0], b[0]);
142+ e[1] = foo (c[1], d[1], a[1], b[1]);
143+ e[2] = foo (c[2], d[2], a[2], b[2]);
144+ e[3] = foo (c[3], d[3], a[3], b[3]);
145+ e[4] = foo (c[4], d[4], a[4], b[4]);
146+ e[5] = foo (c[5], d[5], a[5], b[5]);
147+ e[6] = foo (c[6], d[6], a[6], b[6]);
148+ e[7] = foo (c[7], d[7], a[7], b[7]);
149+ e[8] = foo (c[8], d[8], a[8], b[8]);
150+ e[9] = foo (c[9], d[9], a[9], b[9]);
151+ e[10] = foo (c[10], d[10], a[10], b[10]);
152+ e[11] = foo (c[11], d[11], a[11], b[11]);
153+ e[12] = foo (c[12], d[12], a[12], b[12]);
154+ e[13] = foo (c[13], d[13], a[13], b[13]);
155+ e[14] = foo (c[14], d[14], a[14], b[14]);
156+ e[15] = foo (c[15], d[15], a[15], b[15]);
157+ }
158+}
159+
160+
161+short a[N], b[N], c[N], d[N];
162+int e[N];
163+
164+int main ()
165+{
166+ int i;
167+
168+ check_vect ();
169+
170+ for (i = 0; i < N; i++)
171+ {
172+ a[i] = i;
173+ b[i] = 5;
174+ e[i] = 0;
175+
176+ switch (i % 9)
177+ {
178+ case 0: asm (""); c[i] = - i - 1; d[i] = i + 1; break;
179+ case 1: c[i] = 0; d[i] = 0; break;
180+ case 2: c[i] = i + 1; d[i] = - i - 1; break;
181+ case 3: c[i] = i; d[i] = i + 7; break;
182+ case 4: c[i] = i; d[i] = i; break;
183+ case 5: c[i] = i + 16; d[i] = i + 3; break;
184+ case 6: c[i] = - i - 5; d[i] = - i; break;
185+ case 7: c[i] = - i; d[i] = - i; break;
186+ case 8: c[i] = - i; d[i] = - i - 7; break;
187+ }
188+ }
189+
190+ bar (a, b, c, d, e, 16, 2);
191+ for (i = 0; i < N; i++)
192+ if (e[i] != ((i % 3) == 0 ? 5 : i))
193+ abort ();
194+
195+ return 0;
196+}
197+
198+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
199+/* { dg-final { cleanup-tree-dump "slp" } } */
200+
201
202=== modified file 'gcc/tree-vect-patterns.c'
203--- old/gcc/tree-vect-patterns.c 2011-11-23 07:49:33 +0000
204+++ new/gcc/tree-vect-patterns.c 2011-11-27 12:17:31 +0000
205@@ -62,18 +62,16 @@
206 vect_recog_mixed_size_cond_pattern};
207
208
209-/* Function widened_name_p
210-
211- Check whether NAME, an ssa-name used in USE_STMT,
212- is a result of a type-promotion, such that:
213- DEF_STMT: NAME = NOP (name0)
214- where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
215+/* Check whether NAME, an ssa-name used in USE_STMT,
216+ is a result of a type promotion or demotion, such that:
217+ DEF_STMT: NAME = NOP (name0)
218+ where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
219 If CHECK_SIGN is TRUE, check that either both types are signed or both are
220 unsigned. */
221
222 static bool
223-widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
224- bool check_sign)
225+type_conversion_p (tree name, gimple use_stmt, bool check_sign,
226+ tree *orig_type, gimple *def_stmt, bool *promotion)
227 {
228 tree dummy;
229 gimple dummy_gimple;
230@@ -96,21 +94,27 @@
231 && dt != vect_external_def && dt != vect_constant_def)
232 return false;
233
234- if (! *def_stmt)
235+ if (!*def_stmt)
236 return false;
237
238 if (!is_gimple_assign (*def_stmt))
239 return false;
240
241- if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
242+ if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
243 return false;
244
245 oprnd0 = gimple_assign_rhs1 (*def_stmt);
246
247- *half_type = TREE_TYPE (oprnd0);
248- if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
249- || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
250- || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
251+ *orig_type = TREE_TYPE (oprnd0);
252+ if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
253+ || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
254+ return false;
255+
256+ if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
257+ *promotion = true;
258+ else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
259+ *promotion = false;
260+ else
261 return false;
262
263 if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy,
264@@ -192,6 +196,7 @@
265 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
266 struct loop *loop;
267 tree var, rhs;
268+ bool promotion;
269
270 if (!loop_info)
271 return NULL;
272@@ -255,7 +260,9 @@
273 return NULL;
274 stmt = last_stmt;
275
276- if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
277+ if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
278+ &promotion)
279+ && promotion)
280 {
281 stmt = def_stmt;
282 oprnd0 = gimple_assign_rhs1 (stmt);
283@@ -310,10 +317,14 @@
284 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
285 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
286 return NULL;
287- if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
288+ if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
289+ &promotion)
290+ || !promotion)
291 return NULL;
292 oprnd00 = gimple_assign_rhs1 (def_stmt);
293- if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
294+ if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
295+ &promotion)
296+ || !promotion)
297 return NULL;
298 oprnd01 = gimple_assign_rhs1 (def_stmt);
299 if (!types_compatible_p (half_type0, half_type1))
300@@ -526,7 +537,7 @@
301 enum tree_code dummy_code;
302 int dummy_int;
303 VEC (tree, heap) *dummy_vec;
304- bool op1_ok;
305+ bool op1_ok, promotion;
306
307 if (!is_gimple_assign (last_stmt))
308 return NULL;
309@@ -546,12 +557,14 @@
310 return NULL;
311
312 /* Check argument 0. */
313- if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
314+ if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
315+ &promotion)
316+ || !promotion)
317 return NULL;
318- /* Check argument 1. */
319- op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
320-
321- if (op1_ok)
322+ /* Check argument 1. */
323+ op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
324+ &def_stmt1, &promotion);
325+ if (op1_ok && promotion)
326 {
327 oprnd0 = gimple_assign_rhs1 (def_stmt0);
328 oprnd1 = gimple_assign_rhs1 (def_stmt1);
329@@ -793,6 +806,7 @@
330 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
331 struct loop *loop;
332 tree var;
333+ bool promotion;
334
335 if (!loop_info)
336 return NULL;
337@@ -832,8 +846,10 @@
338 Left to check that oprnd0 is defined by a cast from type 'type' to type
339 'TYPE'. */
340
341- if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
342- return NULL;
343+ if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
344+ &promotion)
345+ || !promotion)
346+ return NULL;
347
348 oprnd0 = gimple_assign_rhs1 (stmt);
349 *type_in = half_type;
350@@ -899,6 +915,7 @@
351 gimple def_stmt, new_stmt;
352 bool first = false;
353 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
354+ bool promotion;
355 bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
356 struct loop *loop = NULL;
357
358@@ -933,7 +950,9 @@
359 else
360 {
361 first = true;
362- if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
363+ if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
364+ &promotion)
365+ || !promotion
366 || !gimple_bb (def_stmt)
367 || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
368 || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info)
369@@ -1327,6 +1346,7 @@
370 VEC (tree, heap) * dummy_vec;
371 gimple use_stmt = NULL;
372 bool over_widen = false;
373+ bool promotion;
374
375 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
376 return NULL;
377@@ -1381,8 +1401,10 @@
378 return NULL;
379
380 /* Check operand 0: it has to be defined by a type promotion. */
381- if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
382- return NULL;
383+ if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
384+ &promotion)
385+ || !promotion)
386+ return NULL;
387
388 /* Check operand 1: has to be positive. We check that it fits the type
389 in vect_handle_widen_op_by_const (). */
390@@ -1492,9 +1514,9 @@
391 S1 a_T = x_t CMP y_t ? b_T : c_T;
392
393 where type 'TYPE' is an integral type which has different size
394- from 'type'. b_T and c_T are constants and if 'TYPE' is wider
395+ from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
396 than 'type', the constants need to fit into an integer type
397- with the same width as 'type'.
398+ with the same width as 'type') or results of conversion from 'type'.
399
400 Input:
401
402@@ -1523,6 +1545,9 @@
403 enum machine_mode cmpmode;
404 gimple pattern_stmt, def_stmt;
405 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
406+ tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
407+ gimple def_stmt0 = NULL, def_stmt1 = NULL;
408+ bool promotion;
409 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
410
411 if (!is_gimple_assign (last_stmt)
412@@ -1535,25 +1560,40 @@
413 then_clause = TREE_OPERAND (op, 1);
414 else_clause = TREE_OPERAND (op, 2);
415
416- if (TREE_CODE (then_clause) != INTEGER_CST
417- || TREE_CODE (else_clause) != INTEGER_CST)
418- return NULL;
419-
420 if (!COMPARISON_CLASS_P (cond_expr))
421 return NULL;
422
423 type = gimple_expr_type (last_stmt);
424 comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
425- if (!INTEGRAL_TYPE_P (comp_type)
426- || !INTEGRAL_TYPE_P (type))
427- return NULL;
428-
429 comp_vectype = get_vectype_for_scalar_type (comp_type);
430 if (comp_vectype == NULL_TREE)
431 return NULL;
432
433+ if (types_compatible_p (type, comp_type)
434+ || !INTEGRAL_TYPE_P (comp_type)
435+ || !INTEGRAL_TYPE_P (type))
436+ return NULL;
437+
438+ if ((TREE_CODE (then_clause) != INTEGER_CST
439+ && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
440+ &def_stmt0, &promotion))
441+ || (TREE_CODE (else_clause) != INTEGER_CST
442+ && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
443+ &def_stmt1, &promotion)))
444+ return NULL;
445+
446+ if (orig_type0 && orig_type1
447+ && (!types_compatible_p (orig_type0, orig_type1)
448+ || !types_compatible_p (orig_type0, comp_type)))
449+ return NULL;
450+
451+ if (orig_type0)
452+ then_clause = gimple_assign_rhs1 (def_stmt0);
453+
454+ if (orig_type1)
455+ else_clause = gimple_assign_rhs1 (def_stmt1);
456+
457 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
458-
459 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
460 return NULL;
461
462@@ -1561,18 +1601,15 @@
463 if (vectype == NULL_TREE)
464 return NULL;
465
466- if (types_compatible_p (vectype, comp_vectype))
467- return NULL;
468-
469 if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype)))
470 return NULL;
471
472- if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
473- {
474- if (!int_fits_type_p (then_clause, comp_type)
475- || !int_fits_type_p (else_clause, comp_type))
476- return NULL;
477- }
478+ if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)
479+ && ((TREE_CODE (then_clause) == INTEGER_CST
480+ && !int_fits_type_p (then_clause, comp_type))
481+ || (TREE_CODE (else_clause) == INTEGER_CST
482+ && !int_fits_type_p (else_clause, comp_type))))
483+ return NULL;
484
485 tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr),
486 fold_convert (comp_type, then_clause),
487
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch
new file mode 100644
index 000000000..43a2a4da9
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch
@@ -0,0 +1,276 @@
12011-12-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
2
3 Backport from mainline -A15 tuning.
4 2011-11-30 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
5
6 * config/arm/arm.c (arm_issue_rate): Cortex-A15 can triple issue.
7 * config/arm/arm.md (mul64): New attribute.
8 (generic_sched): Cortex-A15 is not scheduled generically.
9 (cortex-a15.md): Include.
10 * config/arm/cortex-a15.md: New machine description.
11 * config/arm/t-arm (MD_INCLUDES): Add cortex-a15.md.
12
13 2011-11-30 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
14 * config/arm/t-arm (MD_INCLUDES): Ensure all md files are listed.
15
16=== modified file 'gcc/config/arm/arm.c'
17--- old/gcc/config/arm/arm.c 2011-12-05 10:55:48 +0000
18+++ new/gcc/config/arm/arm.c 2011-12-05 12:33:25 +0000
19@@ -24056,6 +24056,9 @@
20 {
21 switch (arm_tune)
22 {
23+ case cortexa15:
24+ return 3;
25+
26 case cortexr4:
27 case cortexr4f:
28 case cortexr5:
29
30=== modified file 'gcc/config/arm/arm.md'
31--- old/gcc/config/arm/arm.md 2011-10-26 11:38:30 +0000
32+++ new/gcc/config/arm/arm.md 2011-12-02 00:38:59 +0000
33@@ -345,6 +345,13 @@
34 (const_string "mult")
35 (const_string "alu")))
36
37+; Is this an (integer side) multiply with a 64-bit result?
38+(define_attr "mul64" "no,yes"
39+ (if_then_else
40+ (eq_attr "insn" "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
41+ (const_string "yes")
42+ (const_string "no")))
43+
44 ; Load scheduling, set from the arm_ld_sched variable
45 ; initialized by arm_option_override()
46 (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
47@@ -511,7 +518,7 @@
48
49 (define_attr "generic_sched" "yes,no"
50 (const (if_then_else
51- (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4")
52+ (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4")
53 (eq_attr "tune_cortexr4" "yes"))
54 (const_string "no")
55 (const_string "yes"))))
56@@ -537,6 +544,7 @@
57 (include "cortex-a5.md")
58 (include "cortex-a8.md")
59 (include "cortex-a9.md")
60+(include "cortex-a15.md")
61 (include "cortex-r4.md")
62 (include "cortex-r4f.md")
63 (include "cortex-m4.md")
64
65=== added file 'gcc/config/arm/cortex-a15.md'
66--- old/gcc/config/arm/cortex-a15.md 1970-01-01 00:00:00 +0000
67+++ new/gcc/config/arm/cortex-a15.md 2011-12-02 00:38:59 +0000
68@@ -0,0 +1,186 @@
69+;; ARM Cortex-A15 pipeline description
70+;; Copyright (C) 2011 Free Software Foundation, Inc.
71+;;
72+;; Written by Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
73+
74+;; This file is part of GCC.
75+;;
76+;; GCC is free software; you can redistribute it and/or modify it
77+;; under the terms of the GNU General Public License as published by
78+;; the Free Software Foundation; either version 3, or (at your option)
79+;; any later version.
80+;;
81+;; GCC is distributed in the hope that it will be useful, but
82+;; WITHOUT ANY WARRANTY; without even the implied warranty of
83+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
84+;; General Public License for more details.
85+;;
86+;; You should have received a copy of the GNU General Public License
87+;; along with GCC; see the file COPYING3. If not see
88+;; <http://www.gnu.org/licenses/>.
89+
90+(define_automaton "cortex_a15")
91+
92+;; The Cortex-A15 core is modelled as a triple issue pipeline that has
93+;; the following dispatch units.
94+;; 1. Two pipelines for simple integer operations: SX1, SX2
95+;; 2. Two pipelines for Neon and FP data-processing operations: CX1, CX2
96+;; 3. One pipeline for branch operations: BX
97+;; 4. One pipeline for integer multiply and divide operations: MX
98+;; 5. Two pipelines for load and store operations: LS1, LS2
99+;;
100+;; We can issue into three pipelines per-cycle.
101+;;
102+;; We assume that where we have unit pairs xx1 is always filled before xx2.
103+
104+;; The three issue units
105+(define_cpu_unit "ca15_i0, ca15_i1, ca15_i2" "cortex_a15")
106+
107+(define_reservation "ca15_issue1" "(ca15_i0|ca15_i1|ca15_i2)")
108+(define_reservation "ca15_issue2" "((ca15_i0+ca15_i1)|(ca15_i1+ca15_i2))")
109+(define_reservation "ca15_issue3" "(ca15_i0+ca15_i1+ca15_i2)")
110+(final_presence_set "ca15_i1" "ca15_i0")
111+(final_presence_set "ca15_i2" "ca15_i1")
112+
113+;; The main dispatch units
114+(define_cpu_unit "ca15_sx1, ca15_sx2" "cortex_a15")
115+(define_cpu_unit "ca15_cx1, ca15_cx2" "cortex_a15")
116+(define_cpu_unit "ca15_ls1, ca15_ls2" "cortex_a15")
117+(define_cpu_unit "ca15_bx, ca15_mx" "cortex_a15")
118+
119+(define_reservation "ca15_ls" "(ca15_ls1|ca15_ls2)")
120+
121+;; The extended load-store pipeline
122+(define_cpu_unit "ca15_ldr, ca15_str" "cortex_a15")
123+
124+;; The extended ALU pipeline
125+(define_cpu_unit "ca15_sx1_alu, ca15_sx1_shf, ca15_sx1_sat" "cortex_a15")
126+(define_cpu_unit "ca15_sx2_alu, ca15_sx2_shf, ca15_sx2_sat" "cortex_a15")
127+
128+;; Simple Execution Unit:
129+;;
130+;; Simple ALU without shift
131+(define_insn_reservation "cortex_a15_alu" 2
132+ (and (eq_attr "tune" "cortexa15")
133+ (and (eq_attr "type" "alu")
134+ (eq_attr "neon_type" "none")))
135+ "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)")
136+
137+;; ALU ops with immediate shift
138+(define_insn_reservation "cortex_a15_alu_shift" 3
139+ (and (eq_attr "tune" "cortexa15")
140+ (and (eq_attr "type" "alu_shift")
141+ (eq_attr "neon_type" "none")))
142+ "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\
143+ |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)")
144+
145+;; ALU ops with register controlled shift
146+(define_insn_reservation "cortex_a15_alu_shift_reg" 3
147+ (and (eq_attr "tune" "cortexa15")
148+ (and (eq_attr "type" "alu_shift_reg")
149+ (eq_attr "neon_type" "none")))
150+ "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\
151+ |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\
152+ |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)")
153+
154+;; Multiply Execution Unit:
155+;;
156+;; 32-bit multiplies
157+(define_insn_reservation "cortex_a15_mult32" 3
158+ (and (eq_attr "tune" "cortexa15")
159+ (and (eq_attr "type" "mult")
160+ (and (eq_attr "neon_type" "none")
161+ (eq_attr "mul64" "no"))))
162+ "ca15_issue1,ca15_mx")
163+
164+;; 64-bit multiplies
165+(define_insn_reservation "cortex_a15_mult64" 4
166+ (and (eq_attr "tune" "cortexa15")
167+ (and (eq_attr "type" "mult")
168+ (and (eq_attr "neon_type" "none")
169+ (eq_attr "mul64" "yes"))))
170+ "ca15_issue1,ca15_mx*2")
171+
172+;; Integer divide
173+(define_insn_reservation "cortex_a15_udiv" 9
174+ (and (eq_attr "tune" "cortexa15")
175+ (eq_attr "insn" "udiv"))
176+ "ca15_issue1,ca15_mx")
177+
178+(define_insn_reservation "cortex_a15_sdiv" 10
179+ (and (eq_attr "tune" "cortexa15")
180+ (eq_attr "insn" "sdiv"))
181+ "ca15_issue1,ca15_mx")
182+
183+;; Block all issue pipes for a cycle
184+(define_insn_reservation "cortex_a15_block" 1
185+ (and (eq_attr "tune" "cortexa15")
186+ (and (eq_attr "type" "block")
187+ (eq_attr "neon_type" "none")))
188+ "ca15_issue3")
189+
190+;; Branch execution Unit
191+;;
192+;; Branches take one issue slot.
193+;; No latency as there is no result
194+(define_insn_reservation "cortex_a15_branch" 0
195+ (and (eq_attr "tune" "cortexa15")
196+ (and (eq_attr "type" "branch")
197+ (eq_attr "neon_type" "none")))
198+ "ca15_issue1,ca15_bx")
199+
200+
201+;; We lie with calls. They take up all issue slots, and form a block in the
202+;; pipeline. The result however is available the next cycle.
203+;;
204+;; Addition of new units requires this to be updated.
205+(define_insn_reservation "cortex_a15_call" 1
206+ (and (eq_attr "tune" "cortexa15")
207+ (and (eq_attr "type" "call")
208+ (eq_attr "neon_type" "none")))
209+ "ca15_issue3,\
210+ ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx1+ca15_cx2+ca15_ls1+ca15_ls2,\
211+ ca15_sx1_alu+ca15_sx1_shf+ca15_sx1_sat+ca15_sx2_alu+ca15_sx2_shf\
212+ +ca15_sx2_sat+ca15_ldr+ca15_str")
213+
214+;; Load-store execution Unit
215+;;
216+;; Loads of up to two words.
217+(define_insn_reservation "cortex_a15_load1" 4
218+ (and (eq_attr "tune" "cortexa15")
219+ (and (eq_attr "type" "load_byte,load1,load2")
220+ (eq_attr "neon_type" "none")))
221+ "ca15_issue1,ca15_ls,ca15_ldr,nothing")
222+
223+;; Loads of three or four words.
224+(define_insn_reservation "cortex_a15_load3" 5
225+ (and (eq_attr "tune" "cortexa15")
226+ (and (eq_attr "type" "load3,load4")
227+ (eq_attr "neon_type" "none")))
228+ "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing")
229+
230+;; Stores of up to two words.
231+(define_insn_reservation "cortex_a15_store1" 0
232+ (and (eq_attr "tune" "cortexa15")
233+ (and (eq_attr "type" "store1,store2")
234+ (eq_attr "neon_type" "none")))
235+ "ca15_issue1,ca15_ls,ca15_str")
236+
237+;; Stores of three or four words.
238+(define_insn_reservation "cortex_a15_store3" 0
239+ (and (eq_attr "tune" "cortexa15")
240+ (and (eq_attr "type" "store3,store4")
241+ (eq_attr "neon_type" "none")))
242+ "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str")
243+
244+;; Simple execution unit bypasses
245+(define_bypass 1 "cortex_a15_alu"
246+ "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
247+(define_bypass 2 "cortex_a15_alu_shift"
248+ "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
249+(define_bypass 2 "cortex_a15_alu_shift_reg"
250+ "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
251+(define_bypass 1 "cortex_a15_alu" "cortex_a15_load1,cortex_a15_load3")
252+(define_bypass 2 "cortex_a15_alu_shift" "cortex_a15_load1,cortex_a15_load3")
253+(define_bypass 2 "cortex_a15_alu_shift_reg"
254+ "cortex_a15_load1,cortex_a15_load3")
255
256=== modified file 'gcc/config/arm/t-arm'
257--- old/gcc/config/arm/t-arm 2011-01-03 20:52:22 +0000
258+++ new/gcc/config/arm/t-arm 2011-12-02 00:38:59 +0000
259@@ -31,6 +31,16 @@
260 $(srcdir)/config/arm/fmp626.md \
261 $(srcdir)/config/arm/fa726te.md \
262 $(srcdir)/config/arm/arm926ejs.md \
263+ $(srcdir)/config/arm/cortex-a15.md \
264+ $(srcdir)/config/arm/cortex-a5.md \
265+ $(srcdir)/config/arm/cortex-a8.md \
266+ $(srcdir)/config/arm/cortex-a8-neon.md \
267+ $(srcdir)/config/arm/cortex-a9.md \
268+ $(srcdir)/config/arm/cortex-a9-neon.md \
269+ $(srcdir)/config/arm/cortex-m4-fpu.md \
270+ $(srcdir)/config/arm/cortex-m4.md \
271+ $(srcdir)/config/arm/cortex-r4f.md \
272+ $(srcdir)/config/arm/cortex-r4.md \
273 $(srcdir)/config/arm/cirrus.md \
274 $(srcdir)/config/arm/fpa.md \
275 $(srcdir)/config/arm/vec-common.md \
276
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch
new file mode 100644
index 000000000..8c51c1d1f
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch
@@ -0,0 +1,69 @@
12011-12-20 Ira Rosen <ira.rosen@linaro.org>
2
3 Backport from mainline:
4
5 2011-11-29 Ira Rosen <ira.rosen@linaro.org>
6
7 PR tree-optimization/51301
8 gcc/
9 * tree-vect-patterns.c (vect_recog_over_widening_pattern): Check that
10 the last statement doesn't convert to a bigger type than the original
11 type of the computation.
12
13 gcc/testsuite/
14 * gcc.dg/vect/pr51301.c: New test.
15
16=== added file 'gcc/testsuite/gcc.dg/vect/pr51301.c'
17--- old/gcc/testsuite/gcc.dg/vect/pr51301.c 1970-01-01 00:00:00 +0000
18+++ new/gcc/testsuite/gcc.dg/vect/pr51301.c 2011-11-30 17:54:51 +0000
19@@ -0,0 +1,15 @@
20+/* { dg-do compile } */
21+
22+typedef signed char int8_t;
23+typedef signed long long int64_t;
24+int64_t
25+f0a (int8_t * __restrict__ arg1)
26+{
27+ int idx;
28+ int64_t result = 0;
29+ for (idx = 0; idx < 416; idx += 1)
30+ result += arg1[idx] << (arg1[idx] == arg1[idx]);
31+ return result;
32+}
33+
34+/* { dg-final { cleanup-tree-dump "vect" } } */
35
36=== modified file 'gcc/tree-vect-patterns.c'
37--- old/gcc/tree-vect-patterns.c 2011-11-27 12:17:31 +0000
38+++ new/gcc/tree-vect-patterns.c 2011-12-20 07:47:44 +0000
39@@ -1138,6 +1138,7 @@
40 struct loop *loop = NULL;
41 bb_vec_info bb_vinfo;
42 stmt_vec_info stmt_vinfo;
43+ tree type = NULL;
44
45 stmt_vinfo = vinfo_for_stmt (stmt);
46 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
47@@ -1207,6 +1208,7 @@
48 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
49 }
50
51+ type = gimple_expr_type (stmt);
52 prev_stmt = stmt;
53 stmt = use_stmt;
54
55@@ -1222,9 +1224,11 @@
56 {
57 use_lhs = gimple_assign_lhs (use_stmt);
58 use_type = TREE_TYPE (use_lhs);
59- /* Support only type promotion or signedess change. */
60+ /* Support only type promotion or signedess change. Check that USE_TYPE
61+ is not bigger than the original type. */
62 if (!INTEGRAL_TYPE_P (use_type)
63- || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
64+ || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)
65+ || TYPE_PRECISION (type) < TYPE_PRECISION (use_type))
66 return NULL;
67
68 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
69
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106854.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106854.patch
new file mode 100644
index 000000000..208ba0e78
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106854.patch
@@ -0,0 +1,354 @@
12011-12-30 Richard Sandiford <richard.sandiford@linaro.org>
2
3 gcc/
4 Backport from mainline:
5
6 2011-10-12 Richard Sandiford <richard.sandiford@linaro.org>
7
8 * expr.h (copy_blkmode_to_reg): Declare.
9 * expr.c (copy_blkmode_to_reg): New function.
10 (expand_assignment): Don't expand register RESULT_DECLs before
11 the lhs. Use copy_blkmode_to_reg to copy BLKmode values into a
12 RESULT_DECL register.
13 (expand_expr_real_1): Handle BLKmode decls when looking for promotion.
14 * stmt.c (expand_return): Move BLKmode-to-register code into
15 copy_blkmode_to_reg.
16
17=== modified file 'gcc/expr.c'
18--- old/gcc/expr.c 2011-10-23 13:33:07 +0000
19+++ new/gcc/expr.c 2011-12-30 09:41:30 +0000
20@@ -2180,6 +2180,111 @@
21 return tgtblk;
22 }
23
24+/* Copy BLKmode value SRC into a register of mode MODE. Return the
25+ register if it contains any data, otherwise return null.
26+
27+ This is used on targets that return BLKmode values in registers. */
28+
29+rtx
30+copy_blkmode_to_reg (enum machine_mode mode, tree src)
31+{
32+ int i, n_regs;
33+ unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes;
34+ unsigned int bitsize;
35+ rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX;
36+ enum machine_mode dst_mode;
37+
38+ gcc_assert (TYPE_MODE (TREE_TYPE (src)) == BLKmode);
39+
40+ x = expand_normal (src);
41+
42+ bytes = int_size_in_bytes (TREE_TYPE (src));
43+ if (bytes == 0)
44+ return NULL_RTX;
45+
46+ /* If the structure doesn't take up a whole number of words, see
47+ whether the register value should be padded on the left or on
48+ the right. Set PADDING_CORRECTION to the number of padding
49+ bits needed on the left side.
50+
51+ In most ABIs, the structure will be returned at the least end of
52+ the register, which translates to right padding on little-endian
53+ targets and left padding on big-endian targets. The opposite
54+ holds if the structure is returned at the most significant
55+ end of the register. */
56+ if (bytes % UNITS_PER_WORD != 0
57+ && (targetm.calls.return_in_msb (TREE_TYPE (src))
58+ ? !BYTES_BIG_ENDIAN
59+ : BYTES_BIG_ENDIAN))
60+ padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD)
61+ * BITS_PER_UNIT));
62+
63+ n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
64+ dst_words = XALLOCAVEC (rtx, n_regs);
65+ bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
66+
67+ /* Copy the structure BITSIZE bits at a time. */
68+ for (bitpos = 0, xbitpos = padding_correction;
69+ bitpos < bytes * BITS_PER_UNIT;
70+ bitpos += bitsize, xbitpos += bitsize)
71+ {
72+ /* We need a new destination pseudo each time xbitpos is
73+ on a word boundary and when xbitpos == padding_correction
74+ (the first time through). */
75+ if (xbitpos % BITS_PER_WORD == 0
76+ || xbitpos == padding_correction)
77+ {
78+ /* Generate an appropriate register. */
79+ dst_word = gen_reg_rtx (word_mode);
80+ dst_words[xbitpos / BITS_PER_WORD] = dst_word;
81+
82+ /* Clear the destination before we move anything into it. */
83+ emit_move_insn (dst_word, CONST0_RTX (word_mode));
84+ }
85+
86+ /* We need a new source operand each time bitpos is on a word
87+ boundary. */
88+ if (bitpos % BITS_PER_WORD == 0)
89+ src_word = operand_subword_force (x, bitpos / BITS_PER_WORD, BLKmode);
90+
91+ /* Use bitpos for the source extraction (left justified) and
92+ xbitpos for the destination store (right justified). */
93+ store_bit_field (dst_word, bitsize, xbitpos % BITS_PER_WORD, word_mode,
94+ extract_bit_field (src_word, bitsize,
95+ bitpos % BITS_PER_WORD, 1, false,
96+ NULL_RTX, word_mode, word_mode));
97+ }
98+
99+ if (mode == BLKmode)
100+ {
101+ /* Find the smallest integer mode large enough to hold the
102+ entire structure. */
103+ for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
104+ mode != VOIDmode;
105+ mode = GET_MODE_WIDER_MODE (mode))
106+ /* Have we found a large enough mode? */
107+ if (GET_MODE_SIZE (mode) >= bytes)
108+ break;
109+
110+ /* A suitable mode should have been found. */
111+ gcc_assert (mode != VOIDmode);
112+ }
113+
114+ if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (word_mode))
115+ dst_mode = word_mode;
116+ else
117+ dst_mode = mode;
118+ dst = gen_reg_rtx (dst_mode);
119+
120+ for (i = 0; i < n_regs; i++)
121+ emit_move_insn (operand_subword (dst, i, 0, dst_mode), dst_words[i]);
122+
123+ if (mode != dst_mode)
124+ dst = gen_lowpart (mode, dst);
125+
126+ return dst;
127+}
128+
129 /* Add a USE expression for REG to the (possibly empty) list pointed
130 to by CALL_FUSAGE. REG must denote a hard register. */
131
132@@ -4382,7 +4487,9 @@
133 if (TREE_CODE (from) == CALL_EXPR && ! aggregate_value_p (from, from)
134 && COMPLETE_TYPE_P (TREE_TYPE (from))
135 && TREE_CODE (TYPE_SIZE (TREE_TYPE (from))) == INTEGER_CST
136- && ! (((TREE_CODE (to) == VAR_DECL || TREE_CODE (to) == PARM_DECL)
137+ && ! (((TREE_CODE (to) == VAR_DECL
138+ || TREE_CODE (to) == PARM_DECL
139+ || TREE_CODE (to) == RESULT_DECL)
140 && REG_P (DECL_RTL (to)))
141 || TREE_CODE (to) == SSA_NAME))
142 {
143@@ -4428,12 +4535,15 @@
144 rtx temp;
145
146 push_temp_slots ();
147- temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
148+ if (REG_P (to_rtx) && TYPE_MODE (TREE_TYPE (from)) == BLKmode)
149+ temp = copy_blkmode_to_reg (GET_MODE (to_rtx), from);
150+ else
151+ temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
152
153 if (GET_CODE (to_rtx) == PARALLEL)
154 emit_group_load (to_rtx, temp, TREE_TYPE (from),
155 int_size_in_bytes (TREE_TYPE (from)));
156- else
157+ else if (temp)
158 emit_move_insn (to_rtx, temp);
159
160 preserve_temp_slots (to_rtx);
161@@ -8567,10 +8677,15 @@
162 return temp;
163 }
164
165- /* If the mode of DECL_RTL does not match that of the decl, it
166- must be a promoted value. We return a SUBREG of the wanted mode,
167- but mark it so that we know that it was already extended. */
168- if (REG_P (decl_rtl) && GET_MODE (decl_rtl) != DECL_MODE (exp))
169+ /* If the mode of DECL_RTL does not match that of the decl,
170+ there are two cases: we are dealing with a BLKmode value
171+ that is returned in a register, or we are dealing with
172+ a promoted value. In the latter case, return a SUBREG
173+ of the wanted mode, but mark it so that we know that it
174+ was already extended. */
175+ if (REG_P (decl_rtl)
176+ && DECL_MODE (exp) != BLKmode
177+ && GET_MODE (decl_rtl) != DECL_MODE (exp))
178 {
179 enum machine_mode pmode;
180
181
182=== modified file 'gcc/expr.h'
183--- old/gcc/expr.h 2011-03-03 21:56:58 +0000
184+++ new/gcc/expr.h 2011-10-12 08:01:43 +0000
185@@ -324,6 +324,8 @@
186 /* Copy BLKmode object from a set of registers. */
187 extern rtx copy_blkmode_from_reg (rtx, rtx, tree);
188
189+extern rtx copy_blkmode_to_reg (enum machine_mode, tree);
190+
191 /* Mark REG as holding a parameter for the next CALL_INSN. */
192 extern void use_reg (rtx *, rtx);
193
194
195=== modified file 'gcc/stmt.c'
196--- old/gcc/stmt.c 2011-03-03 21:56:58 +0000
197+++ new/gcc/stmt.c 2011-10-12 08:01:43 +0000
198@@ -1684,119 +1684,21 @@
199 expand_value_return (result_rtl);
200
201 /* If the result is an aggregate that is being returned in one (or more)
202- registers, load the registers here. The compiler currently can't handle
203- copying a BLKmode value into registers. We could put this code in a
204- more general area (for use by everyone instead of just function
205- call/return), but until this feature is generally usable it is kept here
206- (and in expand_call). */
207+ registers, load the registers here. */
208
209 else if (retval_rhs != 0
210 && TYPE_MODE (TREE_TYPE (retval_rhs)) == BLKmode
211 && REG_P (result_rtl))
212 {
213- int i;
214- unsigned HOST_WIDE_INT bitpos, xbitpos;
215- unsigned HOST_WIDE_INT padding_correction = 0;
216- unsigned HOST_WIDE_INT bytes
217- = int_size_in_bytes (TREE_TYPE (retval_rhs));
218- int n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
219- unsigned int bitsize
220- = MIN (TYPE_ALIGN (TREE_TYPE (retval_rhs)), BITS_PER_WORD);
221- rtx *result_pseudos = XALLOCAVEC (rtx, n_regs);
222- rtx result_reg, src = NULL_RTX, dst = NULL_RTX;
223- rtx result_val = expand_normal (retval_rhs);
224- enum machine_mode tmpmode, result_reg_mode;
225-
226- if (bytes == 0)
227- {
228- expand_null_return ();
229- return;
230- }
231-
232- /* If the structure doesn't take up a whole number of words, see
233- whether the register value should be padded on the left or on
234- the right. Set PADDING_CORRECTION to the number of padding
235- bits needed on the left side.
236-
237- In most ABIs, the structure will be returned at the least end of
238- the register, which translates to right padding on little-endian
239- targets and left padding on big-endian targets. The opposite
240- holds if the structure is returned at the most significant
241- end of the register. */
242- if (bytes % UNITS_PER_WORD != 0
243- && (targetm.calls.return_in_msb (TREE_TYPE (retval_rhs))
244- ? !BYTES_BIG_ENDIAN
245- : BYTES_BIG_ENDIAN))
246- padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD)
247- * BITS_PER_UNIT));
248-
249- /* Copy the structure BITSIZE bits at a time. */
250- for (bitpos = 0, xbitpos = padding_correction;
251- bitpos < bytes * BITS_PER_UNIT;
252- bitpos += bitsize, xbitpos += bitsize)
253- {
254- /* We need a new destination pseudo each time xbitpos is
255- on a word boundary and when xbitpos == padding_correction
256- (the first time through). */
257- if (xbitpos % BITS_PER_WORD == 0
258- || xbitpos == padding_correction)
259- {
260- /* Generate an appropriate register. */
261- dst = gen_reg_rtx (word_mode);
262- result_pseudos[xbitpos / BITS_PER_WORD] = dst;
263-
264- /* Clear the destination before we move anything into it. */
265- emit_move_insn (dst, CONST0_RTX (GET_MODE (dst)));
266- }
267-
268- /* We need a new source operand each time bitpos is on a word
269- boundary. */
270- if (bitpos % BITS_PER_WORD == 0)
271- src = operand_subword_force (result_val,
272- bitpos / BITS_PER_WORD,
273- BLKmode);
274-
275- /* Use bitpos for the source extraction (left justified) and
276- xbitpos for the destination store (right justified). */
277- store_bit_field (dst, bitsize, xbitpos % BITS_PER_WORD, word_mode,
278- extract_bit_field (src, bitsize,
279- bitpos % BITS_PER_WORD, 1, false,
280- NULL_RTX, word_mode, word_mode));
281- }
282-
283- tmpmode = GET_MODE (result_rtl);
284- if (tmpmode == BLKmode)
285- {
286- /* Find the smallest integer mode large enough to hold the
287- entire structure and use that mode instead of BLKmode
288- on the USE insn for the return register. */
289- for (tmpmode = GET_CLASS_NARROWEST_MODE (MODE_INT);
290- tmpmode != VOIDmode;
291- tmpmode = GET_MODE_WIDER_MODE (tmpmode))
292- /* Have we found a large enough mode? */
293- if (GET_MODE_SIZE (tmpmode) >= bytes)
294- break;
295-
296- /* A suitable mode should have been found. */
297- gcc_assert (tmpmode != VOIDmode);
298-
299- PUT_MODE (result_rtl, tmpmode);
300- }
301-
302- if (GET_MODE_SIZE (tmpmode) < GET_MODE_SIZE (word_mode))
303- result_reg_mode = word_mode;
304+ val = copy_blkmode_to_reg (GET_MODE (result_rtl), retval_rhs);
305+ if (val)
306+ {
307+ /* Use the mode of the result value on the return register. */
308+ PUT_MODE (result_rtl, GET_MODE (val));
309+ expand_value_return (val);
310+ }
311 else
312- result_reg_mode = tmpmode;
313- result_reg = gen_reg_rtx (result_reg_mode);
314-
315- for (i = 0; i < n_regs; i++)
316- emit_move_insn (operand_subword (result_reg, i, 0, result_reg_mode),
317- result_pseudos[i]);
318-
319- if (tmpmode != result_reg_mode)
320- result_reg = gen_lowpart (tmpmode, result_reg);
321-
322- expand_value_return (result_reg);
323+ expand_null_return ();
324 }
325 else if (retval_rhs != 0
326 && !VOID_TYPE_P (TREE_TYPE (retval_rhs))
327
328=== added file 'gcc/testsuite/g++.dg/pr48660.C'
329--- old/gcc/testsuite/g++.dg/pr48660.C 1970-01-01 00:00:00 +0000
330+++ new/gcc/testsuite/g++.dg/pr48660.C 2011-10-12 08:01:43 +0000
331@@ -0,0 +1,22 @@
332+template<int N> struct val { char a[N]; };
333+
334+class Base
335+{
336+public:
337+ virtual val<1> get1() const = 0;
338+ virtual val<2> get2() const = 0;
339+ virtual val<3> get3() const = 0;
340+ virtual val<4> get4() const = 0;
341+};
342+
343+class Derived : public virtual Base
344+{
345+public:
346+ virtual val<1> get1() const { return foo->get1(); }
347+ virtual val<2> get2() const { return foo->get2(); }
348+ virtual val<3> get3() const { return foo->get3(); }
349+ virtual val<4> get4() const { return foo->get4(); }
350+ Base *foo;
351+};
352+
353+Base* make() { return new Derived; }
354
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch
new file mode 100644
index 000000000..c433fc73f
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch
@@ -0,0 +1,22 @@
12012-01-05 Michael Hope <michael.hope@linaro.org>
2
3 Backport from mainline r182271:
4
5 2011-12-13 Revital Eres <revital.eres@linaro.org>
6
7 gcc/
8 * modulo-sched.c (mark_loop_unsched): Free bbs.
9
10=== modified file 'gcc/modulo-sched.c'
11--- old/gcc/modulo-sched.c 2011-10-30 05:31:00 +0000
12+++ new/gcc/modulo-sched.c 2012-01-05 02:45:23 +0000
13@@ -1204,6 +1204,8 @@
14
15 for (i = 0; i < loop->num_nodes; i++)
16 bbs[i]->flags |= BB_DISABLE_SCHEDULE;
17+
18+ free (bbs);
19 }
20
21 /* Return true if all the BBs of the loop are empty except the
22
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc b/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
index 882876ecc..e42aeeaad 100644
--- a/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
+++ b/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
@@ -74,4 +74,13 @@ file://linaro/gcc-4.6-linaro-r106836.patch \
74file://linaro/gcc-4.6-linaro-r106839.patch \ 74file://linaro/gcc-4.6-linaro-r106839.patch \
75file://linaro/gcc-4.6-linaro-r106840.patch \ 75file://linaro/gcc-4.6-linaro-r106840.patch \
76file://linaro/gcc-4.6-linaro-r106841.patch \ 76file://linaro/gcc-4.6-linaro-r106841.patch \
77file://linaro/gcc-4.6-linaro-r106842.patch \
78file://linaro/gcc-4.6-linaro-r106843.patch \
79file://linaro/gcc-4.6-linaro-r106844.patch \
80file://linaro/gcc-4.6-linaro-r106845.patch \
81file://linaro/gcc-4.6-linaro-r106846.patch \
82file://linaro/gcc-4.6-linaro-r106848.patch \
83file://linaro/gcc-4.6-linaro-r106853.patch \
84file://linaro/gcc-4.6-linaro-r106854.patch \
85file://linaro/gcc-4.6-linaro-r106855.patch \
77" 86"
diff --git a/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc b/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc
index 6b0151b5b..695079772 100644
--- a/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc
+++ b/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc
@@ -1,4 +1,4 @@
1# this will prepend this layer to FILESPATH 1# this will prepend this layer to FILESPATH
2FILESEXTRAPATHS := "${THISDIR}/gcc-4.6" 2FILESEXTRAPATHS := "${THISDIR}/gcc-4.6"
3PRINC = "4" 3PRINC = "5"
4ARM_INSTRUCTION_SET = "arm" 4ARM_INSTRUCTION_SET = "arm"