summaryrefslogtreecommitdiffstats
path: root/meta-oe
diff options
context:
space:
mode:
authorKhem Raj <raj.khem@gmail.com>2011-11-22 07:29:59 -0800
committerKhem Raj <raj.khem@gmail.com>2011-12-03 10:59:33 -0800
commit2378ee8f21741abd23e434260a88c62cb0e151f1 (patch)
tree036eef0d3a08a5cc79de7c4233bda78031b7560e /meta-oe
parenta2fbb83f951a522f3a1eb2b627847f549a0c245a (diff)
downloadmeta-openembedded-2378ee8f21741abd23e434260a88c62cb0e151f1.tar.gz
gcc-4.6: Bring in latest linaro patches
I have tested it on angstrom by successfully building console-image and systemd-gnome-image for all supported qemu targets. Signed-off-by: Khem Raj <raj.khem@gmail.com>
Diffstat (limited to 'meta-oe')
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch80
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch528
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch387
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch290
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch105
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch436
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch378
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch240
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch124
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch362
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch628
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch1951
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch147
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch304
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch123
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch24
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch453
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch1505
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch61
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch23
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch1400
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch515
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc22
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc2
24 files changed, 10087 insertions, 1 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch
new file mode 100644
index 000000000..91b2191cb
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch
@@ -0,0 +1,80 @@
12011-09-22 Revital Eres <revital.eres@linaro.org>
2
3 gcc/
4 Backport from trunk -r178804:
5 modulo-sched.c (remove_node_from_ps): Return void
6 instead of bool.
7 (optimize_sc): Adjust call to remove_node_from_ps.
8 (sms_schedule): Add print info.
9
10=== modified file 'gcc/modulo-sched.c'
11--- old/gcc/modulo-sched.c 2011-08-09 04:51:48 +0000
12+++ new/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000
13@@ -211,7 +211,7 @@
14 static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
15 int, int, sbitmap, int *, sbitmap,
16 sbitmap);
17-static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
18+static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
19
20 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
21 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
22@@ -834,8 +834,7 @@
23 if (next_ps_i->node->cuid == g->closing_branch->cuid)
24 break;
25
26- gcc_assert (next_ps_i);
27- gcc_assert (remove_node_from_ps (ps, next_ps_i));
28+ remove_node_from_ps (ps, next_ps_i);
29 success =
30 try_scheduling_node_in_cycle (ps, g->closing_branch,
31 g->closing_branch->cuid, c,
32@@ -1485,8 +1484,8 @@
33 if (dump_file)
34 {
35 fprintf (dump_file,
36- "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
37- stage_count);
38+ "%s:%d SMS succeeded %d %d (with ii, sc)\n",
39+ insn_file (tail), insn_line (tail), ps->ii, stage_count);
40 print_partial_schedule (ps, dump_file);
41 }
42
43@@ -2810,22 +2809,18 @@
44 }
45
46
47-/* Removes the given PS_INSN from the partial schedule. Returns false if the
48- node is not found in the partial schedule, else returns true. */
49-static bool
50+/* Removes the given PS_INSN from the partial schedule. */
51+static void
52 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
53 {
54 int row;
55
56- if (!ps || !ps_i)
57- return false;
58-
59+ gcc_assert (ps && ps_i);
60+
61 row = SMODULO (ps_i->cycle, ps->ii);
62 if (! ps_i->prev_in_row)
63 {
64- if (ps_i != ps->rows[row])
65- return false;
66-
67+ gcc_assert (ps_i == ps->rows[row]);
68 ps->rows[row] = ps_i->next_in_row;
69 if (ps->rows[row])
70 ps->rows[row]->prev_in_row = NULL;
71@@ -2839,7 +2834,7 @@
72
73 ps->rows_length[row] -= 1;
74 free (ps_i);
75- return true;
76+ return;
77 }
78
79 /* Unlike what literature describes for modulo scheduling (which focuses
80
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch
new file mode 100644
index 000000000..16779bbf1
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch
@@ -0,0 +1,528 @@
12011-09-25 Ira Rosen <ira.rosen@linaro.org>
2
3 gcc/testsuite/
4 * lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
5 Replace check_effective_target_arm_neon with
6 check_effective_target_arm_neon_ok.
7
8 Backport from mainline:
9
10 2011-09-06 Ira Rosen <ira.rosen@linaro.org>
11
12 gcc/
13 * config/arm/arm.c (arm_preferred_simd_mode): Check
14 TARGET_NEON_VECTORIZE_DOUBLE instead of
15 TARGET_NEON_VECTORIZE_QUAD.
16 (arm_autovectorize_vector_sizes): Likewise.
17 * config/arm/arm.opt (mvectorize-with-neon-quad): Make inverse
18 mask of mvectorize-with-neon-double. Add RejectNegative.
19 (mvectorize-with-neon-double): New.
20
21 gcc/testsuite/
22 * lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
23 New procedure.
24 (add_options_for_quad_vectors): Replace with ...
25 (add_options_for_double_vectors): ... this.
26 * gfortran.dg/vect/pr19049.f90: Expect more printings on targets that
27 support multiple vector sizes since the vectorizer attempts to
28 vectorize with both vector sizes.
29 * gcc.dg/vect/no-vfa-vect-79.c,
30 gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c,
31 gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c,
32 gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c,
33 gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c,
34 gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c,
35 gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise.
36 * gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable.
37 * gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c,
38 gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c,
39 gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c,
40 gcc.dg/vect/vect-40.c: Likewise.
41 * gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as
42 redundant.
43 * gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c,
44 gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c,
45 gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c,
46 gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c:
47 Likewise.
48 * gcc.dg/vect/vect-peel-4.c: Make ia global.
49
50=== modified file 'gcc/config/arm/arm.c'
51--- old/gcc/config/arm/arm.c 2011-09-15 09:45:31 +0000
52+++ new/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000
53@@ -22974,7 +22974,7 @@
54 return false;
55 }
56
57-/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
58+/* Use the option -mvectorize-with-neon-double to override the use of quardword
59 registers when autovectorizing for Neon, at least until multiple vector
60 widths are supported properly by the middle-end. */
61
62@@ -22985,15 +22985,15 @@
63 switch (mode)
64 {
65 case SFmode:
66- return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
67+ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
68 case SImode:
69- return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
70+ return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
71 case HImode:
72- return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
73+ return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
74 case QImode:
75- return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
76+ return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
77 case DImode:
78- if (TARGET_NEON_VECTORIZE_QUAD)
79+ if (!TARGET_NEON_VECTORIZE_DOUBLE)
80 return V2DImode;
81 break;
82
83@@ -24226,7 +24226,7 @@
84 static unsigned int
85 arm_autovectorize_vector_sizes (void)
86 {
87- return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
88+ return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
89 }
90
91 static bool
92
93=== modified file 'gcc/config/arm/arm.opt'
94--- old/gcc/config/arm/arm.opt 2009-06-18 11:24:10 +0000
95+++ new/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000
96@@ -158,9 +158,13 @@
97 Assume big endian bytes, little endian words
98
99 mvectorize-with-neon-quad
100-Target Report Mask(NEON_VECTORIZE_QUAD)
101+Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE)
102 Use Neon quad-word (rather than double-word) registers for vectorization
103
104+mvectorize-with-neon-double
105+Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE)
106+Use Neon double-word (rather than quad-word) registers for vectorization
107+
108 mword-relocations
109 Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
110 Only generate absolute relocations on word sized values.
111
112=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c'
113--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-04-28 11:46:58 +0000
114+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-09-19 07:44:24 +0000
115@@ -1,5 +1,4 @@
116 /* { dg-require-effective-target vect_int } */
117-/* { dg-add-options quad_vectors } */
118
119 #include <stdarg.h>
120 #include "tree-vect.h"
121
122=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c'
123--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2007-09-04 12:05:19 +0000
124+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2011-09-19 07:44:24 +0000
125@@ -45,6 +45,7 @@
126 }
127
128 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
129-/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" } } */
130+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
131+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
132 /* { dg-final { cleanup-tree-dump "vect" } } */
133
134
135=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c'
136--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2007-09-12 07:48:44 +0000
137+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2011-09-19 07:44:24 +0000
138@@ -53,6 +53,7 @@
139 }
140
141 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
142-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
143+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
144+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
145 /* { dg-final { cleanup-tree-dump "vect" } } */
146
147
148=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c'
149--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2007-09-12 07:48:44 +0000
150+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2011-09-19 07:44:24 +0000
151@@ -53,6 +53,7 @@
152 }
153
154 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
155-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
156+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
157+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
158 /* { dg-final { cleanup-tree-dump "vect" } } */
159
160
161=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c'
162--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2009-05-08 12:39:01 +0000
163+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2011-09-19 07:44:24 +0000
164@@ -58,5 +58,6 @@
165 If/when the aliasing problems are resolved, unalignment may
166 prevent vectorization on some targets. */
167 /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */
168-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 2 "vect" } } */
169+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { xfail vect_multiple_sizes } } } */
170+/* { dg-final { scan-tree-dump-times "can't determine dependence" 4 "vect" { target vect_multiple_sizes } } } */
171 /* { dg-final { cleanup-tree-dump "vect" } } */
172
173=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c'
174--- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2009-05-08 12:39:01 +0000
175+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2011-09-19 07:44:24 +0000
176@@ -46,5 +46,6 @@
177 If/when the aliasing problems are resolved, unalignment may
178 prevent vectorization on some targets. */
179 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
180-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 "vect" } } */
181+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
182+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
183 /* { dg-final { cleanup-tree-dump "vect" } } */
184
185=== modified file 'gcc/testsuite/gcc.dg/vect/slp-25.c'
186--- old/gcc/testsuite/gcc.dg/vect/slp-25.c 2010-10-04 14:59:30 +0000
187+++ new/gcc/testsuite/gcc.dg/vect/slp-25.c 2011-09-19 07:44:24 +0000
188@@ -1,5 +1,4 @@
189 /* { dg-require-effective-target vect_int } */
190-/* { dg-add-options quad_vectors } */
191
192 #include <stdarg.h>
193 #include "tree-vect.h"
194
195=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c'
196--- old/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-04-28 11:46:58 +0000
197+++ new/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-09-19 07:44:24 +0000
198@@ -1,5 +1,4 @@
199 /* { dg-require-effective-target vect_int } */
200-/* { dg-add-options quad_vectors } */
201
202 #include <stdarg.h>
203 #include "tree-vect.h"
204
205=== modified file 'gcc/testsuite/gcc.dg/vect/vect-104.c'
206--- old/gcc/testsuite/gcc.dg/vect/vect-104.c 2007-09-12 07:48:44 +0000
207+++ new/gcc/testsuite/gcc.dg/vect/vect-104.c 2011-09-19 07:44:24 +0000
208@@ -64,6 +64,7 @@
209 }
210
211 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
212-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
213+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
214+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
215 /* { dg-final { cleanup-tree-dump "vect" } } */
216
217
218=== modified file 'gcc/testsuite/gcc.dg/vect/vect-109.c'
219--- old/gcc/testsuite/gcc.dg/vect/vect-109.c 2010-10-04 14:59:30 +0000
220+++ new/gcc/testsuite/gcc.dg/vect/vect-109.c 2011-09-19 07:44:24 +0000
221@@ -1,5 +1,4 @@
222 /* { dg-require-effective-target vect_int } */
223-/* { dg-add-options quad_vectors } */
224
225 #include <stdarg.h>
226 #include "tree-vect.h"
227
228=== modified file 'gcc/testsuite/gcc.dg/vect/vect-40.c'
229--- old/gcc/testsuite/gcc.dg/vect/vect-40.c 2009-05-25 14:18:21 +0000
230+++ new/gcc/testsuite/gcc.dg/vect/vect-40.c 2011-09-19 07:44:24 +0000
231@@ -1,4 +1,5 @@
232 /* { dg-require-effective-target vect_float } */
233+/* { dg-add-options double_vectors } */
234
235 #include <stdarg.h>
236 #include "tree-vect.h"
237
238=== modified file 'gcc/testsuite/gcc.dg/vect/vect-42.c'
239--- old/gcc/testsuite/gcc.dg/vect/vect-42.c 2010-10-04 14:59:30 +0000
240+++ new/gcc/testsuite/gcc.dg/vect/vect-42.c 2011-09-19 07:44:24 +0000
241@@ -1,4 +1,5 @@
242 /* { dg-require-effective-target vect_float } */
243+/* { dg-add-options double_vectors } */
244
245 #include <stdarg.h>
246 #include "tree-vect.h"
247
248=== modified file 'gcc/testsuite/gcc.dg/vect/vect-46.c'
249--- old/gcc/testsuite/gcc.dg/vect/vect-46.c 2009-05-25 14:18:21 +0000
250+++ new/gcc/testsuite/gcc.dg/vect/vect-46.c 2011-09-19 07:44:24 +0000
251@@ -1,4 +1,5 @@
252 /* { dg-require-effective-target vect_float } */
253+/* { dg-add-options double_vectors } */
254
255 #include <stdarg.h>
256 #include "tree-vect.h"
257
258=== modified file 'gcc/testsuite/gcc.dg/vect/vect-48.c'
259--- old/gcc/testsuite/gcc.dg/vect/vect-48.c 2009-11-04 10:22:22 +0000
260+++ new/gcc/testsuite/gcc.dg/vect/vect-48.c 2011-09-19 07:44:24 +0000
261@@ -1,4 +1,5 @@
262 /* { dg-require-effective-target vect_float } */
263+/* { dg-add-options double_vectors } */
264
265 #include <stdarg.h>
266 #include "tree-vect.h"
267
268=== modified file 'gcc/testsuite/gcc.dg/vect/vect-52.c'
269--- old/gcc/testsuite/gcc.dg/vect/vect-52.c 2009-11-04 10:22:22 +0000
270+++ new/gcc/testsuite/gcc.dg/vect/vect-52.c 2011-09-19 07:44:24 +0000
271@@ -1,4 +1,5 @@
272 /* { dg-require-effective-target vect_float } */
273+/* { dg-add-options double_vectors } */
274
275 #include <stdarg.h>
276 #include "tree-vect.h"
277
278=== modified file 'gcc/testsuite/gcc.dg/vect/vect-54.c'
279--- old/gcc/testsuite/gcc.dg/vect/vect-54.c 2009-10-27 11:46:07 +0000
280+++ new/gcc/testsuite/gcc.dg/vect/vect-54.c 2011-09-19 07:44:24 +0000
281@@ -1,4 +1,5 @@
282 /* { dg-require-effective-target vect_float } */
283+/* { dg-add-options double_vectors } */
284
285 #include <stdarg.h>
286 #include "tree-vect.h"
287
288=== modified file 'gcc/testsuite/gcc.dg/vect/vect-96.c'
289--- old/gcc/testsuite/gcc.dg/vect/vect-96.c 2010-10-04 14:59:30 +0000
290+++ new/gcc/testsuite/gcc.dg/vect/vect-96.c 2011-09-19 07:44:24 +0000
291@@ -1,4 +1,5 @@
292 /* { dg-require-effective-target vect_int } */
293+/* { dg-add-options double_vectors } */
294
295 #include <stdarg.h>
296 #include "tree-vect.h"
297
298=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c'
299--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2010-10-04 14:59:30 +0000
300+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2011-09-19 07:44:24 +0000
301@@ -1,5 +1,4 @@
302 /* { dg-require-effective-target vect_int } */
303-/* { dg-add-options quad_vectors } */
304
305 #include <stdarg.h>
306 #include "tree-vect.h"
307
308=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c'
309--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2009-11-04 10:22:22 +0000
310+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2011-09-19 07:44:24 +0000
311@@ -1,4 +1,5 @@
312 /* { dg-require-effective-target vect_int } */
313+/* { dg-add-options double_vectors } */
314
315 #include <stdarg.h>
316 #include "tree-vect.h"
317
318=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c'
319--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2010-10-04 14:59:30 +0000
320+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2011-09-19 07:44:24 +0000
321@@ -1,5 +1,4 @@
322 /* { dg-require-effective-target vect_int } */
323-/* { dg-add-options quad_vectors } */
324
325 #include <stdarg.h>
326 #include "tree-vect.h"
327
328=== modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c'
329--- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2009-11-10 18:01:22 +0000
330+++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2011-09-19 07:44:24 +0000
331@@ -1,4 +1,5 @@
332 /* { dg-require-effective-target vect_int } */
333+/* { dg-add-options double_vectors } */
334
335 #include <stdarg.h>
336 #include "tree-vect.h"
337
338=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1.c'
339--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2009-05-08 12:39:01 +0000
340+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2011-09-19 07:44:24 +0000
341@@ -22,5 +22,6 @@
342 }
343
344 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
345-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
346+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
347+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
348 /* { dg-final { cleanup-tree-dump "vect" } } */
349
350=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1a.c'
351--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2009-06-16 06:21:12 +0000
352+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2011-09-19 07:44:24 +0000
353@@ -20,5 +20,6 @@
354 }
355
356 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
357-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
358+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
359+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
360 /* { dg-final { cleanup-tree-dump "vect" } } */
361
362=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1b.c'
363--- old/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2007-08-19 11:02:48 +0000
364+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2011-09-19 07:44:24 +0000
365@@ -22,5 +22,6 @@
366 }
367
368 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
369-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
370+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
371+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
372 /* { dg-final { cleanup-tree-dump "vect" } } */
373
374=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-2b.c'
375--- old/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2009-05-08 12:39:01 +0000
376+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2011-09-19 07:44:24 +0000
377@@ -37,5 +37,6 @@
378 return 0;
379 }
380
381-/* { dg-final { scan-tree-dump-times "strided access in outer loop." 1 "vect" } } */
382+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
383+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
384 /* { dg-final { cleanup-tree-dump "vect" } } */
385
386=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3a.c'
387--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2009-05-08 12:39:01 +0000
388+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2011-09-19 07:44:24 +0000
389@@ -49,5 +49,6 @@
390 }
391
392 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
393-/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" } } */
394+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" { xfail vect_multiple_sizes } } } */
395+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 3 "vect" { target vect_multiple_sizes } } } */
396 /* { dg-final { cleanup-tree-dump "vect" } } */
397
398=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3b.c'
399--- old/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2009-05-08 12:39:01 +0000
400+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2011-09-19 07:44:24 +0000
401@@ -49,5 +49,6 @@
402 }
403
404 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
405-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" } } */
406+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { xfail vect_multiple_sizes } } } */
407+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 4 "vect" { target vect_multiple_sizes } } } */
408 /* { dg-final { cleanup-tree-dump "vect" } } */
409
410=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c'
411--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-04-28 11:46:58 +0000
412+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-09-19 07:44:24 +0000
413@@ -1,5 +1,4 @@
414 /* { dg-require-effective-target vect_float } */
415-/* { dg-add-options quad_vectors } */
416
417 #include <stdarg.h>
418 #include <signal.h>
419
420=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-1.c'
421--- old/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-01-10 12:41:40 +0000
422+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-09-19 07:44:24 +0000
423@@ -1,5 +1,4 @@
424 /* { dg-require-effective-target vect_int } */
425-/* { dg-add-options quad_vectors } */
426
427 #include <stdarg.h>
428 #include "tree-vect.h"
429
430=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-2.c'
431--- old/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-01-10 12:41:40 +0000
432+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-09-19 07:44:24 +0000
433@@ -1,5 +1,4 @@
434 /* { dg-require-effective-target vect_int } */
435-/* { dg-add-options quad_vectors } */
436
437 #include <stdarg.h>
438 #include "tree-vect.h"
439
440=== modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-4.c'
441--- old/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-01-10 12:41:40 +0000
442+++ new/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-09-19 07:44:24 +0000
443@@ -6,12 +6,12 @@
444 #define N 128
445
446 int ib[N+7];
447+int ia[N+1];
448
449 __attribute__ ((noinline))
450 int main1 ()
451 {
452 int i;
453- int ia[N+1];
454
455 /* Don't peel keeping one load and the store aligned. */
456 for (i = 0; i <= N; i++)
457
458=== modified file 'gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c'
459--- old/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2010-05-27 12:23:45 +0000
460+++ new/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2011-09-19 07:44:24 +0000
461@@ -58,7 +58,8 @@
462 }
463
464 /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
465-/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
466+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail vect_multiple_sizes } } } */
467+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_multiple_sizes } } } */
468
469 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
470
471
472=== modified file 'gcc/testsuite/gfortran.dg/vect/pr19049.f90'
473--- old/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2005-07-25 11:05:07 +0000
474+++ new/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2011-09-19 07:44:24 +0000
475@@ -19,6 +19,7 @@
476 end
477
478 ! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } }
479-! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } }
480+! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } }
481+! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } }
482 ! { dg-final { cleanup-tree-dump "vect" } }
483
484
485=== modified file 'gcc/testsuite/lib/target-supports.exp'
486--- old/gcc/testsuite/lib/target-supports.exp 2011-08-13 08:32:32 +0000
487+++ new/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000
488@@ -3265,6 +3265,24 @@
489 }]
490 }
491
492+# Return 1 if the target supports multiple vector sizes
493+
494+proc check_effective_target_vect_multiple_sizes { } {
495+ global et_vect_multiple_sizes
496+
497+ if [info exists et_vect_multiple_sizes_saved] {
498+ verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
499+ } else {
500+ set et_vect_multiple_sizes_saved 0
501+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
502+ set et_vect_multiple_sizes_saved 1
503+ }
504+ }
505+
506+ verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2
507+ return $et_vect_multiple_sizes_saved
508+}
509+
510 # Return 1 if the target supports section-anchors
511
512 proc check_effective_target_section_anchors { } {
513@@ -3648,11 +3666,11 @@
514 return $flags
515 }
516
517-# Add to FLAGS the flags needed to enable 128-bit vectors.
518+# Add to FLAGS the flags needed to enable 64-bit vectors.
519
520-proc add_options_for_quad_vectors { flags } {
521+proc add_options_for_double_vectors { flags } {
522 if [is-effective-target arm_neon_ok] {
523- return "$flags -mvectorize-with-neon-quad"
524+ return "$flags -mvectorize-with-neon-double"
525 }
526
527 return $flags
528
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch
new file mode 100644
index 000000000..2f70b1b9c
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch
@@ -0,0 +1,387 @@
12011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
2
3 gcc/
4 Backport from mainline:
5
6 2011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
7
8 * config/arm/neon.md (neon_move_lo_quad_<mode>): Delete.
9 (neon_move_hi_quad_<mode>): Likewise.
10 (move_hi_quad_<mode>, move_lo_quad_<mode>): Use subreg moves.
11
122011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
13
14 gcc/
15 Backport from mainline:
16
17 2011-09-27 Richard Sandiford <richard.sandiford@linaro.org>
18
19 * config/arm/neon.md (neon_vget_highv16qi, neon_vget_highv8hi)
20 (neon_vget_highv4si, neon_vget_highv4sf, neon_vget_highv2di)
21 (neon_vget_lowv16qi, neon_vget_lowv8hi, neon_vget_lowv4si)
22 (neon_vget_lowv4sf, neon_vget_lowv2di): Turn into define_expands
23 that produce subreg moves. Define using VQX iterators.
24
252011-09-28 Richard Sandiford <richard.sandiford@linaro.org>
26
27 gcc/
28 Backport from mainline:
29
30 2011-09-14 Richard Sandiford <richard.sandiford@linaro.org>
31
32 * simplify-rtx.c (simplify_subreg): Check that the inner mode is
33 a scalar integer before applying integer-only optimisations to
34 inner arithmetic.
35
36=== modified file 'gcc/config/arm/neon.md'
37--- old/gcc/config/arm/neon.md 2011-07-04 14:03:49 +0000
38+++ new/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000
39@@ -1235,66 +1235,14 @@
40 (const_string "neon_int_1") (const_string "neon_int_5")))]
41 )
42
43-; FIXME: We wouldn't need the following insns if we could write subregs of
44-; vector registers. Make an attempt at removing unnecessary moves, though
45-; we're really at the mercy of the register allocator.
46-
47-(define_insn "neon_move_lo_quad_<mode>"
48- [(set (match_operand:ANY128 0 "s_register_operand" "+w")
49- (vec_concat:ANY128
50- (match_operand:<V_HALF> 1 "s_register_operand" "w")
51- (vec_select:<V_HALF>
52- (match_dup 0)
53- (match_operand:ANY128 2 "vect_par_constant_high" ""))))]
54- "TARGET_NEON"
55-{
56- int dest = REGNO (operands[0]);
57- int src = REGNO (operands[1]);
58-
59- if (dest != src)
60- return "vmov\t%e0, %P1";
61- else
62- return "";
63-}
64- [(set_attr "neon_type" "neon_bp_simple")]
65-)
66-
67-(define_insn "neon_move_hi_quad_<mode>"
68- [(set (match_operand:ANY128 0 "s_register_operand" "+w")
69- (vec_concat:ANY128
70- (vec_select:<V_HALF>
71- (match_dup 0)
72- (match_operand:ANY128 2 "vect_par_constant_low" ""))
73- (match_operand:<V_HALF> 1 "s_register_operand" "w")))]
74-
75- "TARGET_NEON"
76-{
77- int dest = REGNO (operands[0]);
78- int src = REGNO (operands[1]);
79-
80- if (dest != src)
81- return "vmov\t%f0, %P1";
82- else
83- return "";
84-}
85- [(set_attr "neon_type" "neon_bp_simple")]
86-)
87-
88 (define_expand "move_hi_quad_<mode>"
89 [(match_operand:ANY128 0 "s_register_operand" "")
90 (match_operand:<V_HALF> 1 "s_register_operand" "")]
91 "TARGET_NEON"
92 {
93- rtvec v = rtvec_alloc (<V_mode_nunits>/2);
94- rtx t1;
95- int i;
96-
97- for (i=0; i < (<V_mode_nunits>/2); i++)
98- RTVEC_ELT (v, i) = GEN_INT (i);
99-
100- t1 = gen_rtx_PARALLEL (<MODE>mode, v);
101- emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1));
102-
103+ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
104+ GET_MODE_SIZE (<V_HALF>mode)),
105+ operands[1]);
106 DONE;
107 })
108
109@@ -1303,16 +1251,9 @@
110 (match_operand:<V_HALF> 1 "s_register_operand" "")]
111 "TARGET_NEON"
112 {
113- rtvec v = rtvec_alloc (<V_mode_nunits>/2);
114- rtx t1;
115- int i;
116-
117- for (i=0; i < (<V_mode_nunits>/2); i++)
118- RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
119-
120- t1 = gen_rtx_PARALLEL (<MODE>mode, v);
121- emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1));
122-
123+ emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
124+ <MODE>mode, 0),
125+ operands[1]);
126 DONE;
127 })
128
129@@ -2950,183 +2891,27 @@
130 (set_attr "neon_type" "neon_bp_simple")]
131 )
132
133-(define_insn "neon_vget_highv16qi"
134- [(set (match_operand:V8QI 0 "s_register_operand" "=w")
135- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
136- (parallel [(const_int 8) (const_int 9)
137- (const_int 10) (const_int 11)
138- (const_int 12) (const_int 13)
139- (const_int 14) (const_int 15)])))]
140- "TARGET_NEON"
141-{
142- int dest = REGNO (operands[0]);
143- int src = REGNO (operands[1]);
144-
145- if (dest != src + 2)
146- return "vmov\t%P0, %f1";
147- else
148- return "";
149-}
150- [(set_attr "neon_type" "neon_bp_simple")]
151-)
152-
153-(define_insn "neon_vget_highv8hi"
154- [(set (match_operand:V4HI 0 "s_register_operand" "=w")
155- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
156- (parallel [(const_int 4) (const_int 5)
157- (const_int 6) (const_int 7)])))]
158- "TARGET_NEON"
159-{
160- int dest = REGNO (operands[0]);
161- int src = REGNO (operands[1]);
162-
163- if (dest != src + 2)
164- return "vmov\t%P0, %f1";
165- else
166- return "";
167-}
168- [(set_attr "neon_type" "neon_bp_simple")]
169-)
170-
171-(define_insn "neon_vget_highv4si"
172- [(set (match_operand:V2SI 0 "s_register_operand" "=w")
173- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
174- (parallel [(const_int 2) (const_int 3)])))]
175- "TARGET_NEON"
176-{
177- int dest = REGNO (operands[0]);
178- int src = REGNO (operands[1]);
179-
180- if (dest != src + 2)
181- return "vmov\t%P0, %f1";
182- else
183- return "";
184-}
185- [(set_attr "neon_type" "neon_bp_simple")]
186-)
187-
188-(define_insn "neon_vget_highv4sf"
189- [(set (match_operand:V2SF 0 "s_register_operand" "=w")
190- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
191- (parallel [(const_int 2) (const_int 3)])))]
192- "TARGET_NEON"
193-{
194- int dest = REGNO (operands[0]);
195- int src = REGNO (operands[1]);
196-
197- if (dest != src + 2)
198- return "vmov\t%P0, %f1";
199- else
200- return "";
201-}
202- [(set_attr "neon_type" "neon_bp_simple")]
203-)
204-
205-(define_insn "neon_vget_highv2di"
206- [(set (match_operand:DI 0 "s_register_operand" "=w")
207- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
208- (parallel [(const_int 1)])))]
209- "TARGET_NEON"
210-{
211- int dest = REGNO (operands[0]);
212- int src = REGNO (operands[1]);
213-
214- if (dest != src + 2)
215- return "vmov\t%P0, %f1";
216- else
217- return "";
218-}
219- [(set_attr "neon_type" "neon_bp_simple")]
220-)
221-
222-(define_insn "neon_vget_lowv16qi"
223- [(set (match_operand:V8QI 0 "s_register_operand" "=w")
224- (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
225- (parallel [(const_int 0) (const_int 1)
226- (const_int 2) (const_int 3)
227- (const_int 4) (const_int 5)
228- (const_int 6) (const_int 7)])))]
229- "TARGET_NEON"
230-{
231- int dest = REGNO (operands[0]);
232- int src = REGNO (operands[1]);
233-
234- if (dest != src)
235- return "vmov\t%P0, %e1";
236- else
237- return "";
238-}
239- [(set_attr "neon_type" "neon_bp_simple")]
240-)
241-
242-(define_insn "neon_vget_lowv8hi"
243- [(set (match_operand:V4HI 0 "s_register_operand" "=w")
244- (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
245- (parallel [(const_int 0) (const_int 1)
246- (const_int 2) (const_int 3)])))]
247- "TARGET_NEON"
248-{
249- int dest = REGNO (operands[0]);
250- int src = REGNO (operands[1]);
251-
252- if (dest != src)
253- return "vmov\t%P0, %e1";
254- else
255- return "";
256-}
257- [(set_attr "neon_type" "neon_bp_simple")]
258-)
259-
260-(define_insn "neon_vget_lowv4si"
261- [(set (match_operand:V2SI 0 "s_register_operand" "=w")
262- (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
263- (parallel [(const_int 0) (const_int 1)])))]
264- "TARGET_NEON"
265-{
266- int dest = REGNO (operands[0]);
267- int src = REGNO (operands[1]);
268-
269- if (dest != src)
270- return "vmov\t%P0, %e1";
271- else
272- return "";
273-}
274- [(set_attr "neon_type" "neon_bp_simple")]
275-)
276-
277-(define_insn "neon_vget_lowv4sf"
278- [(set (match_operand:V2SF 0 "s_register_operand" "=w")
279- (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
280- (parallel [(const_int 0) (const_int 1)])))]
281- "TARGET_NEON"
282-{
283- int dest = REGNO (operands[0]);
284- int src = REGNO (operands[1]);
285-
286- if (dest != src)
287- return "vmov\t%P0, %e1";
288- else
289- return "";
290-}
291- [(set_attr "neon_type" "neon_bp_simple")]
292-)
293-
294-(define_insn "neon_vget_lowv2di"
295- [(set (match_operand:DI 0 "s_register_operand" "=w")
296- (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
297- (parallel [(const_int 0)])))]
298- "TARGET_NEON"
299-{
300- int dest = REGNO (operands[0]);
301- int src = REGNO (operands[1]);
302-
303- if (dest != src)
304- return "vmov\t%P0, %e1";
305- else
306- return "";
307-}
308- [(set_attr "neon_type" "neon_bp_simple")]
309-)
310+(define_expand "neon_vget_high<mode>"
311+ [(match_operand:<V_HALF> 0 "s_register_operand")
312+ (match_operand:VQX 1 "s_register_operand")]
313+ "TARGET_NEON"
314+{
315+ emit_move_insn (operands[0],
316+ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
317+ GET_MODE_SIZE (<V_HALF>mode)));
318+ DONE;
319+})
320+
321+(define_expand "neon_vget_low<mode>"
322+ [(match_operand:<V_HALF> 0 "s_register_operand")
323+ (match_operand:VQX 1 "s_register_operand")]
324+ "TARGET_NEON"
325+{
326+ emit_move_insn (operands[0],
327+ simplify_gen_subreg (<V_HALF>mode, operands[1],
328+ <MODE>mode, 0));
329+ DONE;
330+})
331
332 (define_insn "neon_vcvt<mode>"
333 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
334
335=== modified file 'gcc/simplify-rtx.c'
336--- old/gcc/simplify-rtx.c 2011-08-13 08:32:32 +0000
337+++ new/gcc/simplify-rtx.c 2011-09-28 15:11:59 +0000
338@@ -5567,6 +5567,7 @@
339 /* Optimize SUBREG truncations of zero and sign extended values. */
340 if ((GET_CODE (op) == ZERO_EXTEND
341 || GET_CODE (op) == SIGN_EXTEND)
342+ && SCALAR_INT_MODE_P (innermode)
343 && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode))
344 {
345 unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);
346@@ -5605,6 +5606,7 @@
347 if ((GET_CODE (op) == LSHIFTRT
348 || GET_CODE (op) == ASHIFTRT)
349 && SCALAR_INT_MODE_P (outermode)
350+ && SCALAR_INT_MODE_P (innermode)
351 /* Ensure that OUTERMODE is at least twice as wide as the INNERMODE
352 to avoid the possibility that an outer LSHIFTRT shifts by more
353 than the sign extension's sign_bit_copies and introduces zeros
354@@ -5624,6 +5626,7 @@
355 if ((GET_CODE (op) == LSHIFTRT
356 || GET_CODE (op) == ASHIFTRT)
357 && SCALAR_INT_MODE_P (outermode)
358+ && SCALAR_INT_MODE_P (innermode)
359 && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
360 && CONST_INT_P (XEXP (op, 1))
361 && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
362@@ -5638,6 +5641,7 @@
363 the outer subreg is effectively a truncation to the original mode. */
364 if (GET_CODE (op) == ASHIFT
365 && SCALAR_INT_MODE_P (outermode)
366+ && SCALAR_INT_MODE_P (innermode)
367 && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
368 && CONST_INT_P (XEXP (op, 1))
369 && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
370@@ -5651,7 +5655,7 @@
371 /* Recognize a word extraction from a multi-word subreg. */
372 if ((GET_CODE (op) == LSHIFTRT
373 || GET_CODE (op) == ASHIFTRT)
374- && SCALAR_INT_MODE_P (outermode)
375+ && SCALAR_INT_MODE_P (innermode)
376 && GET_MODE_BITSIZE (outermode) >= BITS_PER_WORD
377 && GET_MODE_BITSIZE (innermode) >= (2 * GET_MODE_BITSIZE (outermode))
378 && CONST_INT_P (XEXP (op, 1))
379@@ -5673,6 +5677,7 @@
380
381 if ((GET_CODE (op) == LSHIFTRT
382 || GET_CODE (op) == ASHIFTRT)
383+ && SCALAR_INT_MODE_P (innermode)
384 && MEM_P (XEXP (op, 0))
385 && CONST_INT_P (XEXP (op, 1))
386 && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op))
387
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch
new file mode 100644
index 000000000..d44f8cf1a
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch
@@ -0,0 +1,290 @@
12011-10-01 Revital Eres <revital.eres@linaro.org>
2
3 gcc/
4 Backport from mainline -r179380 and -r179381
5
6 * ddg.c (autoinc_var_is_used_p): New function.
7 (create_ddg_dep_from_intra_loop_link,
8 add_cross_iteration_register_deps): Call it.
9 * ddg.h (autoinc_var_is_used_p): Declare.
10 * modulo-sched.c (sms_schedule): Handle instructions with REG_INC.
11 (generate_reg_moves): Call autoinc_var_is_used_p. Skip
12 instructions that do not set a register and verify no regmoves
13 are created for !single_set instructions.
14
15 gcc/testsuite/
16
17 * gcc.dg/sms-10.c: New file
18
19=== modified file 'gcc/ddg.c'
20--- old/gcc/ddg.c 2011-07-31 11:29:10 +0000
21+++ new/gcc/ddg.c 2011-10-02 06:56:53 +0000
22@@ -145,6 +145,27 @@
23 return rtx_mem_access_p (PATTERN (insn));
24 }
25
26+/* Return true if DEF_INSN contains address being auto-inc or auto-dec
27+ which is used in USE_INSN. Otherwise return false. The result is
28+ being used to decide whether to remove the edge between def_insn and
29+ use_insn when -fmodulo-sched-allow-regmoves is set. This function
30+ doesn't need to consider the specific address register; no reg_moves
31+ will be allowed for any life range defined by def_insn and used
32+ by use_insn, if use_insn uses an address register auto-inc'ed by
33+ def_insn. */
34+bool
35+autoinc_var_is_used_p (rtx def_insn, rtx use_insn)
36+{
37+ rtx note;
38+
39+ for (note = REG_NOTES (def_insn); note; note = XEXP (note, 1))
40+ if (REG_NOTE_KIND (note) == REG_INC
41+ && reg_referenced_p (XEXP (note, 0), PATTERN (use_insn)))
42+ return true;
43+
44+ return false;
45+}
46+
47 /* Computes the dependence parameters (latency, distance etc.), creates
48 a ddg_edge and adds it to the given DDG. */
49 static void
50@@ -173,10 +194,15 @@
51 compensate for that by generating reg-moves based on the life-range
52 analysis. The anti-deps that will be deleted are the ones which
53 have true-deps edges in the opposite direction (in other words
54- the kernel has only one def of the relevant register). TODO:
55- support the removal of all anti-deps edges, i.e. including those
56+ the kernel has only one def of the relevant register).
57+ If the address that is being auto-inc or auto-dec in DEST_NODE
58+ is used in SRC_NODE then do not remove the edge to make sure
59+ reg-moves will not be created for this address.
60+ TODO: support the removal of all anti-deps edges, i.e. including those
61 whose register has multiple defs in the loop. */
62- if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
63+ if (flag_modulo_sched_allow_regmoves
64+ && (t == ANTI_DEP && dt == REG_DEP)
65+ && !autoinc_var_is_used_p (dest_node->insn, src_node->insn))
66 {
67 rtx set;
68
69@@ -302,10 +328,14 @@
70 gcc_assert (first_def_node);
71
72 /* Always create the edge if the use node is a branch in
73- order to prevent the creation of reg-moves. */
74+ order to prevent the creation of reg-moves.
75+ If the address that is being auto-inc or auto-dec in LAST_DEF
76+ is used in USE_INSN then do not remove the edge to make sure
77+ reg-moves will not be created for that address. */
78 if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
79 || !flag_modulo_sched_allow_regmoves
80- || JUMP_P (use_node->insn))
81+ || JUMP_P (use_node->insn)
82+ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn))
83 create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
84 REG_DEP, 1);
85
86
87=== modified file 'gcc/ddg.h'
88--- old/gcc/ddg.h 2009-11-25 10:55:54 +0000
89+++ new/gcc/ddg.h 2011-10-02 06:56:53 +0000
90@@ -186,4 +186,6 @@
91 int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to);
92 int longest_simple_path (ddg_ptr, int from, int to, sbitmap via);
93
94+bool autoinc_var_is_used_p (rtx, rtx);
95+
96 #endif /* GCC_DDG_H */
97
98=== modified file 'gcc/modulo-sched.c'
99--- old/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000
100+++ new/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000
101@@ -477,7 +477,12 @@
102 sbitmap *uses_of_defs;
103 rtx last_reg_move;
104 rtx prev_reg, old_reg;
105-
106+ rtx set = single_set (u->insn);
107+
108+ /* Skip instructions that do not set a register. */
109+ if ((set && !REG_P (SET_DEST (set))))
110+ continue;
111+
112 /* Compute the number of reg_moves needed for u, by looking at life
113 ranges started at u (excluding self-loops). */
114 for (e = u->out; e; e = e->next_out)
115@@ -494,6 +499,20 @@
116 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
117 nreg_moves4e--;
118
119+ if (nreg_moves4e >= 1)
120+ {
121+ /* !single_set instructions are not supported yet and
122+ thus we do not except to encounter them in the loop
123+ except from the doloop part. For the latter case
124+ we assume no regmoves are generated as the doloop
125+ instructions are tied to the branch with an edge. */
126+ gcc_assert (set);
127+ /* If the instruction contains auto-inc register then
128+ validate that the regmov is being generated for the
129+ target regsiter rather then the inc'ed register. */
130+ gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
131+ }
132+
133 nreg_moves = MAX (nreg_moves, nreg_moves4e);
134 }
135
136@@ -1266,12 +1285,10 @@
137 continue;
138 }
139
140- /* Don't handle BBs with calls or barriers or auto-increment insns
141- (to avoid creating invalid reg-moves for the auto-increment insns),
142+ /* Don't handle BBs with calls or barriers
143 or !single_set with the exception of instructions that include
144 count_reg---these instructions are part of the control part
145 that do-loop recognizes.
146- ??? Should handle auto-increment insns.
147 ??? Should handle insns defining subregs. */
148 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
149 {
150@@ -1282,7 +1299,6 @@
151 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
152 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
153 && !reg_mentioned_p (count_reg, insn))
154- || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
155 || (INSN_P (insn) && (set = single_set (insn))
156 && GET_CODE (SET_DEST (set)) == SUBREG))
157 break;
158@@ -1296,8 +1312,6 @@
159 fprintf (dump_file, "SMS loop-with-call\n");
160 else if (BARRIER_P (insn))
161 fprintf (dump_file, "SMS loop-with-barrier\n");
162- else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
163- fprintf (dump_file, "SMS reg inc\n");
164 else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
165 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
166 fprintf (dump_file, "SMS loop-with-not-single-set\n");
167
168=== added file 'gcc/testsuite/gcc.dg/sms-10.c'
169--- old/gcc/testsuite/gcc.dg/sms-10.c 1970-01-01 00:00:00 +0000
170+++ new/gcc/testsuite/gcc.dg/sms-10.c 2011-10-02 06:56:53 +0000
171@@ -0,0 +1,118 @@
172+ /* { dg-do run } */
173+ /* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */
174+
175+
176+typedef __SIZE_TYPE__ size_t;
177+extern void *malloc (size_t);
178+extern void free (void *);
179+extern void abort (void);
180+
181+struct regstat_n_sets_and_refs_t
182+{
183+ int sets;
184+ int refs;
185+};
186+
187+struct regstat_n_sets_and_refs_t *regstat_n_sets_and_refs;
188+
189+struct df_reg_info
190+{
191+ unsigned int n_refs;
192+};
193+
194+struct df_d
195+{
196+ struct df_reg_info **def_regs;
197+ struct df_reg_info **use_regs;
198+};
199+struct df_d *df;
200+
201+static inline int
202+REG_N_SETS (int regno)
203+{
204+ return regstat_n_sets_and_refs[regno].sets;
205+}
206+
207+__attribute__ ((noinline))
208+ int max_reg_num (void)
209+{
210+ return 100;
211+}
212+
213+__attribute__ ((noinline))
214+ void regstat_init_n_sets_and_refs (void)
215+{
216+ unsigned int i;
217+ unsigned int max_regno = max_reg_num ();
218+
219+ for (i = 0; i < max_regno; i++)
220+ {
221+ (regstat_n_sets_and_refs[i].sets = (df->def_regs[(i)]->n_refs));
222+ (regstat_n_sets_and_refs[i].refs =
223+ (df->use_regs[(i)]->n_refs) + REG_N_SETS (i));
224+ }
225+}
226+
227+int a_sets[100] =
228+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
229+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
230+ 40, 41, 42,
231+ 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
232+ 62, 63, 64,
233+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
234+ 84, 85, 86,
235+ 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99
236+};
237+
238+int a_refs[100] =
239+ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38,
240+ 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
241+ 78, 80, 82,
242+ 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116,
243+ 118, 120,
244+ 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150,
245+ 152, 154, 156,
246+ 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186,
247+ 188, 190, 192,
248+ 194, 196, 198
249+};
250+
251+int
252+main ()
253+{
254+ struct df_reg_info *b[100], *c[100];
255+ struct df_d df1;
256+ size_t s = sizeof (struct df_reg_info);
257+ struct regstat_n_sets_and_refs_t a[100];
258+
259+ df = &df1;
260+ regstat_n_sets_and_refs = a;
261+ int i;
262+
263+ for (i = 0; i < 100; i++)
264+ {
265+ b[i] = (struct df_reg_info *) malloc (s);
266+ b[i]->n_refs = i;
267+ c[i] = (struct df_reg_info *) malloc (s);
268+ c[i]->n_refs = i;
269+ }
270+
271+ df1.def_regs = b;
272+ df1.use_regs = c;
273+ regstat_init_n_sets_and_refs ();
274+
275+ for (i = 0; i < 100; i++)
276+ if ((a[i].sets != a_sets[i]) || (a[i].refs != a_refs[i]))
277+ abort ();
278+
279+ for (i = 0; i < 100; i++)
280+ {
281+ free (b[i]);
282+ free (c[i]);
283+ }
284+
285+ return 0;
286+}
287+
288+/* { dg-final { scan-rtl-dump-times "SMS succeeded" 1 "sms" { target powerpc*-*-* } } } */
289+/* { dg-final { cleanup-rtl-dump "sms" } } */
290
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch
new file mode 100644
index 000000000..ef98142bc
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch
@@ -0,0 +1,105 @@
12011-10-03 Michael Hope <michael.hope@linaro.org>
2
3 Backport from mainline:
4
5 2011-09-13 Sevak Sargsyan <sevak.sargsyan@ispras.ru>
6
7 gcc/
8 * config/arm/neon.md (neon_vabd<mode>_2, neon_vabd<mode>_3): New
9 define_insn patterns for combine.
10
11 gcc/testsuite/
12 * gcc.target/arm/neon-combine-sub-abs-into-vabd.c: New test.
13
14=== modified file 'gcc/config/arm/neon.md'
15--- old/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000
16+++ new/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000
17@@ -5428,3 +5428,32 @@
18 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
19 DONE;
20 })
21+
22+(define_insn "neon_vabd<mode>_2"
23+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
24+ (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
25+ (match_operand:VDQ 2 "s_register_operand" "w"))))]
26+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
27+ "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
28+ [(set (attr "neon_type")
29+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
30+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
31+ (const_string "neon_fp_vadd_ddd_vabs_dd")
32+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
33+ (const_string "neon_int_5")))]
34+)
35+
36+(define_insn "neon_vabd<mode>_3"
37+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
38+ (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
39+ (match_operand:VDQ 2 "s_register_operand" "w")]
40+ UNSPEC_VSUB)))]
41+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
42+ "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
43+ [(set (attr "neon_type")
44+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
45+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
46+ (const_string "neon_fp_vadd_ddd_vabs_dd")
47+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
48+ (const_string "neon_int_5")))]
49+)
50
51=== added file 'gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c'
52--- old/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 1970-01-01 00:00:00 +0000
53+++ new/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 2011-10-03 01:32:17 +0000
54@@ -0,0 +1,50 @@
55+/* { dg-do compile } */
56+/* { dg-require-effective-target arm_neon_ok } */
57+/* { dg-options "-O2 -funsafe-math-optimizations" } */
58+/* { dg-add-options arm_neon } */
59+
60+#include <arm_neon.h>
61+float32x2_t f_sub_abs_to_vabd_32()
62+{
63+ float32x2_t val1 = vdup_n_f32 (10);
64+ float32x2_t val2 = vdup_n_f32 (30);
65+ float32x2_t sres = vsub_f32(val1, val2);
66+ float32x2_t res = vabs_f32 (sres);
67+
68+ return res;
69+}
70+/* { dg-final { scan-assembler "vabd\.f32" } }*/
71+
72+#include <arm_neon.h>
73+int8x8_t sub_abs_to_vabd_8()
74+{
75+ int8x8_t val1 = vdup_n_s8 (10);
76+ int8x8_t val2 = vdup_n_s8 (30);
77+ int8x8_t sres = vsub_s8(val1, val2);
78+ int8x8_t res = vabs_s8 (sres);
79+
80+ return res;
81+}
82+/* { dg-final { scan-assembler "vabd\.s8" } }*/
83+
84+int16x4_t sub_abs_to_vabd_16()
85+{
86+ int16x4_t val1 = vdup_n_s16 (10);
87+ int16x4_t val2 = vdup_n_s16 (30);
88+ int16x4_t sres = vsub_s16(val1, val2);
89+ int16x4_t res = vabs_s16 (sres);
90+
91+ return res;
92+}
93+/* { dg-final { scan-assembler "vabd\.s16" } }*/
94+
95+int32x2_t sub_abs_to_vabd_32()
96+{
97+ int32x2_t val1 = vdup_n_s32 (10);
98+ int32x2_t val2 = vdup_n_s32 (30);
99+ int32x2_t sres = vsub_s32(val1, val2);
100+ int32x2_t res = vabs_s32 (sres);
101+
102+ return res;
103+}
104+/* { dg-final { scan-assembler "vabd\.s32" } }*/
105
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch
new file mode 100644
index 000000000..e097ec27f
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch
@@ -0,0 +1,436 @@
12011-10-03 Richard Sandiford <richard.sandiford@linaro.org>
2
3 gcc/
4 Backport from mainline:
5
6 2011-09-22 Richard Sandiford <richard.sandiford@linaro.org>
7
8 * config/arm/predicates.md (expandable_comparison_operator): New
9 predicate, extracted from...
10 (arm_comparison_operator): ...here.
11 * config/arm/arm.md (cbranchsi4, cbranchsf4, cbranchdf4, cbranchdi4)
12 (cstoresi4, cstoresf4, cstoredf4, cstoredi4, movsicc, movsfcc)
13 (movdfcc): Use expandable_comparison_operator.
14
15 gcc/testsuite/
16 Backport from mainline:
17
18 2011-09-22 Richard Sandiford <richard.sandiford@linaro.org>
19
20 * gcc.target/arm/cmp-1.c: New test.
21 * gcc.target/arm/cmp-2.c: Likewise.
22
232011-10-03 Richard Sandiford <richard.sandiford@linaro.org>
24
25 gcc/
26 Backport from mainline:
27
28 2011-09-07 Richard Sandiford <richard.sandiford@linaro.org>
29
30 PR target/49030
31 * config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
32 * config/arm/arm.c (maybe_get_arm_condition_code): New function,
33 reusing the old code from get_arm_condition_code. Return ARM_NV
34 for invalid comparison codes.
35 (get_arm_condition_code): Redefine in terms of
36 maybe_get_arm_condition_code.
37 * config/arm/predicates.md (arm_comparison_operator): Use
38 maybe_get_arm_condition_code.
39
40 gcc/testsuite/
41 Backport from mainline:
42
43 2011-09-07 Richard Sandiford <richard.sandiford@linaro.org>
44
45 PR target/49030
46 * gcc.dg/torture/pr49030.c: New test.
47
48=== modified file 'gcc/config/arm/arm-protos.h'
49--- old/gcc/config/arm/arm-protos.h 2011-09-15 09:45:31 +0000
50+++ new/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000
51@@ -180,6 +180,7 @@
52 #endif
53 extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
54 #ifdef RTX_CODE
55+extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
56 extern void thumb1_final_prescan_insn (rtx);
57 extern void thumb2_final_prescan_insn (rtx);
58 extern const char *thumb_load_double_from_address (rtx *);
59
60=== modified file 'gcc/config/arm/arm.c'
61--- old/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000
62+++ new/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000
63@@ -17494,10 +17494,10 @@
64 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
65
66 /* Returns the index of the ARM condition code string in
67- `arm_condition_codes'. COMPARISON should be an rtx like
68- `(eq (...) (...))'. */
69-static enum arm_cond_code
70-get_arm_condition_code (rtx comparison)
71+ `arm_condition_codes', or ARM_NV if the comparison is invalid.
72+ COMPARISON should be an rtx like `(eq (...) (...))'. */
73+enum arm_cond_code
74+maybe_get_arm_condition_code (rtx comparison)
75 {
76 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
77 enum arm_cond_code code;
78@@ -17521,11 +17521,11 @@
79 case CC_DLTUmode: code = ARM_CC;
80
81 dominance:
82- gcc_assert (comp_code == EQ || comp_code == NE);
83-
84 if (comp_code == EQ)
85 return ARM_INVERSE_CONDITION_CODE (code);
86- return code;
87+ if (comp_code == NE)
88+ return code;
89+ return ARM_NV;
90
91 case CC_NOOVmode:
92 switch (comp_code)
93@@ -17534,7 +17534,7 @@
94 case EQ: return ARM_EQ;
95 case GE: return ARM_PL;
96 case LT: return ARM_MI;
97- default: gcc_unreachable ();
98+ default: return ARM_NV;
99 }
100
101 case CC_Zmode:
102@@ -17542,7 +17542,7 @@
103 {
104 case NE: return ARM_NE;
105 case EQ: return ARM_EQ;
106- default: gcc_unreachable ();
107+ default: return ARM_NV;
108 }
109
110 case CC_Nmode:
111@@ -17550,7 +17550,7 @@
112 {
113 case NE: return ARM_MI;
114 case EQ: return ARM_PL;
115- default: gcc_unreachable ();
116+ default: return ARM_NV;
117 }
118
119 case CCFPEmode:
120@@ -17575,7 +17575,7 @@
121 /* UNEQ and LTGT do not have a representation. */
122 case UNEQ: /* Fall through. */
123 case LTGT: /* Fall through. */
124- default: gcc_unreachable ();
125+ default: return ARM_NV;
126 }
127
128 case CC_SWPmode:
129@@ -17591,7 +17591,7 @@
130 case GTU: return ARM_CC;
131 case LEU: return ARM_CS;
132 case LTU: return ARM_HI;
133- default: gcc_unreachable ();
134+ default: return ARM_NV;
135 }
136
137 case CC_Cmode:
138@@ -17599,7 +17599,7 @@
139 {
140 case LTU: return ARM_CS;
141 case GEU: return ARM_CC;
142- default: gcc_unreachable ();
143+ default: return ARM_NV;
144 }
145
146 case CC_CZmode:
147@@ -17611,7 +17611,7 @@
148 case GTU: return ARM_HI;
149 case LEU: return ARM_LS;
150 case LTU: return ARM_CC;
151- default: gcc_unreachable ();
152+ default: return ARM_NV;
153 }
154
155 case CC_NCVmode:
156@@ -17621,7 +17621,7 @@
157 case LT: return ARM_LT;
158 case GEU: return ARM_CS;
159 case LTU: return ARM_CC;
160- default: gcc_unreachable ();
161+ default: return ARM_NV;
162 }
163
164 case CCmode:
165@@ -17637,13 +17637,22 @@
166 case GTU: return ARM_HI;
167 case LEU: return ARM_LS;
168 case LTU: return ARM_CC;
169- default: gcc_unreachable ();
170+ default: return ARM_NV;
171 }
172
173 default: gcc_unreachable ();
174 }
175 }
176
177+/* Like maybe_get_arm_condition_code, but never return ARM_NV. */
178+static enum arm_cond_code
179+get_arm_condition_code (rtx comparison)
180+{
181+ enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
182+ gcc_assert (code != ARM_NV);
183+ return code;
184+}
185+
186 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
187 instructions. */
188 void
189
190=== modified file 'gcc/config/arm/arm.md'
191--- old/gcc/config/arm/arm.md 2011-09-12 14:14:00 +0000
192+++ new/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000
193@@ -6543,7 +6543,7 @@
194
195 (define_expand "cbranchsi4"
196 [(set (pc) (if_then_else
197- (match_operator 0 "arm_comparison_operator"
198+ (match_operator 0 "expandable_comparison_operator"
199 [(match_operand:SI 1 "s_register_operand" "")
200 (match_operand:SI 2 "nonmemory_operand" "")])
201 (label_ref (match_operand 3 "" ""))
202@@ -6594,7 +6594,7 @@
203
204 (define_expand "cbranchsf4"
205 [(set (pc) (if_then_else
206- (match_operator 0 "arm_comparison_operator"
207+ (match_operator 0 "expandable_comparison_operator"
208 [(match_operand:SF 1 "s_register_operand" "")
209 (match_operand:SF 2 "arm_float_compare_operand" "")])
210 (label_ref (match_operand 3 "" ""))
211@@ -6606,7 +6606,7 @@
212
213 (define_expand "cbranchdf4"
214 [(set (pc) (if_then_else
215- (match_operator 0 "arm_comparison_operator"
216+ (match_operator 0 "expandable_comparison_operator"
217 [(match_operand:DF 1 "s_register_operand" "")
218 (match_operand:DF 2 "arm_float_compare_operand" "")])
219 (label_ref (match_operand 3 "" ""))
220@@ -6618,7 +6618,7 @@
221
222 (define_expand "cbranchdi4"
223 [(set (pc) (if_then_else
224- (match_operator 0 "arm_comparison_operator"
225+ (match_operator 0 "expandable_comparison_operator"
226 [(match_operand:DI 1 "cmpdi_operand" "")
227 (match_operand:DI 2 "cmpdi_operand" "")])
228 (label_ref (match_operand 3 "" ""))
229@@ -7473,7 +7473,7 @@
230
231 (define_expand "cstoresi4"
232 [(set (match_operand:SI 0 "s_register_operand" "")
233- (match_operator:SI 1 "arm_comparison_operator"
234+ (match_operator:SI 1 "expandable_comparison_operator"
235 [(match_operand:SI 2 "s_register_operand" "")
236 (match_operand:SI 3 "reg_or_int_operand" "")]))]
237 "TARGET_32BIT || TARGET_THUMB1"
238@@ -7609,7 +7609,7 @@
239
240 (define_expand "cstoresf4"
241 [(set (match_operand:SI 0 "s_register_operand" "")
242- (match_operator:SI 1 "arm_comparison_operator"
243+ (match_operator:SI 1 "expandable_comparison_operator"
244 [(match_operand:SF 2 "s_register_operand" "")
245 (match_operand:SF 3 "arm_float_compare_operand" "")]))]
246 "TARGET_32BIT && TARGET_HARD_FLOAT"
247@@ -7619,7 +7619,7 @@
248
249 (define_expand "cstoredf4"
250 [(set (match_operand:SI 0 "s_register_operand" "")
251- (match_operator:SI 1 "arm_comparison_operator"
252+ (match_operator:SI 1 "expandable_comparison_operator"
253 [(match_operand:DF 2 "s_register_operand" "")
254 (match_operand:DF 3 "arm_float_compare_operand" "")]))]
255 "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
256@@ -7629,7 +7629,7 @@
257
258 (define_expand "cstoredi4"
259 [(set (match_operand:SI 0 "s_register_operand" "")
260- (match_operator:SI 1 "arm_comparison_operator"
261+ (match_operator:SI 1 "expandable_comparison_operator"
262 [(match_operand:DI 2 "cmpdi_operand" "")
263 (match_operand:DI 3 "cmpdi_operand" "")]))]
264 "TARGET_32BIT"
265@@ -7749,7 +7749,7 @@
266
267 (define_expand "movsicc"
268 [(set (match_operand:SI 0 "s_register_operand" "")
269- (if_then_else:SI (match_operand 1 "arm_comparison_operator" "")
270+ (if_then_else:SI (match_operand 1 "expandable_comparison_operator" "")
271 (match_operand:SI 2 "arm_not_operand" "")
272 (match_operand:SI 3 "arm_not_operand" "")))]
273 "TARGET_32BIT"
274@@ -7769,7 +7769,7 @@
275
276 (define_expand "movsfcc"
277 [(set (match_operand:SF 0 "s_register_operand" "")
278- (if_then_else:SF (match_operand 1 "arm_comparison_operator" "")
279+ (if_then_else:SF (match_operand 1 "expandable_comparison_operator" "")
280 (match_operand:SF 2 "s_register_operand" "")
281 (match_operand:SF 3 "nonmemory_operand" "")))]
282 "TARGET_32BIT && TARGET_HARD_FLOAT"
283@@ -7795,7 +7795,7 @@
284
285 (define_expand "movdfcc"
286 [(set (match_operand:DF 0 "s_register_operand" "")
287- (if_then_else:DF (match_operand 1 "arm_comparison_operator" "")
288+ (if_then_else:DF (match_operand 1 "expandable_comparison_operator" "")
289 (match_operand:DF 2 "s_register_operand" "")
290 (match_operand:DF 3 "arm_float_add_operand" "")))]
291 "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
292
293=== modified file 'gcc/config/arm/predicates.md'
294--- old/gcc/config/arm/predicates.md 2011-09-15 09:45:31 +0000
295+++ new/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000
296@@ -242,11 +242,15 @@
297
298 ;; True for integer comparisons and, if FP is active, for comparisons
299 ;; other than LTGT or UNEQ.
300+(define_special_predicate "expandable_comparison_operator"
301+ (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
302+ unordered,ordered,unlt,unle,unge,ungt"))
303+
304+;; Likewise, but only accept comparisons that are directly supported
305+;; by ARM condition codes.
306 (define_special_predicate "arm_comparison_operator"
307- (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
308- (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
309- && (TARGET_FPA || TARGET_VFP)")
310- (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
311+ (and (match_operand 0 "expandable_comparison_operator")
312+ (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
313
314 (define_special_predicate "lt_ge_comparison_operator"
315 (match_code "lt,ge"))
316
317=== added file 'gcc/testsuite/gcc.dg/torture/pr49030.c'
318--- old/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000
319+++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-10-03 09:46:40 +0000
320@@ -0,0 +1,19 @@
321+void
322+sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
323+ unsigned long dst_skip)
324+{
325+ long long y;
326+ while (nsamples--)
327+ {
328+ y = (long long) (*src * 8388608.0f) << 8;
329+ if (y > 2147483647) {
330+ *(int *) dst = 2147483647;
331+ } else if (y < -2147483647 - 1) {
332+ *(int *) dst = -2147483647 - 1;
333+ } else {
334+ *(int *) dst = (int) y;
335+ }
336+ dst += dst_skip;
337+ src++;
338+ }
339+}
340
341=== added file 'gcc/testsuite/gcc.target/arm/cmp-1.c'
342--- old/gcc/testsuite/gcc.target/arm/cmp-1.c 1970-01-01 00:00:00 +0000
343+++ new/gcc/testsuite/gcc.target/arm/cmp-1.c 2011-10-03 09:47:33 +0000
344@@ -0,0 +1,37 @@
345+/* { dg-do compile } */
346+/* { dg-options "-O" } */
347+/* { dg-final { scan-assembler-not "\tbl\t" } } */
348+/* { dg-final { scan-assembler-not "__aeabi" } } */
349+int x, y;
350+
351+#define TEST_EXPR(NAME, ARGS, EXPR) \
352+ int NAME##1 ARGS { return (EXPR); } \
353+ int NAME##2 ARGS { return !(EXPR); } \
354+ int NAME##3 ARGS { return (EXPR) ? x : y; } \
355+ void NAME##4 ARGS { if (EXPR) x++; } \
356+ void NAME##5 ARGS { if (!(EXPR)) x++; }
357+
358+#define TEST(NAME, TYPE, OPERATOR) \
359+ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), a1 OPERATOR a2) \
360+ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), a1 OPERATOR *a2) \
361+ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), *a1 OPERATOR a2) \
362+ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), *a1 OPERATOR *a2) \
363+ TEST_EXPR (NAME##_rc, (TYPE a1), a1 OPERATOR 100) \
364+ TEST_EXPR (NAME##_cr, (TYPE a1), 100 OPERATOR a1)
365+
366+#define TEST_OP(NAME, OPERATOR) \
367+ TEST (sc_##NAME, signed char, OPERATOR) \
368+ TEST (uc_##NAME, unsigned char, OPERATOR) \
369+ TEST (ss_##NAME, short, OPERATOR) \
370+ TEST (us_##NAME, unsigned short, OPERATOR) \
371+ TEST (si_##NAME, int, OPERATOR) \
372+ TEST (ui_##NAME, unsigned int, OPERATOR) \
373+ TEST (sll_##NAME, long long, OPERATOR) \
374+ TEST (ull_##NAME, unsigned long long, OPERATOR)
375+
376+TEST_OP (eq, ==)
377+TEST_OP (ne, !=)
378+TEST_OP (lt, <)
379+TEST_OP (gt, >)
380+TEST_OP (le, <=)
381+TEST_OP (ge, >=)
382
383=== added file 'gcc/testsuite/gcc.target/arm/cmp-2.c'
384--- old/gcc/testsuite/gcc.target/arm/cmp-2.c 1970-01-01 00:00:00 +0000
385+++ new/gcc/testsuite/gcc.target/arm/cmp-2.c 2011-10-03 09:47:33 +0000
386@@ -0,0 +1,49 @@
387+/* { dg-do compile } */
388+/* { dg-require-effective-target arm_vfp_ok } */
389+/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
390+/* { dg-options "-O -mfpu=vfp -mfloat-abi=softfp" } */
391+/* { dg-final { scan-assembler-not "\tbl\t" } } */
392+/* { dg-final { scan-assembler-not "__aeabi" } } */
393+int x, y;
394+
395+#define EQ(X, Y) ((X) == (Y))
396+#define NE(X, Y) ((X) != (Y))
397+#define LT(X, Y) ((X) < (Y))
398+#define GT(X, Y) ((X) > (Y))
399+#define LE(X, Y) ((X) <= (Y))
400+#define GE(X, Y) ((X) >= (Y))
401+
402+#define TEST_EXPR(NAME, ARGS, EXPR) \
403+ int NAME##1 ARGS { return (EXPR); } \
404+ int NAME##2 ARGS { return !(EXPR); } \
405+ int NAME##3 ARGS { return (EXPR) ? x : y; } \
406+ void NAME##4 ARGS { if (EXPR) x++; } \
407+ void NAME##5 ARGS { if (!(EXPR)) x++; }
408+
409+#define TEST(NAME, TYPE, OPERATOR) \
410+ TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), OPERATOR (a1, a2)) \
411+ TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), OPERATOR (a1, *a2)) \
412+ TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), OPERATOR (*a1, a2)) \
413+ TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), OPERATOR (*a1, *a2)) \
414+ TEST_EXPR (NAME##_rc, (TYPE a1), OPERATOR (a1, 100)) \
415+ TEST_EXPR (NAME##_cr, (TYPE a1), OPERATOR (100, a1))
416+
417+#define TEST_OP(NAME, OPERATOR) \
418+ TEST (f_##NAME, float, OPERATOR) \
419+ TEST (d_##NAME, double, OPERATOR) \
420+ TEST (ld_##NAME, long double, OPERATOR)
421+
422+TEST_OP (eq, EQ)
423+TEST_OP (ne, NE)
424+TEST_OP (lt, LT)
425+TEST_OP (gt, GT)
426+TEST_OP (le, LE)
427+TEST_OP (ge, GE)
428+TEST_OP (blt, __builtin_isless)
429+TEST_OP (bgt, __builtin_isgreater)
430+TEST_OP (ble, __builtin_islessequal)
431+TEST_OP (bge, __builtin_isgreaterequal)
432+/* This one should be expanded into separate ordered and equality
433+ comparisons. */
434+TEST_OP (blg, __builtin_islessgreater)
435+TEST_OP (bun, __builtin_isunordered)
436
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch
new file mode 100644
index 000000000..4a886ce56
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch
@@ -0,0 +1,378 @@
12011-10-06 Ira Rosen <ira.rosen@linaro.org>
2
3 Backport from mainline:
4
5 2011-09-25 Ira Rosen <ira.rosen@linaro.org>
6
7 gcc/
8 * tree-data-ref.c (dr_analyze_innermost): Add new argument.
9 Allow not simple iv if analyzing basic block.
10 (create_data_ref): Update call to dr_analyze_innermost.
11 (stmt_with_adjacent_zero_store_dr_p, ref_base_address): Likewise.
12 * tree-loop-distribution.c (generate_memset_zero): Likewise.
13 * tree-predcom.c (find_looparound_phi): Likewise.
14 * tree-data-ref.h (dr_analyze_innermost): Add new argument.
15
16 gcc/testsuite/
17 * gcc.dg/vect/bb-slp-24.c: New.
18
19
20 2011-09-15 Ira Rosen <ira.rosen@linaro.org>
21
22 gcc/
23 * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Allow
24 read-after-read dependencies in basic block SLP.
25
26 gcc/testsuite/
27 * gcc.dg/vect/bb-slp-25.c: New.
28
29
30 2011-04-21 Richard Sandiford <richard.sandiford@linaro.org>
31
32 gcc/
33 * tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): Use
34 operand_equal_p to compare DR_BASE_ADDRESSes.
35 (vect_check_interleaving): Likewise.
36
37 gcc/testsuite/
38 * gcc.dg/vect/vect-119.c: New test.
39
40=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-24.c'
41--- old/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 1970-01-01 00:00:00 +0000
42+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 2011-10-02 08:43:10 +0000
43@@ -0,0 +1,59 @@
44+/* { dg-require-effective-target vect_int } */
45+
46+#include <stdarg.h>
47+#include "tree-vect.h"
48+
49+#define A 3
50+#define N 256
51+
52+short src[N], dst[N];
53+
54+void foo (short * __restrict__ dst, short * __restrict__ src, int h,
55+ int stride, int dummy)
56+{
57+ int i;
58+ h /= 8;
59+ for (i = 0; i < h; i++)
60+ {
61+ dst[0] += A*src[0];
62+ dst[1] += A*src[1];
63+ dst[2] += A*src[2];
64+ dst[3] += A*src[3];
65+ dst[4] += A*src[4];
66+ dst[5] += A*src[5];
67+ dst[6] += A*src[6];
68+ dst[7] += A*src[7];
69+ dst += stride;
70+ src += stride;
71+ if (dummy == 32)
72+ abort ();
73+ }
74+}
75+
76+
77+int main (void)
78+{
79+ int i;
80+
81+ check_vect ();
82+
83+ for (i = 0; i < N; i++)
84+ {
85+ dst[i] = 0;
86+ src[i] = i;
87+ }
88+
89+ foo (dst, src, N, 8, 0);
90+
91+ for (i = 0; i < N; i++)
92+ {
93+ if (dst[i] != A * i)
94+ abort ();
95+ }
96+
97+ return 0;
98+}
99+
100+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
101+/* { dg-final { cleanup-tree-dump "slp" } } */
102+
103
104=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-25.c'
105--- old/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 1970-01-01 00:00:00 +0000
106+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 2011-10-02 08:43:10 +0000
107@@ -0,0 +1,59 @@
108+/* { dg-require-effective-target vect_int } */
109+
110+#include <stdarg.h>
111+#include "tree-vect.h"
112+
113+#define A 3
114+#define B 4
115+#define N 256
116+
117+short src[N], dst[N];
118+
119+void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
120+{
121+ int i;
122+ h /= 16;
123+ for (i = 0; i < h; i++)
124+ {
125+ dst[0] += A*src[0] + src[stride];
126+ dst[1] += A*src[1] + src[1+stride];
127+ dst[2] += A*src[2] + src[2+stride];
128+ dst[3] += A*src[3] + src[3+stride];
129+ dst[4] += A*src[4] + src[4+stride];
130+ dst[5] += A*src[5] + src[5+stride];
131+ dst[6] += A*src[6] + src[6+stride];
132+ dst[7] += A*src[7] + src[7+stride];
133+ dst += 8;
134+ src += 8;
135+ if (dummy == 32)
136+ abort ();
137+ }
138+}
139+
140+
141+int main (void)
142+{
143+ int i;
144+
145+ check_vect ();
146+
147+ for (i = 0; i < N; i++)
148+ {
149+ dst[i] = 0;
150+ src[i] = i;
151+ }
152+
153+ foo (dst, src, N, 8, 0);
154+
155+ for (i = 0; i < N/2; i++)
156+ {
157+ if (dst[i] != A * i + i + 8)
158+ abort ();
159+ }
160+
161+ return 0;
162+}
163+
164+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
165+/* { dg-final { cleanup-tree-dump "slp" } } */
166+
167
168=== added file 'gcc/testsuite/gcc.dg/vect/vect-119.c'
169--- old/gcc/testsuite/gcc.dg/vect/vect-119.c 1970-01-01 00:00:00 +0000
170+++ new/gcc/testsuite/gcc.dg/vect/vect-119.c 2011-10-02 08:43:10 +0000
171@@ -0,0 +1,28 @@
172+/* { dg-do compile } */
173+
174+#define OUTER 32
175+#define INNER 40
176+
177+static unsigned int
178+bar (const unsigned int x[INNER][2], unsigned int sum)
179+{
180+ int i;
181+
182+ for (i = 0; i < INNER; i++)
183+ sum += x[i][0] * x[i][0] + x[i][1] * x[i][1];
184+ return sum;
185+}
186+
187+unsigned int foo (const unsigned int x[OUTER][INNER][2])
188+{
189+ int i;
190+ unsigned int sum;
191+
192+ sum = 0.0f;
193+ for (i = 0; i < OUTER; i++)
194+ sum = bar (x[i], sum);
195+ return sum;
196+}
197+
198+/* { dg-final { scan-tree-dump-times "Detected interleaving of size 2" 1 "vect" } } */
199+/* { dg-final { cleanup-tree-dump "vect" } } */
200
201=== modified file 'gcc/tree-data-ref.c'
202--- old/gcc/tree-data-ref.c 2011-05-26 14:27:33 +0000
203+++ new/gcc/tree-data-ref.c 2011-10-02 08:43:10 +0000
204@@ -721,11 +721,11 @@
205 }
206
207 /* Analyzes the behavior of the memory reference DR in the innermost loop or
208- basic block that contains it. Returns true if analysis succeed or false
209+ basic block that contains it. Returns true if analysis succeed or false
210 otherwise. */
211
212 bool
213-dr_analyze_innermost (struct data_reference *dr)
214+dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
215 {
216 gimple stmt = DR_STMT (dr);
217 struct loop *loop = loop_containing_stmt (stmt);
218@@ -768,14 +768,25 @@
219 }
220 else
221 base = build_fold_addr_expr (base);
222+
223 if (in_loop)
224 {
225 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
226 false))
227 {
228- if (dump_file && (dump_flags & TDF_DETAILS))
229- fprintf (dump_file, "failed: evolution of base is not affine.\n");
230- return false;
231+ if (nest)
232+ {
233+ if (dump_file && (dump_flags & TDF_DETAILS))
234+ fprintf (dump_file, "failed: evolution of base is not"
235+ " affine.\n");
236+ return false;
237+ }
238+ else
239+ {
240+ base_iv.base = base;
241+ base_iv.step = ssize_int (0);
242+ base_iv.no_overflow = true;
243+ }
244 }
245 }
246 else
247@@ -800,10 +811,18 @@
248 else if (!simple_iv (loop, loop_containing_stmt (stmt),
249 poffset, &offset_iv, false))
250 {
251- if (dump_file && (dump_flags & TDF_DETAILS))
252- fprintf (dump_file, "failed: evolution of offset is not"
253- " affine.\n");
254- return false;
255+ if (nest)
256+ {
257+ if (dump_file && (dump_flags & TDF_DETAILS))
258+ fprintf (dump_file, "failed: evolution of offset is not"
259+ " affine.\n");
260+ return false;
261+ }
262+ else
263+ {
264+ offset_iv.base = poffset;
265+ offset_iv.step = ssize_int (0);
266+ }
267 }
268 }
269
270@@ -967,7 +986,7 @@
271 DR_REF (dr) = memref;
272 DR_IS_READ (dr) = is_read;
273
274- dr_analyze_innermost (dr);
275+ dr_analyze_innermost (dr, nest);
276 dr_analyze_indices (dr, nest, loop);
277 dr_analyze_alias (dr);
278
279@@ -5185,7 +5204,7 @@
280 DR_STMT (dr) = stmt;
281 DR_REF (dr) = op0;
282
283- res = dr_analyze_innermost (dr)
284+ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
285 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
286
287 free_data_ref (dr);
288@@ -5225,7 +5244,7 @@
289
290 DR_STMT (dr) = stmt;
291 DR_REF (dr) = *ref->pos;
292- dr_analyze_innermost (dr);
293+ dr_analyze_innermost (dr, loop_containing_stmt (stmt));
294 base_address = DR_BASE_ADDRESS (dr);
295
296 if (!base_address)
297
298=== modified file 'gcc/tree-data-ref.h'
299--- old/gcc/tree-data-ref.h 2011-03-27 09:38:18 +0000
300+++ new/gcc/tree-data-ref.h 2011-10-02 08:43:10 +0000
301@@ -386,7 +386,7 @@
302 DEF_VEC_ALLOC_O (data_ref_loc, heap);
303
304 bool get_references_in_stmt (gimple, VEC (data_ref_loc, heap) **);
305-bool dr_analyze_innermost (struct data_reference *);
306+bool dr_analyze_innermost (struct data_reference *, struct loop *);
307 extern bool compute_data_dependences_for_loop (struct loop *, bool,
308 VEC (loop_p, heap) **,
309 VEC (data_reference_p, heap) **,
310
311=== modified file 'gcc/tree-loop-distribution.c'
312--- old/gcc/tree-loop-distribution.c 2011-05-11 13:07:54 +0000
313+++ new/gcc/tree-loop-distribution.c 2011-10-02 08:43:10 +0000
314@@ -267,7 +267,7 @@
315
316 DR_STMT (dr) = stmt;
317 DR_REF (dr) = op0;
318- res = dr_analyze_innermost (dr);
319+ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
320 gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
321
322 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
323
324=== modified file 'gcc/tree-predcom.c'
325--- old/gcc/tree-predcom.c 2011-02-11 14:19:44 +0000
326+++ new/gcc/tree-predcom.c 2011-10-02 08:43:10 +0000
327@@ -1114,7 +1114,7 @@
328 memset (&init_dr, 0, sizeof (struct data_reference));
329 DR_REF (&init_dr) = init_ref;
330 DR_STMT (&init_dr) = phi;
331- if (!dr_analyze_innermost (&init_dr))
332+ if (!dr_analyze_innermost (&init_dr, loop))
333 return NULL;
334
335 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
336
337=== modified file 'gcc/tree-vect-data-refs.c'
338--- old/gcc/tree-vect-data-refs.c 2011-07-04 11:13:51 +0000
339+++ new/gcc/tree-vect-data-refs.c 2011-10-02 08:43:10 +0000
340@@ -353,11 +353,7 @@
341
342 /* Check that the data-refs have same bases and offsets. If not, we can't
343 determine if they are dependent. */
344- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
345- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
346- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
347- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
348- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
349+ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
350 || !dr_equal_offsets_p (dra, drb))
351 return true;
352
353@@ -403,11 +399,7 @@
354
355 /* Check that the data-refs have same first location (except init) and they
356 are both either store or load (not load and store). */
357- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
358- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
359- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
360- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
361- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
362+ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
363 || !dr_equal_offsets_p (dra, drb)
364 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
365 || DR_IS_READ (dra) != DR_IS_READ (drb))
366@@ -615,6 +607,11 @@
367 if (vect_check_interleaving (dra, drb))
368 return false;
369
370+ /* Read-read is OK (we need this check here, after checking for
371+ interleaving). */
372+ if (DR_IS_READ (dra) && DR_IS_READ (drb))
373+ return false;
374+
375 if (vect_print_dump_info (REPORT_DR_DETAILS))
376 {
377 fprintf (vect_dump, "can't determine dependence between ");
378
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch
new file mode 100644
index 000000000..f25a37858
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch
@@ -0,0 +1,240 @@
12011-10-06 Ira Rosen <ira.rosen@linaro.org>
2
3 gcc/testsuite/
4 * gcc.dg/vect/bb-slp-26.c: Simplify to make the basic block
5 vectorizable.
6
7 Backport from mainline:
8
9 2011-09-25 Ira Rosen <ira.rosen@linaro.org>
10
11 gcc/
12 * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
13 of vect_analyze_bb here.
14 (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.
15
16 gcc/testsuite/
17 * lib/target-supports.exp (check_effective_target_vect64): New.
18 * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
19 of multiple vector sizes.
20 * gcc.dg/vect/bb-slp-26.c: New.
21
22=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c'
23--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2010-11-22 12:16:52 +0000
24+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000
25@@ -49,6 +49,7 @@
26 }
27
28 /* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
29-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
30+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */
31+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */
32 /* { dg-final { cleanup-tree-dump "slp" } } */
33
34
35=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-26.c'
36--- old/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 1970-01-01 00:00:00 +0000
37+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 2011-10-02 10:40:34 +0000
38@@ -0,0 +1,60 @@
39+/* { dg-require-effective-target vect_int } */
40+
41+#include <stdarg.h>
42+#include "tree-vect.h"
43+
44+#define A 3
45+#define B 4
46+#define N 256
47+
48+char src[N], dst[N];
49+
50+void foo (char * __restrict__ dst, char * __restrict__ src, int h,
51+ int stride, int dummy)
52+{
53+ int i;
54+ h /= 16;
55+ for (i = 0; i < h; i++)
56+ {
57+ dst[0] += A*src[0];
58+ dst[1] += A*src[1];
59+ dst[2] += A*src[2];
60+ dst[3] += A*src[3];
61+ dst[4] += A*src[4];
62+ dst[5] += A*src[5];
63+ dst[6] += A*src[6];
64+ dst[7] += A*src[7];
65+ dst += 8;
66+ src += 8;
67+ if (dummy == 32)
68+ abort ();
69+ }
70+}
71+
72+
73+int main (void)
74+{
75+ int i;
76+
77+ check_vect ();
78+
79+ for (i = 0; i < N; i++)
80+ {
81+ dst[i] = 0;
82+ src[i] = i/8;
83+ }
84+
85+ foo (dst, src, N, 8, 0);
86+
87+ for (i = 0; i < N/2; i++)
88+ {
89+ if (dst[i] != A * src[i])
90+ abort ();
91+ }
92+
93+ return 0;
94+}
95+
96+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
97+/* { dg-final { cleanup-tree-dump "slp" } } */
98+
99
100=== modified file 'gcc/testsuite/lib/target-supports.exp'
101--- old/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000
102+++ new/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000
103@@ -3283,6 +3283,24 @@
104 return $et_vect_multiple_sizes_saved
105 }
106
107+# Return 1 if the target supports vectors of 64 bits.
108+
109+proc check_effective_target_vect64 { } {
110+ global et_vect64
111+
112+ if [info exists et_vect64_saved] {
113+ verbose "check_effective_target_vect64: using cached result" 2
114+ } else {
115+ set et_vect64_saved 0
116+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
117+ set et_vect64_saved 1
118+ }
119+ }
120+
121+ verbose "check_effective_target_vect64: returning $et_vect64_saved" 2
122+ return $et_vect64_saved
123+}
124+
125 # Return 1 if the target supports section-anchors
126
127 proc check_effective_target_section_anchors { } {
128
129=== modified file 'gcc/tree-vect-slp.c'
130--- old/gcc/tree-vect-slp.c 2011-07-06 12:04:10 +0000
131+++ new/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000
132@@ -1664,42 +1664,18 @@
133
134 /* Check if the basic block can be vectorized. */
135
136-bb_vec_info
137-vect_slp_analyze_bb (basic_block bb)
138+static bb_vec_info
139+vect_slp_analyze_bb_1 (basic_block bb)
140 {
141 bb_vec_info bb_vinfo;
142 VEC (ddr_p, heap) *ddrs;
143 VEC (slp_instance, heap) *slp_instances;
144 slp_instance instance;
145- int i, insns = 0;
146- gimple_stmt_iterator gsi;
147+ int i;
148 int min_vf = 2;
149 int max_vf = MAX_VECTORIZATION_FACTOR;
150 bool data_dependence_in_bb = false;
151
152- current_vector_size = 0;
153-
154- if (vect_print_dump_info (REPORT_DETAILS))
155- fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
156-
157- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
158- {
159- gimple stmt = gsi_stmt (gsi);
160- if (!is_gimple_debug (stmt)
161- && !gimple_nop_p (stmt)
162- && gimple_code (stmt) != GIMPLE_LABEL)
163- insns++;
164- }
165-
166- if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
167- {
168- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
169- fprintf (vect_dump, "not vectorized: too many instructions in basic "
170- "block.\n");
171-
172- return NULL;
173- }
174-
175 bb_vinfo = new_bb_vec_info (bb);
176 if (!bb_vinfo)
177 return NULL;
178@@ -1819,6 +1795,61 @@
179 }
180
181
182+bb_vec_info
183+vect_slp_analyze_bb (basic_block bb)
184+{
185+ bb_vec_info bb_vinfo;
186+ int insns = 0;
187+ gimple_stmt_iterator gsi;
188+ unsigned int vector_sizes;
189+
190+ if (vect_print_dump_info (REPORT_DETAILS))
191+ fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
192+
193+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
194+ {
195+ gimple stmt = gsi_stmt (gsi);
196+ if (!is_gimple_debug (stmt)
197+ && !gimple_nop_p (stmt)
198+ && gimple_code (stmt) != GIMPLE_LABEL)
199+ insns++;
200+ }
201+
202+ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
203+ {
204+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
205+ fprintf (vect_dump, "not vectorized: too many instructions in basic "
206+ "block.\n");
207+
208+ return NULL;
209+ }
210+
211+ /* Autodetect first vector size we try. */
212+ current_vector_size = 0;
213+ vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
214+
215+ while (1)
216+ {
217+ bb_vinfo = vect_slp_analyze_bb_1 (bb);
218+ if (bb_vinfo)
219+ return bb_vinfo;
220+
221+ destroy_bb_vec_info (bb_vinfo);
222+
223+ vector_sizes &= ~current_vector_size;
224+ if (vector_sizes == 0
225+ || current_vector_size == 0)
226+ return NULL;
227+
228+ /* Try the next biggest vector size. */
229+ current_vector_size = 1 << floor_log2 (vector_sizes);
230+ if (vect_print_dump_info (REPORT_DETAILS))
231+ fprintf (vect_dump, "***** Re-trying analysis with "
232+ "vector size %d\n", current_vector_size);
233+ }
234+}
235+
236+
237 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
238 the number of created vector stmts depends on the unrolling factor).
239 However, the actual number of vector stmts for every SLP node depends on
240
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch
new file mode 100644
index 000000000..13e6fd26e
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch
@@ -0,0 +1,124 @@
12011-10-13 Andrew Stubbs <ams@codesourcery.com>
2
3 Backport from mainline:
4
5 2011-10-07 Andrew Stubbs <ams@codesourcery.com>
6
7 gcc/
8 * config/arm/predicates.md (shift_amount_operand): Remove constant
9 range check.
10 (shift_operator): Check range of constants for all shift operators.
11
12 gcc/testsuite/
13 * gcc.dg/pr50193-1.c: New file.
14 * gcc.target/arm/shiftable.c: New file.
15
16=== modified file 'gcc/config/arm/predicates.md'
17--- old/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000
18+++ new/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000
19@@ -129,11 +129,12 @@
20 (ior (match_operand 0 "arm_rhs_operand")
21 (match_operand 0 "memory_operand")))
22
23+;; This doesn't have to do much because the constant is already checked
24+;; in the shift_operator predicate.
25 (define_predicate "shift_amount_operand"
26 (ior (and (match_test "TARGET_ARM")
27 (match_operand 0 "s_register_operand"))
28- (and (match_operand 0 "const_int_operand")
29- (match_test "INTVAL (op) > 0"))))
30+ (match_operand 0 "const_int_operand")))
31
32 (define_predicate "arm_add_operand"
33 (ior (match_operand 0 "arm_rhs_operand")
34@@ -219,13 +220,20 @@
35 (match_test "mode == GET_MODE (op)")))
36
37 ;; True for shift operators.
38+;; Notes:
39+;; * mult is only permitted with a constant shift amount
40+;; * patterns that permit register shift amounts only in ARM mode use
41+;; shift_amount_operand, patterns that always allow registers do not,
42+;; so we don't have to worry about that sort of thing here.
43 (define_special_predicate "shift_operator"
44 (and (ior (ior (and (match_code "mult")
45 (match_test "power_of_two_operand (XEXP (op, 1), mode)"))
46 (and (match_code "rotate")
47 (match_test "GET_CODE (XEXP (op, 1)) == CONST_INT
48 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
49- (match_code "ashift,ashiftrt,lshiftrt,rotatert"))
50+ (and (match_code "ashift,ashiftrt,lshiftrt,rotatert")
51+ (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT
52+ || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
53 (match_test "mode == GET_MODE (op)")))
54
55 ;; True for MULT, to identify which variant of shift_operator is in use.
56
57=== added file 'gcc/testsuite/gcc.target/arm/shiftable.c'
58--- old/gcc/testsuite/gcc.target/arm/shiftable.c 1970-01-01 00:00:00 +0000
59+++ new/gcc/testsuite/gcc.target/arm/shiftable.c 2011-10-10 11:43:28 +0000
60@@ -0,0 +1,63 @@
61+/* { dg-do compile } */
62+/* { dg-options "-O2" } */
63+/* { dg-require-effective-target arm32 } */
64+
65+/* ARM has shift-and-alu insns. Depending on the ALU op GCC represents some
66+ of these as a left shift, others as a multiply. Check that we match the
67+ right one. */
68+
69+int
70+plus (int a, int b)
71+{
72+ return (a * 64) + b;
73+}
74+
75+/* { dg-final { scan-assembler "add.*\[al]sl #6" } } */
76+
77+int
78+minus (int a, int b)
79+{
80+ return a - (b * 64);
81+}
82+
83+/* { dg-final { scan-assembler "sub.*\[al]sl #6" } } */
84+
85+int
86+ior (int a, int b)
87+{
88+ return (a * 64) | b;
89+}
90+
91+/* { dg-final { scan-assembler "orr.*\[al]sl #6" } } */
92+
93+int
94+xor (int a, int b)
95+{
96+ return (a * 64) ^ b;
97+}
98+
99+/* { dg-final { scan-assembler "eor.*\[al]sl #6" } } */
100+
101+int
102+and (int a, int b)
103+{
104+ return (a * 64) & b;
105+}
106+
107+/* { dg-final { scan-assembler "and.*\[al]sl #6" } } */
108+
109+int
110+rsb (int a, int b)
111+{
112+ return (a * 64) - b;
113+}
114+
115+/* { dg-final { scan-assembler "rsb.*\[al]sl #6" } } */
116+
117+int
118+mvn (int a, int b)
119+{
120+ return ~(a * 64);
121+}
122+
123+/* { dg-final { scan-assembler "mvn.*\[al]sl #6" } } */
124
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch
new file mode 100644
index 000000000..6642f8a66
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch
@@ -0,0 +1,362 @@
12011-10-16 Ira Rosen <ira.rosen@linaro.org>
2
3 Backport from mainline:
4
5 2011-09-27 Ira Rosen <ira.rosen@linaro.org>
6
7 gcc/
8 * tree-vect-stmts.c (vectorizable_type_demotion): Handle basic block
9 vectorization.
10 (vectorizable_type_promotion): Likewise.
11 (vect_analyze_stmt): Call vectorizable_type_demotion and
12 vectorizable_type_promotion for basic blocks.
13 (supportable_widening_operation): Don't assume loop vectorization.
14 * tree-vect-slp.c (vect_build_slp_tree): Allow multiple types for
15 basic blocks. Update vectorization factor for basic block
16 vectorization.
17 (vect_analyze_slp_instance): Allow multiple types for basic block
18 vectorization. Recheck unrolling factor after construction of SLP
19 instance.
20
21 gcc/testsuite/
22 * gcc.dg/vect/bb-slp-11.c: Expect to get vectorized with 64-bit
23 vectors.
24 * gcc.dg/vect/bb-slp-27.c: New.
25 * gcc.dg/vect/bb-slp-28.c: New.
26
27
28 2011-10-04 Ira Rosen <ira.rosen@linaro.org>
29
30 gcc/testsuite/
31 * lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
32 Make et_vect_multiple_sizes_saved global.
33 (check_effective_target_vect64): Make et_vect64_saved global.
34
35=== modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c'
36--- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000
37+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-06 11:08:08 +0000
38@@ -48,8 +48,6 @@
39 return 0;
40 }
41
42-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
43-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */
44-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */
45+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
46 /* { dg-final { cleanup-tree-dump "slp" } } */
47
48
49=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-27.c'
50--- old/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 1970-01-01 00:00:00 +0000
51+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 2011-10-06 11:08:08 +0000
52@@ -0,0 +1,49 @@
53+/* { dg-require-effective-target vect_int } */
54+
55+#include <stdarg.h>
56+#include "tree-vect.h"
57+
58+#define A 3
59+#define N 16
60+
61+short src[N], dst[N];
62+
63+void foo (int a)
64+{
65+ dst[0] += a*src[0];
66+ dst[1] += a*src[1];
67+ dst[2] += a*src[2];
68+ dst[3] += a*src[3];
69+ dst[4] += a*src[4];
70+ dst[5] += a*src[5];
71+ dst[6] += a*src[6];
72+ dst[7] += a*src[7];
73+}
74+
75+
76+int main (void)
77+{
78+ int i;
79+
80+ check_vect ();
81+
82+ for (i = 0; i < N; i++)
83+ {
84+ dst[i] = 0;
85+ src[i] = i;
86+ }
87+
88+ foo (A);
89+
90+ for (i = 0; i < 8; i++)
91+ {
92+ if (dst[i] != A * i)
93+ abort ();
94+ }
95+
96+ return 0;
97+}
98+
99+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_unpack && vect_pack_trunc } } } } } */
100+/* { dg-final { cleanup-tree-dump "slp" } } */
101+
102
103=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-28.c'
104--- old/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 1970-01-01 00:00:00 +0000
105+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 2011-10-06 11:08:08 +0000
106@@ -0,0 +1,71 @@
107+/* { dg-require-effective-target vect_int } */
108+
109+#include <stdarg.h>
110+#include "tree-vect.h"
111+
112+#define A 300
113+#define N 16
114+
115+char src[N];
116+short dst[N];
117+short src1[N], dst1[N];
118+
119+void foo (int a)
120+{
121+ dst[0] = (short) (a * (int) src[0]);
122+ dst[1] = (short) (a * (int) src[1]);
123+ dst[2] = (short) (a * (int) src[2]);
124+ dst[3] = (short) (a * (int) src[3]);
125+ dst[4] = (short) (a * (int) src[4]);
126+ dst[5] = (short) (a * (int) src[5]);
127+ dst[6] = (short) (a * (int) src[6]);
128+ dst[7] = (short) (a * (int) src[7]);
129+ dst[8] = (short) (a * (int) src[8]);
130+ dst[9] = (short) (a * (int) src[9]);
131+ dst[10] = (short) (a * (int) src[10]);
132+ dst[11] = (short) (a * (int) src[11]);
133+ dst[12] = (short) (a * (int) src[12]);
134+ dst[13] = (short) (a * (int) src[13]);
135+ dst[14] = (short) (a * (int) src[14]);
136+ dst[15] = (short) (a * (int) src[15]);
137+
138+ dst1[0] += src1[0];
139+ dst1[1] += src1[1];
140+ dst1[2] += src1[2];
141+ dst1[3] += src1[3];
142+ dst1[4] += src1[4];
143+ dst1[5] += src1[5];
144+ dst1[6] += src1[6];
145+ dst1[7] += src1[7];
146+}
147+
148+
149+int main (void)
150+{
151+ int i;
152+
153+ check_vect ();
154+
155+ for (i = 0; i < N; i++)
156+ {
157+ dst[i] = 2;
158+ dst1[i] = 0;
159+ src[i] = i;
160+ src1[i] = i+2;
161+ }
162+
163+ foo (A);
164+
165+ for (i = 0; i < N; i++)
166+ {
167+ if (dst[i] != A * i
168+ || (i < N/2 && dst1[i] != i + 2))
169+ abort ();
170+ }
171+
172+ return 0;
173+}
174+
175+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_pack_trunc && vect_unpack } } } } } */
176+/* { dg-final { cleanup-tree-dump "slp" } } */
177+
178
179=== modified file 'gcc/testsuite/lib/target-supports.exp'
180--- old/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000
181+++ new/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000
182@@ -3268,7 +3268,7 @@
183 # Return 1 if the target supports multiple vector sizes
184
185 proc check_effective_target_vect_multiple_sizes { } {
186- global et_vect_multiple_sizes
187+ global et_vect_multiple_sizes_saved
188
189 if [info exists et_vect_multiple_sizes_saved] {
190 verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
191@@ -3286,7 +3286,7 @@
192 # Return 1 if the target supports vectors of 64 bits.
193
194 proc check_effective_target_vect64 { } {
195- global et_vect64
196+ global et_vect64_saved
197
198 if [info exists et_vect64_saved] {
199 verbose "check_effective_target_vect64: using cached result" 2
200
201=== modified file 'gcc/tree-vect-slp.c'
202--- old/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000
203+++ new/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000
204@@ -386,20 +386,15 @@
205 return false;
206 }
207
208- ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
209- if (ncopies != 1)
210- {
211- if (vect_print_dump_info (REPORT_SLP))
212- fprintf (vect_dump, "SLP with multiple types ");
213-
214- /* FORNOW: multiple types are unsupported in BB SLP. */
215- if (bb_vinfo)
216- return false;
217- }
218-
219 /* In case of multiple types we need to detect the smallest type. */
220 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
221- *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
222+ {
223+ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
224+ if (bb_vinfo)
225+ vectorization_factor = *max_nunits;
226+ }
227+
228+ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
229
230 if (is_gimple_call (stmt))
231 rhs_code = CALL_EXPR;
232@@ -1183,7 +1178,6 @@
233 if (loop_vinfo)
234 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
235 else
236- /* No multitypes in BB SLP. */
237 vectorization_factor = nunits;
238
239 /* Calculate the unrolling factor. */
240@@ -1246,16 +1240,23 @@
241 &max_nunits, &load_permutation, &loads,
242 vectorization_factor))
243 {
244+ /* Calculate the unrolling factor based on the smallest type. */
245+ if (max_nunits > nunits)
246+ unrolling_factor = least_common_multiple (max_nunits, group_size)
247+ / group_size;
248+
249+ if (unrolling_factor != 1 && !loop_vinfo)
250+ {
251+ if (vect_print_dump_info (REPORT_SLP))
252+ fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
253+ " block SLP");
254+ return false;
255+ }
256+
257 /* Create a new SLP instance. */
258 new_instance = XNEW (struct _slp_instance);
259 SLP_INSTANCE_TREE (new_instance) = node;
260 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
261- /* Calculate the unrolling factor based on the smallest type in the
262- loop. */
263- if (max_nunits > nunits)
264- unrolling_factor = least_common_multiple (max_nunits, group_size)
265- / group_size;
266-
267 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
268 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
269 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
270
271=== modified file 'gcc/tree-vect-stmts.c'
272--- old/gcc/tree-vect-stmts.c 2011-10-04 08:57:25 +0000
273+++ new/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000
274@@ -3081,11 +3081,9 @@
275 VEC (tree, heap) *vec_oprnds0 = NULL;
276 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
277 tree last_oprnd, intermediate_type;
278-
279- /* FORNOW: not supported by basic block SLP vectorization. */
280- gcc_assert (loop_vinfo);
281-
282- if (!STMT_VINFO_RELEVANT_P (stmt_info))
283+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
284+
285+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
286 return false;
287
288 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
289@@ -3113,7 +3111,7 @@
290 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
291 && CONVERT_EXPR_CODE_P (code))))
292 return false;
293- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
294+ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
295 &def_stmt, &def, &dt[0], &vectype_in))
296 {
297 if (vect_print_dump_info (REPORT_DETAILS))
298@@ -3360,11 +3358,9 @@
299 int multi_step_cvt = 0;
300 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
301 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
302-
303- /* FORNOW: not supported by basic block SLP vectorization. */
304- gcc_assert (loop_vinfo);
305-
306- if (!STMT_VINFO_RELEVANT_P (stmt_info))
307+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
308+
309+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
310 return false;
311
312 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
313@@ -3393,7 +3389,7 @@
314 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
315 && CONVERT_EXPR_CODE_P (code))))
316 return false;
317- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
318+ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
319 &def_stmt, &def, &dt[0], &vectype_in))
320 {
321 if (vect_print_dump_info (REPORT_DETAILS))
322@@ -5153,7 +5149,9 @@
323 else
324 {
325 if (bb_vinfo)
326- ok = (vectorizable_shift (stmt, NULL, NULL, node)
327+ ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
328+ || vectorizable_type_demotion (stmt, NULL, NULL, node)
329+ || vectorizable_shift (stmt, NULL, NULL, node)
330 || vectorizable_operation (stmt, NULL, NULL, node)
331 || vectorizable_assignment (stmt, NULL, NULL, node)
332 || vectorizable_load (stmt, NULL, NULL, node, NULL)
333@@ -5780,7 +5778,7 @@
334 {
335 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
336 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
337- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
338+ struct loop *vect_loop = NULL;
339 bool ordered_p;
340 enum machine_mode vec_mode;
341 enum insn_code icode1, icode2;
342@@ -5789,6 +5787,9 @@
343 tree wide_vectype = vectype_out;
344 enum tree_code c1, c2;
345
346+ if (loop_info)
347+ vect_loop = LOOP_VINFO_LOOP (loop_info);
348+
349 /* The result of a vectorized widening operation usually requires two vectors
350 (because the widened results do not fit int one vector). The generated
351 vector results would normally be expected to be generated in the same
352@@ -5809,7 +5810,8 @@
353 iterations in parallel). We therefore don't allow to change the order
354 of the computation in the inner-loop during outer-loop vectorization. */
355
356- if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
357+ if (vect_loop
358+ && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
359 && !nested_in_vect_loop_p (vect_loop, stmt))
360 ordered_p = false;
361 else
362
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch
new file mode 100644
index 000000000..d49ebab60
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch
@@ -0,0 +1,628 @@
12011-10-17 Michael Hope <michael.hope@linaro.org>
2
3 Backport from mainline r178852:
4
5 2011-09-14 Julian Brown <julian@codesourcery.com>
6
7 gcc/
8 * config/arm/arm.c (arm_override_options): Add unaligned_access
9 support.
10 (arm_file_start): Emit attribute for unaligned access as appropriate.
11 * config/arm/arm.md (UNSPEC_UNALIGNED_LOAD)
12 (UNSPEC_UNALIGNED_STORE): Add constants for unspecs.
13 (insv, extzv): Add unaligned-access support.
14 (extv): Change to expander. Likewise.
15 (extzv_t1, extv_regsi): Add helpers.
16 (unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu)
17 (unaligned_storesi, unaligned_storehi): New.
18 (*extv_reg): New (previous extv implementation).
19 * config/arm/arm.opt (munaligned_access): Add option.
20 * config/arm/constraints.md (Uw): New constraint.
21 * expmed.c (store_bit_field_1): Adjust bitfield numbering according
22 to size of access, not size of unit, when BITS_BIG_ENDIAN !=
23 BYTES_BIG_ENDIAN. Don't use bitfield accesses for
24 volatile accesses when -fstrict-volatile-bitfields is in effect.
25 (extract_bit_field_1): Likewise.
26
27 Backport from mainline r172697:
28
29 2011-04-19 Wei Guozhi <carrot@google.com>
30
31 PR target/47855
32 gcc/
33 * config/arm/arm-protos.h (thumb1_legitimate_address_p): New prototype.
34 * config/arm/arm.c (thumb1_legitimate_address_p): Remove the static
35 linkage.
36 * config/arm/constraints.md (Uu): New constraint.
37 * config/arm/arm.md (*arm_movqi_insn): Compute attr "length".
38
39=== modified file 'gcc/config/arm/arm-protos.h'
40--- old/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000
41+++ new/gcc/config/arm/arm-protos.h 2011-10-11 01:56:19 +0000
42@@ -59,6 +59,7 @@
43 int);
44 extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int,
45 int);
46+extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
47 extern int arm_const_double_rtx (rtx);
48 extern int neg_const_double_rtx_ok_for_fpa (rtx);
49 extern int vfp3_const_double_rtx (rtx);
50
51=== modified file 'gcc/config/arm/arm.c'
52--- old/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000
53+++ new/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000
54@@ -2065,6 +2065,28 @@
55 fix_cm3_ldrd = 0;
56 }
57
58+ /* Enable -munaligned-access by default for
59+ - all ARMv6 architecture-based processors
60+ - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
61+
62+ Disable -munaligned-access by default for
63+ - all pre-ARMv6 architecture-based processors
64+ - ARMv6-M architecture-based processors. */
65+
66+ if (unaligned_access == 2)
67+ {
68+ if (arm_arch6 && (arm_arch_notm || arm_arch7))
69+ unaligned_access = 1;
70+ else
71+ unaligned_access = 0;
72+ }
73+ else if (unaligned_access == 1
74+ && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
75+ {
76+ warning (0, "target CPU does not support unaligned accesses");
77+ unaligned_access = 0;
78+ }
79+
80 if (TARGET_THUMB1 && flag_schedule_insns)
81 {
82 /* Don't warn since it's on by default in -O2. */
83@@ -6106,7 +6128,7 @@
84 addresses based on the frame pointer or arg pointer until the
85 reload pass starts. This is so that eliminating such addresses
86 into stack based ones won't produce impossible code. */
87-static int
88+int
89 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
90 {
91 /* ??? Not clear if this is right. Experiment. */
92@@ -22226,6 +22248,10 @@
93 val = 6;
94 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
95
96+ /* Tag_CPU_unaligned_access. */
97+ asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n",
98+ unaligned_access);
99+
100 /* Tag_ABI_FP_16bit_format. */
101 if (arm_fp16_format)
102 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
103
104=== modified file 'gcc/config/arm/arm.md'
105--- old/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000
106+++ new/gcc/config/arm/arm.md 2011-10-11 02:31:01 +0000
107@@ -113,6 +113,10 @@
108 (UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from
109 ; another symbolic address.
110 (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier.
111+ (UNSPEC_UNALIGNED_LOAD 29) ; Used to represent ldr/ldrh instructions that access
112+ ; unaligned locations, on architectures which support
113+ ; that.
114+ (UNSPEC_UNALIGNED_STORE 30) ; Same for str/strh.
115 ]
116 )
117
118@@ -2463,10 +2467,10 @@
119 ;;; this insv pattern, so this pattern needs to be reevalutated.
120
121 (define_expand "insv"
122- [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "")
123- (match_operand:SI 1 "general_operand" "")
124- (match_operand:SI 2 "general_operand" ""))
125- (match_operand:SI 3 "reg_or_int_operand" ""))]
126+ [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
127+ (match_operand 1 "general_operand" "")
128+ (match_operand 2 "general_operand" ""))
129+ (match_operand 3 "reg_or_int_operand" ""))]
130 "TARGET_ARM || arm_arch_thumb2"
131 "
132 {
133@@ -2477,35 +2481,70 @@
134
135 if (arm_arch_thumb2)
136 {
137- bool use_bfi = TRUE;
138-
139- if (GET_CODE (operands[3]) == CONST_INT)
140- {
141- HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
142-
143- if (val == 0)
144- {
145- emit_insn (gen_insv_zero (operands[0], operands[1],
146- operands[2]));
147+ if (unaligned_access && MEM_P (operands[0])
148+ && s_register_operand (operands[3], GET_MODE (operands[3]))
149+ && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0)
150+ {
151+ rtx base_addr;
152+
153+ if (BYTES_BIG_ENDIAN)
154+ start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width
155+ - start_bit;
156+
157+ if (width == 32)
158+ {
159+ base_addr = adjust_address (operands[0], SImode,
160+ start_bit / BITS_PER_UNIT);
161+ emit_insn (gen_unaligned_storesi (base_addr, operands[3]));
162+ }
163+ else
164+ {
165+ rtx tmp = gen_reg_rtx (HImode);
166+
167+ base_addr = adjust_address (operands[0], HImode,
168+ start_bit / BITS_PER_UNIT);
169+ emit_move_insn (tmp, gen_lowpart (HImode, operands[3]));
170+ emit_insn (gen_unaligned_storehi (base_addr, tmp));
171+ }
172+ DONE;
173+ }
174+ else if (s_register_operand (operands[0], GET_MODE (operands[0])))
175+ {
176+ bool use_bfi = TRUE;
177+
178+ if (GET_CODE (operands[3]) == CONST_INT)
179+ {
180+ HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
181+
182+ if (val == 0)
183+ {
184+ emit_insn (gen_insv_zero (operands[0], operands[1],
185+ operands[2]));
186+ DONE;
187+ }
188+
189+ /* See if the set can be done with a single orr instruction. */
190+ if (val == mask && const_ok_for_arm (val << start_bit))
191+ use_bfi = FALSE;
192+ }
193+
194+ if (use_bfi)
195+ {
196+ if (GET_CODE (operands[3]) != REG)
197+ operands[3] = force_reg (SImode, operands[3]);
198+
199+ emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
200+ operands[3]));
201 DONE;
202 }
203-
204- /* See if the set can be done with a single orr instruction. */
205- if (val == mask && const_ok_for_arm (val << start_bit))
206- use_bfi = FALSE;
207- }
208-
209- if (use_bfi)
210- {
211- if (GET_CODE (operands[3]) != REG)
212- operands[3] = force_reg (SImode, operands[3]);
213-
214- emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
215- operands[3]));
216- DONE;
217- }
218+ }
219+ else
220+ FAIL;
221 }
222
223+ if (!s_register_operand (operands[0], GET_MODE (operands[0])))
224+ FAIL;
225+
226 target = copy_rtx (operands[0]);
227 /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical
228 subreg as the final target. */
229@@ -3697,12 +3736,10 @@
230 ;; to reduce register pressure later on.
231
232 (define_expand "extzv"
233- [(set (match_dup 4)
234- (ashift:SI (match_operand:SI 1 "register_operand" "")
235- (match_operand:SI 2 "const_int_operand" "")))
236- (set (match_operand:SI 0 "register_operand" "")
237- (lshiftrt:SI (match_dup 4)
238- (match_operand:SI 3 "const_int_operand" "")))]
239+ [(set (match_operand 0 "s_register_operand" "")
240+ (zero_extract (match_operand 1 "nonimmediate_operand" "")
241+ (match_operand 2 "const_int_operand" "")
242+ (match_operand 3 "const_int_operand" "")))]
243 "TARGET_THUMB1 || arm_arch_thumb2"
244 "
245 {
246@@ -3711,10 +3748,57 @@
247
248 if (arm_arch_thumb2)
249 {
250- emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
251- operands[3]));
252- DONE;
253+ HOST_WIDE_INT width = INTVAL (operands[2]);
254+ HOST_WIDE_INT bitpos = INTVAL (operands[3]);
255+
256+ if (unaligned_access && MEM_P (operands[1])
257+ && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0)
258+ {
259+ rtx base_addr;
260+
261+ if (BYTES_BIG_ENDIAN)
262+ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width
263+ - bitpos;
264+
265+ if (width == 32)
266+ {
267+ base_addr = adjust_address (operands[1], SImode,
268+ bitpos / BITS_PER_UNIT);
269+ emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
270+ }
271+ else
272+ {
273+ rtx dest = operands[0];
274+ rtx tmp = gen_reg_rtx (SImode);
275+
276+ /* We may get a paradoxical subreg here. Strip it off. */
277+ if (GET_CODE (dest) == SUBREG
278+ && GET_MODE (dest) == SImode
279+ && GET_MODE (SUBREG_REG (dest)) == HImode)
280+ dest = SUBREG_REG (dest);
281+
282+ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
283+ FAIL;
284+
285+ base_addr = adjust_address (operands[1], HImode,
286+ bitpos / BITS_PER_UNIT);
287+ emit_insn (gen_unaligned_loadhiu (tmp, base_addr));
288+ emit_move_insn (gen_lowpart (SImode, dest), tmp);
289+ }
290+ DONE;
291+ }
292+ else if (s_register_operand (operands[1], GET_MODE (operands[1])))
293+ {
294+ emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
295+ operands[3]));
296+ DONE;
297+ }
298+ else
299+ FAIL;
300 }
301+
302+ if (!s_register_operand (operands[1], GET_MODE (operands[1])))
303+ FAIL;
304
305 operands[3] = GEN_INT (rshift);
306
307@@ -3724,12 +3808,154 @@
308 DONE;
309 }
310
311- operands[2] = GEN_INT (lshift);
312- operands[4] = gen_reg_rtx (SImode);
313+ emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift),
314+ operands[3], gen_reg_rtx (SImode)));
315+ DONE;
316 }"
317 )
318
319-(define_insn "extv"
320+;; Helper for extzv, for the Thumb-1 register-shifts case.
321+
322+(define_expand "extzv_t1"
323+ [(set (match_operand:SI 4 "s_register_operand" "")
324+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
325+ (match_operand:SI 2 "const_int_operand" "")))
326+ (set (match_operand:SI 0 "s_register_operand" "")
327+ (lshiftrt:SI (match_dup 4)
328+ (match_operand:SI 3 "const_int_operand" "")))]
329+ "TARGET_THUMB1"
330+ "")
331+
332+(define_expand "extv"
333+ [(set (match_operand 0 "s_register_operand" "")
334+ (sign_extract (match_operand 1 "nonimmediate_operand" "")
335+ (match_operand 2 "const_int_operand" "")
336+ (match_operand 3 "const_int_operand" "")))]
337+ "arm_arch_thumb2"
338+{
339+ HOST_WIDE_INT width = INTVAL (operands[2]);
340+ HOST_WIDE_INT bitpos = INTVAL (operands[3]);
341+
342+ if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32)
343+ && (bitpos % BITS_PER_UNIT) == 0)
344+ {
345+ rtx base_addr;
346+
347+ if (BYTES_BIG_ENDIAN)
348+ bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos;
349+
350+ if (width == 32)
351+ {
352+ base_addr = adjust_address (operands[1], SImode,
353+ bitpos / BITS_PER_UNIT);
354+ emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
355+ }
356+ else
357+ {
358+ rtx dest = operands[0];
359+ rtx tmp = gen_reg_rtx (SImode);
360+
361+ /* We may get a paradoxical subreg here. Strip it off. */
362+ if (GET_CODE (dest) == SUBREG
363+ && GET_MODE (dest) == SImode
364+ && GET_MODE (SUBREG_REG (dest)) == HImode)
365+ dest = SUBREG_REG (dest);
366+
367+ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
368+ FAIL;
369+
370+ base_addr = adjust_address (operands[1], HImode,
371+ bitpos / BITS_PER_UNIT);
372+ emit_insn (gen_unaligned_loadhis (tmp, base_addr));
373+ emit_move_insn (gen_lowpart (SImode, dest), tmp);
374+ }
375+
376+ DONE;
377+ }
378+ else if (!s_register_operand (operands[1], GET_MODE (operands[1])))
379+ FAIL;
380+ else if (GET_MODE (operands[0]) == SImode
381+ && GET_MODE (operands[1]) == SImode)
382+ {
383+ emit_insn (gen_extv_regsi (operands[0], operands[1], operands[2],
384+ operands[3]));
385+ DONE;
386+ }
387+
388+ FAIL;
389+})
390+
391+; Helper to expand register forms of extv with the proper modes.
392+
393+(define_expand "extv_regsi"
394+ [(set (match_operand:SI 0 "s_register_operand" "")
395+ (sign_extract:SI (match_operand:SI 1 "s_register_operand" "")
396+ (match_operand 2 "const_int_operand" "")
397+ (match_operand 3 "const_int_operand" "")))]
398+ ""
399+{
400+})
401+
402+; ARMv6+ unaligned load/store instructions (used for packed structure accesses).
403+
404+(define_insn "unaligned_loadsi"
405+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
406+ (unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")]
407+ UNSPEC_UNALIGNED_LOAD))]
408+ "unaligned_access && TARGET_32BIT"
409+ "ldr%?\t%0, %1\t@ unaligned"
410+ [(set_attr "arch" "t2,any")
411+ (set_attr "length" "2,4")
412+ (set_attr "predicable" "yes")
413+ (set_attr "type" "load1")])
414+
415+(define_insn "unaligned_loadhis"
416+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
417+ (sign_extend:SI
418+ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
419+ UNSPEC_UNALIGNED_LOAD)))]
420+ "unaligned_access && TARGET_32BIT"
421+ "ldr%(sh%)\t%0, %1\t@ unaligned"
422+ [(set_attr "arch" "t2,any")
423+ (set_attr "length" "2,4")
424+ (set_attr "predicable" "yes")
425+ (set_attr "type" "load_byte")])
426+
427+(define_insn "unaligned_loadhiu"
428+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
429+ (zero_extend:SI
430+ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
431+ UNSPEC_UNALIGNED_LOAD)))]
432+ "unaligned_access && TARGET_32BIT"
433+ "ldr%(h%)\t%0, %1\t@ unaligned"
434+ [(set_attr "arch" "t2,any")
435+ (set_attr "length" "2,4")
436+ (set_attr "predicable" "yes")
437+ (set_attr "type" "load_byte")])
438+
439+(define_insn "unaligned_storesi"
440+ [(set (match_operand:SI 0 "memory_operand" "=Uw,m")
441+ (unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")]
442+ UNSPEC_UNALIGNED_STORE))]
443+ "unaligned_access && TARGET_32BIT"
444+ "str%?\t%1, %0\t@ unaligned"
445+ [(set_attr "arch" "t2,any")
446+ (set_attr "length" "2,4")
447+ (set_attr "predicable" "yes")
448+ (set_attr "type" "store1")])
449+
450+(define_insn "unaligned_storehi"
451+ [(set (match_operand:HI 0 "memory_operand" "=Uw,m")
452+ (unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")]
453+ UNSPEC_UNALIGNED_STORE))]
454+ "unaligned_access && TARGET_32BIT"
455+ "str%(h%)\t%1, %0\t@ unaligned"
456+ [(set_attr "arch" "t2,any")
457+ (set_attr "length" "2,4")
458+ (set_attr "predicable" "yes")
459+ (set_attr "type" "store1")])
460+
461+(define_insn "*extv_reg"
462 [(set (match_operand:SI 0 "s_register_operand" "=r")
463 (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
464 (match_operand:SI 2 "const_int_operand" "M")
465@@ -6038,8 +6264,8 @@
466
467
468 (define_insn "*arm_movqi_insn"
469- [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
470- (match_operand:QI 1 "general_operand" "rI,K,m,r"))]
471+ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,l,Uu,r,m")
472+ (match_operand:QI 1 "general_operand" "rI,K,Uu,l,m,r"))]
473 "TARGET_32BIT
474 && ( register_operand (operands[0], QImode)
475 || register_operand (operands[1], QImode))"
476@@ -6047,10 +6273,14 @@
477 mov%?\\t%0, %1
478 mvn%?\\t%0, #%B1
479 ldr%(b%)\\t%0, %1
480+ str%(b%)\\t%1, %0
481+ ldr%(b%)\\t%0, %1
482 str%(b%)\\t%1, %0"
483- [(set_attr "type" "*,*,load1,store1")
484- (set_attr "insn" "mov,mvn,*,*")
485- (set_attr "predicable" "yes")]
486+ [(set_attr "type" "*,*,load1,store1,load1,store1")
487+ (set_attr "insn" "mov,mvn,*,*,*,*")
488+ (set_attr "predicable" "yes")
489+ (set_attr "arch" "any,any,t2,t2,any,any")
490+ (set_attr "length" "4,4,2,2,4,4")]
491 )
492
493 (define_insn "*thumb1_movqi_insn"
494
495=== modified file 'gcc/config/arm/arm.opt'
496--- old/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000
497+++ new/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000
498@@ -173,3 +173,7 @@
499 Target Report Var(fix_cm3_ldrd) Init(2)
500 Avoid overlapping destination and address registers on LDRD instructions
501 that may trigger Cortex-M3 errata.
502+
503+munaligned-access
504+Target Report Var(unaligned_access) Init(2)
505+Enable unaligned word and halfword accesses to packed data.
506
507=== modified file 'gcc/config/arm/constraints.md'
508--- old/gcc/config/arm/constraints.md 2011-09-12 14:14:00 +0000
509+++ new/gcc/config/arm/constraints.md 2011-10-11 02:31:01 +0000
510@@ -36,6 +36,7 @@
511 ;; The following memory constraints have been used:
512 ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
513 ;; in ARM state: Uq
514+;; in Thumb state: Uu, Uw
515
516
517 (define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS"
518@@ -344,6 +345,27 @@
519 (and (match_code "mem")
520 (match_test "REG_P (XEXP (op, 0))")))
521
522+(define_memory_constraint "Uu"
523+ "@internal
524+ In Thumb state an address that is valid in 16bit encoding."
525+ (and (match_code "mem")
526+ (match_test "TARGET_THUMB
527+ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
528+ 0)")))
529+
530+; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p
531+; are actually LDM/STM instructions, so cannot be used to access unaligned
532+; data.
533+(define_memory_constraint "Uw"
534+ "@internal
535+ In Thumb state an address that is valid in 16bit encoding, and that can be
536+ used for unaligned accesses."
537+ (and (match_code "mem")
538+ (match_test "TARGET_THUMB
539+ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
540+ 0)
541+ && GET_CODE (XEXP (op, 0)) != POST_INC")))
542+
543 ;; We used to have constraint letters for S and R in ARM state, but
544 ;; all uses of these now appear to have been removed.
545
546
547=== modified file 'gcc/expmed.c'
548--- old/gcc/expmed.c 2011-05-22 19:02:59 +0000
549+++ new/gcc/expmed.c 2011-10-11 02:31:01 +0000
550@@ -657,6 +657,10 @@
551 && GET_MODE (value) != BLKmode
552 && bitsize > 0
553 && GET_MODE_BITSIZE (op_mode) >= bitsize
554+ /* Do not use insv for volatile bitfields when
555+ -fstrict-volatile-bitfields is in effect. */
556+ && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
557+ && flag_strict_volatile_bitfields > 0)
558 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
559 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
560 && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
561@@ -700,19 +704,21 @@
562 copy_back = true;
563 }
564
565- /* On big-endian machines, we count bits from the most significant.
566- If the bit field insn does not, we must invert. */
567-
568- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
569- xbitpos = unit - bitsize - xbitpos;
570-
571 /* We have been counting XBITPOS within UNIT.
572 Count instead within the size of the register. */
573- if (BITS_BIG_ENDIAN && !MEM_P (xop0))
574+ if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
575 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
576
577 unit = GET_MODE_BITSIZE (op_mode);
578
579+ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
580+ "backwards" from the size of the unit we are inserting into.
581+ Otherwise, we count bits from the most significant on a
582+ BYTES/BITS_BIG_ENDIAN machine. */
583+
584+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
585+ xbitpos = unit - bitsize - xbitpos;
586+
587 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
588 value1 = value;
589 if (GET_MODE (value) != op_mode)
590@@ -1528,6 +1534,10 @@
591 if (ext_mode != MAX_MACHINE_MODE
592 && bitsize > 0
593 && GET_MODE_BITSIZE (ext_mode) >= bitsize
594+ /* Do not use extv/extzv for volatile bitfields when
595+ -fstrict-volatile-bitfields is in effect. */
596+ && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
597+ && flag_strict_volatile_bitfields > 0)
598 /* If op0 is a register, we need it in EXT_MODE to make it
599 acceptable to the format of ext(z)v. */
600 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
601@@ -1552,17 +1562,20 @@
602 /* Get ref to first byte containing part of the field. */
603 xop0 = adjust_address (xop0, byte_mode, xoffset);
604
605- /* On big-endian machines, we count bits from the most significant.
606- If the bit field insn does not, we must invert. */
607- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
608- xbitpos = unit - bitsize - xbitpos;
609-
610 /* Now convert from counting within UNIT to counting in EXT_MODE. */
611- if (BITS_BIG_ENDIAN && !MEM_P (xop0))
612+ if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
613 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
614
615 unit = GET_MODE_BITSIZE (ext_mode);
616
617+ /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
618+ "backwards" from the size of the unit we are extracting from.
619+ Otherwise, we count bits from the most significant on a
620+ BYTES/BITS_BIG_ENDIAN machine. */
621+
622+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
623+ xbitpos = unit - bitsize - xbitpos;
624+
625 if (xtarget == 0)
626 xtarget = xspec_target = gen_reg_rtx (tmode);
627
628
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch
new file mode 100644
index 000000000..3c0ff0085
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch
@@ -0,0 +1,1951 @@
12011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
2
3 gcc/
4 Backport from mainline:
5
6 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org>
7
8 * modulo-sched.c (ps_reg_move_info): Add num_consecutive_stages.
9 (SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES): Delete.
10 (node_sched_params): Remove first_reg_move and nreg_moves.
11 (ps_num_consecutive_stages, extend_node_sched_params): New functions.
12 (update_node_sched_params): Move up file.
13 (print_node_sched_params): Print the stage. Don't dump info related
14 to first_reg_move and nreg_moves.
15 (set_columns_for_row): New function.
16 (set_columns_for_ps): Move up file and use set_columns_for_row.
17 (schedule_reg_move): New function.
18 (schedule_reg_moves): Call extend_node_sched_params and
19 schedule_reg_move. Extend size of uses bitmap. Initialize
20 num_consecutive_stages. Return false if a move could not be
21 scheduled.
22 (apply_reg_moves): Don't emit moves here.
23 (permute_partial_schedule): Handle register moves.
24 (duplicate_insns_of_cycles): Remove for_prolog. Emit moves according
25 to the same stage-count test as ddg nodes.
26 (generate_prolog_epilog): Update calls accordingly.
27 (sms_schedule): Allow move-scheduling to add a new first stage.
28
292011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
30
31 gcc/
32 Backport from mainline:
33
34 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org>
35
36 * modulo-sched.c (ps_insn): Adjust comment.
37 (ps_reg_move_info): New structure.
38 (partial_schedule): Add reg_moves field.
39 (SCHED_PARAMS): Use node_sched_param_vec instead of node_sched_params.
40 (node_sched_params): Turn first_reg_move into an identifier.
41 (ps_reg_move): New function.
42 (ps_rtl_insn): Cope with register moves.
43 (ps_first_note): Adjust comment and assert that the instruction
44 isn't a register move.
45 (node_sched_params): Replace with...
46 (node_sched_param_vec): ...this vector.
47 (set_node_sched_params): Adjust accordingly.
48 (print_node_sched_params): Take a partial schedule instead of a ddg.
49 Use ps_rtl_insn and ps_reg_move.
50 (generate_reg_moves): Rename to...
51 (schedule_reg_moves): ...this. Remove rescan parameter. Record each
52 move in the partial schedule, but don't emit it here. Don't perform
53 register substitutions here either.
54 (apply_reg_moves): New function.
55 (duplicate_insns_of_cycles): Use register indices directly,
56 rather than finding instructions using PREV_INSN. Use ps_reg_move.
57 (sms_schedule): Call schedule_reg_moves before committing to
58 a partial schedule. Try the next ii if the schedule fails.
59 Use apply_reg_moves instead of generate_reg_moves. Adjust
60 call to print_node_sched_params. Free node_sched_param_vec
61 instead of node_sched_params.
62 (create_partial_schedule): Initialize reg_moves.
63 (free_partial_schedule): Free reg_moves.
64
652011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
66
67 gcc/
68 Backport from mainline:
69
70 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org>
71
72 * modulo-sched.c (ps_insn): Replace node field with an identifier.
73 (SCHED_ASAP): Replace with..
74 (NODE_ASAP): ...this macro.
75 (SCHED_PARAMS): New macro.
76 (SCHED_TIME, SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES, SCHED_ROW)
77 (SCHED_STAGE, SCHED_COLUMN): Redefine using SCHED_PARAMS.
78 (node_sched_params): Remove asap.
79 (ps_rtl_insn, ps_first_note): New functions.
80 (set_node_sched_params): Use XCNEWVEC. Don't copy across the
81 asap values.
82 (print_node_sched_params): Use SCHED_PARAMS and NODE_ASAP.
83 (generate_reg_moves): Pass ids to the SCHED_* macros.
84 (update_node_sched_params): Take a ps insn identifier rather than
85 a node as parameter. Use ps_rtl_insn.
86 (set_columns_for_ps): Update for above field and SCHED_* macro changes.
87 (permute_partial_schedule): Use ps_rtl_insn and ps_first_note.
88 (optimize_sc): Update for above field and SCHED_* macro changes.
89 Update calls to try_scheduling_node_in_cycle and
90 update_node_sched_params.
91 (duplicate_insns_of_cycles): Adjust for above field and SCHED_*
92 macro changes. Use ps_rtl_insn and ps_first_note.
93 (sms_schedule): Pass ids to the SCHED_* macros.
94 (get_sched_window): Adjust for above field and SCHED_* macro changes.
95 Use NODE_ASAP instead of SCHED_ASAP.
96 (try_scheduling_node_in_cycle): Remove node parameter. Update
97 call to ps_add_node_check_conflicts. Pass ids to the SCHED_*
98 macros.
99 (sms_schedule_by_order): Update call to try_scheduling_node_in_cycle.
100 (ps_insert_empty_row): Adjust for above field changes.
101 (compute_split_row): Use ids rather than nodes.
102 (verify_partial_schedule): Adjust for above field changes.
103 (print_partial_schedule): Use ps_rtl_insn.
104 (create_ps_insn): Take an id rather than a node.
105 (ps_insn_find_column): Adjust for above field changes.
106 Use ps_rtl_insn.
107 (ps_insn_advance_column): Adjust for above field changes.
108 (add_node_to_ps): Remove node parameter. Update call to
109 create_ps_insn.
110 (ps_has_conflicts): Use ps_rtl_insn.
111 (ps_add_node_check_conflicts): Replace node parameter than an id.
112
1132011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
114
115 gcc/
116 Backport from mainline:
117
118 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org>
119
120 * modulo-sched.c (undo_replace_buff_elem): Delete.
121 (generate_reg_moves): Don't build and return an undo list.
122 (free_undo_replace_buff): Delete.
123 (sms_schedule): Adjust call to generate_reg_moves.
124 Don't call free_undo_replace_buff.
125
1262011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
127
128 gcc/
129 Backport from mainline:
130
131 2011-08-08 Richard Sandiford <richard.sandiford@linaro.org>
132
133 * modulo-sched.c (get_sched_window): Use a table for the debug output.
134 Print the current ii.
135 (sms_schedule_by_order): Reduce whitespace in dump line.
136
1372011-10-17 Richard Sandiford <richard.sandiford@linaro.org>
138
139 gcc/
140 Backport from mainline:
141
142 2011-08-08 Richard Sandiford <richard.sandiford@linaro.org>
143
144 * modulo-sched.c (get_sched_window): Use just one loop for predecessors
145 and one loop for successors. Fix upper bound of memory range.
146
147=== modified file 'gcc/modulo-sched.c'
148--- old/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000
149+++ new/gcc/modulo-sched.c 2011-10-10 14:35:32 +0000
150@@ -124,8 +124,10 @@
151 /* A single instruction in the partial schedule. */
152 struct ps_insn
153 {
154- /* The corresponding DDG_NODE. */
155- ddg_node_ptr node;
156+ /* Identifies the instruction to be scheduled. Values smaller than
157+ the ddg's num_nodes refer directly to ddg nodes. A value of
158+ X - num_nodes refers to register move X. */
159+ int id;
160
161 /* The (absolute) cycle in which the PS instruction is scheduled.
162 Same as SCHED_TIME (node). */
163@@ -137,6 +139,33 @@
164
165 };
166
167+/* Information about a register move that has been added to a partial
168+ schedule. */
169+struct ps_reg_move_info
170+{
171+ /* The source of the move is defined by the ps_insn with id DEF.
172+ The destination is used by the ps_insns with the ids in USES. */
173+ int def;
174+ sbitmap uses;
175+
176+ /* The original form of USES' instructions used OLD_REG, but they
177+ should now use NEW_REG. */
178+ rtx old_reg;
179+ rtx new_reg;
180+
181+ /* The number of consecutive stages that the move occupies. */
182+ int num_consecutive_stages;
183+
184+ /* An instruction that sets NEW_REG to the correct value. The first
185+ move associated with DEF will have an rhs of OLD_REG; later moves
186+ use the result of the previous move. */
187+ rtx insn;
188+};
189+
190+typedef struct ps_reg_move_info ps_reg_move_info;
191+DEF_VEC_O (ps_reg_move_info);
192+DEF_VEC_ALLOC_O (ps_reg_move_info, heap);
193+
194 /* Holds the partial schedule as an array of II rows. Each entry of the
195 array points to a linked list of PS_INSNs, which represents the
196 instructions that are scheduled for that row. */
197@@ -148,6 +177,10 @@
198 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
199 ps_insn_ptr *rows;
200
201+ /* All the moves added for this partial schedule. Index X has
202+ a ps_insn id of X + g->num_nodes. */
203+ VEC (ps_reg_move_info, heap) *reg_moves;
204+
205 /* rows_length[i] holds the number of instructions in the row.
206 It is used only (as an optimization) to back off quickly from
207 trying to schedule a node in a full row; that is, to avoid running
208@@ -165,17 +198,6 @@
209 int stage_count; /* The stage count of the partial schedule. */
210 };
211
212-/* We use this to record all the register replacements we do in
213- the kernel so we can undo SMS if it is not profitable. */
214-struct undo_replace_buff_elem
215-{
216- rtx insn;
217- rtx orig_reg;
218- rtx new_reg;
219- struct undo_replace_buff_elem *next;
220-};
221-
222-
223
224 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
225 static void free_partial_schedule (partial_schedule_ptr);
226@@ -183,9 +205,7 @@
227 void print_partial_schedule (partial_schedule_ptr, FILE *);
228 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
229 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
230- ddg_node_ptr node, int cycle,
231- sbitmap must_precede,
232- sbitmap must_follow);
233+ int, int, sbitmap, sbitmap);
234 static void rotate_partial_schedule (partial_schedule_ptr, int);
235 void set_row_column_for_ps (partial_schedule_ptr);
236 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
237@@ -201,43 +221,27 @@
238 static void permute_partial_schedule (partial_schedule_ptr, rtx);
239 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
240 rtx, rtx);
241-static void duplicate_insns_of_cycles (partial_schedule_ptr,
242- int, int, int, rtx);
243 static int calculate_stage_count (partial_schedule_ptr, int);
244 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
245 int, int, sbitmap, sbitmap, sbitmap);
246 static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
247 sbitmap, int, int *, int *, int *);
248-static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
249- int, int, sbitmap, int *, sbitmap,
250- sbitmap);
251+static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
252+ sbitmap, int *, sbitmap, sbitmap);
253 static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
254
255-#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
256-#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
257-#define SCHED_FIRST_REG_MOVE(x) \
258- (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
259-#define SCHED_NREG_MOVES(x) \
260- (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
261-#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
262-#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
263-#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
264+#define NODE_ASAP(node) ((node)->aux.count)
265+
266+#define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x)
267+#define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
268+#define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
269+#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
270+#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
271
272 /* The scheduling parameters held for each node. */
273 typedef struct node_sched_params
274 {
275- int asap; /* A lower-bound on the absolute scheduling cycle. */
276- int time; /* The absolute scheduling cycle (time >= asap). */
277-
278- /* The following field (first_reg_move) is a pointer to the first
279- register-move instruction added to handle the modulo-variable-expansion
280- of the register defined by this node. This register-move copies the
281- original register defined by the node. */
282- rtx first_reg_move;
283-
284- /* The number of register-move instructions added, immediately preceding
285- first_reg_move. */
286- int nreg_moves;
287+ int time; /* The absolute scheduling cycle. */
288
289 int row; /* Holds time % ii. */
290 int stage; /* Holds time / ii. */
291@@ -247,6 +251,9 @@
292 int column;
293 } *node_sched_params_ptr;
294
295+typedef struct node_sched_params node_sched_params;
296+DEF_VEC_O (node_sched_params);
297+DEF_VEC_ALLOC_O (node_sched_params, heap);
298
299 /* The following three functions are copied from the current scheduler
300 code in order to use sched_analyze() for computing the dependencies.
301@@ -296,6 +303,49 @@
302 0
303 };
304
305+/* Partial schedule instruction ID in PS is a register move. Return
306+ information about it. */
307+static struct ps_reg_move_info *
308+ps_reg_move (partial_schedule_ptr ps, int id)
309+{
310+ gcc_checking_assert (id >= ps->g->num_nodes);
311+ return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes);
312+}
313+
314+/* Return the rtl instruction that is being scheduled by partial schedule
315+ instruction ID, which belongs to schedule PS. */
316+static rtx
317+ps_rtl_insn (partial_schedule_ptr ps, int id)
318+{
319+ if (id < ps->g->num_nodes)
320+ return ps->g->nodes[id].insn;
321+ else
322+ return ps_reg_move (ps, id)->insn;
323+}
324+
325+/* Partial schedule instruction ID, which belongs to PS, occured in
326+ the original (unscheduled) loop. Return the first instruction
327+ in the loop that was associated with ps_rtl_insn (PS, ID).
328+ If the instruction had some notes before it, this is the first
329+ of those notes. */
330+static rtx
331+ps_first_note (partial_schedule_ptr ps, int id)
332+{
333+ gcc_assert (id < ps->g->num_nodes);
334+ return ps->g->nodes[id].first_note;
335+}
336+
337+/* Return the number of consecutive stages that are occupied by
338+ partial schedule instruction ID in PS. */
339+static int
340+ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
341+{
342+ if (id < ps->g->num_nodes)
343+ return 1;
344+ else
345+ return ps_reg_move (ps, id)->num_consecutive_stages;
346+}
347+
348 /* Given HEAD and TAIL which are the first and last insns in a loop;
349 return the register which controls the loop. Return zero if it has
350 more than one occurrence in the loop besides the control part or the
351@@ -396,35 +446,59 @@
352 }
353
354
355-/* Points to the array that contains the sched data for each node. */
356-static node_sched_params_ptr node_sched_params;
357+/* A vector that contains the sched data for each ps_insn. */
358+static VEC (node_sched_params, heap) *node_sched_param_vec;
359
360-/* Allocate sched_params for each node and initialize it. Assumes that
361- the aux field of each node contain the asap bound (computed earlier),
362- and copies it into the sched_params field. */
363+/* Allocate sched_params for each node and initialize it. */
364 static void
365 set_node_sched_params (ddg_ptr g)
366 {
367- int i;
368-
369- /* Allocate for each node in the DDG a place to hold the "sched_data". */
370- /* Initialize ASAP/ALAP/HIGHT to zero. */
371- node_sched_params = (node_sched_params_ptr)
372- xcalloc (g->num_nodes,
373- sizeof (struct node_sched_params));
374-
375- /* Set the pointer of the general data of the node to point to the
376- appropriate sched_params structure. */
377- for (i = 0; i < g->num_nodes; i++)
378- {
379- /* Watch out for aliasing problems? */
380- node_sched_params[i].asap = g->nodes[i].aux.count;
381- g->nodes[i].aux.info = &node_sched_params[i];
382- }
383-}
384-
385-static void
386-print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
387+ VEC_truncate (node_sched_params, node_sched_param_vec, 0);
388+ VEC_safe_grow_cleared (node_sched_params, heap,
389+ node_sched_param_vec, g->num_nodes);
390+}
391+
392+/* Make sure that node_sched_param_vec has an entry for every move in PS. */
393+static void
394+extend_node_sched_params (partial_schedule_ptr ps)
395+{
396+ VEC_safe_grow_cleared (node_sched_params, heap, node_sched_param_vec,
397+ ps->g->num_nodes + VEC_length (ps_reg_move_info,
398+ ps->reg_moves));
399+}
400+
401+/* Update the sched_params (time, row and stage) for node U using the II,
402+ the CYCLE of U and MIN_CYCLE.
403+ We're not simply taking the following
404+ SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
405+ because the stages may not be aligned on cycle 0. */
406+static void
407+update_node_sched_params (int u, int ii, int cycle, int min_cycle)
408+{
409+ int sc_until_cycle_zero;
410+ int stage;
411+
412+ SCHED_TIME (u) = cycle;
413+ SCHED_ROW (u) = SMODULO (cycle, ii);
414+
415+ /* The calculation of stage count is done adding the number
416+ of stages before cycle zero and after cycle zero. */
417+ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
418+
419+ if (SCHED_TIME (u) < 0)
420+ {
421+ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
422+ SCHED_STAGE (u) = sc_until_cycle_zero - stage;
423+ }
424+ else
425+ {
426+ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
427+ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
428+ }
429+}
430+
431+static void
432+print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
433 {
434 int i;
435
436@@ -432,22 +506,170 @@
437 return;
438 for (i = 0; i < num_nodes; i++)
439 {
440- node_sched_params_ptr nsp = &node_sched_params[i];
441- rtx reg_move = nsp->first_reg_move;
442- int j;
443+ node_sched_params_ptr nsp = SCHED_PARAMS (i);
444
445 fprintf (file, "Node = %d; INSN = %d\n", i,
446- (INSN_UID (g->nodes[i].insn)));
447- fprintf (file, " asap = %d:\n", nsp->asap);
448+ INSN_UID (ps_rtl_insn (ps, i)));
449+ fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
450 fprintf (file, " time = %d:\n", nsp->time);
451- fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
452- for (j = 0; j < nsp->nreg_moves; j++)
453+ fprintf (file, " stage = %d:\n", nsp->stage);
454+ }
455+}
456+
457+/* Set SCHED_COLUMN for each instruction in row ROW of PS. */
458+static void
459+set_columns_for_row (partial_schedule_ptr ps, int row)
460+{
461+ ps_insn_ptr cur_insn;
462+ int column;
463+
464+ column = 0;
465+ for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
466+ SCHED_COLUMN (cur_insn->id) = column++;
467+}
468+
469+/* Set SCHED_COLUMN for each instruction in PS. */
470+static void
471+set_columns_for_ps (partial_schedule_ptr ps)
472+{
473+ int row;
474+
475+ for (row = 0; row < ps->ii; row++)
476+ set_columns_for_row (ps, row);
477+}
478+
479+/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
480+ Its single predecessor has already been scheduled, as has its
481+ ddg node successors. (The move may have also another move as its
482+ successor, in which case that successor will be scheduled later.)
483+
484+ The move is part of a chain that satisfies register dependencies
485+ between a producing ddg node and various consuming ddg nodes.
486+ If some of these dependencies have a distance of 1 (meaning that
487+ the use is upward-exposoed) then DISTANCE1_USES is nonnull and
488+ contains the set of uses with distance-1 dependencies.
489+ DISTANCE1_USES is null otherwise.
490+
491+ MUST_FOLLOW is a scratch bitmap that is big enough to hold
492+ all current ps_insn ids.
493+
494+ Return true on success. */
495+static bool
496+schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
497+ sbitmap distance1_uses, sbitmap must_follow)
498+{
499+ unsigned int u;
500+ int this_time, this_distance, this_start, this_end, this_latency;
501+ int start, end, c, ii;
502+ sbitmap_iterator sbi;
503+ ps_reg_move_info *move;
504+ rtx this_insn;
505+ ps_insn_ptr psi;
506+
507+ move = ps_reg_move (ps, i_reg_move);
508+ ii = ps->ii;
509+ if (dump_file)
510+ {
511+ fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
512+ ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
513+ PS_MIN_CYCLE (ps));
514+ print_rtl_single (dump_file, move->insn);
515+ fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
516+ fprintf (dump_file, "=========== =========== =====\n");
517+ }
518+
519+ start = INT_MIN;
520+ end = INT_MAX;
521+
522+ /* For dependencies of distance 1 between a producer ddg node A
523+ and consumer ddg node B, we have a chain of dependencies:
524+
525+ A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
526+
527+ where Mi is the ith move. For dependencies of distance 0 between
528+ a producer ddg node A and consumer ddg node C, we have a chain of
529+ dependencies:
530+
531+ A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
532+
533+ where Mi' occupies the same position as Mi but occurs a stage later.
534+ We can only schedule each move once, so if we have both types of
535+ chain, we model the second as:
536+
537+ A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
538+
539+ First handle the dependencies between the previously-scheduled
540+ predecessor and the move. */
541+ this_insn = ps_rtl_insn (ps, move->def);
542+ this_latency = insn_latency (this_insn, move->insn);
543+ this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
544+ this_time = SCHED_TIME (move->def) - this_distance * ii;
545+ this_start = this_time + this_latency;
546+ this_end = this_time + ii;
547+ if (dump_file)
548+ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
549+ this_start, this_end, SCHED_TIME (move->def),
550+ INSN_UID (this_insn), this_latency, this_distance,
551+ INSN_UID (move->insn));
552+
553+ if (start < this_start)
554+ start = this_start;
555+ if (end > this_end)
556+ end = this_end;
557+
558+ /* Handle the dependencies between the move and previously-scheduled
559+ successors. */
560+ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, u, sbi)
561+ {
562+ this_insn = ps_rtl_insn (ps, u);
563+ this_latency = insn_latency (move->insn, this_insn);
564+ if (distance1_uses && !TEST_BIT (distance1_uses, u))
565+ this_distance = -1;
566+ else
567+ this_distance = 0;
568+ this_time = SCHED_TIME (u) + this_distance * ii;
569+ this_start = this_time - ii;
570+ this_end = this_time - this_latency;
571+ if (dump_file)
572+ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
573+ this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
574+ this_latency, this_distance, INSN_UID (this_insn));
575+
576+ if (start < this_start)
577+ start = this_start;
578+ if (end > this_end)
579+ end = this_end;
580+ }
581+
582+ if (dump_file)
583+ {
584+ fprintf (dump_file, "----------- ----------- -----\n");
585+ fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
586+ }
587+
588+ sbitmap_zero (must_follow);
589+ SET_BIT (must_follow, move->def);
590+
591+ start = MAX (start, end - (ii - 1));
592+ for (c = end; c >= start; c--)
593+ {
594+ psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
595+ move->uses, must_follow);
596+ if (psi)
597 {
598- fprintf (file, " reg_move = ");
599- print_rtl_single (file, reg_move);
600- reg_move = PREV_INSN (reg_move);
601+ update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
602+ if (dump_file)
603+ fprintf (dump_file, "\nScheduled register move INSN %d at"
604+ " time %d, row %d\n\n", INSN_UID (move->insn), c,
605+ SCHED_ROW (i_reg_move));
606+ return true;
607 }
608 }
609+
610+ if (dump_file)
611+ fprintf (dump_file, "\nNo available slot\n\n");
612+
613+ return false;
614 }
615
616 /*
617@@ -461,22 +683,23 @@
618 nreg_moves = ----------------------------------- + 1 - { dependence.
619 ii { 1 if not.
620 */
621-static struct undo_replace_buff_elem *
622-generate_reg_moves (partial_schedule_ptr ps, bool rescan)
623+static bool
624+schedule_reg_moves (partial_schedule_ptr ps)
625 {
626 ddg_ptr g = ps->g;
627 int ii = ps->ii;
628 int i;
629- struct undo_replace_buff_elem *reg_move_replaces = NULL;
630
631 for (i = 0; i < g->num_nodes; i++)
632 {
633 ddg_node_ptr u = &g->nodes[i];
634 ddg_edge_ptr e;
635 int nreg_moves = 0, i_reg_move;
636- sbitmap *uses_of_defs;
637- rtx last_reg_move;
638 rtx prev_reg, old_reg;
639+ int first_move;
640+ int distances[2];
641+ sbitmap must_follow;
642+ sbitmap distance1_uses;
643 rtx set = single_set (u->insn);
644
645 /* Skip instructions that do not set a register. */
646@@ -485,18 +708,21 @@
647
648 /* Compute the number of reg_moves needed for u, by looking at life
649 ranges started at u (excluding self-loops). */
650+ distances[0] = distances[1] = false;
651 for (e = u->out; e; e = e->next_out)
652 if (e->type == TRUE_DEP && e->dest != e->src)
653 {
654- int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
655+ int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
656+ - SCHED_TIME (e->src->cuid)) / ii;
657
658 if (e->distance == 1)
659- nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
660+ nreg_moves4e = (SCHED_TIME (e->dest->cuid)
661+ - SCHED_TIME (e->src->cuid) + ii) / ii;
662
663 /* If dest precedes src in the schedule of the kernel, then dest
664 will read before src writes and we can save one reg_copy. */
665- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
666- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
667+ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
668+ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
669 nreg_moves4e--;
670
671 if (nreg_moves4e >= 1)
672@@ -513,125 +739,105 @@
673 gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
674 }
675
676+ if (nreg_moves4e)
677+ {
678+ gcc_assert (e->distance < 2);
679+ distances[e->distance] = true;
680+ }
681 nreg_moves = MAX (nreg_moves, nreg_moves4e);
682 }
683
684 if (nreg_moves == 0)
685 continue;
686
687+ /* Create NREG_MOVES register moves. */
688+ first_move = VEC_length (ps_reg_move_info, ps->reg_moves);
689+ VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves,
690+ first_move + nreg_moves);
691+ extend_node_sched_params (ps);
692+
693+ /* Record the moves associated with this node. */
694+ first_move += ps->g->num_nodes;
695+
696+ /* Generate each move. */
697+ old_reg = prev_reg = SET_DEST (single_set (u->insn));
698+ for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
699+ {
700+ ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
701+
702+ move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
703+ move->uses = sbitmap_alloc (first_move + nreg_moves);
704+ move->old_reg = old_reg;
705+ move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
706+ move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
707+ move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
708+ sbitmap_zero (move->uses);
709+
710+ prev_reg = move->new_reg;
711+ }
712+
713+ distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
714+
715 /* Every use of the register defined by node may require a different
716 copy of this register, depending on the time the use is scheduled.
717- Set a bitmap vector, telling which nodes use each copy of this
718- register. */
719- uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
720- sbitmap_vector_zero (uses_of_defs, nreg_moves);
721+ Record which uses require which move results. */
722 for (e = u->out; e; e = e->next_out)
723 if (e->type == TRUE_DEP && e->dest != e->src)
724 {
725- int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
726+ int dest_copy = (SCHED_TIME (e->dest->cuid)
727+ - SCHED_TIME (e->src->cuid)) / ii;
728
729 if (e->distance == 1)
730- dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
731+ dest_copy = (SCHED_TIME (e->dest->cuid)
732+ - SCHED_TIME (e->src->cuid) + ii) / ii;
733
734- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
735- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
736+ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
737+ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
738 dest_copy--;
739
740 if (dest_copy)
741- SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
742+ {
743+ ps_reg_move_info *move;
744+
745+ move = ps_reg_move (ps, first_move + dest_copy - 1);
746+ SET_BIT (move->uses, e->dest->cuid);
747+ if (e->distance == 1)
748+ SET_BIT (distance1_uses, e->dest->cuid);
749+ }
750 }
751
752- /* Now generate the reg_moves, attaching relevant uses to them. */
753- SCHED_NREG_MOVES (u) = nreg_moves;
754- old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
755- /* Insert the reg-moves right before the notes which precede
756- the insn they relates to. */
757- last_reg_move = u->first_note;
758-
759+ must_follow = sbitmap_alloc (first_move + nreg_moves);
760 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
761+ if (!schedule_reg_move (ps, first_move + i_reg_move,
762+ distance1_uses, must_follow))
763+ break;
764+ sbitmap_free (must_follow);
765+ if (distance1_uses)
766+ sbitmap_free (distance1_uses);
767+ if (i_reg_move < nreg_moves)
768+ return false;
769+ }
770+ return true;
771+}
772+
773+/* Emit the moves associatied with PS. Apply the substitutions
774+ associated with them. */
775+static void
776+apply_reg_moves (partial_schedule_ptr ps)
777+{
778+ ps_reg_move_info *move;
779+ int i;
780+
781+ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
782+ {
783+ unsigned int i_use;
784+ sbitmap_iterator sbi;
785+
786+ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi)
787 {
788- unsigned int i_use = 0;
789- rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
790- rtx reg_move = gen_move_insn (new_reg, prev_reg);
791- sbitmap_iterator sbi;
792-
793- add_insn_before (reg_move, last_reg_move, NULL);
794- last_reg_move = reg_move;
795-
796- if (!SCHED_FIRST_REG_MOVE (u))
797- SCHED_FIRST_REG_MOVE (u) = reg_move;
798-
799- EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
800- {
801- struct undo_replace_buff_elem *rep;
802-
803- rep = (struct undo_replace_buff_elem *)
804- xcalloc (1, sizeof (struct undo_replace_buff_elem));
805- rep->insn = g->nodes[i_use].insn;
806- rep->orig_reg = old_reg;
807- rep->new_reg = new_reg;
808-
809- if (! reg_move_replaces)
810- reg_move_replaces = rep;
811- else
812- {
813- rep->next = reg_move_replaces;
814- reg_move_replaces = rep;
815- }
816-
817- replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
818- if (rescan)
819- df_insn_rescan (g->nodes[i_use].insn);
820- }
821-
822- prev_reg = new_reg;
823+ replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
824+ df_insn_rescan (ps->g->nodes[i_use].insn);
825 }
826- sbitmap_vector_free (uses_of_defs);
827- }
828- return reg_move_replaces;
829-}
830-
831-/* Free memory allocated for the undo buffer. */
832-static void
833-free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
834-{
835-
836- while (reg_move_replaces)
837- {
838- struct undo_replace_buff_elem *rep = reg_move_replaces;
839-
840- reg_move_replaces = reg_move_replaces->next;
841- free (rep);
842- }
843-}
844-
845-/* Update the sched_params (time, row and stage) for node U using the II,
846- the CYCLE of U and MIN_CYCLE.
847- We're not simply taking the following
848- SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
849- because the stages may not be aligned on cycle 0. */
850-static void
851-update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle)
852-{
853- int sc_until_cycle_zero;
854- int stage;
855-
856- SCHED_TIME (u) = cycle;
857- SCHED_ROW (u) = SMODULO (cycle, ii);
858-
859- /* The calculation of stage count is done adding the number
860- of stages before cycle zero and after cycle zero. */
861- sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
862-
863- if (SCHED_TIME (u) < 0)
864- {
865- stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
866- SCHED_STAGE (u) = sc_until_cycle_zero - stage;
867- }
868- else
869- {
870- stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
871- SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
872 }
873 }
874
875@@ -647,18 +853,19 @@
876 for (row = 0; row < ii; row++)
877 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
878 {
879- ddg_node_ptr u = crr_insn->node;
880+ int u = crr_insn->id;
881 int normalized_time = SCHED_TIME (u) - amount;
882 int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
883
884 if (dump_file)
885 {
886 /* Print the scheduling times after the rotation. */
887+ rtx insn = ps_rtl_insn (ps, u);
888+
889 fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
890- "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
891- INSN_UID (crr_insn->node->insn), normalized_time,
892- new_min_cycle);
893- if (JUMP_P (crr_insn->node->insn))
894+ "crr_insn->cycle=%d, min_cycle=%d", u,
895+ INSN_UID (insn), normalized_time, new_min_cycle);
896+ if (JUMP_P (insn))
897 fprintf (dump_file, " (branch)");
898 fprintf (dump_file, "\n");
899 }
900@@ -671,22 +878,6 @@
901 }
902 }
903
904-/* Set SCHED_COLUMN of each node according to its position in PS. */
905-static void
906-set_columns_for_ps (partial_schedule_ptr ps)
907-{
908- int row;
909-
910- for (row = 0; row < ps->ii; row++)
911- {
912- ps_insn_ptr cur_insn = ps->rows[row];
913- int column = 0;
914-
915- for (; cur_insn; cur_insn = cur_insn->next_in_row)
916- SCHED_COLUMN (cur_insn->node) = column++;
917- }
918-}
919-
920 /* Permute the insns according to their order in PS, from row 0 to
921 row ii-1, and position them right before LAST. This schedules
922 the insns of the loop kernel. */
923@@ -699,9 +890,18 @@
924
925 for (row = 0; row < ii ; row++)
926 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
927- if (PREV_INSN (last) != ps_ij->node->insn)
928- reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
929- PREV_INSN (last));
930+ {
931+ rtx insn = ps_rtl_insn (ps, ps_ij->id);
932+
933+ if (PREV_INSN (last) != insn)
934+ {
935+ if (ps_ij->id < ps->g->num_nodes)
936+ reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
937+ PREV_INSN (last));
938+ else
939+ add_insn_before (insn, last, NULL);
940+ }
941+ }
942 }
943
944 /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
945@@ -750,7 +950,7 @@
946 to row ii-1. If they are equal just bail out. */
947 stage_count = calculate_stage_count (ps, amount);
948 stage_count_curr =
949- calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1));
950+ calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
951
952 if (stage_count == stage_count_curr)
953 {
954@@ -779,7 +979,7 @@
955 print_partial_schedule (ps, dump_file);
956 }
957
958- if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1)
959+ if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
960 {
961 ok = true;
962 goto clear;
963@@ -794,7 +994,7 @@
964 {
965 bool success;
966 ps_insn_ptr next_ps_i;
967- int branch_cycle = SCHED_TIME (g->closing_branch);
968+ int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
969 int row = SMODULO (branch_cycle, ps->ii);
970 int num_splits = 0;
971 sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
972@@ -850,13 +1050,12 @@
973 branch so we can remove it from it's current cycle. */
974 for (next_ps_i = ps->rows[row];
975 next_ps_i; next_ps_i = next_ps_i->next_in_row)
976- if (next_ps_i->node->cuid == g->closing_branch->cuid)
977+ if (next_ps_i->id == g->closing_branch->cuid)
978 break;
979
980 remove_node_from_ps (ps, next_ps_i);
981 success =
982- try_scheduling_node_in_cycle (ps, g->closing_branch,
983- g->closing_branch->cuid, c,
984+ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
985 sched_nodes, &num_splits,
986 tmp_precede, tmp_follow);
987 gcc_assert (num_splits == 0);
988@@ -874,8 +1073,7 @@
989 must_precede, branch_cycle, start, end,
990 step);
991 success =
992- try_scheduling_node_in_cycle (ps, g->closing_branch,
993- g->closing_branch->cuid,
994+ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
995 branch_cycle, sched_nodes,
996 &num_splits, tmp_precede,
997 tmp_follow);
998@@ -889,7 +1087,7 @@
999 fprintf (dump_file,
1000 "SMS success in moving branch to cycle %d\n", c);
1001
1002- update_node_sched_params (g->closing_branch, ii, c,
1003+ update_node_sched_params (g->closing_branch->cuid, ii, c,
1004 PS_MIN_CYCLE (ps));
1005 ok = true;
1006 }
1007@@ -905,7 +1103,7 @@
1008
1009 static void
1010 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1011- int to_stage, int for_prolog, rtx count_reg)
1012+ int to_stage, rtx count_reg)
1013 {
1014 int row;
1015 ps_insn_ptr ps_ij;
1016@@ -913,9 +1111,9 @@
1017 for (row = 0; row < ps->ii; row++)
1018 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1019 {
1020- ddg_node_ptr u_node = ps_ij->node;
1021- int j, i_reg_moves;
1022- rtx reg_move = NULL_RTX;
1023+ int u = ps_ij->id;
1024+ int first_u, last_u;
1025+ rtx u_insn;
1026
1027 /* Do not duplicate any insn which refers to count_reg as it
1028 belongs to the control part.
1029@@ -923,52 +1121,20 @@
1030 be ignored.
1031 TODO: This should be done by analyzing the control part of
1032 the loop. */
1033- if (reg_mentioned_p (count_reg, u_node->insn)
1034- || JUMP_P (ps_ij->node->insn))
1035+ u_insn = ps_rtl_insn (ps, u);
1036+ if (reg_mentioned_p (count_reg, u_insn)
1037+ || JUMP_P (u_insn))
1038 continue;
1039
1040- if (for_prolog)
1041- {
1042- /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
1043- number of reg_moves starting with the second occurrence of
1044- u_node, which is generated if its SCHED_STAGE <= to_stage. */
1045- i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
1046- i_reg_moves = MAX (i_reg_moves, 0);
1047- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
1048-
1049- /* The reg_moves start from the *first* reg_move backwards. */
1050- if (i_reg_moves)
1051- {
1052- reg_move = SCHED_FIRST_REG_MOVE (u_node);
1053- for (j = 1; j < i_reg_moves; j++)
1054- reg_move = PREV_INSN (reg_move);
1055- }
1056- }
1057- else /* It's for the epilog. */
1058- {
1059- /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
1060- starting to decrease one stage after u_node no longer occurs;
1061- that is, generate all reg_moves until
1062- SCHED_STAGE (u_node) == from_stage - 1. */
1063- i_reg_moves = SCHED_NREG_MOVES (u_node)
1064- - (from_stage - SCHED_STAGE (u_node) - 1);
1065- i_reg_moves = MAX (i_reg_moves, 0);
1066- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
1067-
1068- /* The reg_moves start from the *last* reg_move forwards. */
1069- if (i_reg_moves)
1070- {
1071- reg_move = SCHED_FIRST_REG_MOVE (u_node);
1072- for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
1073- reg_move = PREV_INSN (reg_move);
1074- }
1075- }
1076-
1077- for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
1078- emit_insn (copy_rtx (PATTERN (reg_move)));
1079- if (SCHED_STAGE (u_node) >= from_stage
1080- && SCHED_STAGE (u_node) <= to_stage)
1081- duplicate_insn_chain (u_node->first_note, u_node->insn);
1082+ first_u = SCHED_STAGE (u);
1083+ last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1084+ if (from_stage <= last_u && to_stage >= first_u)
1085+ {
1086+ if (u < ps->g->num_nodes)
1087+ duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1088+ else
1089+ emit_insn (copy_rtx (PATTERN (u_insn)));
1090+ }
1091 }
1092 }
1093
1094@@ -1002,7 +1168,7 @@
1095 }
1096
1097 for (i = 0; i < last_stage; i++)
1098- duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
1099+ duplicate_insns_of_cycles (ps, 0, i, count_reg);
1100
1101 /* Put the prolog on the entry edge. */
1102 e = loop_preheader_edge (loop);
1103@@ -1014,7 +1180,7 @@
1104 start_sequence ();
1105
1106 for (i = 0; i < last_stage; i++)
1107- duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
1108+ duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1109
1110 /* Put the epilogue on the exit edge. */
1111 gcc_assert (single_exit (loop));
1112@@ -1350,10 +1516,9 @@
1113 {
1114 rtx head, tail;
1115 rtx count_reg, count_init;
1116- int mii, rec_mii;
1117- unsigned stage_count = 0;
1118+ int mii, rec_mii, stage_count, min_cycle;
1119 HOST_WIDEST_INT loop_count = 0;
1120- bool opt_sc_p = false;
1121+ bool opt_sc_p;
1122
1123 if (! (g = g_arr[loop->num]))
1124 continue;
1125@@ -1430,62 +1595,63 @@
1126 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1127 rec_mii, mii, maxii);
1128
1129- /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1130- ASAP. */
1131- set_node_sched_params (g);
1132-
1133- ps = sms_schedule_by_order (g, mii, maxii, node_order);
1134-
1135- if (ps)
1136+ for (;;)
1137 {
1138- /* Try to achieve optimized SC by normalizing the partial
1139- schedule (having the cycles start from cycle zero).
1140- The branch location must be placed in row ii-1 in the
1141- final scheduling. If failed, shift all instructions to
1142- position the branch in row ii-1. */
1143- opt_sc_p = optimize_sc (ps, g);
1144- if (opt_sc_p)
1145- stage_count = calculate_stage_count (ps, 0);
1146- else
1147+ set_node_sched_params (g);
1148+
1149+ stage_count = 0;
1150+ opt_sc_p = false;
1151+ ps = sms_schedule_by_order (g, mii, maxii, node_order);
1152+
1153+ if (ps)
1154 {
1155- /* Bring the branch to cycle ii-1. */
1156- int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
1157+ /* Try to achieve optimized SC by normalizing the partial
1158+ schedule (having the cycles start from cycle zero).
1159+ The branch location must be placed in row ii-1 in the
1160+ final scheduling. If failed, shift all instructions to
1161+ position the branch in row ii-1. */
1162+ opt_sc_p = optimize_sc (ps, g);
1163+ if (opt_sc_p)
1164+ stage_count = calculate_stage_count (ps, 0);
1165+ else
1166+ {
1167+ /* Bring the branch to cycle ii-1. */
1168+ int amount = (SCHED_TIME (g->closing_branch->cuid)
1169+ - (ps->ii - 1));
1170
1171+ if (dump_file)
1172+ fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1173+
1174+ stage_count = calculate_stage_count (ps, amount);
1175+ }
1176+
1177+ gcc_assert (stage_count >= 1);
1178+ }
1179+
1180+ /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1181+ 1 means that there is no interleaving between iterations thus
1182+ we let the scheduling passes do the job in this case. */
1183+ if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1184+ || (count_init && (loop_count <= stage_count))
1185+ || (flag_branch_probabilities && (trip_count <= stage_count)))
1186+ {
1187 if (dump_file)
1188- fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1189-
1190- stage_count = calculate_stage_count (ps, amount);
1191- }
1192-
1193- gcc_assert (stage_count >= 1);
1194- PS_STAGE_COUNT (ps) = stage_count;
1195- }
1196-
1197- /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1198- 1 means that there is no interleaving between iterations thus
1199- we let the scheduling passes do the job in this case. */
1200- if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
1201- || (count_init && (loop_count <= stage_count))
1202- || (flag_branch_probabilities && (trip_count <= stage_count)))
1203- {
1204- if (dump_file)
1205- {
1206- fprintf (dump_file, "SMS failed... \n");
1207- fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1208- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1209- fprintf (dump_file, ", trip-count=");
1210- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1211- fprintf (dump_file, ")\n");
1212- }
1213- }
1214- else
1215- {
1216- struct undo_replace_buff_elem *reg_move_replaces;
1217+ {
1218+ fprintf (dump_file, "SMS failed... \n");
1219+ fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1220+ " loop-count=", stage_count);
1221+ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1222+ fprintf (dump_file, ", trip-count=");
1223+ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1224+ fprintf (dump_file, ")\n");
1225+ }
1226+ break;
1227+ }
1228
1229 if (!opt_sc_p)
1230 {
1231 /* Rotate the partial schedule to have the branch in row ii-1. */
1232- int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
1233+ int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1234
1235 reset_sched_times (ps, amount);
1236 rotate_partial_schedule (ps, amount);
1237@@ -1493,6 +1659,29 @@
1238
1239 set_columns_for_ps (ps);
1240
1241+ min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1242+ if (!schedule_reg_moves (ps))
1243+ {
1244+ mii = ps->ii + 1;
1245+ free_partial_schedule (ps);
1246+ continue;
1247+ }
1248+
1249+ /* Moves that handle incoming values might have been added
1250+ to a new first stage. Bump the stage count if so.
1251+
1252+ ??? Perhaps we could consider rotating the schedule here
1253+ instead? */
1254+ if (PS_MIN_CYCLE (ps) < min_cycle)
1255+ {
1256+ reset_sched_times (ps, 0);
1257+ stage_count++;
1258+ }
1259+
1260+ /* The stage count should now be correct without rotation. */
1261+ gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1262+ PS_STAGE_COUNT (ps) = stage_count;
1263+
1264 canon_loop (loop);
1265
1266 if (dump_file)
1267@@ -1531,17 +1720,16 @@
1268 /* The life-info is not valid any more. */
1269 df_set_bb_dirty (g->bb);
1270
1271- reg_move_replaces = generate_reg_moves (ps, true);
1272+ apply_reg_moves (ps);
1273 if (dump_file)
1274- print_node_sched_params (dump_file, g->num_nodes, g);
1275+ print_node_sched_params (dump_file, g->num_nodes, ps);
1276 /* Generate prolog and epilog. */
1277 generate_prolog_epilog (ps, loop, count_reg, count_init);
1278-
1279- free_undo_replace_buff (reg_move_replaces);
1280+ break;
1281 }
1282
1283 free_partial_schedule (ps);
1284- free (node_sched_params);
1285+ VEC_free (node_sched_params, heap, node_sched_param_vec);
1286 free (node_order);
1287 free_ddg (g);
1288 }
1289@@ -1643,9 +1831,11 @@
1290
1291 static int
1292 get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1293- sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1294+ sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1295+ int *end_p)
1296 {
1297 int start, step, end;
1298+ int early_start, late_start;
1299 ddg_edge_ptr e;
1300 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1301 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1302@@ -1653,6 +1843,8 @@
1303 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1304 int psp_not_empty;
1305 int pss_not_empty;
1306+ int count_preds;
1307+ int count_succs;
1308
1309 /* 1. compute sched window for u (start, end, step). */
1310 sbitmap_zero (psp);
1311@@ -1660,214 +1852,119 @@
1312 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1313 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1314
1315- if (psp_not_empty && !pss_not_empty)
1316- {
1317- int early_start = INT_MIN;
1318-
1319- end = INT_MAX;
1320- for (e = u_node->in; e != 0; e = e->next_in)
1321- {
1322- ddg_node_ptr v_node = e->src;
1323-
1324- if (dump_file)
1325- {
1326- fprintf (dump_file, "\nProcessing edge: ");
1327- print_ddg_edge (dump_file, e);
1328- fprintf (dump_file,
1329- "\nScheduling %d (%d) in psp_not_empty,"
1330- " checking p %d (%d): ", u_node->cuid,
1331- INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1332- (v_node->insn));
1333- }
1334-
1335- if (TEST_BIT (sched_nodes, v_node->cuid))
1336- {
1337- int p_st = SCHED_TIME (v_node);
1338-
1339- early_start =
1340- MAX (early_start, p_st + e->latency - (e->distance * ii));
1341-
1342- if (dump_file)
1343- fprintf (dump_file,
1344- "pred st = %d; early_start = %d; latency: %d",
1345- p_st, early_start, e->latency);
1346-
1347- if (e->data_type == MEM_DEP)
1348- end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1349- }
1350- else if (dump_file)
1351- fprintf (dump_file, "the node is not scheduled\n");
1352- }
1353- start = early_start;
1354- end = MIN (end, early_start + ii);
1355- /* Schedule the node close to it's predecessors. */
1356- step = 1;
1357-
1358- if (dump_file)
1359- fprintf (dump_file,
1360- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1361- u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1362- }
1363-
1364- else if (!psp_not_empty && pss_not_empty)
1365- {
1366- int late_start = INT_MAX;
1367-
1368- end = INT_MIN;
1369- for (e = u_node->out; e != 0; e = e->next_out)
1370- {
1371- ddg_node_ptr v_node = e->dest;
1372-
1373- if (dump_file)
1374- {
1375- fprintf (dump_file, "\nProcessing edge:");
1376- print_ddg_edge (dump_file, e);
1377- fprintf (dump_file,
1378- "\nScheduling %d (%d) in pss_not_empty,"
1379- " checking s %d (%d): ", u_node->cuid,
1380- INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1381- (v_node->insn));
1382- }
1383-
1384- if (TEST_BIT (sched_nodes, v_node->cuid))
1385- {
1386- int s_st = SCHED_TIME (v_node);
1387-
1388- late_start = MIN (late_start,
1389- s_st - e->latency + (e->distance * ii));
1390-
1391- if (dump_file)
1392- fprintf (dump_file,
1393- "succ st = %d; late_start = %d; latency = %d",
1394- s_st, late_start, e->latency);
1395-
1396- if (e->data_type == MEM_DEP)
1397- end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1398- if (dump_file)
1399- fprintf (dump_file, "end = %d\n", end);
1400-
1401- }
1402- else if (dump_file)
1403- fprintf (dump_file, "the node is not scheduled\n");
1404-
1405- }
1406- start = late_start;
1407- end = MAX (end, late_start - ii);
1408- /* Schedule the node close to it's successors. */
1409+ /* We first compute a forward range (start <= end), then decide whether
1410+ to reverse it. */
1411+ early_start = INT_MIN;
1412+ late_start = INT_MAX;
1413+ start = INT_MIN;
1414+ end = INT_MAX;
1415+ step = 1;
1416+
1417+ count_preds = 0;
1418+ count_succs = 0;
1419+
1420+ if (dump_file && (psp_not_empty || pss_not_empty))
1421+ {
1422+ fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1423+ "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1424+ fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1425+ "start", "early start", "late start", "end", "time");
1426+ fprintf (dump_file, "=========== =========== =========== ==========="
1427+ " =====\n");
1428+ }
1429+ /* Calculate early_start and limit end. Both bounds are inclusive. */
1430+ if (psp_not_empty)
1431+ for (e = u_node->in; e != 0; e = e->next_in)
1432+ {
1433+ int v = e->src->cuid;
1434+
1435+ if (TEST_BIT (sched_nodes, v))
1436+ {
1437+ int p_st = SCHED_TIME (v);
1438+ int earliest = p_st + e->latency - (e->distance * ii);
1439+ int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1440+
1441+ if (dump_file)
1442+ {
1443+ fprintf (dump_file, "%11s %11d %11s %11d %5d",
1444+ "", earliest, "", latest, p_st);
1445+ print_ddg_edge (dump_file, e);
1446+ fprintf (dump_file, "\n");
1447+ }
1448+
1449+ early_start = MAX (early_start, earliest);
1450+ end = MIN (end, latest);
1451+
1452+ if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1453+ count_preds++;
1454+ }
1455+ }
1456+
1457+ /* Calculate late_start and limit start. Both bounds are inclusive. */
1458+ if (pss_not_empty)
1459+ for (e = u_node->out; e != 0; e = e->next_out)
1460+ {
1461+ int v = e->dest->cuid;
1462+
1463+ if (TEST_BIT (sched_nodes, v))
1464+ {
1465+ int s_st = SCHED_TIME (v);
1466+ int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1467+ int latest = s_st - e->latency + (e->distance * ii);
1468+
1469+ if (dump_file)
1470+ {
1471+ fprintf (dump_file, "%11d %11s %11d %11s %5d",
1472+ earliest, "", latest, "", s_st);
1473+ print_ddg_edge (dump_file, e);
1474+ fprintf (dump_file, "\n");
1475+ }
1476+
1477+ start = MAX (start, earliest);
1478+ late_start = MIN (late_start, latest);
1479+
1480+ if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1481+ count_succs++;
1482+ }
1483+ }
1484+
1485+ if (dump_file && (psp_not_empty || pss_not_empty))
1486+ {
1487+ fprintf (dump_file, "----------- ----------- ----------- -----------"
1488+ " -----\n");
1489+ fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1490+ start, early_start, late_start, end, "",
1491+ "(max, max, min, min)");
1492+ }
1493+
1494+ /* Get a target scheduling window no bigger than ii. */
1495+ if (early_start == INT_MIN && late_start == INT_MAX)
1496+ early_start = NODE_ASAP (u_node);
1497+ else if (early_start == INT_MIN)
1498+ early_start = late_start - (ii - 1);
1499+ late_start = MIN (late_start, early_start + (ii - 1));
1500+
1501+ /* Apply memory dependence limits. */
1502+ start = MAX (start, early_start);
1503+ end = MIN (end, late_start);
1504+
1505+ if (dump_file && (psp_not_empty || pss_not_empty))
1506+ fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1507+ "", start, end, "", "");
1508+
1509+ /* If there are at least as many successors as predecessors, schedule the
1510+ node close to its successors. */
1511+ if (pss_not_empty && count_succs >= count_preds)
1512+ {
1513+ int tmp = end;
1514+ end = start;
1515+ start = tmp;
1516 step = -1;
1517-
1518- if (dump_file)
1519- fprintf (dump_file,
1520- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1521- u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1522-
1523- }
1524-
1525- else if (psp_not_empty && pss_not_empty)
1526- {
1527- int early_start = INT_MIN;
1528- int late_start = INT_MAX;
1529- int count_preds = 0;
1530- int count_succs = 0;
1531-
1532- start = INT_MIN;
1533- end = INT_MAX;
1534- for (e = u_node->in; e != 0; e = e->next_in)
1535- {
1536- ddg_node_ptr v_node = e->src;
1537-
1538- if (dump_file)
1539- {
1540- fprintf (dump_file, "\nProcessing edge:");
1541- print_ddg_edge (dump_file, e);
1542- fprintf (dump_file,
1543- "\nScheduling %d (%d) in psp_pss_not_empty,"
1544- " checking p %d (%d): ", u_node->cuid, INSN_UID
1545- (u_node->insn), v_node->cuid, INSN_UID
1546- (v_node->insn));
1547- }
1548-
1549- if (TEST_BIT (sched_nodes, v_node->cuid))
1550- {
1551- int p_st = SCHED_TIME (v_node);
1552-
1553- early_start = MAX (early_start,
1554- p_st + e->latency
1555- - (e->distance * ii));
1556-
1557- if (dump_file)
1558- fprintf (dump_file,
1559- "pred st = %d; early_start = %d; latency = %d",
1560- p_st, early_start, e->latency);
1561-
1562- if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1563- count_preds++;
1564-
1565- if (e->data_type == MEM_DEP)
1566- end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1567- }
1568- else if (dump_file)
1569- fprintf (dump_file, "the node is not scheduled\n");
1570-
1571- }
1572- for (e = u_node->out; e != 0; e = e->next_out)
1573- {
1574- ddg_node_ptr v_node = e->dest;
1575-
1576- if (dump_file)
1577- {
1578- fprintf (dump_file, "\nProcessing edge:");
1579- print_ddg_edge (dump_file, e);
1580- fprintf (dump_file,
1581- "\nScheduling %d (%d) in psp_pss_not_empty,"
1582- " checking s %d (%d): ", u_node->cuid, INSN_UID
1583- (u_node->insn), v_node->cuid, INSN_UID
1584- (v_node->insn));
1585- }
1586-
1587- if (TEST_BIT (sched_nodes, v_node->cuid))
1588- {
1589- int s_st = SCHED_TIME (v_node);
1590-
1591- late_start = MIN (late_start,
1592- s_st - e->latency
1593- + (e->distance * ii));
1594-
1595- if (dump_file)
1596- fprintf (dump_file,
1597- "succ st = %d; late_start = %d; latency = %d",
1598- s_st, late_start, e->latency);
1599-
1600- if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1601- count_succs++;
1602-
1603- if (e->data_type == MEM_DEP)
1604- start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1605- }
1606- else if (dump_file)
1607- fprintf (dump_file, "the node is not scheduled\n");
1608-
1609- }
1610- start = MAX (start, early_start);
1611- end = MIN (end, MIN (early_start + ii, late_start + 1));
1612- step = 1;
1613- /* If there are more successors than predecessors schedule the
1614- node close to it's successors. */
1615- if (count_succs >= count_preds)
1616- {
1617- int old_start = start;
1618-
1619- start = end - 1;
1620- end = old_start - 1;
1621- step = -1;
1622- }
1623- }
1624- else /* psp is empty && pss is empty. */
1625- {
1626- start = SCHED_ASAP (u_node);
1627- end = start + ii;
1628- step = 1;
1629- }
1630+ }
1631+
1632+ /* Now that we've finalized the window, make END an exclusive rather
1633+ than an inclusive bound. */
1634+ end += step;
1635
1636 *start_p = start;
1637 *step_p = step;
1638@@ -1880,10 +1977,10 @@
1639 if (dump_file)
1640 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1641 start, end, step);
1642- return -1;
1643+ return -1;
1644 }
1645
1646- return 0;
1647+ return 0;
1648 }
1649
1650 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1651@@ -1939,7 +2036,7 @@
1652 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
1653 for (e = u_node->in; e != 0; e = e->next_in)
1654 if (TEST_BIT (sched_nodes, e->src->cuid)
1655- && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
1656+ && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
1657 first_cycle_in_window))
1658 {
1659 if (dump_file)
1660@@ -1964,7 +2061,7 @@
1661 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
1662 for (e = u_node->out; e != 0; e = e->next_out)
1663 if (TEST_BIT (sched_nodes, e->dest->cuid)
1664- && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
1665+ && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
1666 last_cycle_in_window))
1667 {
1668 if (dump_file)
1669@@ -1988,7 +2085,7 @@
1670 last row of the scheduling window) */
1671
1672 static bool
1673-try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
1674+try_scheduling_node_in_cycle (partial_schedule_ptr ps,
1675 int u, int cycle, sbitmap sched_nodes,
1676 int *num_splits, sbitmap must_precede,
1677 sbitmap must_follow)
1678@@ -1997,11 +2094,10 @@
1679 bool success = 0;
1680
1681 verify_partial_schedule (ps, sched_nodes);
1682- psi = ps_add_node_check_conflicts (ps, u_node, cycle,
1683- must_precede, must_follow);
1684+ psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
1685 if (psi)
1686 {
1687- SCHED_TIME (u_node) = cycle;
1688+ SCHED_TIME (u) = cycle;
1689 SET_BIT (sched_nodes, u);
1690 success = 1;
1691 *num_splits = 0;
1692@@ -2062,8 +2158,8 @@
1693 &step, &end) == 0)
1694 {
1695 if (dump_file)
1696- fprintf (dump_file, "\nTrying to schedule node %d \
1697- INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1698+ fprintf (dump_file, "\nTrying to schedule node %d "
1699+ "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1700 (g->nodes[u].insn)), start, end, step);
1701
1702 gcc_assert ((step > 0 && start < end)
1703@@ -2081,7 +2177,7 @@
1704 &tmp_precede, must_precede,
1705 c, start, end, step);
1706 success =
1707- try_scheduling_node_in_cycle (ps, u_node, u, c,
1708+ try_scheduling_node_in_cycle (ps, u, c,
1709 sched_nodes,
1710 &num_splits, tmp_precede,
1711 tmp_follow);
1712@@ -2181,7 +2277,7 @@
1713 for (crr_insn = rows_new[row];
1714 crr_insn; crr_insn = crr_insn->next_in_row)
1715 {
1716- ddg_node_ptr u = crr_insn->node;
1717+ int u = crr_insn->id;
1718 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1719
1720 SCHED_TIME (u) = new_time;
1721@@ -2202,7 +2298,7 @@
1722 for (crr_insn = rows_new[row + 1];
1723 crr_insn; crr_insn = crr_insn->next_in_row)
1724 {
1725- ddg_node_ptr u = crr_insn->node;
1726+ int u = crr_insn->id;
1727 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1728
1729 SCHED_TIME (u) = new_time;
1730@@ -2242,24 +2338,24 @@
1731 {
1732 ddg_edge_ptr e;
1733 int lower = INT_MIN, upper = INT_MAX;
1734- ddg_node_ptr crit_pred = NULL;
1735- ddg_node_ptr crit_succ = NULL;
1736+ int crit_pred = -1;
1737+ int crit_succ = -1;
1738 int crit_cycle;
1739
1740 for (e = u_node->in; e != 0; e = e->next_in)
1741 {
1742- ddg_node_ptr v_node = e->src;
1743+ int v = e->src->cuid;
1744
1745- if (TEST_BIT (sched_nodes, v_node->cuid)
1746- && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
1747- if (SCHED_TIME (v_node) > lower)
1748+ if (TEST_BIT (sched_nodes, v)
1749+ && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
1750+ if (SCHED_TIME (v) > lower)
1751 {
1752- crit_pred = v_node;
1753- lower = SCHED_TIME (v_node);
1754+ crit_pred = v;
1755+ lower = SCHED_TIME (v);
1756 }
1757 }
1758
1759- if (crit_pred != NULL)
1760+ if (crit_pred >= 0)
1761 {
1762 crit_cycle = SCHED_TIME (crit_pred) + 1;
1763 return SMODULO (crit_cycle, ii);
1764@@ -2267,17 +2363,18 @@
1765
1766 for (e = u_node->out; e != 0; e = e->next_out)
1767 {
1768- ddg_node_ptr v_node = e->dest;
1769- if (TEST_BIT (sched_nodes, v_node->cuid)
1770- && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
1771- if (SCHED_TIME (v_node) < upper)
1772+ int v = e->dest->cuid;
1773+
1774+ if (TEST_BIT (sched_nodes, v)
1775+ && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
1776+ if (SCHED_TIME (v) < upper)
1777 {
1778- crit_succ = v_node;
1779- upper = SCHED_TIME (v_node);
1780+ crit_succ = v;
1781+ upper = SCHED_TIME (v);
1782 }
1783 }
1784
1785- if (crit_succ != NULL)
1786+ if (crit_succ >= 0)
1787 {
1788 crit_cycle = SCHED_TIME (crit_succ);
1789 return SMODULO (crit_cycle, ii);
1790@@ -2301,10 +2398,10 @@
1791
1792 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
1793 {
1794- ddg_node_ptr u = crr_insn->node;
1795+ int u = crr_insn->id;
1796
1797 length++;
1798- gcc_assert (TEST_BIT (sched_nodes, u->cuid));
1799+ gcc_assert (TEST_BIT (sched_nodes, u));
1800 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
1801 popcount (sched_nodes) == number of insns in ps. */
1802 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
1803@@ -2719,6 +2816,7 @@
1804 partial_schedule_ptr ps = XNEW (struct partial_schedule);
1805 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
1806 ps->rows_length = (int *) xcalloc (ii, sizeof (int));
1807+ ps->reg_moves = NULL;
1808 ps->ii = ii;
1809 ps->history = history;
1810 ps->min_cycle = INT_MAX;
1811@@ -2753,8 +2851,16 @@
1812 static void
1813 free_partial_schedule (partial_schedule_ptr ps)
1814 {
1815+ ps_reg_move_info *move;
1816+ unsigned int i;
1817+
1818 if (!ps)
1819 return;
1820+
1821+ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
1822+ sbitmap_free (move->uses);
1823+ VEC_free (ps_reg_move_info, heap, ps->reg_moves);
1824+
1825 free_ps_insns (ps);
1826 free (ps->rows);
1827 free (ps->rows_length);
1828@@ -2796,12 +2902,12 @@
1829 fprintf (dump, "\n[ROW %d ]: ", i);
1830 while (ps_i)
1831 {
1832- if (JUMP_P (ps_i->node->insn))
1833- fprintf (dump, "%d (branch), ",
1834- INSN_UID (ps_i->node->insn));
1835+ rtx insn = ps_rtl_insn (ps, ps_i->id);
1836+
1837+ if (JUMP_P (insn))
1838+ fprintf (dump, "%d (branch), ", INSN_UID (insn));
1839 else
1840- fprintf (dump, "%d, ",
1841- INSN_UID (ps_i->node->insn));
1842+ fprintf (dump, "%d, ", INSN_UID (insn));
1843
1844 ps_i = ps_i->next_in_row;
1845 }
1846@@ -2810,11 +2916,11 @@
1847
1848 /* Creates an object of PS_INSN and initializes it to the given parameters. */
1849 static ps_insn_ptr
1850-create_ps_insn (ddg_node_ptr node, int cycle)
1851+create_ps_insn (int id, int cycle)
1852 {
1853 ps_insn_ptr ps_i = XNEW (struct ps_insn);
1854
1855- ps_i->node = node;
1856+ ps_i->id = id;
1857 ps_i->next_in_row = NULL;
1858 ps_i->prev_in_row = NULL;
1859 ps_i->cycle = cycle;
1860@@ -2879,10 +2985,11 @@
1861 next_ps_i;
1862 next_ps_i = next_ps_i->next_in_row)
1863 {
1864- if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
1865+ if (must_follow
1866+ && TEST_BIT (must_follow, next_ps_i->id)
1867 && ! first_must_follow)
1868 first_must_follow = next_ps_i;
1869- if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
1870+ if (must_precede && TEST_BIT (must_precede, next_ps_i->id))
1871 {
1872 /* If we have already met a node that must follow, then
1873 there is no possible column. */
1874@@ -2893,8 +3000,8 @@
1875 }
1876 /* The closing branch must be the last in the row. */
1877 if (must_precede
1878- && TEST_BIT (must_precede, next_ps_i->node->cuid)
1879- && JUMP_P (next_ps_i->node->insn))
1880+ && TEST_BIT (must_precede, next_ps_i->id)
1881+ && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
1882 return false;
1883
1884 last_in_row = next_ps_i;
1885@@ -2903,7 +3010,7 @@
1886 /* The closing branch is scheduled as well. Make sure there is no
1887 dependent instruction after it as the branch should be the last
1888 instruction in the row. */
1889- if (JUMP_P (ps_i->node->insn))
1890+ if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
1891 {
1892 if (first_must_follow)
1893 return false;
1894@@ -2954,7 +3061,6 @@
1895 {
1896 ps_insn_ptr prev, next;
1897 int row;
1898- ddg_node_ptr next_node;
1899
1900 if (!ps || !ps_i)
1901 return false;
1902@@ -2964,11 +3070,9 @@
1903 if (! ps_i->next_in_row)
1904 return false;
1905
1906- next_node = ps_i->next_in_row->node;
1907-
1908 /* Check if next_in_row is dependent on ps_i, both having same sched
1909 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
1910- if (must_follow && TEST_BIT (must_follow, next_node->cuid))
1911+ if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id))
1912 return false;
1913
1914 /* Advance PS_I over its next_in_row in the doubly linked list. */
1915@@ -2999,7 +3103,7 @@
1916 before/after (respectively) the node pointed to by PS_I when scheduled
1917 in the same cycle. */
1918 static ps_insn_ptr
1919-add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
1920+add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
1921 sbitmap must_precede, sbitmap must_follow)
1922 {
1923 ps_insn_ptr ps_i;
1924@@ -3008,7 +3112,7 @@
1925 if (ps->rows_length[row] >= issue_rate)
1926 return NULL;
1927
1928- ps_i = create_ps_insn (node, cycle);
1929+ ps_i = create_ps_insn (id, cycle);
1930
1931 /* Finds and inserts PS_I according to MUST_FOLLOW and
1932 MUST_PRECEDE. */
1933@@ -3060,7 +3164,7 @@
1934 crr_insn;
1935 crr_insn = crr_insn->next_in_row)
1936 {
1937- rtx insn = crr_insn->node->insn;
1938+ rtx insn = ps_rtl_insn (ps, crr_insn->id);
1939
1940 if (!NONDEBUG_INSN_P (insn))
1941 continue;
1942@@ -3097,7 +3201,7 @@
1943 cuid N must be come before/after (respectively) the node pointed to by
1944 PS_I when scheduled in the same cycle. */
1945 ps_insn_ptr
1946-ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
1947+ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
1948 int c, sbitmap must_precede,
1949 sbitmap must_follow)
1950 {
1951
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch
new file mode 100644
index 000000000..02f8e5177
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch
@@ -0,0 +1,147 @@
12011-10-19 Andrew Stubbs <ams@codesourcery.com>
2
3 Backport from FSF:
4
5 2011-09-09 Andrew Stubbs <ams@codesourcery.com>
6
7 gcc/
8 * config/arm/arm-cores.def (generic-armv7-a): New architecture.
9 * config/arm/arm-tables.opt: Regenerate.
10 * config/arm/arm-tune.md: Regenerate.
11 * config/arm/arm.c (arm_file_start): Output .arch directive when
12 user passes -mcpu=generic-*.
13 (arm_issue_rate): Add genericv7a support.
14 * config/arm/arm.h (EXTRA_SPECS): Add asm_cpu_spec.
15 (ASM_CPU_SPEC): New define.
16 * config/arm/elf.h (ASM_SPEC): Use %(asm_cpu_spec).
17 * config/arm/semi.h (ASM_SPEC): Likewise.
18 * doc/invoke.texi (ARM Options): Document -mcpu=generic-*
19 and -mtune=generic-*.
20
21=== modified file 'gcc/config/arm/arm-cores.def'
22--- old/gcc/config/arm/arm-cores.def 2011-06-14 16:00:30 +0000
23+++ new/gcc/config/arm/arm-cores.def 2011-10-19 16:46:51 +0000
24@@ -124,6 +124,7 @@
25 ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e)
26 ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2)
27 ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2)
28+ARM_CORE("generic-armv7-a", genericv7a, 7A, FL_LDSCHED, cortex)
29 ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5)
30 ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex)
31 ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
32@@ -135,3 +136,4 @@
33 ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex)
34 ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex)
35 ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex)
36+
37
38=== modified file 'gcc/config/arm/arm-tune.md'
39--- old/gcc/config/arm/arm-tune.md 2011-06-14 14:37:30 +0000
40+++ new/gcc/config/arm/arm-tune.md 2011-10-19 16:46:51 +0000
41@@ -1,5 +1,5 @@
42 ;; -*- buffer-read-only: t -*-
43 ;; Generated automatically by gentune.sh from arm-cores.def
44 (define_attr "tune"
45- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
46+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
47 (const (symbol_ref "((enum attr_tune) arm_tune)")))
48
49=== modified file 'gcc/config/arm/arm.c'
50--- old/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000
51+++ new/gcc/config/arm/arm.c 2011-10-19 16:46:51 +0000
52@@ -22185,6 +22185,8 @@
53 const char *fpu_name;
54 if (arm_selected_arch)
55 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
56+ else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
57+ asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
58 else
59 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
60
61@@ -23717,6 +23719,7 @@
62 case cortexr4:
63 case cortexr4f:
64 case cortexr5:
65+ case genericv7a:
66 case cortexa5:
67 case cortexa8:
68 case cortexa9:
69
70=== modified file 'gcc/config/arm/arm.h'
71--- old/gcc/config/arm/arm.h 2011-09-05 14:32:11 +0000
72+++ new/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000
73@@ -198,6 +198,7 @@
74 Do not define this macro if it does not need to do anything. */
75 #define EXTRA_SPECS \
76 { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \
77+ { "asm_cpu_spec", ASM_CPU_SPEC }, \
78 SUBTARGET_EXTRA_SPECS
79
80 #ifndef SUBTARGET_EXTRA_SPECS
81@@ -2278,4 +2279,8 @@
82 instruction. */
83 #define MAX_LDM_STM_OPS 4
84
85+#define ASM_CPU_SPEC \
86+ " %{mcpu=generic-*:-march=%*;" \
87+ " :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}"
88+
89 #endif /* ! GCC_ARM_H */
90
91=== modified file 'gcc/config/arm/elf.h'
92--- old/gcc/config/arm/elf.h 2009-06-21 19:48:15 +0000
93+++ new/gcc/config/arm/elf.h 2011-10-19 16:46:51 +0000
94@@ -56,8 +56,7 @@
95 #define ASM_SPEC "\
96 %{mbig-endian:-EB} \
97 %{mlittle-endian:-EL} \
98-%{mcpu=*:-mcpu=%*} \
99-%{march=*:-march=%*} \
100+%(asm_cpu_spec) \
101 %{mapcs-*:-mapcs-%*} \
102 %(subtarget_asm_float_spec) \
103 %{mthumb-interwork:-mthumb-interwork} \
104
105=== modified file 'gcc/config/arm/semi.h'
106--- old/gcc/config/arm/semi.h 2007-08-02 09:49:31 +0000
107+++ new/gcc/config/arm/semi.h 2011-10-19 16:46:51 +0000
108@@ -65,8 +65,7 @@
109 #define ASM_SPEC "\
110 %{fpic|fpie: -k} %{fPIC|fPIE: -k} \
111 %{mbig-endian:-EB} \
112-%{mcpu=*:-mcpu=%*} \
113-%{march=*:-march=%*} \
114+%(arm_cpu_spec) \
115 %{mapcs-float:-mfloat} \
116 %{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \
117 %{mfloat-abi=*} %{mfpu=*} \
118
119=== modified file 'gcc/doc/invoke.texi'
120--- old/gcc/doc/invoke.texi 2011-08-13 08:32:32 +0000
121+++ new/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000
122@@ -10215,6 +10215,10 @@
123 @samp{cortex-m0},
124 @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
125
126+@option{-mcpu=generic-@var{arch}} is also permissible, and is
127+equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}.
128+See @option{-mtune} for more information.
129+
130 @item -mtune=@var{name}
131 @opindex mtune
132 This option is very similar to the @option{-mcpu=} option, except that
133@@ -10226,6 +10230,13 @@
134 For some ARM implementations better performance can be obtained by using
135 this option.
136
137+@option{-mtune=generic-@var{arch}} specifies that GCC should tune the
138+performance for a blend of processors within architecture @var{arch}.
139+The aim is to generate code that run well on the current most popular
140+processors, balancing between optimizations that benefit some CPUs in the
141+range, and avoiding performance pitfalls of other CPUs. The effects of
142+this option may change in future GCC versions as CPU models come and go.
143+
144 @item -march=@var{name}
145 @opindex march
146 This specifies the name of the target ARM architecture. GCC uses this
147
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch
new file mode 100644
index 000000000..695aa8559
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch
@@ -0,0 +1,304 @@
12011-10-19 Andrew Stubbs <ams@codesourcery.com>
2
3 Backport from FSF:
4
5 2011-10-18 Andrew Stubbs <ams@codesourcery.com>
6
7 * config/arm/driver-arm.c (host_detect_local_cpu): Close the file
8 before exiting.
9
10 2011-10-18 Andrew Stubbs <ams@codesourcery.com>
11
12 gcc/
13 * config.host (arm*-*-linux*): Add driver-arm.o and x-arm.
14 * config/arm/arm.opt: Add 'native' processor_type and
15 arm_arch enum values.
16 * config/arm/arm.h (host_detect_local_cpu): New prototype.
17 (EXTRA_SPEC_FUNCTIONS): New define.
18 (MCPU_MTUNE_NATIVE_SPECS): New define.
19 (DRIVER_SELF_SPECS): New define.
20 * config/arm/driver-arm.c: New file.
21 * config/arm/x-arm: New file.
22 * doc/invoke.texi (ARM Options): Document -mcpu=native,
23 -mtune=native and -march=native.
24
25=== modified file 'gcc/config.host'
26--- old/gcc/config.host 2011-02-15 09:49:14 +0000
27+++ new/gcc/config.host 2011-10-19 17:01:50 +0000
28@@ -100,6 +100,14 @@
29 esac
30
31 case ${host} in
32+ arm*-*-linux*)
33+ case ${target} in
34+ arm*-*-*)
35+ host_extra_gcc_objs="driver-arm.o"
36+ host_xmake_file="${host_xmake_file} arm/x-arm"
37+ ;;
38+ esac
39+ ;;
40 alpha*-*-linux*)
41 case ${target} in
42 alpha*-*-linux*)
43
44=== modified file 'gcc/config/arm/arm.h'
45--- old/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000
46+++ new/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000
47@@ -2283,4 +2283,21 @@
48 " %{mcpu=generic-*:-march=%*;" \
49 " :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}"
50
51+/* -mcpu=native handling only makes sense with compiler running on
52+ an ARM chip. */
53+#if defined(__arm__)
54+extern const char *host_detect_local_cpu (int argc, const char **argv);
55+# define EXTRA_SPEC_FUNCTIONS \
56+ { "local_cpu_detect", host_detect_local_cpu },
57+
58+# define MCPU_MTUNE_NATIVE_SPECS \
59+ " %{march=native:%<march=native %:local_cpu_detect(arch)}" \
60+ " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}" \
61+ " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
62+#else
63+# define MCPU_MTUNE_NATIVE_SPECS ""
64+#endif
65+
66+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
67+
68 #endif /* ! GCC_ARM_H */
69
70=== modified file 'gcc/config/arm/arm.opt'
71--- old/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000
72+++ new/gcc/config/arm/arm.opt 2011-10-19 17:01:50 +0000
73@@ -48,6 +48,11 @@
74 Target RejectNegative Joined
75 Specify the name of the target architecture
76
77+; Other arm_arch values are loaded from arm-tables.opt
78+; but that is a generated file and this is an odd-one-out.
79+EnumValue
80+Enum(arm_arch) String(native) Value(-1) DriverOnly
81+
82 marm
83 Target RejectNegative InverseMask(THUMB) Undocumented
84
85@@ -153,6 +158,11 @@
86 Target RejectNegative Joined
87 Tune code for the given processor
88
89+; Other processor_type values are loaded from arm-tables.opt
90+; but that is a generated file and this is an odd-one-out.
91+EnumValue
92+Enum(processor_type) String(native) Value(-1) DriverOnly
93+
94 mwords-little-endian
95 Target Report RejectNegative Mask(LITTLE_WORDS)
96 Assume big endian bytes, little endian words
97
98=== added file 'gcc/config/arm/driver-arm.c'
99--- old/gcc/config/arm/driver-arm.c 1970-01-01 00:00:00 +0000
100+++ new/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000
101@@ -0,0 +1,149 @@
102+/* Subroutines for the gcc driver.
103+ Copyright (C) 2011 Free Software Foundation, Inc.
104+
105+This file is part of GCC.
106+
107+GCC is free software; you can redistribute it and/or modify
108+it under the terms of the GNU General Public License as published by
109+the Free Software Foundation; either version 3, or (at your option)
110+any later version.
111+
112+GCC is distributed in the hope that it will be useful,
113+but WITHOUT ANY WARRANTY; without even the implied warranty of
114+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
115+GNU General Public License for more details.
116+
117+You should have received a copy of the GNU General Public License
118+along with GCC; see the file COPYING3. If not see
119+<http://www.gnu.org/licenses/>. */
120+
121+#include "config.h"
122+#include "system.h"
123+#include "coretypes.h"
124+#include "tm.h"
125+#include "configargs.h"
126+
127+struct vendor_cpu {
128+ const char *part_no;
129+ const char *arch_name;
130+ const char *cpu_name;
131+};
132+
133+static struct vendor_cpu arm_cpu_table[] = {
134+ {"0x926", "armv5te", "arm926ej-s"},
135+ {"0xa26", "armv5te", "arm1026ej-s"},
136+ {"0xb02", "armv6k", "mpcore"},
137+ {"0xb36", "armv6j", "arm1136j-s"},
138+ {"0xb56", "armv6t2", "arm1156t2-s"},
139+ {"0xb76", "armv6zk", "arm1176jz-s"},
140+ {"0xc05", "armv7-a", "cortex-a5"},
141+ {"0xc08", "armv7-a", "cortex-a8"},
142+ {"0xc09", "armv7-a", "cortex-a9"},
143+ {"0xc0f", "armv7-a", "cortex-a15"},
144+ {"0xc14", "armv7-r", "cortex-r4"},
145+ {"0xc15", "armv7-r", "cortex-r5"},
146+ {"0xc20", "armv6-m", "cortex-m0"},
147+ {"0xc21", "armv6-m", "cortex-m1"},
148+ {"0xc23", "armv7-m", "cortex-m3"},
149+ {"0xc24", "armv7e-m", "cortex-m4"},
150+ {NULL, NULL, NULL}
151+};
152+
153+struct {
154+ const char *vendor_no;
155+ const struct vendor_cpu *vendor_parts;
156+} vendors[] = {
157+ {"0x41", arm_cpu_table},
158+ {NULL, NULL}
159+};
160+
161+/* This will be called by the spec parser in gcc.c when it sees
162+ a %:local_cpu_detect(args) construct. Currently it will be called
163+ with either "arch", "cpu" or "tune" as argument depending on if
164+ -march=native, -mcpu=native or -mtune=native is to be substituted.
165+
166+ It returns a string containing new command line parameters to be
167+ put at the place of the above two options, depending on what CPU
168+ this is executed. E.g. "-march=armv7-a" on a Cortex-A8 for
169+ -march=native. If the routine can't detect a known processor,
170+ the -march or -mtune option is discarded.
171+
172+ ARGC and ARGV are set depending on the actual arguments given
173+ in the spec. */
174+const char *
175+host_detect_local_cpu (int argc, const char **argv)
176+{
177+ const char *val = NULL;
178+ char buf[128];
179+ FILE *f = NULL;
180+ bool arch;
181+ const struct vendor_cpu *cpu_table = NULL;
182+
183+ if (argc < 1)
184+ goto not_found;
185+
186+ arch = strcmp (argv[0], "arch") == 0;
187+ if (!arch && strcmp (argv[0], "cpu") != 0 && strcmp (argv[0], "tune"))
188+ goto not_found;
189+
190+ f = fopen ("/proc/cpuinfo", "r");
191+ if (f == NULL)
192+ goto not_found;
193+
194+ while (fgets (buf, sizeof (buf), f) != NULL)
195+ {
196+ /* Ensure that CPU implementer is ARM (0x41). */
197+ if (strncmp (buf, "CPU implementer", sizeof ("CPU implementer") - 1) == 0)
198+ {
199+ int i;
200+ for (i = 0; vendors[i].vendor_no != NULL; i++)
201+ if (strstr (buf, vendors[i].vendor_no) != NULL)
202+ {
203+ cpu_table = vendors[i].vendor_parts;
204+ break;
205+ }
206+ }
207+
208+ /* Detect arch/cpu. */
209+ if (strncmp (buf, "CPU part", sizeof ("CPU part") - 1) == 0)
210+ {
211+ int i;
212+
213+ if (cpu_table == NULL)
214+ goto not_found;
215+
216+ for (i = 0; cpu_table[i].part_no != NULL; i++)
217+ if (strstr (buf, cpu_table[i].part_no) != NULL)
218+ {
219+ val = arch ? cpu_table[i].arch_name : cpu_table[i].cpu_name;
220+ break;
221+ }
222+ break;
223+ }
224+ }
225+
226+ fclose (f);
227+
228+ if (val == NULL)
229+ goto not_found;
230+
231+ return concat ("-m", argv[0], "=", val, NULL);
232+
233+not_found:
234+ {
235+ unsigned int i;
236+ unsigned int opt;
237+ const char *search[] = {NULL, "arch"};
238+
239+ if (f)
240+ fclose (f);
241+
242+ search[0] = argv[0];
243+ for (opt = 0; opt < ARRAY_SIZE (search); opt++)
244+ for (i = 0; i < ARRAY_SIZE (configure_default_options); i++)
245+ if (strcmp (configure_default_options[i].name, search[opt]) == 0)
246+ return concat ("-m", search[opt], "=",
247+ configure_default_options[i].value, NULL);
248+ return NULL;
249+ }
250+}
251
252=== added file 'gcc/config/arm/x-arm'
253--- old/gcc/config/arm/x-arm 1970-01-01 00:00:00 +0000
254+++ new/gcc/config/arm/x-arm 2011-10-19 17:01:50 +0000
255@@ -0,0 +1,3 @@
256+driver-arm.o: $(srcdir)/config/arm/driver-arm.c \
257+ $(CONFIG_H) $(SYSTEM_H)
258+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
259
260=== modified file 'gcc/doc/invoke.texi'
261--- old/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000
262+++ new/gcc/doc/invoke.texi 2011-10-19 17:01:50 +0000
263@@ -10215,10 +10215,16 @@
264 @samp{cortex-m0},
265 @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
266
267+
268 @option{-mcpu=generic-@var{arch}} is also permissible, and is
269 equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}.
270 See @option{-mtune} for more information.
271
272+@option{-mcpu=native} causes the compiler to auto-detect the CPU
273+of the build computer. At present, this feature is only supported on
274+Linux, and not all architectures are recognised. If the auto-detect is
275+unsuccessful the option has no effect.
276+
277 @item -mtune=@var{name}
278 @opindex mtune
279 This option is very similar to the @option{-mcpu=} option, except that
280@@ -10237,6 +10243,11 @@
281 range, and avoiding performance pitfalls of other CPUs. The effects of
282 this option may change in future GCC versions as CPU models come and go.
283
284+@option{-mtune=native} causes the compiler to auto-detect the CPU
285+of the build computer. At present, this feature is only supported on
286+Linux, and not all architectures are recognised. If the auto-detect is
287+unsuccessful the option has no effect.
288+
289 @item -march=@var{name}
290 @opindex march
291 This specifies the name of the target ARM architecture. GCC uses this
292@@ -10250,6 +10261,11 @@
293 @samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m},
294 @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
295
296+@option{-march=native} causes the compiler to auto-detect the architecture
297+of the build computer. At present, this feature is only supported on
298+Linux, and not all architectures are recognised. If the auto-detect is
299+unsuccessful the option has no effect.
300+
301 @item -mfpu=@var{name}
302 @itemx -mfpe=@var{number}
303 @itemx -mfp=@var{number}
304
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch
new file mode 100644
index 000000000..ad91d7736
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch
@@ -0,0 +1,123 @@
12011-10-19 Andrew Stubbs <ams@codesourcery.com>
2
3 Backport from FSF:
4
5 2011-10-18 Andrew Stubbs <ams@codesourcery.com>
6
7 PR tree-optimization/50717
8
9 gcc/
10 * tree-ssa-math-opts.c (is_widening_mult_p): Remove the 'type'
11 parameter. Calculate 'type' from stmt.
12 (convert_mult_to_widen): Update call the is_widening_mult_p.
13 (convert_plusminus_to_widen): Likewise.
14
15 gcc/testsuite/
16 * gcc.dg/pr50717-1.c: New file.
17 * gcc.target/arm/wmul-12.c: Correct types.
18 * gcc.target/arm/wmul-8.c: Correct types.
19
20=== added file 'gcc/testsuite/gcc.dg/pr50717-1.c'
21--- old/gcc/testsuite/gcc.dg/pr50717-1.c 1970-01-01 00:00:00 +0000
22+++ new/gcc/testsuite/gcc.dg/pr50717-1.c 2011-10-19 14:42:50 +0000
23@@ -0,0 +1,26 @@
24+/* PR tree-optimization/50717 */
25+/* Ensure that widening multiply-and-accumulate is not used where integer
26+ type promotion or users' casts should prevent it. */
27+
28+/* { dg-options "-O2 -fdump-tree-widening_mul" } */
29+
30+long long
31+f (unsigned int a, char b, long long c)
32+{
33+ return (a * b) + c;
34+}
35+
36+int
37+g (short a, short b, int c)
38+{
39+ return (short)(a * b) + c;
40+}
41+
42+int
43+h (char a, char b, int c)
44+{
45+ return (char)(a * b) + c;
46+}
47+
48+/* { dg-final { scan-tree-dump-times "WIDEN_MULT_PLUS_EXPR" 0 "widening_mul" } } */
49+/* { dg-final { cleanup-tree-dump "widening_mul" } } */
50
51=== modified file 'gcc/testsuite/gcc.target/arm/wmul-12.c'
52--- old/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-07-22 15:46:42 +0000
53+++ new/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-10-19 14:42:50 +0000
54@@ -4,8 +4,8 @@
55 long long
56 foo (int *b, int *c)
57 {
58- int tmp = *b * *c;
59- return 10 + (long long)tmp;
60+ long long tmp = (long long)*b * *c;
61+ return 10 + tmp;
62 }
63
64 /* { dg-final { scan-assembler "smlal" } } */
65
66=== modified file 'gcc/testsuite/gcc.target/arm/wmul-8.c'
67--- old/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-07-15 14:16:54 +0000
68+++ new/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-10-19 14:42:50 +0000
69@@ -4,7 +4,7 @@
70 long long
71 foo (long long a, int *b, int *c)
72 {
73- return a + *b * *c;
74+ return a + (long long)*b * *c;
75 }
76
77 /* { dg-final { scan-assembler "smlal" } } */
78
79=== modified file 'gcc/tree-ssa-math-opts.c'
80--- old/gcc/tree-ssa-math-opts.c 2011-09-08 20:11:43 +0000
81+++ new/gcc/tree-ssa-math-opts.c 2011-10-19 14:42:50 +0000
82@@ -1351,10 +1351,12 @@
83 and *TYPE2_OUT would give the operands of the multiplication. */
84
85 static bool
86-is_widening_mult_p (tree type, gimple stmt,
87+is_widening_mult_p (gimple stmt,
88 tree *type1_out, tree *rhs1_out,
89 tree *type2_out, tree *rhs2_out)
90 {
91+ tree type = TREE_TYPE (gimple_assign_lhs (stmt));
92+
93 if (TREE_CODE (type) != INTEGER_TYPE
94 && TREE_CODE (type) != FIXED_POINT_TYPE)
95 return false;
96@@ -1416,7 +1418,7 @@
97 if (TREE_CODE (type) != INTEGER_TYPE)
98 return false;
99
100- if (!is_widening_mult_p (type, stmt, &type1, &rhs1, &type2, &rhs2))
101+ if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
102 return false;
103
104 to_mode = TYPE_MODE (type);
105@@ -1592,7 +1594,7 @@
106 if (code == PLUS_EXPR
107 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
108 {
109- if (!is_widening_mult_p (type, rhs1_stmt, &type1, &mult_rhs1,
110+ if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
111 &type2, &mult_rhs2))
112 return false;
113 add_rhs = rhs2;
114@@ -1600,7 +1602,7 @@
115 }
116 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
117 {
118- if (!is_widening_mult_p (type, rhs2_stmt, &type1, &mult_rhs1,
119+ if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
120 &type2, &mult_rhs2))
121 return false;
122 add_rhs = rhs1;
123
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch
new file mode 100644
index 000000000..843f1cff2
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch
@@ -0,0 +1,24 @@
12011-10-21 Andrew Stubbs <ams@codesourcery.com>
2
3 Backport from FSF mainline:
4
5 2011-10-21 Andrew Stubbs <ams@codesourcery.com>
6
7 PR target/50809
8
9 gcc/
10 * config/arm/driver-arm.c (vendors): Make static.
11
12=== modified file 'gcc/config/arm/driver-arm.c'
13--- old/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000
14+++ new/gcc/config/arm/driver-arm.c 2011-10-21 19:27:47 +0000
15@@ -49,7 +49,7 @@
16 {NULL, NULL, NULL}
17 };
18
19-struct {
20+static struct {
21 const char *vendor_no;
22 const struct vendor_cpu *vendor_parts;
23 } vendors[] = {
24
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch
new file mode 100644
index 000000000..1ad48e512
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch
@@ -0,0 +1,453 @@
12011-10-27 Ira Rosen <ira.rosen@linaro.org>
2
3 Backport from mainline:
4
5 2011-10-16 Ira Rosen <ira.rosen@linaro.org>
6
7 gcc/
8 * tree-vect-stmts.c (vectorizable_load): For SLP without permutation
9 treat the first load of the node as the first element in its
10 interleaving chain.
11 * tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if
12 necessary and possible.
13 (vect_build_slp_tree): Add new argument. Allow load groups of any size
14 in basic blocks. Keep all the loads for further permutation check.
15 Use the new argument to determine if there is a permutation. Update
16 the recursive calls.
17 (vect_supported_load_permutation_p): Allow subchains of interleaving
18 chains in basic block vectorization.
19 (vect_analyze_slp_instance): Update the call to vect_build_slp_tree.
20 Check load permutation based on the new parameter.
21 (vect_schedule_slp_instance): Don't start from the first element in
22 interleaving chain unless the loads are permuted.
23
24 gcc/testsuite/
25 * gcc.dg/vect/bb-slp-29.c: New test.
26
27=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-29.c'
28--- old/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 1970-01-01 00:00:00 +0000
29+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 2011-10-23 11:29:25 +0000
30@@ -0,0 +1,59 @@
31+/* { dg-require-effective-target vect_int } */
32+
33+#include <stdarg.h>
34+#include "tree-vect.h"
35+
36+#define A 3
37+#define B 4
38+#define N 256
39+
40+short src[N], dst[N];
41+
42+void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
43+{
44+ int i;
45+ h /= 16;
46+ for (i = 0; i < h; i++)
47+ {
48+ dst[0] = A*src[0] + B*src[1];
49+ dst[1] = A*src[1] + B*src[2];
50+ dst[2] = A*src[2] + B*src[3];
51+ dst[3] = A*src[3] + B*src[4];
52+ dst[4] = A*src[4] + B*src[5];
53+ dst[5] = A*src[5] + B*src[6];
54+ dst[6] = A*src[6] + B*src[7];
55+ dst[7] = A*src[7] + B*src[8];
56+ dst += stride;
57+ src += stride;
58+ if (dummy == 32)
59+ abort ();
60+ }
61+}
62+
63+
64+int main (void)
65+{
66+ int i;
67+
68+ check_vect ();
69+
70+ for (i = 0; i < N; i++)
71+ {
72+ dst[i] = 0;
73+ src[i] = i;
74+ }
75+
76+ foo (dst, src, N, 8, 0);
77+
78+ for (i = 0; i < N/2; i++)
79+ {
80+ if (dst[i] != A * src[i] + B * src[i+1])
81+ abort ();
82+ }
83+
84+ return 0;
85+}
86+
87+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && vect_element_align } } } } */
88+/* { dg-final { cleanup-tree-dump "slp" } } */
89+
90
91=== modified file 'gcc/tree-vect-slp.c'
92--- old/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000
93+++ new/gcc/tree-vect-slp.c 2011-10-23 11:29:25 +0000
94@@ -115,13 +115,15 @@
95 {
96 tree oprnd;
97 unsigned int i, number_of_oprnds;
98- tree def;
99+ tree def[2];
100 gimple def_stmt;
101 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
102 stmt_vec_info stmt_info =
103 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
104 enum gimple_rhs_class rhs_class;
105 struct loop *loop = NULL;
106+ enum tree_code rhs_code;
107+ bool different_types = false;
108
109 if (loop_vinfo)
110 loop = LOOP_VINFO_LOOP (loop_vinfo);
111@@ -133,7 +135,7 @@
112 {
113 oprnd = gimple_op (stmt, i + 1);
114
115- if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
116+ if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
117 &dt[i])
118 || (!def_stmt && dt[i] != vect_constant_def))
119 {
120@@ -188,11 +190,11 @@
121 switch (gimple_code (def_stmt))
122 {
123 case GIMPLE_PHI:
124- def = gimple_phi_result (def_stmt);
125+ def[i] = gimple_phi_result (def_stmt);
126 break;
127
128 case GIMPLE_ASSIGN:
129- def = gimple_assign_lhs (def_stmt);
130+ def[i] = gimple_assign_lhs (def_stmt);
131 break;
132
133 default:
134@@ -206,8 +208,8 @@
135 {
136 /* op0 of the first stmt of the group - store its info. */
137 *first_stmt_dt0 = dt[i];
138- if (def)
139- *first_stmt_def0_type = TREE_TYPE (def);
140+ if (def[i])
141+ *first_stmt_def0_type = TREE_TYPE (def[i]);
142 else
143 *first_stmt_const_oprnd = oprnd;
144
145@@ -227,8 +229,8 @@
146 {
147 /* op1 of the first stmt of the group - store its info. */
148 *first_stmt_dt1 = dt[i];
149- if (def)
150- *first_stmt_def1_type = TREE_TYPE (def);
151+ if (def[i])
152+ *first_stmt_def1_type = TREE_TYPE (def[i]);
153 else
154 {
155 /* We assume that the stmt contains only one constant
156@@ -249,22 +251,53 @@
157 the def-stmt/s of the first stmt. */
158 if ((i == 0
159 && (*first_stmt_dt0 != dt[i]
160- || (*first_stmt_def0_type && def
161+ || (*first_stmt_def0_type && def[0]
162 && !types_compatible_p (*first_stmt_def0_type,
163- TREE_TYPE (def)))))
164+ TREE_TYPE (def[0])))))
165 || (i == 1
166 && (*first_stmt_dt1 != dt[i]
167- || (*first_stmt_def1_type && def
168+ || (*first_stmt_def1_type && def[1]
169 && !types_compatible_p (*first_stmt_def1_type,
170- TREE_TYPE (def)))))
171- || (!def
172+ TREE_TYPE (def[1])))))
173+ || (!def[i]
174 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
175- TREE_TYPE (oprnd))))
176+ TREE_TYPE (oprnd)))
177+ || different_types)
178 {
179- if (vect_print_dump_info (REPORT_SLP))
180- fprintf (vect_dump, "Build SLP failed: different types ");
181+ if (i != number_of_oprnds - 1)
182+ different_types = true;
183+ else
184+ {
185+ if (is_gimple_assign (stmt)
186+ && (rhs_code = gimple_assign_rhs_code (stmt))
187+ && TREE_CODE_CLASS (rhs_code) == tcc_binary
188+ && commutative_tree_code (rhs_code)
189+ && *first_stmt_dt0 == dt[1]
190+ && *first_stmt_dt1 == dt[0]
191+ && def[0] && def[1]
192+ && !(*first_stmt_def0_type
193+ && !types_compatible_p (*first_stmt_def0_type,
194+ TREE_TYPE (def[1])))
195+ && !(*first_stmt_def1_type
196+ && !types_compatible_p (*first_stmt_def1_type,
197+ TREE_TYPE (def[0]))))
198+ {
199+ if (vect_print_dump_info (REPORT_SLP))
200+ {
201+ fprintf (vect_dump, "Swapping operands of ");
202+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
203+ }
204+ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
205+ gimple_assign_rhs2_ptr (stmt));
206+ }
207+ else
208+ {
209+ if (vect_print_dump_info (REPORT_SLP))
210+ fprintf (vect_dump, "Build SLP failed: different types ");
211
212- return false;
213+ return false;
214+ }
215+ }
216 }
217 }
218 }
219@@ -278,10 +311,10 @@
220
221 case vect_internal_def:
222 case vect_reduction_def:
223- if (i == 0)
224+ if ((i == 0 && !different_types) || (i == 1 && different_types))
225 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
226 else
227- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
228+ VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
229 break;
230
231 default:
232@@ -289,7 +322,7 @@
233 if (vect_print_dump_info (REPORT_SLP))
234 {
235 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
236- print_generic_expr (vect_dump, def, TDF_SLIM);
237+ print_generic_expr (vect_dump, def[i], TDF_SLIM);
238 }
239
240 return false;
241@@ -312,7 +345,7 @@
242 int ncopies_for_cost, unsigned int *max_nunits,
243 VEC (int, heap) **load_permutation,
244 VEC (slp_tree, heap) **loads,
245- unsigned int vectorization_factor)
246+ unsigned int vectorization_factor, bool *loads_permuted)
247 {
248 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
249 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
250@@ -523,7 +556,9 @@
251
252 /* Check that the size of interleaved loads group is not
253 greater than the SLP group size. */
254- if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
255+ if (loop_vinfo
256+ && DR_GROUP_SIZE (vinfo_for_stmt (stmt))
257+ > ncopies * group_size)
258 {
259 if (vect_print_dump_info (REPORT_SLP))
260 {
261@@ -644,19 +679,22 @@
262 /* Strided loads were reached - stop the recursion. */
263 if (stop_recursion)
264 {
265+ VEC_safe_push (slp_tree, heap, *loads, *node);
266 if (permutation)
267 {
268- VEC_safe_push (slp_tree, heap, *loads, *node);
269+
270+ *loads_permuted = true;
271 *inside_cost
272 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
273 * group_size;
274 }
275 else
276- {
277- /* We don't check here complex numbers chains, so we keep them in
278- LOADS for further check in vect_supported_load_permutation_p. */
279+ {
280+ /* We don't check here complex numbers chains, so we set
281+ LOADS_PERMUTED for further check in
282+ vect_supported_load_permutation_p. */
283 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
284- VEC_safe_push (slp_tree, heap, *loads, *node);
285+ *loads_permuted = true;
286 }
287
288 return true;
289@@ -675,7 +713,7 @@
290 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
291 inside_cost, outside_cost, ncopies_for_cost,
292 max_nunits, load_permutation, loads,
293- vectorization_factor))
294+ vectorization_factor, loads_permuted))
295 return false;
296
297 SLP_TREE_LEFT (*node) = left_node;
298@@ -693,7 +731,7 @@
299 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
300 inside_cost, outside_cost, ncopies_for_cost,
301 max_nunits, load_permutation, loads,
302- vectorization_factor))
303+ vectorization_factor, loads_permuted))
304 return false;
305
306 SLP_TREE_RIGHT (*node) = right_node;
307@@ -879,8 +917,10 @@
308 bool supported, bad_permutation = false;
309 sbitmap load_index;
310 slp_tree node, other_complex_node;
311- gimple stmt, first = NULL, other_node_first;
312+ gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
313 unsigned complex_numbers = 0;
314+ struct data_reference *dr;
315+ bb_vec_info bb_vinfo;
316
317 /* FORNOW: permutations are only supported in SLP. */
318 if (!slp_instn)
319@@ -1040,6 +1080,76 @@
320 }
321 }
322
323+ /* In basic block vectorization we allow any subchain of an interleaving
324+ chain.
325+ FORNOW: not supported in loop SLP because of realignment compications. */
326+ bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
327+ bad_permutation = false;
328+ /* Check that for every node in the instance teh loads form a subchain. */
329+ if (bb_vinfo)
330+ {
331+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
332+ {
333+ next_load = NULL;
334+ first_load = NULL;
335+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
336+ {
337+ if (!first_load)
338+ first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (load));
339+ else if (first_load
340+ != DR_GROUP_FIRST_DR (vinfo_for_stmt (load)))
341+ {
342+ bad_permutation = true;
343+ break;
344+ }
345+
346+ if (j != 0 && next_load != load)
347+ {
348+ bad_permutation = true;
349+ break;
350+ }
351+
352+ next_load = DR_GROUP_NEXT_DR (vinfo_for_stmt (load));
353+ }
354+
355+ if (bad_permutation)
356+ break;
357+ }
358+
359+ /* Check that the alignment of the first load in every subchain, i.e.,
360+ the first statement in every load node, is supported. */
361+ if (!bad_permutation)
362+ {
363+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
364+ {
365+ first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
366+ if (first_load
367+ != DR_GROUP_FIRST_DR (vinfo_for_stmt (first_load)))
368+ {
369+ dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
370+ if (vect_supportable_dr_alignment (dr, false)
371+ == dr_unaligned_unsupported)
372+ {
373+ if (vect_print_dump_info (REPORT_SLP))
374+ {
375+ fprintf (vect_dump, "unsupported unaligned load ");
376+ print_gimple_stmt (vect_dump, first_load, 0,
377+ TDF_SLIM);
378+ }
379+ bad_permutation = true;
380+ break;
381+ }
382+ }
383+ }
384+
385+ if (!bad_permutation)
386+ {
387+ VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
388+ return true;
389+ }
390+ }
391+ }
392+
393 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
394 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
395 well (unless it's reduction). */
396@@ -1149,6 +1259,7 @@
397 VEC (int, heap) *load_permutation;
398 VEC (slp_tree, heap) *loads;
399 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
400+ bool loads_permuted = false;
401
402 if (dr)
403 {
404@@ -1238,7 +1349,7 @@
405 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
406 &inside_cost, &outside_cost, ncopies_for_cost,
407 &max_nunits, &load_permutation, &loads,
408- vectorization_factor))
409+ vectorization_factor, &loads_permuted))
410 {
411 /* Calculate the unrolling factor based on the smallest type. */
412 if (max_nunits > nunits)
413@@ -1263,7 +1374,8 @@
414 SLP_INSTANCE_LOADS (new_instance) = loads;
415 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
416 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
417- if (VEC_length (slp_tree, loads))
418+
419+ if (loads_permuted)
420 {
421 if (!vect_supported_load_permutation_p (new_instance, group_size,
422 load_permutation))
423@@ -2542,10 +2654,11 @@
424 /* Loads should be inserted before the first load. */
425 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
426 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
427- && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
428+ && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
429+ && SLP_INSTANCE_LOAD_PERMUTATION (instance))
430 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
431 else if (is_pattern_stmt_p (stmt_info))
432- si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
433+ si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
434 else
435 si = gsi_for_stmt (stmt);
436
437
438=== modified file 'gcc/tree-vect-stmts.c'
439--- old/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000
440+++ new/gcc/tree-vect-stmts.c 2011-10-23 11:29:25 +0000
441@@ -4285,6 +4285,11 @@
442 if (strided_load)
443 {
444 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
445+ if (slp
446+ && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
447+ && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
448+ first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
449+
450 /* Check if the chain of loads is already vectorized. */
451 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
452 {
453
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch
new file mode 100644
index 000000000..421a8fe3a
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch
@@ -0,0 +1,1505 @@
1 2011-10-18 Ira Rosen <ira.rosen@linaro.org>
2
3 gcc/
4 * doc/md.texi (vec_widen_ushiftl_hi, vec_widen_ushiftl_lo,
5 vec_widen_sshiftl_hi, vec_widen_sshiftl_lo): Document.
6 * tree-pretty-print.c (dump_generic_node): Handle WIDEN_LSHIFT_EXPR,
7 VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
8 (op_code_prio): Likewise.
9 (op_symbol_code): Handle WIDEN_LSHIFT_EXPR.
10 * optabs.c (optab_for_tree_code): Handle
11 VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
12 (init-optabs): Initialize optab codes for vec_widen_u/sshiftl_hi/lo.
13 * optabs.h (enum optab_index): Add OTI_vec_widen_u/sshiftl_hi/lo.
14 * genopinit.c (optabs): Initialize the new optabs.
15 * expr.c (expand_expr_real_2): Handle
16 VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
17 * gimple-pretty-print.c (dump_binary_rhs): Likewise.
18 * tree-vectorizer.h (NUM_PATTERNS): Increase to 8.
19 * tree.def (WIDEN_LSHIFT_EXPR, VEC_WIDEN_LSHIFT_HI_EXPR,
20 VEC_WIDEN_LSHIFT_LO_EXPR): New.
21 * cfgexpand.c (expand_debug_expr): Handle new tree codes.
22 * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add
23 vect_recog_widen_shift_pattern.
24 (vect_handle_widen_mult_by_const): Rename...
25 (vect_handle_widen_op_by_const): ...to this. Handle shifts.
26 Add a new argument, update documentation.
27 (vect_recog_widen_mult_pattern): Assume that only second
28 operand can be constant. Update call to
29 vect_handle_widen_op_by_const.
30 (vect_recog_over_widening_pattern): Fix typo.
31 (vect_recog_widen_shift_pattern): New.
32 * tree-vect-stmts.c (vectorizable_type_promotion): Handle
33 widening shifts.
34 (supportable_widening_operation): Likewise.
35 * tree-inline.c (estimate_operator_cost): Handle new tree codes.
36 * tree-vect-generic.c (expand_vector_operations_1): Likewise.
37 * tree-cfg.c (verify_gimple_assign_binary): Likewise.
38 * config/arm/neon.md (neon_vec_<US>shiftl_<mode>): New.
39 (vec_widen_<US>shiftl_lo_<mode>, neon_vec_<US>shiftl_hi_<mode>,
40 vec_widen_<US>shiftl_hi_<mode>, neon_vec_<US>shift_left_<mode>):
41 Likewise.
42 * config/arm/predicates.md (const_neon_scalar_shift_amount_operand):
43 New.
44 * config/arm/iterators.md (V_innermode): New.
45 * tree-vect-slp.c (vect_build_slp_tree): Require same shift operand
46 for widening shift.
47
48 gcc/testsuite
49 * testsuite/lib/target-supports.exp
50 (check_effective_target_vect_widen_shift): New.
51 * gcc.dg/vect/vect-widen-shift-s16.c: New.
52 * gcc.dg/vect/vect-widen-shift-s8.c: New.
53 * gcc.dg/vect/vect-widen-shift-u16.c: New.
54 * gcc.dg/vect/vect-widen-shift-u8.c: New.
55
56 2011-10-06 Jakub Jelinek <jakub@redhat.com>
57
58 gcc/
59 * tree-vect-patterns.c (vect_pattern_recog_1): Use
60 vect_recog_func_ptr typedef for the first argument.
61 (vect_pattern_recog): Rename vect_recog_func_ptr variable
62 to vect_recog_func, use vect_recog_func_ptr typedef for it.
63
64 2011-10-16 Ira Rosen <ira.rosen@linaro.org>
65
66 gcc/
67 PR tree-optimization/50727
68 * tree-vect-patterns.c (vect_operation_fits_smaller_type): Add
69 DEF_STMT to the list of statements to be replaced by the
70 pattern statements.
71
72 2011-10-09 Ira Rosen <ira.rosen@linaro.org>
73
74 gcc/
75 PR tree-optimization/50635
76 * tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add
77 DEF_STMT to the list of statements to be replaced by the
78 pattern statements.
79 (vect_handle_widen_mult_by_const): Don't check TYPE_OUT.
80
81=== modified file 'gcc/cfgexpand.c'
82--- old/gcc/cfgexpand.c 2011-07-01 09:19:21 +0000
83+++ new/gcc/cfgexpand.c 2011-10-23 13:33:07 +0000
84@@ -3215,6 +3215,8 @@
85 case VEC_UNPACK_LO_EXPR:
86 case VEC_WIDEN_MULT_HI_EXPR:
87 case VEC_WIDEN_MULT_LO_EXPR:
88+ case VEC_WIDEN_LSHIFT_HI_EXPR:
89+ case VEC_WIDEN_LSHIFT_LO_EXPR:
90 return NULL;
91
92 /* Misc codes. */
93
94=== modified file 'gcc/config/arm/iterators.md'
95--- old/gcc/config/arm/iterators.md 2011-09-06 14:29:24 +0000
96+++ new/gcc/config/arm/iterators.md 2011-10-23 13:33:07 +0000
97@@ -388,6 +388,9 @@
98 (define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")])
99 (define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
100
101+;; Mode attribute for vshll.
102+(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
103+
104 ;;----------------------------------------------------------------------------
105 ;; Code attributes
106 ;;----------------------------------------------------------------------------
107
108=== modified file 'gcc/config/arm/neon.md'
109--- old/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000
110+++ new/gcc/config/arm/neon.md 2011-10-23 13:33:07 +0000
111@@ -5316,6 +5316,44 @@
112 }
113 )
114
115+(define_insn "neon_vec_<US>shiftl_<mode>"
116+ [(set (match_operand:<V_widen> 0 "register_operand" "=w")
117+ (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
118+ (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
119+ "TARGET_NEON"
120+{
121+ return "vshll.<US><V_sz_elem> %q0, %P1, %2";
122+}
123+ [(set_attr "neon_type" "neon_shift_1")]
124+)
125+
126+(define_expand "vec_widen_<US>shiftl_lo_<mode>"
127+ [(match_operand:<V_unpack> 0 "register_operand" "")
128+ (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
129+ (match_operand:SI 2 "immediate_operand" "i")]
130+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
131+ {
132+ emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
133+ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
134+ operands[2]));
135+ DONE;
136+ }
137+)
138+
139+(define_expand "vec_widen_<US>shiftl_hi_<mode>"
140+ [(match_operand:<V_unpack> 0 "register_operand" "")
141+ (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
142+ (match_operand:SI 2 "immediate_operand" "i")]
143+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
144+ {
145+ emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
146+ simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
147+ GET_MODE_SIZE (<V_HALF>mode)),
148+ operands[2]));
149+ DONE;
150+ }
151+)
152+
153 ;; Vectorize for non-neon-quad case
154 (define_insn "neon_unpack<US>_<mode>"
155 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
156@@ -5392,6 +5430,34 @@
157 }
158 )
159
160+(define_expand "vec_widen_<US>shiftl_hi_<mode>"
161+ [(match_operand:<V_double_width> 0 "register_operand" "")
162+ (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
163+ (match_operand:SI 2 "immediate_operand" "i")]
164+ "TARGET_NEON"
165+ {
166+ rtx tmpreg = gen_reg_rtx (<V_widen>mode);
167+ emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
168+ emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
169+
170+ DONE;
171+ }
172+)
173+
174+(define_expand "vec_widen_<US>shiftl_lo_<mode>"
175+ [(match_operand:<V_double_width> 0 "register_operand" "")
176+ (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
177+ (match_operand:SI 2 "immediate_operand" "i")]
178+ "TARGET_NEON"
179+ {
180+ rtx tmpreg = gen_reg_rtx (<V_widen>mode);
181+ emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
182+ emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
183+
184+ DONE;
185+ }
186+)
187+
188 ;; The case when using all quad registers.
189 (define_insn "vec_pack_trunc_<mode>"
190 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
191
192=== modified file 'gcc/config/arm/predicates.md'
193--- old/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000
194+++ new/gcc/config/arm/predicates.md 2011-10-23 13:33:07 +0000
195@@ -136,6 +136,11 @@
196 (match_operand 0 "s_register_operand"))
197 (match_operand 0 "const_int_operand")))
198
199+(define_predicate "const_neon_scalar_shift_amount_operand"
200+ (and (match_code "const_int")
201+ (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) <= GET_MODE_BITSIZE (mode)
202+ && ((unsigned HOST_WIDE_INT) INTVAL (op)) > 0")))
203+
204 (define_predicate "arm_add_operand"
205 (ior (match_operand 0 "arm_rhs_operand")
206 (match_operand 0 "arm_neg_immediate_operand")))
207
208=== modified file 'gcc/doc/md.texi'
209--- old/gcc/doc/md.texi 2011-08-13 08:32:32 +0000
210+++ new/gcc/doc/md.texi 2011-10-23 13:33:07 +0000
211@@ -4230,6 +4230,17 @@
212 elements of the two vectors, and put the N/2 products of size 2*S in the
213 output vector (operand 0).
214
215+@cindex @code{vec_widen_ushiftl_hi_@var{m}} instruction pattern
216+@cindex @code{vec_widen_ushiftl_lo_@var{m}} instruction pattern
217+@cindex @code{vec_widen_sshiftl_hi_@var{m}} instruction pattern
218+@cindex @code{vec_widen_sshiftl_lo_@var{m}} instruction pattern
219+@item @samp{vec_widen_ushiftl_hi_@var{m}}, @samp{vec_widen_ushiftl_lo_@var{m}}
220+@itemx @samp{vec_widen_sshiftl_hi_@var{m}}, @samp{vec_widen_sshiftl_lo_@var{m}}
221+Signed/Unsigned widening shift left. The first input (operand 1) is a vector
222+with N signed/unsigned elements of size S@. Operand 2 is a constant. Shift
223+the high/low elements of operand 1, and put the N/2 results of size 2*S in the
224+output vector (operand 0).
225+
226 @cindex @code{mulhisi3} instruction pattern
227 @item @samp{mulhisi3}
228 Multiply operands 1 and 2, which have mode @code{HImode}, and store
229
230=== modified file 'gcc/expr.c'
231--- old/gcc/expr.c 2011-08-25 11:42:09 +0000
232+++ new/gcc/expr.c 2011-10-23 13:33:07 +0000
233@@ -8290,6 +8290,19 @@
234 return target;
235 }
236
237+ case VEC_WIDEN_LSHIFT_HI_EXPR:
238+ case VEC_WIDEN_LSHIFT_LO_EXPR:
239+ {
240+ tree oprnd0 = treeop0;
241+ tree oprnd1 = treeop1;
242+
243+ expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
244+ target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX,
245+ target, unsignedp);
246+ gcc_assert (target);
247+ return target;
248+ }
249+
250 case VEC_PACK_TRUNC_EXPR:
251 case VEC_PACK_SAT_EXPR:
252 case VEC_PACK_FIX_TRUNC_EXPR:
253
254=== modified file 'gcc/genopinit.c'
255--- old/gcc/genopinit.c 2011-07-15 13:06:31 +0000
256+++ new/gcc/genopinit.c 2011-10-23 13:33:07 +0000
257@@ -268,6 +268,10 @@
258 "set_optab_handler (vec_widen_umult_lo_optab, $A, CODE_FOR_$(vec_widen_umult_lo_$a$))",
259 "set_optab_handler (vec_widen_smult_hi_optab, $A, CODE_FOR_$(vec_widen_smult_hi_$a$))",
260 "set_optab_handler (vec_widen_smult_lo_optab, $A, CODE_FOR_$(vec_widen_smult_lo_$a$))",
261+ "set_optab_handler (vec_widen_ushiftl_hi_optab, $A, CODE_FOR_$(vec_widen_ushiftl_hi_$a$))",
262+ "set_optab_handler (vec_widen_ushiftl_lo_optab, $A, CODE_FOR_$(vec_widen_ushiftl_lo_$a$))",
263+ "set_optab_handler (vec_widen_sshiftl_hi_optab, $A, CODE_FOR_$(vec_widen_sshiftl_hi_$a$))",
264+ "set_optab_handler (vec_widen_sshiftl_lo_optab, $A, CODE_FOR_$(vec_widen_sshiftl_lo_$a$))",
265 "set_optab_handler (vec_unpacks_hi_optab, $A, CODE_FOR_$(vec_unpacks_hi_$a$))",
266 "set_optab_handler (vec_unpacks_lo_optab, $A, CODE_FOR_$(vec_unpacks_lo_$a$))",
267 "set_optab_handler (vec_unpacku_hi_optab, $A, CODE_FOR_$(vec_unpacku_hi_$a$))",
268
269=== modified file 'gcc/gimple-pretty-print.c'
270--- old/gcc/gimple-pretty-print.c 2011-05-05 15:42:22 +0000
271+++ new/gcc/gimple-pretty-print.c 2011-10-23 13:33:07 +0000
272@@ -343,6 +343,8 @@
273 case VEC_EXTRACT_ODD_EXPR:
274 case VEC_INTERLEAVE_HIGH_EXPR:
275 case VEC_INTERLEAVE_LOW_EXPR:
276+ case VEC_WIDEN_LSHIFT_HI_EXPR:
277+ case VEC_WIDEN_LSHIFT_LO_EXPR:
278 for (p = tree_code_name [(int) code]; *p; p++)
279 pp_character (buffer, TOUPPER (*p));
280 pp_string (buffer, " <");
281
282=== modified file 'gcc/optabs.c'
283--- old/gcc/optabs.c 2011-08-11 15:46:01 +0000
284+++ new/gcc/optabs.c 2011-10-23 13:33:07 +0000
285@@ -454,6 +454,14 @@
286 return TYPE_UNSIGNED (type) ?
287 vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
288
289+ case VEC_WIDEN_LSHIFT_HI_EXPR:
290+ return TYPE_UNSIGNED (type) ?
291+ vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
292+
293+ case VEC_WIDEN_LSHIFT_LO_EXPR:
294+ return TYPE_UNSIGNED (type) ?
295+ vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab;
296+
297 case VEC_UNPACK_HI_EXPR:
298 return TYPE_UNSIGNED (type) ?
299 vec_unpacku_hi_optab : vec_unpacks_hi_optab;
300@@ -6351,6 +6359,10 @@
301 init_optab (vec_widen_umult_lo_optab, UNKNOWN);
302 init_optab (vec_widen_smult_hi_optab, UNKNOWN);
303 init_optab (vec_widen_smult_lo_optab, UNKNOWN);
304+ init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN);
305+ init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN);
306+ init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN);
307+ init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN);
308 init_optab (vec_unpacks_hi_optab, UNKNOWN);
309 init_optab (vec_unpacks_lo_optab, UNKNOWN);
310 init_optab (vec_unpacku_hi_optab, UNKNOWN);
311
312=== modified file 'gcc/optabs.h'
313--- old/gcc/optabs.h 2011-07-27 14:12:45 +0000
314+++ new/gcc/optabs.h 2011-10-23 13:33:07 +0000
315@@ -350,6 +350,12 @@
316 OTI_vec_widen_umult_lo,
317 OTI_vec_widen_smult_hi,
318 OTI_vec_widen_smult_lo,
319+ /* Widening shift left.
320+ The high/low part of the resulting vector is returned. */
321+ OTI_vec_widen_ushiftl_hi,
322+ OTI_vec_widen_ushiftl_lo,
323+ OTI_vec_widen_sshiftl_hi,
324+ OTI_vec_widen_sshiftl_lo,
325 /* Extract and widen the high/low part of a vector of signed or
326 floating point elements. */
327 OTI_vec_unpacks_hi,
328@@ -542,6 +548,10 @@
329 #define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo])
330 #define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi])
331 #define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo])
332+#define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi])
333+#define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo])
334+#define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi])
335+#define vec_widen_sshiftl_lo_optab (&optab_table[OTI_vec_widen_sshiftl_lo])
336 #define vec_unpacks_hi_optab (&optab_table[OTI_vec_unpacks_hi])
337 #define vec_unpacks_lo_optab (&optab_table[OTI_vec_unpacks_lo])
338 #define vec_unpacku_hi_optab (&optab_table[OTI_vec_unpacku_hi])
339
340=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c'
341--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c 1970-01-01 00:00:00 +0000
342+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c 2011-10-23 13:33:07 +0000
343@@ -0,0 +1,107 @@
344+/* { dg-require-effective-target vect_int } */
345+/* { dg-require-effective-target vect_shift } */
346+
347+#include <stdarg.h>
348+#include "tree-vect.h"
349+
350+#define N 64
351+#define C 16
352+
353+__attribute__ ((noinline)) void
354+foo (short *src, int *dst)
355+{
356+ int i;
357+ short b, b0, b1, b2, b3, *s = src;
358+ int *d = dst;
359+
360+ for (i = 0; i < N/4; i++)
361+ {
362+ b0 = *s++;
363+ b1 = *s++;
364+ b2 = *s++;
365+ b3 = *s++;
366+ *d = b0 << C;
367+ d++;
368+ *d = b1 << C;
369+ d++;
370+ *d = b2 << C;
371+ d++;
372+ *d = b3 << C;
373+ d++;
374+ }
375+
376+ s = src;
377+ d = dst;
378+ for (i = 0; i < N; i++)
379+ {
380+ b = *s++;
381+ if (*d != b << C)
382+ abort ();
383+ d++;
384+ }
385+
386+ s = src;
387+ d = dst;
388+ for (i = 0; i < N/4; i++)
389+ {
390+ b0 = *s++;
391+ b1 = *s++;
392+ b2 = *s++;
393+ b3 = *s++;
394+ *d = b0 << C;
395+ d++;
396+ *d = b1 << C;
397+ d++;
398+ *d = b2 << C;
399+ d++;
400+ *d = b3 << 6;
401+ d++;
402+ }
403+
404+ s = src;
405+ d = dst;
406+ for (i = 0; i < N/4; i++)
407+ {
408+ b = *s++;
409+ if (*d != b << C)
410+ abort ();
411+ d++;
412+ b = *s++;
413+ if (*d != b << C)
414+ abort ();
415+ d++;
416+ b = *s++;
417+ if (*d != b << C)
418+ abort ();
419+ d++;
420+ b = *s++;
421+ if (*d != b << 6)
422+ abort ();
423+ d++;
424+ }
425+}
426+
427+int main (void)
428+{
429+ int i;
430+ short in[N];
431+ int out[N];
432+
433+ check_vect ();
434+
435+ for (i = 0; i < N; i++)
436+ {
437+ in[i] = i;
438+ out[i] = 255;
439+ __asm__ volatile ("");
440+ }
441+
442+ foo (in, out);
443+
444+ return 0;
445+}
446+
447+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 8 "vect" { target vect_widen_shift } } } */
448+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
449+/* { dg-final { cleanup-tree-dump "vect" } } */
450+
451
452=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c'
453--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c 1970-01-01 00:00:00 +0000
454+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c 2011-10-23 13:33:07 +0000
455@@ -0,0 +1,58 @@
456+/* { dg-require-effective-target vect_int } */
457+/* { dg-require-effective-target vect_shift } */
458+
459+#include <stdarg.h>
460+#include "tree-vect.h"
461+
462+#define N 64
463+#define C 12
464+
465+__attribute__ ((noinline)) void
466+foo (char *src, int *dst)
467+{
468+ int i;
469+ char b, *s = src;
470+ int *d = dst;
471+
472+ for (i = 0; i < N; i++)
473+ {
474+ b = *s++;
475+ *d = b << C;
476+ d++;
477+ }
478+
479+ s = src;
480+ d = dst;
481+ for (i = 0; i < N; i++)
482+ {
483+ b = *s++;
484+ if (*d != b << C)
485+ abort ();
486+ d++;
487+ }
488+}
489+
490+int main (void)
491+{
492+ int i;
493+ char in[N];
494+ int out[N];
495+
496+ check_vect ();
497+
498+ for (i = 0; i < N; i++)
499+ {
500+ in[i] = i;
501+ out[i] = 255;
502+ __asm__ volatile ("");
503+ }
504+
505+ foo (in, out);
506+
507+ return 0;
508+}
509+
510+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
511+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
512+/* { dg-final { cleanup-tree-dump "vect" } } */
513+
514
515=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c'
516--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c 1970-01-01 00:00:00 +0000
517+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c 2011-10-23 13:33:07 +0000
518@@ -0,0 +1,58 @@
519+/* { dg-require-effective-target vect_int } */
520+/* { dg-require-effective-target vect_shift } */
521+
522+#include <stdarg.h>
523+#include "tree-vect.h"
524+
525+#define N 64
526+#define C 7
527+
528+__attribute__ ((noinline)) void
529+foo (unsigned short *src, unsigned int *dst)
530+{
531+ int i;
532+ unsigned short b, *s = src;
533+ unsigned int *d = dst;
534+
535+ for (i = 0; i < N; i++)
536+ {
537+ b = *s++;
538+ *d = b << C;
539+ d++;
540+ }
541+
542+ s = src;
543+ d = dst;
544+ for (i = 0; i < N; i++)
545+ {
546+ b = *s++;
547+ if (*d != b << C)
548+ abort ();
549+ d++;
550+ }
551+}
552+
553+int main (void)
554+{
555+ int i;
556+ unsigned short in[N];
557+ unsigned int out[N];
558+
559+ check_vect ();
560+
561+ for (i = 0; i < N; i++)
562+ {
563+ in[i] = i;
564+ out[i] = 255;
565+ __asm__ volatile ("");
566+ }
567+
568+ foo (in, out);
569+
570+ return 0;
571+}
572+
573+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
574+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
575+/* { dg-final { cleanup-tree-dump "vect" } } */
576+
577
578=== added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c'
579--- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 1970-01-01 00:00:00 +0000
580+++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 2011-10-23 13:33:07 +0000
581@@ -0,0 +1,65 @@
582+/* { dg-require-effective-target vect_int } */
583+/* { dg-require-effective-target vect_shift } */
584+
585+#include <stdarg.h>
586+#include "tree-vect.h"
587+
588+#define N 64
589+#define C1 10
590+#define C2 5
591+
592+__attribute__ ((noinline)) void
593+foo (unsigned char *src, unsigned int *dst1, unsigned int *dst2)
594+{
595+ int i;
596+ unsigned char b, *s = src;
597+ unsigned int *d1 = dst1, *d2 = dst2;
598+
599+ for (i = 0; i < N; i++)
600+ {
601+ b = *s++;
602+ *d1 = b << C1;
603+ d1++;
604+ *d2 = b << C2;
605+ d2++;
606+ }
607+
608+ s = src;
609+ d1 = dst1;
610+ d2 = dst2;
611+ for (i = 0; i < N; i++)
612+ {
613+ b = *s++;
614+ if (*d1 != b << C1 || *d2 != b << C2)
615+ abort ();
616+ d1++;
617+ d2++;
618+ }
619+}
620+
621+int main (void)
622+{
623+ int i;
624+ unsigned char in[N];
625+ unsigned int out1[N];
626+ unsigned int out2[N];
627+
628+ check_vect ();
629+
630+ for (i = 0; i < N; i++)
631+ {
632+ in[i] = i;
633+ out1[i] = 255;
634+ out2[i] = 255;
635+ __asm__ volatile ("");
636+ }
637+
638+ foo (in, out1, out2);
639+
640+ return 0;
641+}
642+
643+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
644+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
645+/* { dg-final { cleanup-tree-dump "vect" } } */
646+
647
648=== modified file 'gcc/testsuite/lib/target-supports.exp'
649--- old/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000
650+++ new/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000
651@@ -2783,6 +2783,26 @@
652 }
653
654 # Return 1 if the target plus current options supports a vector
655+# widening shift, 0 otherwise.
656+#
657+# This won't change for different subtargets so cache the result.
658+
659+proc check_effective_target_vect_widen_shift { } {
660+ global et_vect_widen_shift_saved
661+
662+ if [info exists et_vect_shift_saved] {
663+ verbose "check_effective_target_vect_widen_shift: using cached result" 2
664+ } else {
665+ set et_vect_widen_shift_saved 0
666+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
667+ set et_vect_widen_shift_saved 1
668+ }
669+ }
670+ verbose "check_effective_target_vect_widen_shift: returning $et_vect_widen_shift_saved" 2
671+ return $et_vect_widen_shift_saved
672+}
673+
674+# Return 1 if the target plus current options supports a vector
675 # dot-product of signed chars, 0 otherwise.
676 #
677 # This won't change for different subtargets so cache the result.
678
679=== modified file 'gcc/tree-cfg.c'
680--- old/gcc/tree-cfg.c 2011-07-15 13:44:50 +0000
681+++ new/gcc/tree-cfg.c 2011-10-23 13:33:07 +0000
682@@ -3473,6 +3473,44 @@
683 return false;
684 }
685
686+ case WIDEN_LSHIFT_EXPR:
687+ {
688+ if (!INTEGRAL_TYPE_P (lhs_type)
689+ || !INTEGRAL_TYPE_P (rhs1_type)
690+ || TREE_CODE (rhs2) != INTEGER_CST
691+ || (2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)))
692+ {
693+ error ("type mismatch in widening vector shift expression");
694+ debug_generic_expr (lhs_type);
695+ debug_generic_expr (rhs1_type);
696+ debug_generic_expr (rhs2_type);
697+ return true;
698+ }
699+
700+ return false;
701+ }
702+
703+ case VEC_WIDEN_LSHIFT_HI_EXPR:
704+ case VEC_WIDEN_LSHIFT_LO_EXPR:
705+ {
706+ if (TREE_CODE (rhs1_type) != VECTOR_TYPE
707+ || TREE_CODE (lhs_type) != VECTOR_TYPE
708+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type))
709+ || !INTEGRAL_TYPE_P (TREE_TYPE (lhs_type))
710+ || TREE_CODE (rhs2) != INTEGER_CST
711+ || (2 * TYPE_PRECISION (TREE_TYPE (rhs1_type))
712+ > TYPE_PRECISION (TREE_TYPE (lhs_type))))
713+ {
714+ error ("type mismatch in widening vector shift expression");
715+ debug_generic_expr (lhs_type);
716+ debug_generic_expr (rhs1_type);
717+ debug_generic_expr (rhs2_type);
718+ return true;
719+ }
720+
721+ return false;
722+ }
723+
724 case PLUS_EXPR:
725 case MINUS_EXPR:
726 {
727
728=== modified file 'gcc/tree-inline.c'
729--- old/gcc/tree-inline.c 2011-08-13 08:32:32 +0000
730+++ new/gcc/tree-inline.c 2011-10-23 13:33:07 +0000
731@@ -3343,6 +3343,7 @@
732 case DOT_PROD_EXPR:
733 case WIDEN_MULT_PLUS_EXPR:
734 case WIDEN_MULT_MINUS_EXPR:
735+ case WIDEN_LSHIFT_EXPR:
736
737 case VEC_WIDEN_MULT_HI_EXPR:
738 case VEC_WIDEN_MULT_LO_EXPR:
739@@ -3357,6 +3358,8 @@
740 case VEC_EXTRACT_ODD_EXPR:
741 case VEC_INTERLEAVE_HIGH_EXPR:
742 case VEC_INTERLEAVE_LOW_EXPR:
743+ case VEC_WIDEN_LSHIFT_HI_EXPR:
744+ case VEC_WIDEN_LSHIFT_LO_EXPR:
745
746 return 1;
747
748
749=== modified file 'gcc/tree-pretty-print.c'
750--- old/gcc/tree-pretty-print.c 2010-11-05 09:00:50 +0000
751+++ new/gcc/tree-pretty-print.c 2011-10-23 13:33:07 +0000
752@@ -1539,6 +1539,7 @@
753 case RROTATE_EXPR:
754 case VEC_LSHIFT_EXPR:
755 case VEC_RSHIFT_EXPR:
756+ case WIDEN_LSHIFT_EXPR:
757 case BIT_IOR_EXPR:
758 case BIT_XOR_EXPR:
759 case BIT_AND_EXPR:
760@@ -2209,6 +2210,22 @@
761 pp_string (buffer, " > ");
762 break;
763
764+ case VEC_WIDEN_LSHIFT_HI_EXPR:
765+ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
766+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
767+ pp_string (buffer, ", ");
768+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
769+ pp_string (buffer, " > ");
770+ break;
771+
772+ case VEC_WIDEN_LSHIFT_LO_EXPR:
773+ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
774+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
775+ pp_string (buffer, ", ");
776+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
777+ pp_string (buffer, " > ");
778+ break;
779+
780 case VEC_UNPACK_HI_EXPR:
781 pp_string (buffer, " VEC_UNPACK_HI_EXPR < ");
782 dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
783@@ -2531,6 +2548,9 @@
784 case RSHIFT_EXPR:
785 case LROTATE_EXPR:
786 case RROTATE_EXPR:
787+ case VEC_WIDEN_LSHIFT_HI_EXPR:
788+ case VEC_WIDEN_LSHIFT_LO_EXPR:
789+ case WIDEN_LSHIFT_EXPR:
790 return 11;
791
792 case WIDEN_SUM_EXPR:
793@@ -2706,6 +2726,9 @@
794 case VEC_RSHIFT_EXPR:
795 return "v>>";
796
797+ case WIDEN_LSHIFT_EXPR:
798+ return "w<<";
799+
800 case POINTER_PLUS_EXPR:
801 return "+";
802
803
804=== modified file 'gcc/tree-vect-generic.c'
805--- old/gcc/tree-vect-generic.c 2011-02-08 14:16:50 +0000
806+++ new/gcc/tree-vect-generic.c 2011-10-23 13:33:07 +0000
807@@ -552,7 +552,9 @@
808 || code == VEC_UNPACK_LO_EXPR
809 || code == VEC_PACK_TRUNC_EXPR
810 || code == VEC_PACK_SAT_EXPR
811- || code == VEC_PACK_FIX_TRUNC_EXPR)
812+ || code == VEC_PACK_FIX_TRUNC_EXPR
813+ || code == VEC_WIDEN_LSHIFT_HI_EXPR
814+ || code == VEC_WIDEN_LSHIFT_LO_EXPR)
815 type = TREE_TYPE (rhs1);
816
817 /* Optabs will try converting a negation into a subtraction, so
818
819=== modified file 'gcc/tree-vect-patterns.c'
820--- old/gcc/tree-vect-patterns.c 2011-09-05 06:23:37 +0000
821+++ new/gcc/tree-vect-patterns.c 2011-10-23 13:33:07 +0000
822@@ -48,12 +48,15 @@
823 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
824 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
825 tree *);
826+static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
827+ tree *, tree *);
828 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
829 vect_recog_widen_mult_pattern,
830 vect_recog_widen_sum_pattern,
831 vect_recog_dot_prod_pattern,
832 vect_recog_pow_pattern,
833- vect_recog_over_widening_pattern};
834+ vect_recog_over_widening_pattern,
835+ vect_recog_widen_shift_pattern};
836
837
838 /* Function widened_name_p
839@@ -331,27 +334,38 @@
840 return pattern_stmt;
841 }
842
843-/* Handle two cases of multiplication by a constant. The first one is when
844- the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
845- operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
846- TYPE.
847+
848+/* Handle widening operation by a constant. At the moment we support MULT_EXPR
849+ and LSHIFT_EXPR.
850+
851+ For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
852+ we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
853
854 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
855- HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
856- TYPE), we can perform widen-mult from the intermediate type to TYPE and
857- replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
858+ HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
859+ that satisfies the above restrictions, we can perform a widening opeartion
860+ from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
861+ with a_it = (interm_type) a_t; */
862
863 static bool
864-vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
865- VEC (gimple, heap) **stmts, tree type,
866- tree *half_type, gimple def_stmt)
867+vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
868+ tree const_oprnd, tree *oprnd,
869+ VEC (gimple, heap) **stmts, tree type,
870+ tree *half_type, gimple def_stmt)
871 {
872 tree new_type, new_oprnd, tmp;
873 gimple new_stmt;
874 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
875 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
876
877- if (int_fits_type_p (const_oprnd, *half_type))
878+ if (code != MULT_EXPR && code != LSHIFT_EXPR)
879+ return false;
880+
881+ if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
882+ || (code == LSHIFT_EXPR
883+ && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
884+ != 1))
885+ && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
886 {
887 /* CONST_OPRND is a constant of HALF_TYPE. */
888 *oprnd = gimple_assign_rhs1 (def_stmt);
889@@ -364,14 +378,16 @@
890 || !vinfo_for_stmt (def_stmt))
891 return false;
892
893- /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
894+ /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
895 a type 2 times bigger than HALF_TYPE. */
896 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
897 TYPE_UNSIGNED (type));
898- if (!int_fits_type_p (const_oprnd, new_type))
899+ if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
900+ || (code == LSHIFT_EXPR
901+ && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
902 return false;
903
904- /* Use NEW_TYPE for widen_mult. */
905+ /* Use NEW_TYPE for widening operation. */
906 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
907 {
908 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
909@@ -381,6 +397,7 @@
910 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
911 return false;
912
913+ VEC_safe_push (gimple, heap, *stmts, def_stmt);
914 *oprnd = gimple_assign_lhs (new_stmt);
915 }
916 else
917@@ -392,7 +409,6 @@
918 new_oprnd = make_ssa_name (tmp, NULL);
919 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
920 NULL_TREE);
921- SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
922 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
923 VEC_safe_push (gimple, heap, *stmts, def_stmt);
924 *oprnd = new_oprnd;
925@@ -402,7 +418,6 @@
926 return true;
927 }
928
929-
930 /* Function vect_recog_widen_mult_pattern
931
932 Try to find the following pattern:
933@@ -491,7 +506,7 @@
934 enum tree_code dummy_code;
935 int dummy_int;
936 VEC (tree, heap) *dummy_vec;
937- bool op0_ok, op1_ok;
938+ bool op1_ok;
939
940 if (!is_gimple_assign (last_stmt))
941 return NULL;
942@@ -511,38 +526,23 @@
943 return NULL;
944
945 /* Check argument 0. */
946- op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
947+ if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
948+ return NULL;
949 /* Check argument 1. */
950 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
951
952- /* In case of multiplication by a constant one of the operands may not match
953- the pattern, but not both. */
954- if (!op0_ok && !op1_ok)
955- return NULL;
956-
957- if (op0_ok && op1_ok)
958+ if (op1_ok)
959 {
960 oprnd0 = gimple_assign_rhs1 (def_stmt0);
961 oprnd1 = gimple_assign_rhs1 (def_stmt1);
962 }
963- else if (!op0_ok)
964- {
965- if (TREE_CODE (oprnd0) == INTEGER_CST
966- && TREE_CODE (half_type1) == INTEGER_TYPE
967- && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
968- stmts, type,
969- &half_type1, def_stmt1))
970- half_type0 = half_type1;
971- else
972- return NULL;
973- }
974- else if (!op1_ok)
975+ else
976 {
977 if (TREE_CODE (oprnd1) == INTEGER_CST
978 && TREE_CODE (half_type0) == INTEGER_TYPE
979- && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
980- stmts, type,
981- &half_type0, def_stmt0))
982+ && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
983+ &oprnd0, stmts, type,
984+ &half_type0, def_stmt0))
985 half_type1 = half_type0;
986 else
987 return NULL;
988@@ -998,6 +998,7 @@
989 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
990 return false;
991
992+ VEC_safe_push (gimple, heap, *stmts, def_stmt);
993 oprnd = gimple_assign_lhs (new_stmt);
994 }
995 else
996@@ -1128,7 +1129,7 @@
997 statetments, except for the case when the last statement in the
998 sequence doesn't have a corresponding pattern statement. In such
999 case we associate the last pattern statement with the last statement
1000- in the sequence. Therefore, we only add an original statetement to
1001+ in the sequence. Therefore, we only add the original statement to
1002 the list if we know that it is not the last. */
1003 if (prev_stmt)
1004 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1005@@ -1215,6 +1216,231 @@
1006 }
1007
1008
1009+/* Detect widening shift pattern:
1010+
1011+ type a_t;
1012+ TYPE a_T, res_T;
1013+
1014+ S1 a_t = ;
1015+ S2 a_T = (TYPE) a_t;
1016+ S3 res_T = a_T << CONST;
1017+
1018+ where type 'TYPE' is at least double the size of type 'type'.
1019+
1020+ Also detect unsigned cases:
1021+
1022+ unsigned type a_t;
1023+ unsigned TYPE u_res_T;
1024+ TYPE a_T, res_T;
1025+
1026+ S1 a_t = ;
1027+ S2 a_T = (TYPE) a_t;
1028+ S3 res_T = a_T << CONST;
1029+ S4 u_res_T = (unsigned TYPE) res_T;
1030+
1031+ And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1032+ create an additional pattern stmt for S2 to create a variable of an
1033+ intermediate type, and perform widen-shift on the intermediate type:
1034+
1035+ type a_t;
1036+ interm_type a_it;
1037+ TYPE a_T, res_T, res_T';
1038+
1039+ S1 a_t = ;
1040+ S2 a_T = (TYPE) a_t;
1041+ '--> a_it = (interm_type) a_t;
1042+ S3 res_T = a_T << CONST;
1043+ '--> res_T' = a_it <<* CONST;
1044+
1045+ Input/Output:
1046+
1047+ * STMTS: Contains a stmt from which the pattern search begins.
1048+ In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1049+ in STMTS. When an intermediate type is used and a pattern statement is
1050+ created for S2, we also put S2 here (before S3).
1051+
1052+ Output:
1053+
1054+ * TYPE_IN: The type of the input arguments to the pattern.
1055+
1056+ * TYPE_OUT: The type of the output of this pattern.
1057+
1058+ * Return value: A new stmt that will be used to replace the sequence of
1059+ stmts that constitute the pattern. In this case it will be:
1060+ WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1061+
1062+static gimple
1063+vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1064+ tree *type_in, tree *type_out)
1065+{
1066+ gimple last_stmt = VEC_pop (gimple, *stmts);
1067+ gimple def_stmt0;
1068+ tree oprnd0, oprnd1;
1069+ tree type, half_type0;
1070+ gimple pattern_stmt, orig_stmt = NULL;
1071+ tree vectype, vectype_out = NULL_TREE;
1072+ tree dummy;
1073+ tree var;
1074+ enum tree_code dummy_code;
1075+ int dummy_int;
1076+ VEC (tree, heap) * dummy_vec;
1077+ gimple use_stmt = NULL;
1078+ bool over_widen = false;
1079+
1080+ if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1081+ return NULL;
1082+
1083+ orig_stmt = last_stmt;
1084+ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1085+ {
1086+ /* This statement was also detected as over-widening operation (it can't
1087+ be any other pattern, because only over-widening detects shifts).
1088+ LAST_STMT is the final type demotion statement, but its related
1089+ statement is shift. We analyze the related statement to catch cases:
1090+
1091+ orig code:
1092+ type a_t;
1093+ itype res;
1094+ TYPE a_T, res_T;
1095+
1096+ S1 a_T = (TYPE) a_t;
1097+ S2 res_T = a_T << CONST;
1098+ S3 res = (itype)res_T;
1099+
1100+ (size of type * 2 <= size of itype
1101+ and size of itype * 2 <= size of TYPE)
1102+
1103+ code after over-widening pattern detection:
1104+
1105+ S1 a_T = (TYPE) a_t;
1106+ --> a_it = (itype) a_t;
1107+ S2 res_T = a_T << CONST;
1108+ S3 res = (itype)res_T; <--- LAST_STMT
1109+ --> res = a_it << CONST;
1110+
1111+ after widen_shift:
1112+
1113+ S1 a_T = (TYPE) a_t;
1114+ --> a_it = (itype) a_t; - redundant
1115+ S2 res_T = a_T << CONST;
1116+ S3 res = (itype)res_T;
1117+ --> res = a_t w<< CONST;
1118+
1119+ i.e., we replace the three statements with res = a_t w<< CONST. */
1120+ last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1121+ over_widen = true;
1122+ }
1123+
1124+ if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1125+ return NULL;
1126+
1127+ oprnd0 = gimple_assign_rhs1 (last_stmt);
1128+ oprnd1 = gimple_assign_rhs2 (last_stmt);
1129+ if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1130+ return NULL;
1131+
1132+ /* Check operand 0: it has to be defined by a type promotion. */
1133+ if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1134+ return NULL;
1135+
1136+ /* Check operand 1: has to be positive. We check that it fits the type
1137+ in vect_handle_widen_op_by_const (). */
1138+ if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1139+ return NULL;
1140+
1141+ oprnd0 = gimple_assign_rhs1 (def_stmt0);
1142+ type = gimple_expr_type (last_stmt);
1143+
1144+ /* Check if this a widening operation. */
1145+ if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1146+ &oprnd0, stmts,
1147+ type, &half_type0, def_stmt0))
1148+ return NULL;
1149+
1150+ /* Handle unsigned case. Look for
1151+ S4 u_res_T = (unsigned TYPE) res_T;
1152+ Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1153+ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1154+ {
1155+ tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1156+ imm_use_iterator imm_iter;
1157+ use_operand_p use_p;
1158+ int nuses = 0;
1159+ tree use_type;
1160+
1161+ if (over_widen)
1162+ {
1163+ /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1164+ We check here that TYPE is the correct type for the operation,
1165+ i.e., it's the type of the original result. */
1166+ tree orig_type = gimple_expr_type (orig_stmt);
1167+ if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1168+ || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1169+ return NULL;
1170+ }
1171+ else
1172+ {
1173+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1174+ {
1175+ if (is_gimple_debug (USE_STMT (use_p)))
1176+ continue;
1177+ use_stmt = USE_STMT (use_p);
1178+ nuses++;
1179+ }
1180+
1181+ if (nuses != 1 || !is_gimple_assign (use_stmt)
1182+ || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1183+ return NULL;
1184+
1185+ use_lhs = gimple_assign_lhs (use_stmt);
1186+ use_type = TREE_TYPE (use_lhs);
1187+
1188+ if (!INTEGRAL_TYPE_P (use_type)
1189+ || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1190+ || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1191+ return NULL;
1192+
1193+ type = use_type;
1194+ }
1195+ }
1196+
1197+ /* Pattern detected. */
1198+ if (vect_print_dump_info (REPORT_DETAILS))
1199+ fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1200+
1201+ /* Check target support. */
1202+ vectype = get_vectype_for_scalar_type (half_type0);
1203+ vectype_out = get_vectype_for_scalar_type (type);
1204+
1205+ if (!vectype
1206+ || !vectype_out
1207+ || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1208+ vectype_out, vectype,
1209+ &dummy, &dummy, &dummy_code,
1210+ &dummy_code, &dummy_int,
1211+ &dummy_vec))
1212+ return NULL;
1213+
1214+ *type_in = vectype;
1215+ *type_out = vectype_out;
1216+
1217+ /* Pattern supported. Create a stmt to be used to replace the pattern. */
1218+ var = vect_recog_temp_ssa_var (type, NULL);
1219+ pattern_stmt =
1220+ gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1221+
1222+ if (vect_print_dump_info (REPORT_DETAILS))
1223+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1224+
1225+ if (use_stmt)
1226+ last_stmt = use_stmt;
1227+ else
1228+ last_stmt = orig_stmt;
1229+
1230+ VEC_safe_push (gimple, heap, *stmts, last_stmt);
1231+ return pattern_stmt;
1232+}
1233+
1234 /* Mark statements that are involved in a pattern. */
1235
1236 static inline void
1237@@ -1278,7 +1504,8 @@
1238 static void
1239 vect_pattern_recog_1 (
1240 gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
1241- gimple_stmt_iterator si)
1242+ gimple_stmt_iterator si,
1243+ VEC (gimple, heap) **stmts_to_replace)
1244 {
1245 gimple stmt = gsi_stmt (si), pattern_stmt;
1246 stmt_vec_info stmt_info;
1247@@ -1288,14 +1515,14 @@
1248 enum tree_code code;
1249 int i;
1250 gimple next;
1251- VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
1252
1253- VEC_quick_push (gimple, stmts_to_replace, stmt);
1254- pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out);
1255+ VEC_truncate (gimple, *stmts_to_replace, 0);
1256+ VEC_quick_push (gimple, *stmts_to_replace, stmt);
1257+ pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
1258 if (!pattern_stmt)
1259 return;
1260
1261- stmt = VEC_last (gimple, stmts_to_replace);
1262+ stmt = VEC_last (gimple, *stmts_to_replace);
1263 stmt_info = vinfo_for_stmt (stmt);
1264 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1265
1266@@ -1303,8 +1530,6 @@
1267 {
1268 /* No need to check target support (already checked by the pattern
1269 recognition function). */
1270- if (type_out)
1271- gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
1272 pattern_vectype = type_out ? type_out : type_in;
1273 }
1274 else
1275@@ -1360,8 +1585,8 @@
1276 /* It is possible that additional pattern stmts are created and inserted in
1277 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
1278 relevant statements. */
1279- for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
1280- && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
1281+ for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
1282+ && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
1283 i++)
1284 {
1285 stmt_info = vinfo_for_stmt (stmt);
1286@@ -1374,8 +1599,6 @@
1287
1288 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
1289 }
1290-
1291- VEC_free (gimple, heap, stmts_to_replace);
1292 }
1293
1294
1295@@ -1465,6 +1688,7 @@
1296 gimple_stmt_iterator si;
1297 unsigned int i, j;
1298 gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
1299+ VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
1300
1301 if (vect_print_dump_info (REPORT_DETAILS))
1302 fprintf (vect_dump, "=== vect_pattern_recog ===");
1303@@ -1480,8 +1704,11 @@
1304 for (j = 0; j < NUM_PATTERNS; j++)
1305 {
1306 vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
1307- vect_pattern_recog_1 (vect_recog_func_ptr, si);
1308+ vect_pattern_recog_1 (vect_recog_func_ptr, si,
1309+ &stmts_to_replace);
1310 }
1311 }
1312 }
1313+
1314+ VEC_free (gimple, heap, stmts_to_replace);
1315 }
1316
1317=== modified file 'gcc/tree-vect-slp.c'
1318--- old/gcc/tree-vect-slp.c 2011-10-23 11:29:25 +0000
1319+++ new/gcc/tree-vect-slp.c 2011-10-27 11:27:59 +0000
1320@@ -480,6 +480,11 @@
1321 }
1322 }
1323 }
1324+ else if (rhs_code == WIDEN_LSHIFT_EXPR)
1325+ {
1326+ need_same_oprnds = true;
1327+ first_op1 = gimple_assign_rhs2 (stmt);
1328+ }
1329 }
1330 else
1331 {
1332
1333=== modified file 'gcc/tree-vect-stmts.c'
1334--- old/gcc/tree-vect-stmts.c 2011-10-23 11:29:25 +0000
1335+++ new/gcc/tree-vect-stmts.c 2011-10-27 11:27:59 +0000
1336@@ -3359,6 +3359,7 @@
1337 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1338 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
1339 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1340+ unsigned int k;
1341
1342 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1343 return false;
1344@@ -3375,7 +3376,8 @@
1345
1346 code = gimple_assign_rhs_code (stmt);
1347 if (!CONVERT_EXPR_CODE_P (code)
1348- && code != WIDEN_MULT_EXPR)
1349+ && code != WIDEN_MULT_EXPR
1350+ && code != WIDEN_LSHIFT_EXPR)
1351 return false;
1352
1353 scalar_dest = gimple_assign_lhs (stmt);
1354@@ -3403,7 +3405,7 @@
1355 bool ok;
1356
1357 op1 = gimple_assign_rhs2 (stmt);
1358- if (code == WIDEN_MULT_EXPR)
1359+ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
1360 {
1361 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
1362 OP1. */
1363@@ -3480,7 +3482,7 @@
1364 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
1365 ncopies);
1366
1367- if (code == WIDEN_MULT_EXPR)
1368+ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
1369 {
1370 if (CONSTANT_CLASS_P (op0))
1371 op0 = fold_convert (TREE_TYPE (op1), op0);
1372@@ -3521,6 +3523,8 @@
1373 if (op_type == binary_op)
1374 vec_oprnds1 = VEC_alloc (tree, heap, 1);
1375 }
1376+ else if (code == WIDEN_LSHIFT_EXPR)
1377+ vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
1378
1379 /* In case the vectorization factor (VF) is bigger than the number
1380 of elements that we can fit in a vectype (nunits), we have to generate
1381@@ -3534,15 +3538,33 @@
1382 if (j == 0)
1383 {
1384 if (slp_node)
1385- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
1386- &vec_oprnds1, -1);
1387- else
1388+ {
1389+ if (code == WIDEN_LSHIFT_EXPR)
1390+ {
1391+ vec_oprnd1 = op1;
1392+ /* Store vec_oprnd1 for every vector stmt to be created
1393+ for SLP_NODE. We check during the analysis that all
1394+ the shift arguments are the same. */
1395+ for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
1396+ VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
1397+
1398+ vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
1399+ -1);
1400+ }
1401+ else
1402+ vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
1403+ &vec_oprnds1, -1);
1404+ }
1405+ else
1406 {
1407 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1408 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
1409 if (op_type == binary_op)
1410 {
1411- vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1412+ if (code == WIDEN_LSHIFT_EXPR)
1413+ vec_oprnd1 = op1;
1414+ else
1415+ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1416 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
1417 }
1418 }
1419@@ -3553,7 +3575,10 @@
1420 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
1421 if (op_type == binary_op)
1422 {
1423- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
1424+ if (code == WIDEN_LSHIFT_EXPR)
1425+ vec_oprnd1 = op1;
1426+ else
1427+ vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
1428 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
1429 }
1430 }
1431@@ -5853,6 +5878,19 @@
1432 }
1433 break;
1434
1435+ case WIDEN_LSHIFT_EXPR:
1436+ if (BYTES_BIG_ENDIAN)
1437+ {
1438+ c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
1439+ c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
1440+ }
1441+ else
1442+ {
1443+ c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
1444+ c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
1445+ }
1446+ break;
1447+
1448 CASE_CONVERT:
1449 if (BYTES_BIG_ENDIAN)
1450 {
1451
1452=== modified file 'gcc/tree-vectorizer.h'
1453--- old/gcc/tree-vectorizer.h 2011-09-05 06:23:37 +0000
1454+++ new/gcc/tree-vectorizer.h 2011-10-23 13:33:07 +0000
1455@@ -896,7 +896,7 @@
1456 Additional pattern recognition functions can (and will) be added
1457 in the future. */
1458 typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
1459-#define NUM_PATTERNS 5
1460+#define NUM_PATTERNS 6
1461 void vect_pattern_recog (loop_vec_info);
1462
1463 /* In tree-vectorizer.c. */
1464
1465=== modified file 'gcc/tree.def'
1466--- old/gcc/tree.def 2011-01-21 14:14:12 +0000
1467+++ new/gcc/tree.def 2011-10-23 13:33:07 +0000
1468@@ -1092,6 +1092,19 @@
1469 is subtracted from t3. */
1470 DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3)
1471
1472+/* Widening shift left.
1473+ The first operand is of type t1.
1474+ The second operand is the number of bits to shift by; it need not be the
1475+ same type as the first operand and result.
1476+ Note that the result is undefined if the second operand is larger
1477+ than or equal to the first operand's type size.
1478+ The type of the entire expression is t2, such that t2 is at least twice
1479+ the size of t1.
1480+ WIDEN_LSHIFT_EXPR is equivalent to first widening (promoting)
1481+ the first argument from type t1 to type t2, and then shifting it
1482+ by the second argument. */
1483+DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2)
1484+
1485 /* Fused multiply-add.
1486 All operands and the result are of the same type. No intermediate
1487 rounding is performed after multiplying operand one with operand two
1488@@ -1147,6 +1160,16 @@
1489 DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2)
1490 DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2)
1491
1492+/* Widening vector shift left in bits.
1493+ Operand 0 is a vector to be shifted with N elements of size S.
1494+ Operand 1 is an integer shift amount in bits.
1495+ The result of the operation is N elements of size 2*S.
1496+ VEC_WIDEN_LSHIFT_HI_EXPR computes the N/2 high results.
1497+ VEC_WIDEN_LSHIFT_LO_EXPR computes the N/2 low results.
1498+ */
1499+DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2)
1500+DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2)
1501+
1502 /* PREDICT_EXPR. Specify hint for branch prediction. The
1503 PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the
1504 outcome (0 for not taken and 1 for taken). Once the profile is guessed
1505
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch
new file mode 100644
index 000000000..95b9ea9b8
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch
@@ -0,0 +1,61 @@
12011-11-04 Revital Eres <revital.eres@linaro.org>
2
3 Backport from mainline -r180673:
4
5 gcc/
6 * modulo-sched.c (generate_prolog_epilog): Mark prolog
7 and epilog as BB_DISABLE_SCHEDULE.
8 (mark_loop_unsched): New function.
9 (sms_schedule): Call it.
10
11=== modified file 'gcc/modulo-sched.c'
12--- old/gcc/modulo-sched.c 2011-10-10 14:35:32 +0000
13+++ new/gcc/modulo-sched.c 2011-10-30 05:31:00 +0000
14@@ -1173,6 +1173,8 @@
15 /* Put the prolog on the entry edge. */
16 e = loop_preheader_edge (loop);
17 split_edge_and_insert (e, get_insns ());
18+ if (!flag_resched_modulo_sched)
19+ e->dest->flags |= BB_DISABLE_SCHEDULE;
20
21 end_sequence ();
22
23@@ -1186,9 +1188,24 @@
24 gcc_assert (single_exit (loop));
25 e = single_exit (loop);
26 split_edge_and_insert (e, get_insns ());
27+ if (!flag_resched_modulo_sched)
28+ e->dest->flags |= BB_DISABLE_SCHEDULE;
29+
30 end_sequence ();
31 }
32
33+/* Mark LOOP as software pipelined so the later
34+ scheduling passes don't touch it. */
35+static void
36+mark_loop_unsched (struct loop *loop)
37+{
38+ unsigned i;
39+ basic_block *bbs = get_loop_body (loop);
40+
41+ for (i = 0; i < loop->num_nodes; i++)
42+ bbs[i]->flags |= BB_DISABLE_SCHEDULE;
43+}
44+
45 /* Return true if all the BBs of the loop are empty except the
46 loop header. */
47 static bool
48@@ -1714,9 +1731,10 @@
49 permute_partial_schedule (ps, g->closing_branch->first_note);
50
51 /* Mark this loop as software pipelined so the later
52- scheduling passes doesn't touch it. */
53+ scheduling passes don't touch it. */
54 if (! flag_resched_modulo_sched)
55- g->bb->flags |= BB_DISABLE_SCHEDULE;
56+ mark_loop_unsched (loop);
57+
58 /* The life-info is not valid any more. */
59 df_set_bb_dirty (g->bb);
60
61
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch
new file mode 100644
index 000000000..dcffe1945
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch
@@ -0,0 +1,23 @@
12011-11-02 Andrew Stubbs <ams@codesourcery.com>
2
3 Backport from FSF mainline:
4
5 2011-11-01 Andrew Stubbs <ams@codesourcery.com>
6
7 gcc/
8 * config/arm/bpabi.h (BE8_LINK_SPEC): Recognize generic-armv7 tuning.
9
10=== modified file 'gcc/config/arm/bpabi.h'
11--- old/gcc/config/arm/bpabi.h 2010-12-20 17:48:51 +0000
12+++ new/gcc/config/arm/bpabi.h 2011-11-02 21:02:53 +0000
13@@ -56,7 +56,8 @@
14 "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
15
16 #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\
17- "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}"
18+ "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\
19+ ":%{!r:--be8}}}"
20
21 /* Tell the assembler to build BPABI binaries. */
22 #undef SUBTARGET_EXTRA_ASM_SPEC
23
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch
new file mode 100644
index 000000000..70a7bdfa2
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch
@@ -0,0 +1,1400 @@
12011-11-17 Ira Rosen <ira.rosen@linaro.org>
2
3 Backport from mainline:
4
5 2011-11-03 Ira Rosen <ira.rosen@linaro.org>
6
7 gcc/
8 * tree-vectorizer.h (slp_void_p): New.
9 (struct _slp_tree): Replace left and right with children. Update
10 documentation.
11 (struct _slp_oprnd_info): New.
12 (vect_get_vec_defs): Declare.
13 (vect_get_slp_defs): Update arguments.
14 * tree-vect-loop.c (vect_create_epilog_for_reduction): Call
15 vect_get_vec_defs instead of vect_get_slp_defs.
16 (vectorizable_reduction): Likewise.
17 * tree-vect-stmts.c (vect_get_vec_defs): Remove static, add argument.
18 Update call to vect_get_slp_defs.
19 (vectorizable_conversion): Update call to vect_get_vec_defs.
20 (vectorizable_assignment, vectorizable_shift,
21 vectorizable_operation): Likewise.
22 (vectorizable_type_demotion): Call vect_get_vec_defs instead of
23 vect_get_slp_defs.
24 (vectorizable_type_promotion, vectorizable_store): Likewise.
25 (vect_analyze_stmt): Fix typo.
26 * tree-vect-slp.c (vect_free_slp_tree): Update SLP tree traversal.
27 (vect_print_slp_tree, vect_mark_slp_stmts,
28 vect_mark_slp_stmts_relevant, vect_slp_rearrange_stmts,
29 vect_detect_hybrid_slp_stmts, vect_slp_analyze_node_operations,
30 vect_schedule_slp_instance): Likewise.
31 (vect_create_new_slp_node): New.
32 (vect_create_oprnd_info, vect_free_oprnd_info): Likewise.
33 (vect_get_and_check_slp_defs): Pass information about defs using
34 oprnds_info, allow any number of operands.
35 (vect_build_slp_tree): Likewise. Update calls to
36 vect_get_and_check_slp_defs. Fix comments.
37 (vect_analyze_slp_instance): Move node creation to
38 vect_create_new_slp_node.
39 (vect_get_slp_defs): Allow any number of operands.
40
41 2011-11-11 Jakub Jelinek <jakub@redhat.com>
42
43 gcc/
44 * tree-vect-slp.c (vect_free_slp_tree): Also free SLP_TREE_CHILDREN
45 vector.
46 (vect_create_new_slp_node): Don't allocate node before checking stmt
47 type.
48 (vect_free_oprnd_info): Remove FREE_DEF_STMTS argument, always
49 free def_stmts vectors and additionally free oprnd_info.
50 (vect_build_slp_tree): Adjust callers. Call it even if
51 stop_recursion. If vect_create_new_slp_node or
52 vect_build_slp_tree fails, properly handle freeing memory.
53 If it succeeded, clear def_stmts in oprnd_info.
54
55=== modified file 'gcc/tree-vect-loop.c'
56--- old/gcc/tree-vect-loop.c 2011-09-05 06:23:37 +0000
57+++ new/gcc/tree-vect-loop.c 2011-11-14 11:38:08 +0000
58@@ -3282,8 +3282,8 @@
59
60 /* Get the loop-entry arguments. */
61 if (slp_node)
62- vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs,
63- NULL, reduc_index);
64+ vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
65+ NULL, slp_node, reduc_index);
66 else
67 {
68 vec_initial_defs = VEC_alloc (tree, heap, 1);
69@@ -4451,8 +4451,8 @@
70 }
71
72 if (slp_node)
73- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1,
74- -1);
75+ vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
76+ slp_node, -1);
77 else
78 {
79 loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
80
81=== modified file 'gcc/tree-vect-slp.c'
82--- old/gcc/tree-vect-slp.c 2011-10-27 11:27:59 +0000
83+++ new/gcc/tree-vect-slp.c 2011-11-14 11:38:08 +0000
84@@ -67,15 +67,16 @@
85 static void
86 vect_free_slp_tree (slp_tree node)
87 {
88+ int i;
89+ slp_void_p child;
90+
91 if (!node)
92 return;
93
94- if (SLP_TREE_LEFT (node))
95- vect_free_slp_tree (SLP_TREE_LEFT (node));
96-
97- if (SLP_TREE_RIGHT (node))
98- vect_free_slp_tree (SLP_TREE_RIGHT (node));
99-
100+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
101+ vect_free_slp_tree ((slp_tree) child);
102+
103+ VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
104 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
105
106 if (SLP_TREE_VEC_STMTS (node))
107@@ -96,48 +97,116 @@
108 }
109
110
111-/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
112- they are of a legal type and that they match the defs of the first stmt of
113- the SLP group (stored in FIRST_STMT_...). */
114+/* Create an SLP node for SCALAR_STMTS. */
115+
116+static slp_tree
117+vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
118+{
119+ slp_tree node;
120+ gimple stmt = VEC_index (gimple, scalar_stmts, 0);
121+ unsigned int nops;
122+
123+ if (is_gimple_call (stmt))
124+ nops = gimple_call_num_args (stmt);
125+ else if (is_gimple_assign (stmt))
126+ nops = gimple_num_ops (stmt) - 1;
127+ else
128+ return NULL;
129+
130+ node = XNEW (struct _slp_tree);
131+ SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
132+ SLP_TREE_VEC_STMTS (node) = NULL;
133+ SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
134+ SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
135+ SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
136+
137+ return node;
138+}
139+
140+
141+/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
142+ operand. */
143+static VEC (slp_oprnd_info, heap) *
144+vect_create_oprnd_info (int nops, int group_size)
145+{
146+ int i;
147+ slp_oprnd_info oprnd_info;
148+ VEC (slp_oprnd_info, heap) *oprnds_info;
149+
150+ oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
151+ for (i = 0; i < nops; i++)
152+ {
153+ oprnd_info = XNEW (struct _slp_oprnd_info);
154+ oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
155+ oprnd_info->first_dt = vect_uninitialized_def;
156+ oprnd_info->first_def_type = NULL_TREE;
157+ oprnd_info->first_const_oprnd = NULL_TREE;
158+ oprnd_info->first_pattern = false;
159+ VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
160+ }
161+
162+ return oprnds_info;
163+}
164+
165+
166+/* Free operands info. */
167+
168+static void
169+vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
170+{
171+ int i;
172+ slp_oprnd_info oprnd_info;
173+
174+ FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
175+ {
176+ VEC_free (gimple, heap, oprnd_info->def_stmts);
177+ XDELETE (oprnd_info);
178+ }
179+
180+ VEC_free (slp_oprnd_info, heap, *oprnds_info);
181+}
182+
183+
184+/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
185+ they are of a valid type and that they match the defs of the first stmt of
186+ the SLP group (stored in OPRNDS_INFO). */
187
188 static bool
189 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
190 slp_tree slp_node, gimple stmt,
191- VEC (gimple, heap) **def_stmts0,
192- VEC (gimple, heap) **def_stmts1,
193- enum vect_def_type *first_stmt_dt0,
194- enum vect_def_type *first_stmt_dt1,
195- tree *first_stmt_def0_type,
196- tree *first_stmt_def1_type,
197- tree *first_stmt_const_oprnd,
198- int ncopies_for_cost,
199- bool *pattern0, bool *pattern1)
200+ int ncopies_for_cost, bool first,
201+ VEC (slp_oprnd_info, heap) **oprnds_info)
202 {
203 tree oprnd;
204 unsigned int i, number_of_oprnds;
205- tree def[2];
206+ tree def, def_op0 = NULL_TREE;
207 gimple def_stmt;
208- enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
209- stmt_vec_info stmt_info =
210- vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
211- enum gimple_rhs_class rhs_class;
212+ enum vect_def_type dt = vect_uninitialized_def;
213+ enum vect_def_type dt_op0 = vect_uninitialized_def;
214+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
215+ tree lhs = gimple_get_lhs (stmt);
216 struct loop *loop = NULL;
217 enum tree_code rhs_code;
218 bool different_types = false;
219+ bool pattern = false;
220+ slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
221
222 if (loop_vinfo)
223 loop = LOOP_VINFO_LOOP (loop_vinfo);
224
225- rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
226- number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
227+ if (is_gimple_call (stmt))
228+ number_of_oprnds = gimple_call_num_args (stmt);
229+ else
230+ number_of_oprnds = gimple_num_ops (stmt) - 1;
231
232 for (i = 0; i < number_of_oprnds; i++)
233 {
234 oprnd = gimple_op (stmt, i + 1);
235+ oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
236
237- if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
238- &dt[i])
239- || (!def_stmt && dt[i] != vect_constant_def))
240+ if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
241+ &dt)
242+ || (!def_stmt && dt != vect_constant_def))
243 {
244 if (vect_print_dump_info (REPORT_SLP))
245 {
246@@ -158,29 +227,24 @@
247 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
248 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
249 {
250- if (!*first_stmt_dt0)
251- *pattern0 = true;
252- else
253- {
254- if (i == 1 && !*first_stmt_dt1)
255- *pattern1 = true;
256- else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
257- {
258- if (vect_print_dump_info (REPORT_DETAILS))
259- {
260- fprintf (vect_dump, "Build SLP failed: some of the stmts"
261- " are in a pattern, and others are not ");
262- print_generic_expr (vect_dump, oprnd, TDF_SLIM);
263- }
264+ pattern = true;
265+ if (!first && !oprnd_info->first_pattern)
266+ {
267+ if (vect_print_dump_info (REPORT_DETAILS))
268+ {
269+ fprintf (vect_dump, "Build SLP failed: some of the stmts"
270+ " are in a pattern, and others are not ");
271+ print_generic_expr (vect_dump, oprnd, TDF_SLIM);
272+ }
273
274- return false;
275- }
276+ return false;
277 }
278
279 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
280- dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
281+ dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
282
283- if (*dt == vect_unknown_def_type)
284+ if (dt == vect_unknown_def_type
285+ || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt)))
286 {
287 if (vect_print_dump_info (REPORT_DETAILS))
288 fprintf (vect_dump, "Unsupported pattern.");
289@@ -190,11 +254,11 @@
290 switch (gimple_code (def_stmt))
291 {
292 case GIMPLE_PHI:
293- def[i] = gimple_phi_result (def_stmt);
294+ def = gimple_phi_result (def_stmt);
295 break;
296
297 case GIMPLE_ASSIGN:
298- def[i] = gimple_assign_lhs (def_stmt);
299+ def = gimple_assign_lhs (def_stmt);
300 break;
301
302 default:
303@@ -204,117 +268,125 @@
304 }
305 }
306
307- if (!*first_stmt_dt0)
308+ if (first)
309 {
310- /* op0 of the first stmt of the group - store its info. */
311- *first_stmt_dt0 = dt[i];
312- if (def[i])
313- *first_stmt_def0_type = TREE_TYPE (def[i]);
314- else
315- *first_stmt_const_oprnd = oprnd;
316+ oprnd_info->first_dt = dt;
317+ oprnd_info->first_pattern = pattern;
318+ if (def)
319+ {
320+ oprnd_info->first_def_type = TREE_TYPE (def);
321+ oprnd_info->first_const_oprnd = NULL_TREE;
322+ }
323+ else
324+ {
325+ oprnd_info->first_def_type = NULL_TREE;
326+ oprnd_info->first_const_oprnd = oprnd;
327+ }
328
329- /* Analyze costs (for the first stmt of the group only). */
330- if (rhs_class != GIMPLE_SINGLE_RHS)
331- /* Not memory operation (we don't call this functions for loads). */
332- vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
333- else
334- /* Store. */
335- vect_model_store_cost (stmt_info, ncopies_for_cost, false,
336- dt[0], slp_node);
337+ if (i == 0)
338+ {
339+ def_op0 = def;
340+ dt_op0 = dt;
341+ /* Analyze costs (for the first stmt of the group only). */
342+ if (REFERENCE_CLASS_P (lhs))
343+ /* Store. */
344+ vect_model_store_cost (stmt_info, ncopies_for_cost, false,
345+ dt, slp_node);
346+ else
347+ /* Not memory operation (we don't call this function for
348+ loads). */
349+ vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt,
350+ slp_node);
351+ }
352 }
353
354 else
355 {
356- if (!*first_stmt_dt1 && i == 1)
357- {
358- /* op1 of the first stmt of the group - store its info. */
359- *first_stmt_dt1 = dt[i];
360- if (def[i])
361- *first_stmt_def1_type = TREE_TYPE (def[i]);
362- else
363- {
364- /* We assume that the stmt contains only one constant
365- operand. We fail otherwise, to be on the safe side. */
366- if (*first_stmt_const_oprnd)
367- {
368- if (vect_print_dump_info (REPORT_SLP))
369- fprintf (vect_dump, "Build SLP failed: two constant "
370- "oprnds in stmt");
371- return false;
372- }
373- *first_stmt_const_oprnd = oprnd;
374- }
375- }
376- else
377- {
378- /* Not first stmt of the group, check that the def-stmt/s match
379- the def-stmt/s of the first stmt. */
380- if ((i == 0
381- && (*first_stmt_dt0 != dt[i]
382- || (*first_stmt_def0_type && def[0]
383- && !types_compatible_p (*first_stmt_def0_type,
384- TREE_TYPE (def[0])))))
385- || (i == 1
386- && (*first_stmt_dt1 != dt[i]
387- || (*first_stmt_def1_type && def[1]
388- && !types_compatible_p (*first_stmt_def1_type,
389- TREE_TYPE (def[1])))))
390- || (!def[i]
391- && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
392- TREE_TYPE (oprnd)))
393- || different_types)
394- {
395- if (i != number_of_oprnds - 1)
396- different_types = true;
397+ /* Not first stmt of the group, check that the def-stmt/s match
398+ the def-stmt/s of the first stmt. Allow different definition
399+ types for reduction chains: the first stmt must be a
400+ vect_reduction_def (a phi node), and the rest
401+ vect_internal_def. */
402+ if (((oprnd_info->first_dt != dt
403+ && !(oprnd_info->first_dt == vect_reduction_def
404+ && dt == vect_internal_def))
405+ || (oprnd_info->first_def_type != NULL_TREE
406+ && def
407+ && !types_compatible_p (oprnd_info->first_def_type,
408+ TREE_TYPE (def))))
409+ || (!def
410+ && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
411+ TREE_TYPE (oprnd)))
412+ || different_types)
413+ {
414+ if (number_of_oprnds != 2)
415+ {
416+ if (vect_print_dump_info (REPORT_SLP))
417+ fprintf (vect_dump, "Build SLP failed: different types ");
418+
419+ return false;
420+ }
421+
422+ /* Try to swap operands in case of binary operation. */
423+ if (i == 0)
424+ different_types = true;
425+ else
426+ {
427+ oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
428+ if (is_gimple_assign (stmt)
429+ && (rhs_code = gimple_assign_rhs_code (stmt))
430+ && TREE_CODE_CLASS (rhs_code) == tcc_binary
431+ && commutative_tree_code (rhs_code)
432+ && oprnd0_info->first_dt == dt
433+ && oprnd_info->first_dt == dt_op0
434+ && def_op0 && def
435+ && !(oprnd0_info->first_def_type
436+ && !types_compatible_p (oprnd0_info->first_def_type,
437+ TREE_TYPE (def)))
438+ && !(oprnd_info->first_def_type
439+ && !types_compatible_p (oprnd_info->first_def_type,
440+ TREE_TYPE (def_op0))))
441+ {
442+ if (vect_print_dump_info (REPORT_SLP))
443+ {
444+ fprintf (vect_dump, "Swapping operands of ");
445+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
446+ }
447+
448+ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
449+ gimple_assign_rhs2_ptr (stmt));
450+ }
451 else
452- {
453- if (is_gimple_assign (stmt)
454- && (rhs_code = gimple_assign_rhs_code (stmt))
455- && TREE_CODE_CLASS (rhs_code) == tcc_binary
456- && commutative_tree_code (rhs_code)
457- && *first_stmt_dt0 == dt[1]
458- && *first_stmt_dt1 == dt[0]
459- && def[0] && def[1]
460- && !(*first_stmt_def0_type
461- && !types_compatible_p (*first_stmt_def0_type,
462- TREE_TYPE (def[1])))
463- && !(*first_stmt_def1_type
464- && !types_compatible_p (*first_stmt_def1_type,
465- TREE_TYPE (def[0]))))
466- {
467- if (vect_print_dump_info (REPORT_SLP))
468- {
469- fprintf (vect_dump, "Swapping operands of ");
470- print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
471- }
472- swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
473- gimple_assign_rhs2_ptr (stmt));
474- }
475- else
476- {
477- if (vect_print_dump_info (REPORT_SLP))
478- fprintf (vect_dump, "Build SLP failed: different types ");
479-
480- return false;
481- }
482- }
483+ {
484+ if (vect_print_dump_info (REPORT_SLP))
485+ fprintf (vect_dump, "Build SLP failed: different types ");
486+
487+ return false;
488+ }
489 }
490 }
491 }
492
493 /* Check the types of the definitions. */
494- switch (dt[i])
495+ switch (dt)
496 {
497 case vect_constant_def:
498 case vect_external_def:
499+ case vect_reduction_def:
500 break;
501
502 case vect_internal_def:
503- case vect_reduction_def:
504- if ((i == 0 && !different_types) || (i == 1 && different_types))
505- VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
506+ if (different_types)
507+ {
508+ oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
509+ oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
510+ if (i == 0)
511+ VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
512+ else
513+ VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
514+ }
515 else
516- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
517+ VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
518 break;
519
520 default:
521@@ -322,7 +394,7 @@
522 if (vect_print_dump_info (REPORT_SLP))
523 {
524 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
525- print_generic_expr (vect_dump, def[i], TDF_SLIM);
526+ print_generic_expr (vect_dump, def, TDF_SLIM);
527 }
528
529 return false;
530@@ -347,15 +419,10 @@
531 VEC (slp_tree, heap) **loads,
532 unsigned int vectorization_factor, bool *loads_permuted)
533 {
534- VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
535- VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
536 unsigned int i;
537 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
538 gimple stmt = VEC_index (gimple, stmts, 0);
539- enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
540- enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
541 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
542- tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
543 tree lhs;
544 bool stop_recursion = false, need_same_oprnds = false;
545 tree vectype, scalar_type, first_op1 = NULL_TREE;
546@@ -364,13 +431,21 @@
547 int icode;
548 enum machine_mode optab_op2_mode;
549 enum machine_mode vec_mode;
550- tree first_stmt_const_oprnd = NULL_TREE;
551 struct data_reference *first_dr;
552- bool pattern0 = false, pattern1 = false;
553 HOST_WIDE_INT dummy;
554 bool permutation = false;
555 unsigned int load_place;
556 gimple first_load, prev_first_load = NULL;
557+ VEC (slp_oprnd_info, heap) *oprnds_info;
558+ unsigned int nops;
559+ slp_oprnd_info oprnd_info;
560+
561+ if (is_gimple_call (stmt))
562+ nops = gimple_call_num_args (stmt);
563+ else
564+ nops = gimple_num_ops (stmt) - 1;
565+
566+ oprnds_info = vect_create_oprnd_info (nops, group_size);
567
568 /* For every stmt in NODE find its def stmt/s. */
569 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
570@@ -391,6 +466,7 @@
571 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
572 }
573
574+ vect_free_oprnd_info (&oprnds_info);
575 return false;
576 }
577
578@@ -400,10 +476,11 @@
579 if (vect_print_dump_info (REPORT_SLP))
580 {
581 fprintf (vect_dump,
582- "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
583+ "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
584 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
585 }
586
587+ vect_free_oprnd_info (&oprnds_info);
588 return false;
589 }
590
591@@ -416,6 +493,8 @@
592 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
593 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
594 }
595+
596+ vect_free_oprnd_info (&oprnds_info);
597 return false;
598 }
599
600@@ -462,6 +541,7 @@
601 {
602 if (vect_print_dump_info (REPORT_SLP))
603 fprintf (vect_dump, "Build SLP failed: no optab.");
604+ vect_free_oprnd_info (&oprnds_info);
605 return false;
606 }
607 icode = (int) optab_handler (optab, vec_mode);
608@@ -470,6 +550,7 @@
609 if (vect_print_dump_info (REPORT_SLP))
610 fprintf (vect_dump, "Build SLP failed: "
611 "op not supported by target.");
612+ vect_free_oprnd_info (&oprnds_info);
613 return false;
614 }
615 optab_op2_mode = insn_data[icode].operand[2].mode;
616@@ -506,6 +587,7 @@
617 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
618 }
619
620+ vect_free_oprnd_info (&oprnds_info);
621 return false;
622 }
623
624@@ -519,6 +601,7 @@
625 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
626 }
627
628+ vect_free_oprnd_info (&oprnds_info);
629 return false;
630 }
631 }
632@@ -530,15 +613,12 @@
633 {
634 /* Store. */
635 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
636- stmt, &def_stmts0, &def_stmts1,
637- &first_stmt_dt0,
638- &first_stmt_dt1,
639- &first_stmt_def0_type,
640- &first_stmt_def1_type,
641- &first_stmt_const_oprnd,
642- ncopies_for_cost,
643- &pattern0, &pattern1))
644- return false;
645+ stmt, ncopies_for_cost,
646+ (i == 0), &oprnds_info))
647+ {
648+ vect_free_oprnd_info (&oprnds_info);
649+ return false;
650+ }
651 }
652 else
653 {
654@@ -556,6 +636,7 @@
655 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
656 }
657
658+ vect_free_oprnd_info (&oprnds_info);
659 return false;
660 }
661
662@@ -573,6 +654,7 @@
663 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
664 }
665
666+ vect_free_oprnd_info (&oprnds_info);
667 return false;
668 }
669
670@@ -593,6 +675,7 @@
671 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
672 }
673
674+ vect_free_oprnd_info (&oprnds_info);
675 return false;
676 }
677 }
678@@ -612,6 +695,7 @@
679 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
680 }
681
682+ vect_free_oprnd_info (&oprnds_info);
683 return false;
684 }
685
686@@ -639,7 +723,7 @@
687 {
688 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
689 {
690- /* Not strided load. */
691+ /* Not strided load. */
692 if (vect_print_dump_info (REPORT_SLP))
693 {
694 fprintf (vect_dump, "Build SLP failed: not strided load ");
695@@ -647,6 +731,7 @@
696 }
697
698 /* FORNOW: Not strided loads are not supported. */
699+ vect_free_oprnd_info (&oprnds_info);
700 return false;
701 }
702
703@@ -661,19 +746,18 @@
704 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
705 }
706
707+ vect_free_oprnd_info (&oprnds_info);
708 return false;
709 }
710
711 /* Find the def-stmts. */
712 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
713- &def_stmts0, &def_stmts1,
714- &first_stmt_dt0, &first_stmt_dt1,
715- &first_stmt_def0_type,
716- &first_stmt_def1_type,
717- &first_stmt_const_oprnd,
718- ncopies_for_cost,
719- &pattern0, &pattern1))
720- return false;
721+ ncopies_for_cost, (i == 0),
722+ &oprnds_info))
723+ {
724+ vect_free_oprnd_info (&oprnds_info);
725+ return false;
726+ }
727 }
728 }
729
730@@ -702,46 +786,37 @@
731 *loads_permuted = true;
732 }
733
734+ vect_free_oprnd_info (&oprnds_info);
735 return true;
736 }
737
738 /* Create SLP_TREE nodes for the definition node/s. */
739- if (first_stmt_dt0 == vect_internal_def)
740- {
741- slp_tree left_node = XNEW (struct _slp_tree);
742- SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
743- SLP_TREE_VEC_STMTS (left_node) = NULL;
744- SLP_TREE_LEFT (left_node) = NULL;
745- SLP_TREE_RIGHT (left_node) = NULL;
746- SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
747- SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
748- if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
749- inside_cost, outside_cost, ncopies_for_cost,
750- max_nunits, load_permutation, loads,
751- vectorization_factor, loads_permuted))
752- return false;
753-
754- SLP_TREE_LEFT (*node) = left_node;
755- }
756-
757- if (first_stmt_dt1 == vect_internal_def)
758- {
759- slp_tree right_node = XNEW (struct _slp_tree);
760- SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
761- SLP_TREE_VEC_STMTS (right_node) = NULL;
762- SLP_TREE_LEFT (right_node) = NULL;
763- SLP_TREE_RIGHT (right_node) = NULL;
764- SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
765- SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
766- if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
767- inside_cost, outside_cost, ncopies_for_cost,
768- max_nunits, load_permutation, loads,
769- vectorization_factor, loads_permuted))
770- return false;
771-
772- SLP_TREE_RIGHT (*node) = right_node;
773- }
774-
775+ FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
776+ {
777+ slp_tree child;
778+
779+ if (oprnd_info->first_dt != vect_internal_def)
780+ continue;
781+
782+ child = vect_create_new_slp_node (oprnd_info->def_stmts);
783+ if (!child
784+ || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
785+ inside_cost, outside_cost, ncopies_for_cost,
786+ max_nunits, load_permutation, loads,
787+ vectorization_factor, loads_permuted))
788+ {
789+ if (child)
790+ oprnd_info->def_stmts = NULL;
791+ vect_free_slp_tree (child);
792+ vect_free_oprnd_info (&oprnds_info);
793+ return false;
794+ }
795+
796+ oprnd_info->def_stmts = NULL;
797+ VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
798+ }
799+
800+ vect_free_oprnd_info (&oprnds_info);
801 return true;
802 }
803
804@@ -751,6 +826,7 @@
805 {
806 int i;
807 gimple stmt;
808+ slp_void_p child;
809
810 if (!node)
811 return;
812@@ -763,8 +839,8 @@
813 }
814 fprintf (vect_dump, "\n");
815
816- vect_print_slp_tree (SLP_TREE_LEFT (node));
817- vect_print_slp_tree (SLP_TREE_RIGHT (node));
818+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
819+ vect_print_slp_tree ((slp_tree) child);
820 }
821
822
823@@ -778,6 +854,7 @@
824 {
825 int i;
826 gimple stmt;
827+ slp_void_p child;
828
829 if (!node)
830 return;
831@@ -786,8 +863,8 @@
832 if (j < 0 || i == j)
833 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
834
835- vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
836- vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
837+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
838+ vect_mark_slp_stmts ((slp_tree) child, mark, j);
839 }
840
841
842@@ -799,6 +876,7 @@
843 int i;
844 gimple stmt;
845 stmt_vec_info stmt_info;
846+ slp_void_p child;
847
848 if (!node)
849 return;
850@@ -811,8 +889,8 @@
851 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
852 }
853
854- vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
855- vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
856+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
857+ vect_mark_slp_stmts_relevant ((slp_tree) child);
858 }
859
860
861@@ -885,12 +963,13 @@
862 gimple stmt;
863 VEC (gimple, heap) *tmp_stmts;
864 unsigned int index, i;
865+ slp_void_p child;
866
867 if (!node)
868 return;
869
870- vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
871- vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
872+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
873+ vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
874
875 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
876 tmp_stmts = VEC_alloc (gimple, heap, group_size);
877@@ -1253,7 +1332,7 @@
878 gimple stmt)
879 {
880 slp_instance new_instance;
881- slp_tree node = XNEW (struct _slp_tree);
882+ slp_tree node;
883 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
884 unsigned int unrolling_factor = 1, nunits;
885 tree vectype, scalar_type = NULL_TREE;
886@@ -1265,6 +1344,7 @@
887 VEC (slp_tree, heap) *loads;
888 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
889 bool loads_permuted = false;
890+ VEC (gimple, heap) *scalar_stmts;
891
892 if (dr)
893 {
894@@ -1308,39 +1388,26 @@
895 }
896
897 /* Create a node (a root of the SLP tree) for the packed strided stores. */
898- SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
899+ scalar_stmts = VEC_alloc (gimple, heap, group_size);
900 next = stmt;
901 if (dr)
902 {
903 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
904 while (next)
905 {
906- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
907+ VEC_safe_push (gimple, heap, scalar_stmts, next);
908 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
909 }
910 }
911 else
912 {
913 /* Collect reduction statements. */
914- for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
915- next);
916- i++)
917- {
918- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
919- if (vect_print_dump_info (REPORT_DETAILS))
920- {
921- fprintf (vect_dump, "pushing reduction into node: ");
922- print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
923- }
924- }
925+ VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
926+ for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
927+ VEC_safe_push (gimple, heap, scalar_stmts, next);
928 }
929
930- SLP_TREE_VEC_STMTS (node) = NULL;
931- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
932- SLP_TREE_LEFT (node) = NULL;
933- SLP_TREE_RIGHT (node) = NULL;
934- SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
935- SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
936+ node = vect_create_new_slp_node (scalar_stmts);
937
938 /* Calculate the number of vector stmts to create based on the unrolling
939 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
940@@ -1517,6 +1584,7 @@
941 imm_use_iterator imm_iter;
942 gimple use_stmt;
943 stmt_vec_info stmt_vinfo;
944+ slp_void_p child;
945
946 if (!node)
947 return;
948@@ -1534,8 +1602,8 @@
949 == vect_reduction_def))
950 vect_mark_slp_stmts (node, hybrid, i);
951
952- vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
953- vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
954+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
955+ vect_detect_hybrid_slp_stmts ((slp_tree) child);
956 }
957
958
959@@ -1625,13 +1693,14 @@
960 bool dummy;
961 int i;
962 gimple stmt;
963+ slp_void_p child;
964
965 if (!node)
966 return true;
967
968- if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
969- || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
970- return false;
971+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
972+ if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
973+ return false;
974
975 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
976 {
977@@ -2207,88 +2276,102 @@
978 If the scalar definitions are loop invariants or constants, collect them and
979 call vect_get_constant_vectors() to create vector stmts.
980 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
981- must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
982- vect_get_slp_vect_defs() to retrieve them.
983- If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
984- the right node. This is used when the second operand must remain scalar. */
985+ must be stored in the corresponding child of SLP_NODE, and we call
986+ vect_get_slp_vect_defs () to retrieve them. */
987
988 void
989-vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
990- VEC (tree,heap) **vec_oprnds0,
991- VEC (tree,heap) **vec_oprnds1, int reduc_index)
992+vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
993+ VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
994 {
995- gimple first_stmt;
996- enum tree_code code;
997- int number_of_vects;
998+ gimple first_stmt, first_def;
999+ int number_of_vects = 0, i;
1000+ unsigned int child_index = 0;
1001 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1002+ slp_tree child = NULL;
1003+ VEC (tree, heap) *vec_defs;
1004+ tree oprnd, def_lhs;
1005+ bool vectorized_defs;
1006
1007 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1008- /* The number of vector defs is determined by the number of vector statements
1009- in the node from which we get those statements. */
1010- if (SLP_TREE_LEFT (slp_node))
1011- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1012- else
1013- {
1014- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1015- /* Number of vector stmts was calculated according to LHS in
1016- vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1017- necessary. See vect_get_smallest_scalar_type () for details. */
1018- vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1019- &rhs_size_unit);
1020- if (rhs_size_unit != lhs_size_unit)
1021- {
1022- number_of_vects *= rhs_size_unit;
1023- number_of_vects /= lhs_size_unit;
1024- }
1025+ FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
1026+ {
1027+ /* For each operand we check if it has vectorized definitions in a child
1028+ node or we need to create them (for invariants and constants). We
1029+ check if the LHS of the first stmt of the next child matches OPRND.
1030+ If it does, we found the correct child. Otherwise, we call
1031+ vect_get_constant_vectors (), and not advance CHILD_INDEX in order
1032+ to check this child node for the next operand. */
1033+ vectorized_defs = false;
1034+ if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
1035+ {
1036+ child = (slp_tree) VEC_index (slp_void_p,
1037+ SLP_TREE_CHILDREN (slp_node),
1038+ child_index);
1039+ first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
1040+
1041+ /* In the end of a pattern sequence we have a use of the original stmt,
1042+ so we need to compare OPRND with the original def. */
1043+ if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
1044+ && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
1045+ && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
1046+ first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
1047+
1048+ if (is_gimple_call (first_def))
1049+ def_lhs = gimple_call_lhs (first_def);
1050+ else
1051+ def_lhs = gimple_assign_lhs (first_def);
1052+
1053+ if (operand_equal_p (oprnd, def_lhs, 0))
1054+ {
1055+ /* The number of vector defs is determined by the number of
1056+ vector statements in the node from which we get those
1057+ statements. */
1058+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
1059+ vectorized_defs = true;
1060+ child_index++;
1061+ }
1062+ }
1063+
1064+ if (!vectorized_defs)
1065+ {
1066+ if (i == 0)
1067+ {
1068+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1069+ /* Number of vector stmts was calculated according to LHS in
1070+ vect_schedule_slp_instance (), fix it by replacing LHS with
1071+ RHS, if necessary. See vect_get_smallest_scalar_type () for
1072+ details. */
1073+ vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1074+ &rhs_size_unit);
1075+ if (rhs_size_unit != lhs_size_unit)
1076+ {
1077+ number_of_vects *= rhs_size_unit;
1078+ number_of_vects /= lhs_size_unit;
1079+ }
1080+ }
1081+ }
1082+
1083+ /* Allocate memory for vectorized defs. */
1084+ vec_defs = VEC_alloc (tree, heap, number_of_vects);
1085+
1086+ /* For reduction defs we call vect_get_constant_vectors (), since we are
1087+ looking for initial loop invariant values. */
1088+ if (vectorized_defs && reduc_index == -1)
1089+ /* The defs are already vectorized. */
1090+ vect_get_slp_vect_defs (child, &vec_defs);
1091+ else
1092+ /* Build vectors from scalar defs. */
1093+ vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
1094+ number_of_vects, reduc_index);
1095+
1096+ VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
1097+
1098+ /* For reductions, we only need initial values. */
1099+ if (reduc_index != -1)
1100+ return;
1101 }
1102-
1103- /* Allocate memory for vectorized defs. */
1104- *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1105-
1106- /* SLP_NODE corresponds either to a group of stores or to a group of
1107- unary/binary operations. We don't call this function for loads.
1108- For reduction defs we call vect_get_constant_vectors(), since we are
1109- looking for initial loop invariant values. */
1110- if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
1111- /* The defs are already vectorized. */
1112- vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1113- else
1114- /* Build vectors from scalar defs. */
1115- vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
1116- reduc_index);
1117-
1118- if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1119- /* Since we don't call this function with loads, this is a group of
1120- stores. */
1121- return;
1122-
1123- /* For reductions, we only need initial values. */
1124- if (reduc_index != -1)
1125- return;
1126-
1127- code = gimple_assign_rhs_code (first_stmt);
1128- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1129- return;
1130-
1131- /* The number of vector defs is determined by the number of vector statements
1132- in the node from which we get those statements. */
1133- if (SLP_TREE_RIGHT (slp_node))
1134- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1135- else
1136- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1137-
1138- *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1139-
1140- if (SLP_TREE_RIGHT (slp_node))
1141- /* The defs are already vectorized. */
1142- vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1143- else
1144- /* Build vectors from scalar defs. */
1145- vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
1146- -1);
1147 }
1148
1149-
1150 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1151 building a vector of type MASK_TYPE from it) and two input vectors placed in
1152 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1153@@ -2605,14 +2688,14 @@
1154 tree vectype;
1155 int i;
1156 slp_tree loads_node;
1157+ slp_void_p child;
1158
1159 if (!node)
1160 return false;
1161
1162- vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1163- vectorization_factor);
1164- vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1165- vectorization_factor);
1166+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1167+ vect_schedule_slp_instance ((slp_tree) child, instance,
1168+ vectorization_factor);
1169
1170 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1171 stmt_info = vinfo_for_stmt (stmt);
1172
1173=== modified file 'gcc/tree-vect-stmts.c'
1174--- old/gcc/tree-vect-stmts.c 2011-10-27 11:27:59 +0000
1175+++ new/gcc/tree-vect-stmts.c 2011-11-14 11:38:08 +0000
1176@@ -1419,16 +1419,35 @@
1177 }
1178
1179
1180-/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1181- NULL. */
1182+/* Get vectorized definitions for OP0 and OP1.
1183+ REDUC_INDEX is the index of reduction operand in case of reduction,
1184+ and -1 otherwise. */
1185
1186-static void
1187+void
1188 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1189- VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1190- slp_tree slp_node)
1191+ VEC (tree, heap) **vec_oprnds0,
1192+ VEC (tree, heap) **vec_oprnds1,
1193+ slp_tree slp_node, int reduc_index)
1194 {
1195 if (slp_node)
1196- vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1197+ {
1198+ int nops = (op1 == NULL_TREE) ? 1 : 2;
1199+ VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1200+ VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1201+
1202+ VEC_quick_push (tree, ops, op0);
1203+ if (op1)
1204+ VEC_quick_push (tree, ops, op1);
1205+
1206+ vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1207+
1208+ *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1209+ if (op1)
1210+ *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1211+
1212+ VEC_free (tree, heap, ops);
1213+ VEC_free (slp_void_p, heap, vec_defs);
1214+ }
1215 else
1216 {
1217 tree vec_oprnd;
1218@@ -2016,7 +2035,8 @@
1219 for (j = 0; j < ncopies; j++)
1220 {
1221 if (j == 0)
1222- vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1223+ vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
1224+ -1);
1225 else
1226 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1227
1228@@ -2221,7 +2241,7 @@
1229 {
1230 /* Handle uses. */
1231 if (j == 0)
1232- vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1233+ vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
1234 else
1235 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
1236
1237@@ -2576,10 +2596,10 @@
1238 operand 1 should be of a vector type (the usual case). */
1239 if (vec_oprnd1)
1240 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
1241- slp_node);
1242+ slp_node, -1);
1243 else
1244 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
1245- slp_node);
1246+ slp_node, -1);
1247 }
1248 else
1249 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
1250@@ -2887,10 +2907,10 @@
1251 {
1252 if (op_type == binary_op || op_type == ternary_op)
1253 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
1254- slp_node);
1255+ slp_node, -1);
1256 else
1257 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
1258- slp_node);
1259+ slp_node, -1);
1260 if (op_type == ternary_op)
1261 {
1262 vec_oprnds2 = VEC_alloc (tree, heap, 1);
1263@@ -3202,7 +3222,8 @@
1264 {
1265 /* Handle uses. */
1266 if (slp_node)
1267- vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
1268+ vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
1269+ slp_node, -1);
1270 else
1271 {
1272 VEC_free (tree, heap, vec_oprnds0);
1273@@ -3548,12 +3569,12 @@
1274 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
1275 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
1276
1277- vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
1278- -1);
1279+ vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
1280+ slp_node, -1);
1281 }
1282 else
1283- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
1284- &vec_oprnds1, -1);
1285+ vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
1286+ &vec_oprnds1, slp_node, -1);
1287 }
1288 else
1289 {
1290@@ -3796,6 +3817,7 @@
1291 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1292 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1293 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1294+ op = gimple_assign_rhs1 (first_stmt);
1295 }
1296 else
1297 /* VEC_NUM is the number of vect stmts to be created for this
1298@@ -3878,8 +3900,8 @@
1299 if (slp)
1300 {
1301 /* Get vectorized arguments for SLP_NODE. */
1302- vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
1303- NULL, -1);
1304+ vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
1305+ NULL, slp_node, -1);
1306
1307 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
1308 }
1309@@ -5040,7 +5062,7 @@
1310 In basic blocks we only analyze statements that are a part of some SLP
1311 instance, therefore, all the statements are relevant.
1312
1313- Pattern statement need to be analyzed instead of the original statement
1314+ Pattern statement needs to be analyzed instead of the original statement
1315 if the original statement is not relevant. Otherwise, we analyze both
1316 statements. */
1317
1318
1319=== modified file 'gcc/tree-vectorizer.h'
1320--- old/gcc/tree-vectorizer.h 2011-10-23 13:33:07 +0000
1321+++ new/gcc/tree-vectorizer.h 2011-11-14 11:38:08 +0000
1322@@ -73,15 +73,15 @@
1323 /************************************************************************
1324 SLP
1325 ************************************************************************/
1326+typedef void *slp_void_p;
1327+DEF_VEC_P (slp_void_p);
1328+DEF_VEC_ALLOC_P (slp_void_p, heap);
1329
1330-/* A computation tree of an SLP instance. Each node corresponds to a group of
1331+/* A computation tree of an SLP instance. Each node corresponds to a group of
1332 stmts to be packed in a SIMD stmt. */
1333 typedef struct _slp_tree {
1334- /* Only binary and unary operations are supported. LEFT child corresponds to
1335- the first operand and RIGHT child to the second if the operation is
1336- binary. */
1337- struct _slp_tree *left;
1338- struct _slp_tree *right;
1339+ /* Nodes that contain def-stmts of this node statements operands. */
1340+ VEC (slp_void_p, heap) *children;
1341 /* A group of scalar stmts to be vectorized together. */
1342 VEC (gimple, heap) *stmts;
1343 /* Vectorized stmt/s. */
1344@@ -146,14 +146,32 @@
1345 #define SLP_INSTANCE_LOADS(S) (S)->loads
1346 #define SLP_INSTANCE_FIRST_LOAD_STMT(S) (S)->first_load
1347
1348-#define SLP_TREE_LEFT(S) (S)->left
1349-#define SLP_TREE_RIGHT(S) (S)->right
1350+#define SLP_TREE_CHILDREN(S) (S)->children
1351 #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts
1352 #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts
1353 #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
1354 #define SLP_TREE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop
1355 #define SLP_TREE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop
1356
1357+/* This structure is used in creation of an SLP tree. Each instance
1358+ corresponds to the same operand in a group of scalar stmts in an SLP
1359+ node. */
1360+typedef struct _slp_oprnd_info
1361+{
1362+ /* Def-stmts for the operands. */
1363+ VEC (gimple, heap) *def_stmts;
1364+ /* Information about the first statement, its vector def-type, type, the
1365+ operand itself in case it's constant, and an indication if it's a pattern
1366+ stmt. */
1367+ enum vect_def_type first_dt;
1368+ tree first_def_type;
1369+ tree first_const_oprnd;
1370+ bool first_pattern;
1371+} *slp_oprnd_info;
1372+
1373+DEF_VEC_P(slp_oprnd_info);
1374+DEF_VEC_ALLOC_P(slp_oprnd_info, heap);
1375+
1376
1377 typedef struct _vect_peel_info
1378 {
1379@@ -819,6 +837,8 @@
1380 unsigned int *, unsigned int *);
1381 extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
1382 extern bool vect_supportable_shift (enum tree_code, tree);
1383+extern void vect_get_vec_defs (tree, tree, gimple, VEC (tree, heap) **,
1384+ VEC (tree, heap) **, slp_tree, int);
1385
1386 /* In tree-vect-data-refs.c. */
1387 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
1388@@ -885,8 +905,9 @@
1389 extern bool vect_analyze_slp (loop_vec_info, bb_vec_info);
1390 extern void vect_make_slp_decision (loop_vec_info);
1391 extern void vect_detect_hybrid_slp (loop_vec_info);
1392-extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **,
1393- VEC (tree,heap) **, int);
1394+extern void vect_get_slp_defs (VEC (tree, heap) *, slp_tree,
1395+ VEC (slp_void_p, heap) **, int);
1396+
1397 extern LOC find_bb_location (basic_block);
1398 extern bb_vec_info vect_slp_analyze_bb (basic_block);
1399 extern void vect_slp_transform_bb (basic_block);
1400
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch
new file mode 100644
index 000000000..d72446919
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch
@@ -0,0 +1,515 @@
12011-11-21 Michael Hope <michael.hope@linaro.org>
2
3 Backport from mainline r180131:
4
5 2011-10-18 Julian Brown <julian@codesourcery.com>
6
7 gcc/
8 * config/arm/arm.c (arm_block_move_unaligned_straight)
9 (arm_adjust_block_mem, arm_block_move_unaligned_loop)
10 (arm_movmemqi_unaligned): New.
11 (arm_gen_movmemqi): Support unaligned block copies.
12
13 gcc/testsuite/
14 * lib/target-supports.exp (check_effective_target_arm_unaligned): New.
15 * gcc.target/arm/unaligned-memcpy-1.c: New.
16 * gcc.target/arm/unaligned-memcpy-2.c: New.
17 * gcc.target/arm/unaligned-memcpy-3.c: New.
18 * gcc.target/arm/unaligned-memcpy-4.c: New.
19
20 2011-09-15 James Greenhalgh <james.greenhalgh@arm.com>
21
22 gcc/
23 * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): New builtin macro.
24
25=== modified file 'gcc/config/arm/arm.c'
26--- old/gcc/config/arm/arm.c 2011-10-26 11:38:30 +0000
27+++ new/gcc/config/arm/arm.c 2011-11-21 01:45:54 +0000
28@@ -10803,6 +10803,335 @@
29 return true;
30 }
31
32+/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
33+ unaligned copies on processors which support unaligned semantics for those
34+ instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
35+ (using more registers) by doing e.g. load/load/store/store for a factor of 2.
36+ An interleave factor of 1 (the minimum) will perform no interleaving.
37+ Load/store multiple are used for aligned addresses where possible. */
38+
39+static void
40+arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
41+ HOST_WIDE_INT length,
42+ unsigned int interleave_factor)
43+{
44+ rtx *regs = XALLOCAVEC (rtx, interleave_factor);
45+ int *regnos = XALLOCAVEC (int, interleave_factor);
46+ HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
47+ HOST_WIDE_INT i, j;
48+ HOST_WIDE_INT remaining = length, words;
49+ rtx halfword_tmp = NULL, byte_tmp = NULL;
50+ rtx dst, src;
51+ bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
52+ bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
53+ HOST_WIDE_INT srcoffset, dstoffset;
54+ HOST_WIDE_INT src_autoinc, dst_autoinc;
55+ rtx mem, addr;
56+
57+ gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
58+
59+ /* Use hard registers if we have aligned source or destination so we can use
60+ load/store multiple with contiguous registers. */
61+ if (dst_aligned || src_aligned)
62+ for (i = 0; i < interleave_factor; i++)
63+ regs[i] = gen_rtx_REG (SImode, i);
64+ else
65+ for (i = 0; i < interleave_factor; i++)
66+ regs[i] = gen_reg_rtx (SImode);
67+
68+ dst = copy_addr_to_reg (XEXP (dstbase, 0));
69+ src = copy_addr_to_reg (XEXP (srcbase, 0));
70+
71+ srcoffset = dstoffset = 0;
72+
73+ /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
74+ For copying the last bytes we want to subtract this offset again. */
75+ src_autoinc = dst_autoinc = 0;
76+
77+ for (i = 0; i < interleave_factor; i++)
78+ regnos[i] = i;
79+
80+ /* Copy BLOCK_SIZE_BYTES chunks. */
81+
82+ for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
83+ {
84+ /* Load words. */
85+ if (src_aligned && interleave_factor > 1)
86+ {
87+ emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
88+ TRUE, srcbase, &srcoffset));
89+ src_autoinc += UNITS_PER_WORD * interleave_factor;
90+ }
91+ else
92+ {
93+ for (j = 0; j < interleave_factor; j++)
94+ {
95+ addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
96+ - src_autoinc);
97+ mem = adjust_automodify_address (srcbase, SImode, addr,
98+ srcoffset + j * UNITS_PER_WORD);
99+ emit_insn (gen_unaligned_loadsi (regs[j], mem));
100+ }
101+ srcoffset += block_size_bytes;
102+ }
103+
104+ /* Store words. */
105+ if (dst_aligned && interleave_factor > 1)
106+ {
107+ emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
108+ TRUE, dstbase, &dstoffset));
109+ dst_autoinc += UNITS_PER_WORD * interleave_factor;
110+ }
111+ else
112+ {
113+ for (j = 0; j < interleave_factor; j++)
114+ {
115+ addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
116+ - dst_autoinc);
117+ mem = adjust_automodify_address (dstbase, SImode, addr,
118+ dstoffset + j * UNITS_PER_WORD);
119+ emit_insn (gen_unaligned_storesi (mem, regs[j]));
120+ }
121+ dstoffset += block_size_bytes;
122+ }
123+
124+ remaining -= block_size_bytes;
125+ }
126+
127+ /* Copy any whole words left (note these aren't interleaved with any
128+ subsequent halfword/byte load/stores in the interests of simplicity). */
129+
130+ words = remaining / UNITS_PER_WORD;
131+
132+ gcc_assert (words < interleave_factor);
133+
134+ if (src_aligned && words > 1)
135+ {
136+ emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
137+ &srcoffset));
138+ src_autoinc += UNITS_PER_WORD * words;
139+ }
140+ else
141+ {
142+ for (j = 0; j < words; j++)
143+ {
144+ addr = plus_constant (src,
145+ srcoffset + j * UNITS_PER_WORD - src_autoinc);
146+ mem = adjust_automodify_address (srcbase, SImode, addr,
147+ srcoffset + j * UNITS_PER_WORD);
148+ emit_insn (gen_unaligned_loadsi (regs[j], mem));
149+ }
150+ srcoffset += words * UNITS_PER_WORD;
151+ }
152+
153+ if (dst_aligned && words > 1)
154+ {
155+ emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
156+ &dstoffset));
157+ dst_autoinc += words * UNITS_PER_WORD;
158+ }
159+ else
160+ {
161+ for (j = 0; j < words; j++)
162+ {
163+ addr = plus_constant (dst,
164+ dstoffset + j * UNITS_PER_WORD - dst_autoinc);
165+ mem = adjust_automodify_address (dstbase, SImode, addr,
166+ dstoffset + j * UNITS_PER_WORD);
167+ emit_insn (gen_unaligned_storesi (mem, regs[j]));
168+ }
169+ dstoffset += words * UNITS_PER_WORD;
170+ }
171+
172+ remaining -= words * UNITS_PER_WORD;
173+
174+ gcc_assert (remaining < 4);
175+
176+ /* Copy a halfword if necessary. */
177+
178+ if (remaining >= 2)
179+ {
180+ halfword_tmp = gen_reg_rtx (SImode);
181+
182+ addr = plus_constant (src, srcoffset - src_autoinc);
183+ mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
184+ emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
185+
186+ /* Either write out immediately, or delay until we've loaded the last
187+ byte, depending on interleave factor. */
188+ if (interleave_factor == 1)
189+ {
190+ addr = plus_constant (dst, dstoffset - dst_autoinc);
191+ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
192+ emit_insn (gen_unaligned_storehi (mem,
193+ gen_lowpart (HImode, halfword_tmp)));
194+ halfword_tmp = NULL;
195+ dstoffset += 2;
196+ }
197+
198+ remaining -= 2;
199+ srcoffset += 2;
200+ }
201+
202+ gcc_assert (remaining < 2);
203+
204+ /* Copy last byte. */
205+
206+ if ((remaining & 1) != 0)
207+ {
208+ byte_tmp = gen_reg_rtx (SImode);
209+
210+ addr = plus_constant (src, srcoffset - src_autoinc);
211+ mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
212+ emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
213+
214+ if (interleave_factor == 1)
215+ {
216+ addr = plus_constant (dst, dstoffset - dst_autoinc);
217+ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
218+ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
219+ byte_tmp = NULL;
220+ dstoffset++;
221+ }
222+
223+ remaining--;
224+ srcoffset++;
225+ }
226+
227+ /* Store last halfword if we haven't done so already. */
228+
229+ if (halfword_tmp)
230+ {
231+ addr = plus_constant (dst, dstoffset - dst_autoinc);
232+ mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
233+ emit_insn (gen_unaligned_storehi (mem,
234+ gen_lowpart (HImode, halfword_tmp)));
235+ dstoffset += 2;
236+ }
237+
238+ /* Likewise for last byte. */
239+
240+ if (byte_tmp)
241+ {
242+ addr = plus_constant (dst, dstoffset - dst_autoinc);
243+ mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
244+ emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
245+ dstoffset++;
246+ }
247+
248+ gcc_assert (remaining == 0 && srcoffset == dstoffset);
249+}
250+
251+/* From mips_adjust_block_mem:
252+
253+ Helper function for doing a loop-based block operation on memory
254+ reference MEM. Each iteration of the loop will operate on LENGTH
255+ bytes of MEM.
256+
257+ Create a new base register for use within the loop and point it to
258+ the start of MEM. Create a new memory reference that uses this
259+ register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
260+
261+static void
262+arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
263+ rtx *loop_mem)
264+{
265+ *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
266+
267+ /* Although the new mem does not refer to a known location,
268+ it does keep up to LENGTH bytes of alignment. */
269+ *loop_mem = change_address (mem, BLKmode, *loop_reg);
270+ set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
271+}
272+
273+/* From mips_block_move_loop:
274+
275+ Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
276+ bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
277+ the memory regions do not overlap. */
278+
279+static void
280+arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
281+ unsigned int interleave_factor,
282+ HOST_WIDE_INT bytes_per_iter)
283+{
284+ rtx label, src_reg, dest_reg, final_src, test;
285+ HOST_WIDE_INT leftover;
286+
287+ leftover = length % bytes_per_iter;
288+ length -= leftover;
289+
290+ /* Create registers and memory references for use within the loop. */
291+ arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
292+ arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
293+
294+ /* Calculate the value that SRC_REG should have after the last iteration of
295+ the loop. */
296+ final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
297+ 0, 0, OPTAB_WIDEN);
298+
299+ /* Emit the start of the loop. */
300+ label = gen_label_rtx ();
301+ emit_label (label);
302+
303+ /* Emit the loop body. */
304+ arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
305+ interleave_factor);
306+
307+ /* Move on to the next block. */
308+ emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
309+ emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
310+
311+ /* Emit the loop condition. */
312+ test = gen_rtx_NE (VOIDmode, src_reg, final_src);
313+ emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
314+
315+ /* Mop up any left-over bytes. */
316+ if (leftover)
317+ arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
318+}
319+
320+/* Emit a block move when either the source or destination is unaligned (not
321+ aligned to a four-byte boundary). This may need further tuning depending on
322+ core type, optimize_size setting, etc. */
323+
324+static int
325+arm_movmemqi_unaligned (rtx *operands)
326+{
327+ HOST_WIDE_INT length = INTVAL (operands[2]);
328+
329+ if (optimize_size)
330+ {
331+ bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
332+ bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
333+ /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
334+ size of code if optimizing for size. We'll use ldm/stm if src_aligned
335+ or dst_aligned though: allow more interleaving in those cases since the
336+ resulting code can be smaller. */
337+ unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
338+ HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
339+
340+ if (length > 12)
341+ arm_block_move_unaligned_loop (operands[0], operands[1], length,
342+ interleave_factor, bytes_per_iter);
343+ else
344+ arm_block_move_unaligned_straight (operands[0], operands[1], length,
345+ interleave_factor);
346+ }
347+ else
348+ {
349+ /* Note that the loop created by arm_block_move_unaligned_loop may be
350+ subject to loop unrolling, which makes tuning this condition a little
351+ redundant. */
352+ if (length > 32)
353+ arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
354+ else
355+ arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
356+ }
357+
358+ return 1;
359+}
360+
361 int
362 arm_gen_movmemqi (rtx *operands)
363 {
364@@ -10815,8 +11144,13 @@
365
366 if (GET_CODE (operands[2]) != CONST_INT
367 || GET_CODE (operands[3]) != CONST_INT
368- || INTVAL (operands[2]) > 64
369- || INTVAL (operands[3]) & 3)
370+ || INTVAL (operands[2]) > 64)
371+ return 0;
372+
373+ if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
374+ return arm_movmemqi_unaligned (operands);
375+
376+ if (INTVAL (operands[3]) & 3)
377 return 0;
378
379 dstbase = operands[0];
380
381=== modified file 'gcc/config/arm/arm.h'
382--- old/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000
383+++ new/gcc/config/arm/arm.h 2011-11-21 01:45:54 +0000
384@@ -47,6 +47,8 @@
385 { \
386 if (TARGET_DSP_MULTIPLY) \
387 builtin_define ("__ARM_FEATURE_DSP"); \
388+ if (unaligned_access) \
389+ builtin_define ("__ARM_FEATURE_UNALIGNED"); \
390 /* Define __arm__ even when in thumb mode, for \
391 consistency with armcc. */ \
392 builtin_define ("__arm__"); \
393
394=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c'
395--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 1970-01-01 00:00:00 +0000
396+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 2011-10-19 22:56:19 +0000
397@@ -0,0 +1,19 @@
398+/* { dg-do compile } */
399+/* { dg-require-effective-target arm_unaligned } */
400+/* { dg-options "-O2" } */
401+
402+#include <string.h>
403+
404+void unknown_alignment (char *dest, char *src)
405+{
406+ memcpy (dest, src, 15);
407+}
408+
409+/* We should see three unaligned word loads and store pairs, one unaligned
410+ ldrh/strh pair, and an ldrb/strb pair. Sanity check that. */
411+
412+/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */
413+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
414+/* { dg-final { scan-assembler-times "strh" 1 } } */
415+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
416+/* { dg-final { scan-assembler-times "strb" 1 } } */
417
418=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c'
419--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 1970-01-01 00:00:00 +0000
420+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 2011-10-19 22:56:19 +0000
421@@ -0,0 +1,21 @@
422+/* { dg-do compile } */
423+/* { dg-require-effective-target arm_unaligned } */
424+/* { dg-options "-O2" } */
425+
426+#include <string.h>
427+
428+char dest[16];
429+
430+void aligned_dest (char *src)
431+{
432+ memcpy (dest, src, 15);
433+}
434+
435+/* Expect a multi-word store for the main part of the copy, but subword
436+ loads/stores for the remainder. */
437+
438+/* { dg-final { scan-assembler-times "stmia" 1 } } */
439+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
440+/* { dg-final { scan-assembler-times "strh" 1 } } */
441+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
442+/* { dg-final { scan-assembler-times "strb" 1 } } */
443
444=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c'
445--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 1970-01-01 00:00:00 +0000
446+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 2011-10-19 22:56:19 +0000
447@@ -0,0 +1,21 @@
448+/* { dg-do compile } */
449+/* { dg-require-effective-target arm_unaligned } */
450+/* { dg-options "-O2" } */
451+
452+#include <string.h>
453+
454+char src[16];
455+
456+void aligned_src (char *dest)
457+{
458+ memcpy (dest, src, 15);
459+}
460+
461+/* Expect a multi-word load for the main part of the copy, but subword
462+ loads/stores for the remainder. */
463+
464+/* { dg-final { scan-assembler-times "ldmia" 1 } } */
465+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
466+/* { dg-final { scan-assembler-times "strh" 1 } } */
467+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
468+/* { dg-final { scan-assembler-times "strb" 1 } } */
469
470=== added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c'
471--- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 1970-01-01 00:00:00 +0000
472+++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 2011-10-19 22:56:19 +0000
473@@ -0,0 +1,18 @@
474+/* { dg-do compile } */
475+/* { dg-require-effective-target arm_unaligned } */
476+/* { dg-options "-O2" } */
477+
478+#include <string.h>
479+
480+char src[16];
481+char dest[16];
482+
483+void aligned_both (void)
484+{
485+ memcpy (dest, src, 15);
486+}
487+
488+/* We know both src and dest to be aligned: expect multiword loads/stores. */
489+
490+/* { dg-final { scan-assembler-times "ldmia" 1 } } */
491+/* { dg-final { scan-assembler-times "stmia" 1 } } */
492
493=== modified file 'gcc/testsuite/lib/target-supports.exp'
494--- old/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000
495+++ new/gcc/testsuite/lib/target-supports.exp 2011-11-21 01:45:54 +0000
496@@ -1894,6 +1894,18 @@
497 }]
498 }
499
500+# Return 1 if this is an ARM target that supports unaligned word/halfword
501+# load/store instructions.
502+
503+proc check_effective_target_arm_unaligned { } {
504+ return [check_no_compiler_messages arm_unaligned assembly {
505+ #ifndef __ARM_FEATURE_UNALIGNED
506+ #error no unaligned support
507+ #endif
508+ int i;
509+ }]
510+}
511+
512 # Add the options needed for NEON. We need either -mfloat-abi=softfp
513 # or -mfloat-abi=hard, but if one is already specified by the
514 # multilib, use it. Similarly, if a -mfpu option already enables
515
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc b/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
index 73fe5c8b2..fcdccf5d2 100644
--- a/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
+++ b/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
@@ -53,4 +53,26 @@ file://linaro/gcc-4.6-linaro-r106805.patch \
53file://linaro/gcc-4.6-linaro-r106806.patch \ 53file://linaro/gcc-4.6-linaro-r106806.patch \
54file://linaro/gcc-4.6-linaro-r106807.patch \ 54file://linaro/gcc-4.6-linaro-r106807.patch \
55file://linaro/gcc-4.6-linaro-r106811.patch \ 55file://linaro/gcc-4.6-linaro-r106811.patch \
56file://linaro/gcc-4.6-linaro-r106814.patch \
57file://linaro/gcc-4.6-linaro-r106815.patch \
58file://linaro/gcc-4.6-linaro-r106816.patch \
59file://linaro/gcc-4.6-linaro-r106817.patch \
60file://linaro/gcc-4.6-linaro-r106818.patch \
61file://linaro/gcc-4.6-linaro-r106819.patch \
62file://linaro/gcc-4.6-linaro-r106820.patch \
63file://linaro/gcc-4.6-linaro-r106821.patch \
64file://linaro/gcc-4.6-linaro-r106825.patch \
65file://linaro/gcc-4.6-linaro-r106826.patch \
66file://linaro/gcc-4.6-linaro-r106827.patch \
67file://linaro/gcc-4.6-linaro-r106828.patch \
68file://linaro/gcc-4.6-linaro-r106829.patch \
69file://linaro/gcc-4.6-linaro-r106830.patch \
70file://linaro/gcc-4.6-linaro-r106831.patch \
71file://linaro/gcc-4.6-linaro-r106832.patch \
72file://linaro/gcc-4.6-linaro-r106833.patch \
73file://linaro/gcc-4.6-linaro-r106834.patch \
74file://linaro/gcc-4.6-linaro-r106836.patch \
75file://linaro/gcc-4.6-linaro-r106839.patch \
76file://linaro/gcc-4.6-linaro-r106840.patch \
77file://linaro/gcc-4.6-linaro-r106841.patch \
56" 78"
diff --git a/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc b/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc
index 0faf45e93..c12913d92 100644
--- a/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc
+++ b/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc
@@ -1,4 +1,4 @@
1# this will prepend this layer to FILESPATH 1# this will prepend this layer to FILESPATH
2FILESEXTRAPATHS := "${THISDIR}/gcc-4.6" 2FILESEXTRAPATHS := "${THISDIR}/gcc-4.6"
3PRINC = "2" 3PRINC = "3"
4ARM_INSTRUCTION_SET = "arm" 4ARM_INSTRUCTION_SET = "arm"