diff options
author | Khem Raj <raj.khem@gmail.com> | 2011-11-22 07:29:59 -0800 |
---|---|---|
committer | Khem Raj <raj.khem@gmail.com> | 2011-12-03 10:59:33 -0800 |
commit | 2378ee8f21741abd23e434260a88c62cb0e151f1 (patch) | |
tree | 036eef0d3a08a5cc79de7c4233bda78031b7560e /meta-oe | |
parent | a2fbb83f951a522f3a1eb2b627847f549a0c245a (diff) | |
download | meta-openembedded-2378ee8f21741abd23e434260a88c62cb0e151f1.tar.gz |
gcc-4.6: Bring in latest linaro patches
I have tested it on angstrom by successfully building console-image
and systemd-gnome-image for all supported qemu targets.
Signed-off-by: Khem Raj <raj.khem@gmail.com>
Diffstat (limited to 'meta-oe')
24 files changed, 10087 insertions, 1 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch new file mode 100644 index 000000000..91b2191cb --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106814.patch | |||
@@ -0,0 +1,80 @@ | |||
1 | 2011-09-22 Revital Eres <revital.eres@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | Backport from trunk -r178804: | ||
5 | modulo-sched.c (remove_node_from_ps): Return void | ||
6 | instead of bool. | ||
7 | (optimize_sc): Adjust call to remove_node_from_ps. | ||
8 | (sms_schedule): Add print info. | ||
9 | |||
10 | === modified file 'gcc/modulo-sched.c' | ||
11 | --- old/gcc/modulo-sched.c 2011-08-09 04:51:48 +0000 | ||
12 | +++ new/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000 | ||
13 | @@ -211,7 +211,7 @@ | ||
14 | static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr, | ||
15 | int, int, sbitmap, int *, sbitmap, | ||
16 | sbitmap); | ||
17 | -static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); | ||
18 | +static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); | ||
19 | |||
20 | #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) | ||
21 | #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) | ||
22 | @@ -834,8 +834,7 @@ | ||
23 | if (next_ps_i->node->cuid == g->closing_branch->cuid) | ||
24 | break; | ||
25 | |||
26 | - gcc_assert (next_ps_i); | ||
27 | - gcc_assert (remove_node_from_ps (ps, next_ps_i)); | ||
28 | + remove_node_from_ps (ps, next_ps_i); | ||
29 | success = | ||
30 | try_scheduling_node_in_cycle (ps, g->closing_branch, | ||
31 | g->closing_branch->cuid, c, | ||
32 | @@ -1485,8 +1484,8 @@ | ||
33 | if (dump_file) | ||
34 | { | ||
35 | fprintf (dump_file, | ||
36 | - "SMS succeeded %d %d (with ii, sc)\n", ps->ii, | ||
37 | - stage_count); | ||
38 | + "%s:%d SMS succeeded %d %d (with ii, sc)\n", | ||
39 | + insn_file (tail), insn_line (tail), ps->ii, stage_count); | ||
40 | print_partial_schedule (ps, dump_file); | ||
41 | } | ||
42 | |||
43 | @@ -2810,22 +2809,18 @@ | ||
44 | } | ||
45 | |||
46 | |||
47 | -/* Removes the given PS_INSN from the partial schedule. Returns false if the | ||
48 | - node is not found in the partial schedule, else returns true. */ | ||
49 | -static bool | ||
50 | +/* Removes the given PS_INSN from the partial schedule. */ | ||
51 | +static void | ||
52 | remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) | ||
53 | { | ||
54 | int row; | ||
55 | |||
56 | - if (!ps || !ps_i) | ||
57 | - return false; | ||
58 | - | ||
59 | + gcc_assert (ps && ps_i); | ||
60 | + | ||
61 | row = SMODULO (ps_i->cycle, ps->ii); | ||
62 | if (! ps_i->prev_in_row) | ||
63 | { | ||
64 | - if (ps_i != ps->rows[row]) | ||
65 | - return false; | ||
66 | - | ||
67 | + gcc_assert (ps_i == ps->rows[row]); | ||
68 | ps->rows[row] = ps_i->next_in_row; | ||
69 | if (ps->rows[row]) | ||
70 | ps->rows[row]->prev_in_row = NULL; | ||
71 | @@ -2839,7 +2834,7 @@ | ||
72 | |||
73 | ps->rows_length[row] -= 1; | ||
74 | free (ps_i); | ||
75 | - return true; | ||
76 | + return; | ||
77 | } | ||
78 | |||
79 | /* Unlike what literature describes for modulo scheduling (which focuses | ||
80 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch new file mode 100644 index 000000000..16779bbf1 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106815.patch | |||
@@ -0,0 +1,528 @@ | |||
1 | 2011-09-25 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | gcc/testsuite/ | ||
4 | * lib/target-supports.exp (check_effective_target_vect_multiple_sizes): | ||
5 | Replace check_effective_target_arm_neon with | ||
6 | check_effective_target_arm_neon_ok. | ||
7 | |||
8 | Backport from mainline: | ||
9 | |||
10 | 2011-09-06 Ira Rosen <ira.rosen@linaro.org> | ||
11 | |||
12 | gcc/ | ||
13 | * config/arm/arm.c (arm_preferred_simd_mode): Check | ||
14 | TARGET_NEON_VECTORIZE_DOUBLE instead of | ||
15 | TARGET_NEON_VECTORIZE_QUAD. | ||
16 | (arm_autovectorize_vector_sizes): Likewise. | ||
17 | * config/arm/arm.opt (mvectorize-with-neon-quad): Make inverse | ||
18 | mask of mvectorize-with-neon-double. Add RejectNegative. | ||
19 | (mvectorize-with-neon-double): New. | ||
20 | |||
21 | gcc/testsuite/ | ||
22 | * lib/target-supports.exp (check_effective_target_vect_multiple_sizes): | ||
23 | New procedure. | ||
24 | (add_options_for_quad_vectors): Replace with ... | ||
25 | (add_options_for_double_vectors): ... this. | ||
26 | * gfortran.dg/vect/pr19049.f90: Expect more printings on targets that | ||
27 | support multiple vector sizes since the vectorizer attempts to | ||
28 | vectorize with both vector sizes. | ||
29 | * gcc.dg/vect/no-vfa-vect-79.c, | ||
30 | gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c, | ||
31 | gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c, | ||
32 | gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c, | ||
33 | gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c, | ||
34 | gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c, | ||
35 | gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise. | ||
36 | * gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable. | ||
37 | * gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c, | ||
38 | gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c, | ||
39 | gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c, | ||
40 | gcc.dg/vect/vect-40.c: Likewise. | ||
41 | * gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as | ||
42 | redundant. | ||
43 | * gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c, | ||
44 | gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c, | ||
45 | gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c, | ||
46 | gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c: | ||
47 | Likewise. | ||
48 | * gcc.dg/vect/vect-peel-4.c: Make ia global. | ||
49 | |||
50 | === modified file 'gcc/config/arm/arm.c' | ||
51 | --- old/gcc/config/arm/arm.c 2011-09-15 09:45:31 +0000 | ||
52 | +++ new/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000 | ||
53 | @@ -22974,7 +22974,7 @@ | ||
54 | return false; | ||
55 | } | ||
56 | |||
57 | -/* Use the option -mvectorize-with-neon-quad to override the use of doubleword | ||
58 | +/* Use the option -mvectorize-with-neon-double to override the use of quardword | ||
59 | registers when autovectorizing for Neon, at least until multiple vector | ||
60 | widths are supported properly by the middle-end. */ | ||
61 | |||
62 | @@ -22985,15 +22985,15 @@ | ||
63 | switch (mode) | ||
64 | { | ||
65 | case SFmode: | ||
66 | - return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode; | ||
67 | + return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode; | ||
68 | case SImode: | ||
69 | - return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode; | ||
70 | + return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode; | ||
71 | case HImode: | ||
72 | - return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode; | ||
73 | + return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode; | ||
74 | case QImode: | ||
75 | - return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode; | ||
76 | + return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode; | ||
77 | case DImode: | ||
78 | - if (TARGET_NEON_VECTORIZE_QUAD) | ||
79 | + if (!TARGET_NEON_VECTORIZE_DOUBLE) | ||
80 | return V2DImode; | ||
81 | break; | ||
82 | |||
83 | @@ -24226,7 +24226,7 @@ | ||
84 | static unsigned int | ||
85 | arm_autovectorize_vector_sizes (void) | ||
86 | { | ||
87 | - return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0; | ||
88 | + return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8); | ||
89 | } | ||
90 | |||
91 | static bool | ||
92 | |||
93 | === modified file 'gcc/config/arm/arm.opt' | ||
94 | --- old/gcc/config/arm/arm.opt 2009-06-18 11:24:10 +0000 | ||
95 | +++ new/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000 | ||
96 | @@ -158,9 +158,13 @@ | ||
97 | Assume big endian bytes, little endian words | ||
98 | |||
99 | mvectorize-with-neon-quad | ||
100 | -Target Report Mask(NEON_VECTORIZE_QUAD) | ||
101 | +Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE) | ||
102 | Use Neon quad-word (rather than double-word) registers for vectorization | ||
103 | |||
104 | +mvectorize-with-neon-double | ||
105 | +Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE) | ||
106 | +Use Neon double-word (rather than quad-word) registers for vectorization | ||
107 | + | ||
108 | mword-relocations | ||
109 | Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) | ||
110 | Only generate absolute relocations on word sized values. | ||
111 | |||
112 | === modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c' | ||
113 | --- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-04-28 11:46:58 +0000 | ||
114 | +++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-09-19 07:44:24 +0000 | ||
115 | @@ -1,5 +1,4 @@ | ||
116 | /* { dg-require-effective-target vect_int } */ | ||
117 | -/* { dg-add-options quad_vectors } */ | ||
118 | |||
119 | #include <stdarg.h> | ||
120 | #include "tree-vect.h" | ||
121 | |||
122 | === modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c' | ||
123 | --- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2007-09-04 12:05:19 +0000 | ||
124 | +++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c 2011-09-19 07:44:24 +0000 | ||
125 | @@ -45,6 +45,7 @@ | ||
126 | } | ||
127 | |||
128 | /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ | ||
129 | -/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" } } */ | ||
130 | +/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */ | ||
131 | +/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */ | ||
132 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
133 | |||
134 | |||
135 | === modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c' | ||
136 | --- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2007-09-12 07:48:44 +0000 | ||
137 | +++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c 2011-09-19 07:44:24 +0000 | ||
138 | @@ -53,6 +53,7 @@ | ||
139 | } | ||
140 | |||
141 | /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ | ||
142 | -/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */ | ||
143 | +/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */ | ||
144 | +/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ | ||
145 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
146 | |||
147 | |||
148 | === modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c' | ||
149 | --- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2007-09-12 07:48:44 +0000 | ||
150 | +++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c 2011-09-19 07:44:24 +0000 | ||
151 | @@ -53,6 +53,7 @@ | ||
152 | } | ||
153 | |||
154 | /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ | ||
155 | -/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */ | ||
156 | +/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */ | ||
157 | +/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ | ||
158 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
159 | |||
160 | |||
161 | === modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c' | ||
162 | --- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2009-05-08 12:39:01 +0000 | ||
163 | +++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c 2011-09-19 07:44:24 +0000 | ||
164 | @@ -58,5 +58,6 @@ | ||
165 | If/when the aliasing problems are resolved, unalignment may | ||
166 | prevent vectorization on some targets. */ | ||
167 | /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */ | ||
168 | -/* { dg-final { scan-tree-dump-times "can't determine dependence between" 2 "vect" } } */ | ||
169 | +/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { xfail vect_multiple_sizes } } } */ | ||
170 | +/* { dg-final { scan-tree-dump-times "can't determine dependence" 4 "vect" { target vect_multiple_sizes } } } */ | ||
171 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
172 | |||
173 | === modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c' | ||
174 | --- old/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2009-05-08 12:39:01 +0000 | ||
175 | +++ new/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c 2011-09-19 07:44:24 +0000 | ||
176 | @@ -46,5 +46,6 @@ | ||
177 | If/when the aliasing problems are resolved, unalignment may | ||
178 | prevent vectorization on some targets. */ | ||
179 | /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ | ||
180 | -/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 "vect" } } */ | ||
181 | +/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */ | ||
182 | +/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */ | ||
183 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
184 | |||
185 | === modified file 'gcc/testsuite/gcc.dg/vect/slp-25.c' | ||
186 | --- old/gcc/testsuite/gcc.dg/vect/slp-25.c 2010-10-04 14:59:30 +0000 | ||
187 | +++ new/gcc/testsuite/gcc.dg/vect/slp-25.c 2011-09-19 07:44:24 +0000 | ||
188 | @@ -1,5 +1,4 @@ | ||
189 | /* { dg-require-effective-target vect_int } */ | ||
190 | -/* { dg-add-options quad_vectors } */ | ||
191 | |||
192 | #include <stdarg.h> | ||
193 | #include "tree-vect.h" | ||
194 | |||
195 | === modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c' | ||
196 | --- old/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-04-28 11:46:58 +0000 | ||
197 | +++ new/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-09-19 07:44:24 +0000 | ||
198 | @@ -1,5 +1,4 @@ | ||
199 | /* { dg-require-effective-target vect_int } */ | ||
200 | -/* { dg-add-options quad_vectors } */ | ||
201 | |||
202 | #include <stdarg.h> | ||
203 | #include "tree-vect.h" | ||
204 | |||
205 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-104.c' | ||
206 | --- old/gcc/testsuite/gcc.dg/vect/vect-104.c 2007-09-12 07:48:44 +0000 | ||
207 | +++ new/gcc/testsuite/gcc.dg/vect/vect-104.c 2011-09-19 07:44:24 +0000 | ||
208 | @@ -64,6 +64,7 @@ | ||
209 | } | ||
210 | |||
211 | /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ | ||
212 | -/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */ | ||
213 | +/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */ | ||
214 | +/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ | ||
215 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
216 | |||
217 | |||
218 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-109.c' | ||
219 | --- old/gcc/testsuite/gcc.dg/vect/vect-109.c 2010-10-04 14:59:30 +0000 | ||
220 | +++ new/gcc/testsuite/gcc.dg/vect/vect-109.c 2011-09-19 07:44:24 +0000 | ||
221 | @@ -1,5 +1,4 @@ | ||
222 | /* { dg-require-effective-target vect_int } */ | ||
223 | -/* { dg-add-options quad_vectors } */ | ||
224 | |||
225 | #include <stdarg.h> | ||
226 | #include "tree-vect.h" | ||
227 | |||
228 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-40.c' | ||
229 | --- old/gcc/testsuite/gcc.dg/vect/vect-40.c 2009-05-25 14:18:21 +0000 | ||
230 | +++ new/gcc/testsuite/gcc.dg/vect/vect-40.c 2011-09-19 07:44:24 +0000 | ||
231 | @@ -1,4 +1,5 @@ | ||
232 | /* { dg-require-effective-target vect_float } */ | ||
233 | +/* { dg-add-options double_vectors } */ | ||
234 | |||
235 | #include <stdarg.h> | ||
236 | #include "tree-vect.h" | ||
237 | |||
238 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-42.c' | ||
239 | --- old/gcc/testsuite/gcc.dg/vect/vect-42.c 2010-10-04 14:59:30 +0000 | ||
240 | +++ new/gcc/testsuite/gcc.dg/vect/vect-42.c 2011-09-19 07:44:24 +0000 | ||
241 | @@ -1,4 +1,5 @@ | ||
242 | /* { dg-require-effective-target vect_float } */ | ||
243 | +/* { dg-add-options double_vectors } */ | ||
244 | |||
245 | #include <stdarg.h> | ||
246 | #include "tree-vect.h" | ||
247 | |||
248 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-46.c' | ||
249 | --- old/gcc/testsuite/gcc.dg/vect/vect-46.c 2009-05-25 14:18:21 +0000 | ||
250 | +++ new/gcc/testsuite/gcc.dg/vect/vect-46.c 2011-09-19 07:44:24 +0000 | ||
251 | @@ -1,4 +1,5 @@ | ||
252 | /* { dg-require-effective-target vect_float } */ | ||
253 | +/* { dg-add-options double_vectors } */ | ||
254 | |||
255 | #include <stdarg.h> | ||
256 | #include "tree-vect.h" | ||
257 | |||
258 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-48.c' | ||
259 | --- old/gcc/testsuite/gcc.dg/vect/vect-48.c 2009-11-04 10:22:22 +0000 | ||
260 | +++ new/gcc/testsuite/gcc.dg/vect/vect-48.c 2011-09-19 07:44:24 +0000 | ||
261 | @@ -1,4 +1,5 @@ | ||
262 | /* { dg-require-effective-target vect_float } */ | ||
263 | +/* { dg-add-options double_vectors } */ | ||
264 | |||
265 | #include <stdarg.h> | ||
266 | #include "tree-vect.h" | ||
267 | |||
268 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-52.c' | ||
269 | --- old/gcc/testsuite/gcc.dg/vect/vect-52.c 2009-11-04 10:22:22 +0000 | ||
270 | +++ new/gcc/testsuite/gcc.dg/vect/vect-52.c 2011-09-19 07:44:24 +0000 | ||
271 | @@ -1,4 +1,5 @@ | ||
272 | /* { dg-require-effective-target vect_float } */ | ||
273 | +/* { dg-add-options double_vectors } */ | ||
274 | |||
275 | #include <stdarg.h> | ||
276 | #include "tree-vect.h" | ||
277 | |||
278 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-54.c' | ||
279 | --- old/gcc/testsuite/gcc.dg/vect/vect-54.c 2009-10-27 11:46:07 +0000 | ||
280 | +++ new/gcc/testsuite/gcc.dg/vect/vect-54.c 2011-09-19 07:44:24 +0000 | ||
281 | @@ -1,4 +1,5 @@ | ||
282 | /* { dg-require-effective-target vect_float } */ | ||
283 | +/* { dg-add-options double_vectors } */ | ||
284 | |||
285 | #include <stdarg.h> | ||
286 | #include "tree-vect.h" | ||
287 | |||
288 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-96.c' | ||
289 | --- old/gcc/testsuite/gcc.dg/vect/vect-96.c 2010-10-04 14:59:30 +0000 | ||
290 | +++ new/gcc/testsuite/gcc.dg/vect/vect-96.c 2011-09-19 07:44:24 +0000 | ||
291 | @@ -1,4 +1,5 @@ | ||
292 | /* { dg-require-effective-target vect_int } */ | ||
293 | +/* { dg-add-options double_vectors } */ | ||
294 | |||
295 | #include <stdarg.h> | ||
296 | #include "tree-vect.h" | ||
297 | |||
298 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c' | ||
299 | --- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2010-10-04 14:59:30 +0000 | ||
300 | +++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c 2011-09-19 07:44:24 +0000 | ||
301 | @@ -1,5 +1,4 @@ | ||
302 | /* { dg-require-effective-target vect_int } */ | ||
303 | -/* { dg-add-options quad_vectors } */ | ||
304 | |||
305 | #include <stdarg.h> | ||
306 | #include "tree-vect.h" | ||
307 | |||
308 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c' | ||
309 | --- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2009-11-04 10:22:22 +0000 | ||
310 | +++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c 2011-09-19 07:44:24 +0000 | ||
311 | @@ -1,4 +1,5 @@ | ||
312 | /* { dg-require-effective-target vect_int } */ | ||
313 | +/* { dg-add-options double_vectors } */ | ||
314 | |||
315 | #include <stdarg.h> | ||
316 | #include "tree-vect.h" | ||
317 | |||
318 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c' | ||
319 | --- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2010-10-04 14:59:30 +0000 | ||
320 | +++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c 2011-09-19 07:44:24 +0000 | ||
321 | @@ -1,5 +1,4 @@ | ||
322 | /* { dg-require-effective-target vect_int } */ | ||
323 | -/* { dg-add-options quad_vectors } */ | ||
324 | |||
325 | #include <stdarg.h> | ||
326 | #include "tree-vect.h" | ||
327 | |||
328 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c' | ||
329 | --- old/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2009-11-10 18:01:22 +0000 | ||
330 | +++ new/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c 2011-09-19 07:44:24 +0000 | ||
331 | @@ -1,4 +1,5 @@ | ||
332 | /* { dg-require-effective-target vect_int } */ | ||
333 | +/* { dg-add-options double_vectors } */ | ||
334 | |||
335 | #include <stdarg.h> | ||
336 | #include "tree-vect.h" | ||
337 | |||
338 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1.c' | ||
339 | --- old/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2009-05-08 12:39:01 +0000 | ||
340 | +++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1.c 2011-09-19 07:44:24 +0000 | ||
341 | @@ -22,5 +22,6 @@ | ||
342 | } | ||
343 | |||
344 | /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ | ||
345 | -/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */ | ||
346 | +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ | ||
347 | +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ | ||
348 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
349 | |||
350 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1a.c' | ||
351 | --- old/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2009-06-16 06:21:12 +0000 | ||
352 | +++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c 2011-09-19 07:44:24 +0000 | ||
353 | @@ -20,5 +20,6 @@ | ||
354 | } | ||
355 | |||
356 | /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ | ||
357 | -/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */ | ||
358 | +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ | ||
359 | +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ | ||
360 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
361 | |||
362 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-1b.c' | ||
363 | --- old/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2007-08-19 11:02:48 +0000 | ||
364 | +++ new/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c 2011-09-19 07:44:24 +0000 | ||
365 | @@ -22,5 +22,6 @@ | ||
366 | } | ||
367 | |||
368 | /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ | ||
369 | -/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */ | ||
370 | +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ | ||
371 | +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ | ||
372 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
373 | |||
374 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-2b.c' | ||
375 | --- old/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2009-05-08 12:39:01 +0000 | ||
376 | +++ new/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c 2011-09-19 07:44:24 +0000 | ||
377 | @@ -37,5 +37,6 @@ | ||
378 | return 0; | ||
379 | } | ||
380 | |||
381 | -/* { dg-final { scan-tree-dump-times "strided access in outer loop." 1 "vect" } } */ | ||
382 | +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ | ||
383 | +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ | ||
384 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
385 | |||
386 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3a.c' | ||
387 | --- old/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2009-05-08 12:39:01 +0000 | ||
388 | +++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c 2011-09-19 07:44:24 +0000 | ||
389 | @@ -49,5 +49,6 @@ | ||
390 | } | ||
391 | |||
392 | /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */ | ||
393 | -/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" } } */ | ||
394 | +/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" { xfail vect_multiple_sizes } } } */ | ||
395 | +/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 3 "vect" { target vect_multiple_sizes } } } */ | ||
396 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
397 | |||
398 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-3b.c' | ||
399 | --- old/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2009-05-08 12:39:01 +0000 | ||
400 | +++ new/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c 2011-09-19 07:44:24 +0000 | ||
401 | @@ -49,5 +49,6 @@ | ||
402 | } | ||
403 | |||
404 | /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ | ||
405 | -/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" } } */ | ||
406 | +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { xfail vect_multiple_sizes } } } */ | ||
407 | +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 4 "vect" { target vect_multiple_sizes } } } */ | ||
408 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
409 | |||
410 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c' | ||
411 | --- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-04-28 11:46:58 +0000 | ||
412 | +++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-09-19 07:44:24 +0000 | ||
413 | @@ -1,5 +1,4 @@ | ||
414 | /* { dg-require-effective-target vect_float } */ | ||
415 | -/* { dg-add-options quad_vectors } */ | ||
416 | |||
417 | #include <stdarg.h> | ||
418 | #include <signal.h> | ||
419 | |||
420 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-1.c' | ||
421 | --- old/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-01-10 12:41:40 +0000 | ||
422 | +++ new/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-09-19 07:44:24 +0000 | ||
423 | @@ -1,5 +1,4 @@ | ||
424 | /* { dg-require-effective-target vect_int } */ | ||
425 | -/* { dg-add-options quad_vectors } */ | ||
426 | |||
427 | #include <stdarg.h> | ||
428 | #include "tree-vect.h" | ||
429 | |||
430 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-2.c' | ||
431 | --- old/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-01-10 12:41:40 +0000 | ||
432 | +++ new/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-09-19 07:44:24 +0000 | ||
433 | @@ -1,5 +1,4 @@ | ||
434 | /* { dg-require-effective-target vect_int } */ | ||
435 | -/* { dg-add-options quad_vectors } */ | ||
436 | |||
437 | #include <stdarg.h> | ||
438 | #include "tree-vect.h" | ||
439 | |||
440 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-peel-4.c' | ||
441 | --- old/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-01-10 12:41:40 +0000 | ||
442 | +++ new/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-09-19 07:44:24 +0000 | ||
443 | @@ -6,12 +6,12 @@ | ||
444 | #define N 128 | ||
445 | |||
446 | int ib[N+7]; | ||
447 | +int ia[N+1]; | ||
448 | |||
449 | __attribute__ ((noinline)) | ||
450 | int main1 () | ||
451 | { | ||
452 | int i; | ||
453 | - int ia[N+1]; | ||
454 | |||
455 | /* Don't peel keeping one load and the store aligned. */ | ||
456 | for (i = 0; i <= N; i++) | ||
457 | |||
458 | === modified file 'gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c' | ||
459 | --- old/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2010-05-27 12:23:45 +0000 | ||
460 | +++ new/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c 2011-09-19 07:44:24 +0000 | ||
461 | @@ -58,7 +58,8 @@ | ||
462 | } | ||
463 | |||
464 | /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */ | ||
465 | -/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */ | ||
466 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail vect_multiple_sizes } } } */ | ||
467 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_multiple_sizes } } } */ | ||
468 | |||
469 | /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ | ||
470 | |||
471 | |||
472 | === modified file 'gcc/testsuite/gfortran.dg/vect/pr19049.f90' | ||
473 | --- old/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2005-07-25 11:05:07 +0000 | ||
474 | +++ new/gcc/testsuite/gfortran.dg/vect/pr19049.f90 2011-09-19 07:44:24 +0000 | ||
475 | @@ -19,6 +19,7 @@ | ||
476 | end | ||
477 | |||
478 | ! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } | ||
479 | -! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } } | ||
480 | +! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } } | ||
481 | +! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } } | ||
482 | ! { dg-final { cleanup-tree-dump "vect" } } | ||
483 | |||
484 | |||
485 | === modified file 'gcc/testsuite/lib/target-supports.exp' | ||
486 | --- old/gcc/testsuite/lib/target-supports.exp 2011-08-13 08:32:32 +0000 | ||
487 | +++ new/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000 | ||
488 | @@ -3265,6 +3265,24 @@ | ||
489 | }] | ||
490 | } | ||
491 | |||
492 | +# Return 1 if the target supports multiple vector sizes | ||
493 | + | ||
494 | +proc check_effective_target_vect_multiple_sizes { } { | ||
495 | + global et_vect_multiple_sizes | ||
496 | + | ||
497 | + if [info exists et_vect_multiple_sizes_saved] { | ||
498 | + verbose "check_effective_target_vect_multiple_sizes: using cached result" 2 | ||
499 | + } else { | ||
500 | + set et_vect_multiple_sizes_saved 0 | ||
501 | + if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { | ||
502 | + set et_vect_multiple_sizes_saved 1 | ||
503 | + } | ||
504 | + } | ||
505 | + | ||
506 | + verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2 | ||
507 | + return $et_vect_multiple_sizes_saved | ||
508 | +} | ||
509 | + | ||
510 | # Return 1 if the target supports section-anchors | ||
511 | |||
512 | proc check_effective_target_section_anchors { } { | ||
513 | @@ -3648,11 +3666,11 @@ | ||
514 | return $flags | ||
515 | } | ||
516 | |||
517 | -# Add to FLAGS the flags needed to enable 128-bit vectors. | ||
518 | +# Add to FLAGS the flags needed to enable 64-bit vectors. | ||
519 | |||
520 | -proc add_options_for_quad_vectors { flags } { | ||
521 | +proc add_options_for_double_vectors { flags } { | ||
522 | if [is-effective-target arm_neon_ok] { | ||
523 | - return "$flags -mvectorize-with-neon-quad" | ||
524 | + return "$flags -mvectorize-with-neon-double" | ||
525 | } | ||
526 | |||
527 | return $flags | ||
528 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch new file mode 100644 index 000000000..2f70b1b9c --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106816.patch | |||
@@ -0,0 +1,387 @@ | |||
1 | 2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | Backport from mainline: | ||
5 | |||
6 | 2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> | ||
7 | |||
8 | * config/arm/neon.md (neon_move_lo_quad_<mode>): Delete. | ||
9 | (neon_move_hi_quad_<mode>): Likewise. | ||
10 | (move_hi_quad_<mode>, move_lo_quad_<mode>): Use subreg moves. | ||
11 | |||
12 | 2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> | ||
13 | |||
14 | gcc/ | ||
15 | Backport from mainline: | ||
16 | |||
17 | 2011-09-27 Richard Sandiford <richard.sandiford@linaro.org> | ||
18 | |||
19 | * config/arm/neon.md (neon_vget_highv16qi, neon_vget_highv8hi) | ||
20 | (neon_vget_highv4si, neon_vget_highv4sf, neon_vget_highv2di) | ||
21 | (neon_vget_lowv16qi, neon_vget_lowv8hi, neon_vget_lowv4si) | ||
22 | (neon_vget_lowv4sf, neon_vget_lowv2di): Turn into define_expands | ||
23 | that produce subreg moves. Define using VQX iterators. | ||
24 | |||
25 | 2011-09-28 Richard Sandiford <richard.sandiford@linaro.org> | ||
26 | |||
27 | gcc/ | ||
28 | Backport from mainline: | ||
29 | |||
30 | 2011-09-14 Richard Sandiford <richard.sandiford@linaro.org> | ||
31 | |||
32 | * simplify-rtx.c (simplify_subreg): Check that the inner mode is | ||
33 | a scalar integer before applying integer-only optimisations to | ||
34 | inner arithmetic. | ||
35 | |||
36 | === modified file 'gcc/config/arm/neon.md' | ||
37 | --- old/gcc/config/arm/neon.md 2011-07-04 14:03:49 +0000 | ||
38 | +++ new/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000 | ||
39 | @@ -1235,66 +1235,14 @@ | ||
40 | (const_string "neon_int_1") (const_string "neon_int_5")))] | ||
41 | ) | ||
42 | |||
43 | -; FIXME: We wouldn't need the following insns if we could write subregs of | ||
44 | -; vector registers. Make an attempt at removing unnecessary moves, though | ||
45 | -; we're really at the mercy of the register allocator. | ||
46 | - | ||
47 | -(define_insn "neon_move_lo_quad_<mode>" | ||
48 | - [(set (match_operand:ANY128 0 "s_register_operand" "+w") | ||
49 | - (vec_concat:ANY128 | ||
50 | - (match_operand:<V_HALF> 1 "s_register_operand" "w") | ||
51 | - (vec_select:<V_HALF> | ||
52 | - (match_dup 0) | ||
53 | - (match_operand:ANY128 2 "vect_par_constant_high" ""))))] | ||
54 | - "TARGET_NEON" | ||
55 | -{ | ||
56 | - int dest = REGNO (operands[0]); | ||
57 | - int src = REGNO (operands[1]); | ||
58 | - | ||
59 | - if (dest != src) | ||
60 | - return "vmov\t%e0, %P1"; | ||
61 | - else | ||
62 | - return ""; | ||
63 | -} | ||
64 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
65 | -) | ||
66 | - | ||
67 | -(define_insn "neon_move_hi_quad_<mode>" | ||
68 | - [(set (match_operand:ANY128 0 "s_register_operand" "+w") | ||
69 | - (vec_concat:ANY128 | ||
70 | - (vec_select:<V_HALF> | ||
71 | - (match_dup 0) | ||
72 | - (match_operand:ANY128 2 "vect_par_constant_low" "")) | ||
73 | - (match_operand:<V_HALF> 1 "s_register_operand" "w")))] | ||
74 | - | ||
75 | - "TARGET_NEON" | ||
76 | -{ | ||
77 | - int dest = REGNO (operands[0]); | ||
78 | - int src = REGNO (operands[1]); | ||
79 | - | ||
80 | - if (dest != src) | ||
81 | - return "vmov\t%f0, %P1"; | ||
82 | - else | ||
83 | - return ""; | ||
84 | -} | ||
85 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
86 | -) | ||
87 | - | ||
88 | (define_expand "move_hi_quad_<mode>" | ||
89 | [(match_operand:ANY128 0 "s_register_operand" "") | ||
90 | (match_operand:<V_HALF> 1 "s_register_operand" "")] | ||
91 | "TARGET_NEON" | ||
92 | { | ||
93 | - rtvec v = rtvec_alloc (<V_mode_nunits>/2); | ||
94 | - rtx t1; | ||
95 | - int i; | ||
96 | - | ||
97 | - for (i=0; i < (<V_mode_nunits>/2); i++) | ||
98 | - RTVEC_ELT (v, i) = GEN_INT (i); | ||
99 | - | ||
100 | - t1 = gen_rtx_PARALLEL (<MODE>mode, v); | ||
101 | - emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1)); | ||
102 | - | ||
103 | + emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode, | ||
104 | + GET_MODE_SIZE (<V_HALF>mode)), | ||
105 | + operands[1]); | ||
106 | DONE; | ||
107 | }) | ||
108 | |||
109 | @@ -1303,16 +1251,9 @@ | ||
110 | (match_operand:<V_HALF> 1 "s_register_operand" "")] | ||
111 | "TARGET_NEON" | ||
112 | { | ||
113 | - rtvec v = rtvec_alloc (<V_mode_nunits>/2); | ||
114 | - rtx t1; | ||
115 | - int i; | ||
116 | - | ||
117 | - for (i=0; i < (<V_mode_nunits>/2); i++) | ||
118 | - RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); | ||
119 | - | ||
120 | - t1 = gen_rtx_PARALLEL (<MODE>mode, v); | ||
121 | - emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1)); | ||
122 | - | ||
123 | + emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], | ||
124 | + <MODE>mode, 0), | ||
125 | + operands[1]); | ||
126 | DONE; | ||
127 | }) | ||
128 | |||
129 | @@ -2950,183 +2891,27 @@ | ||
130 | (set_attr "neon_type" "neon_bp_simple")] | ||
131 | ) | ||
132 | |||
133 | -(define_insn "neon_vget_highv16qi" | ||
134 | - [(set (match_operand:V8QI 0 "s_register_operand" "=w") | ||
135 | - (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") | ||
136 | - (parallel [(const_int 8) (const_int 9) | ||
137 | - (const_int 10) (const_int 11) | ||
138 | - (const_int 12) (const_int 13) | ||
139 | - (const_int 14) (const_int 15)])))] | ||
140 | - "TARGET_NEON" | ||
141 | -{ | ||
142 | - int dest = REGNO (operands[0]); | ||
143 | - int src = REGNO (operands[1]); | ||
144 | - | ||
145 | - if (dest != src + 2) | ||
146 | - return "vmov\t%P0, %f1"; | ||
147 | - else | ||
148 | - return ""; | ||
149 | -} | ||
150 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
151 | -) | ||
152 | - | ||
153 | -(define_insn "neon_vget_highv8hi" | ||
154 | - [(set (match_operand:V4HI 0 "s_register_operand" "=w") | ||
155 | - (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") | ||
156 | - (parallel [(const_int 4) (const_int 5) | ||
157 | - (const_int 6) (const_int 7)])))] | ||
158 | - "TARGET_NEON" | ||
159 | -{ | ||
160 | - int dest = REGNO (operands[0]); | ||
161 | - int src = REGNO (operands[1]); | ||
162 | - | ||
163 | - if (dest != src + 2) | ||
164 | - return "vmov\t%P0, %f1"; | ||
165 | - else | ||
166 | - return ""; | ||
167 | -} | ||
168 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
169 | -) | ||
170 | - | ||
171 | -(define_insn "neon_vget_highv4si" | ||
172 | - [(set (match_operand:V2SI 0 "s_register_operand" "=w") | ||
173 | - (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") | ||
174 | - (parallel [(const_int 2) (const_int 3)])))] | ||
175 | - "TARGET_NEON" | ||
176 | -{ | ||
177 | - int dest = REGNO (operands[0]); | ||
178 | - int src = REGNO (operands[1]); | ||
179 | - | ||
180 | - if (dest != src + 2) | ||
181 | - return "vmov\t%P0, %f1"; | ||
182 | - else | ||
183 | - return ""; | ||
184 | -} | ||
185 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
186 | -) | ||
187 | - | ||
188 | -(define_insn "neon_vget_highv4sf" | ||
189 | - [(set (match_operand:V2SF 0 "s_register_operand" "=w") | ||
190 | - (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") | ||
191 | - (parallel [(const_int 2) (const_int 3)])))] | ||
192 | - "TARGET_NEON" | ||
193 | -{ | ||
194 | - int dest = REGNO (operands[0]); | ||
195 | - int src = REGNO (operands[1]); | ||
196 | - | ||
197 | - if (dest != src + 2) | ||
198 | - return "vmov\t%P0, %f1"; | ||
199 | - else | ||
200 | - return ""; | ||
201 | -} | ||
202 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
203 | -) | ||
204 | - | ||
205 | -(define_insn "neon_vget_highv2di" | ||
206 | - [(set (match_operand:DI 0 "s_register_operand" "=w") | ||
207 | - (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w") | ||
208 | - (parallel [(const_int 1)])))] | ||
209 | - "TARGET_NEON" | ||
210 | -{ | ||
211 | - int dest = REGNO (operands[0]); | ||
212 | - int src = REGNO (operands[1]); | ||
213 | - | ||
214 | - if (dest != src + 2) | ||
215 | - return "vmov\t%P0, %f1"; | ||
216 | - else | ||
217 | - return ""; | ||
218 | -} | ||
219 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
220 | -) | ||
221 | - | ||
222 | -(define_insn "neon_vget_lowv16qi" | ||
223 | - [(set (match_operand:V8QI 0 "s_register_operand" "=w") | ||
224 | - (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") | ||
225 | - (parallel [(const_int 0) (const_int 1) | ||
226 | - (const_int 2) (const_int 3) | ||
227 | - (const_int 4) (const_int 5) | ||
228 | - (const_int 6) (const_int 7)])))] | ||
229 | - "TARGET_NEON" | ||
230 | -{ | ||
231 | - int dest = REGNO (operands[0]); | ||
232 | - int src = REGNO (operands[1]); | ||
233 | - | ||
234 | - if (dest != src) | ||
235 | - return "vmov\t%P0, %e1"; | ||
236 | - else | ||
237 | - return ""; | ||
238 | -} | ||
239 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
240 | -) | ||
241 | - | ||
242 | -(define_insn "neon_vget_lowv8hi" | ||
243 | - [(set (match_operand:V4HI 0 "s_register_operand" "=w") | ||
244 | - (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") | ||
245 | - (parallel [(const_int 0) (const_int 1) | ||
246 | - (const_int 2) (const_int 3)])))] | ||
247 | - "TARGET_NEON" | ||
248 | -{ | ||
249 | - int dest = REGNO (operands[0]); | ||
250 | - int src = REGNO (operands[1]); | ||
251 | - | ||
252 | - if (dest != src) | ||
253 | - return "vmov\t%P0, %e1"; | ||
254 | - else | ||
255 | - return ""; | ||
256 | -} | ||
257 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
258 | -) | ||
259 | - | ||
260 | -(define_insn "neon_vget_lowv4si" | ||
261 | - [(set (match_operand:V2SI 0 "s_register_operand" "=w") | ||
262 | - (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") | ||
263 | - (parallel [(const_int 0) (const_int 1)])))] | ||
264 | - "TARGET_NEON" | ||
265 | -{ | ||
266 | - int dest = REGNO (operands[0]); | ||
267 | - int src = REGNO (operands[1]); | ||
268 | - | ||
269 | - if (dest != src) | ||
270 | - return "vmov\t%P0, %e1"; | ||
271 | - else | ||
272 | - return ""; | ||
273 | -} | ||
274 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
275 | -) | ||
276 | - | ||
277 | -(define_insn "neon_vget_lowv4sf" | ||
278 | - [(set (match_operand:V2SF 0 "s_register_operand" "=w") | ||
279 | - (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") | ||
280 | - (parallel [(const_int 0) (const_int 1)])))] | ||
281 | - "TARGET_NEON" | ||
282 | -{ | ||
283 | - int dest = REGNO (operands[0]); | ||
284 | - int src = REGNO (operands[1]); | ||
285 | - | ||
286 | - if (dest != src) | ||
287 | - return "vmov\t%P0, %e1"; | ||
288 | - else | ||
289 | - return ""; | ||
290 | -} | ||
291 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
292 | -) | ||
293 | - | ||
294 | -(define_insn "neon_vget_lowv2di" | ||
295 | - [(set (match_operand:DI 0 "s_register_operand" "=w") | ||
296 | - (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w") | ||
297 | - (parallel [(const_int 0)])))] | ||
298 | - "TARGET_NEON" | ||
299 | -{ | ||
300 | - int dest = REGNO (operands[0]); | ||
301 | - int src = REGNO (operands[1]); | ||
302 | - | ||
303 | - if (dest != src) | ||
304 | - return "vmov\t%P0, %e1"; | ||
305 | - else | ||
306 | - return ""; | ||
307 | -} | ||
308 | - [(set_attr "neon_type" "neon_bp_simple")] | ||
309 | -) | ||
310 | +(define_expand "neon_vget_high<mode>" | ||
311 | + [(match_operand:<V_HALF> 0 "s_register_operand") | ||
312 | + (match_operand:VQX 1 "s_register_operand")] | ||
313 | + "TARGET_NEON" | ||
314 | +{ | ||
315 | + emit_move_insn (operands[0], | ||
316 | + simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, | ||
317 | + GET_MODE_SIZE (<V_HALF>mode))); | ||
318 | + DONE; | ||
319 | +}) | ||
320 | + | ||
321 | +(define_expand "neon_vget_low<mode>" | ||
322 | + [(match_operand:<V_HALF> 0 "s_register_operand") | ||
323 | + (match_operand:VQX 1 "s_register_operand")] | ||
324 | + "TARGET_NEON" | ||
325 | +{ | ||
326 | + emit_move_insn (operands[0], | ||
327 | + simplify_gen_subreg (<V_HALF>mode, operands[1], | ||
328 | + <MODE>mode, 0)); | ||
329 | + DONE; | ||
330 | +}) | ||
331 | |||
332 | (define_insn "neon_vcvt<mode>" | ||
333 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | ||
334 | |||
335 | === modified file 'gcc/simplify-rtx.c' | ||
336 | --- old/gcc/simplify-rtx.c 2011-08-13 08:32:32 +0000 | ||
337 | +++ new/gcc/simplify-rtx.c 2011-09-28 15:11:59 +0000 | ||
338 | @@ -5567,6 +5567,7 @@ | ||
339 | /* Optimize SUBREG truncations of zero and sign extended values. */ | ||
340 | if ((GET_CODE (op) == ZERO_EXTEND | ||
341 | || GET_CODE (op) == SIGN_EXTEND) | ||
342 | + && SCALAR_INT_MODE_P (innermode) | ||
343 | && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)) | ||
344 | { | ||
345 | unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte); | ||
346 | @@ -5605,6 +5606,7 @@ | ||
347 | if ((GET_CODE (op) == LSHIFTRT | ||
348 | || GET_CODE (op) == ASHIFTRT) | ||
349 | && SCALAR_INT_MODE_P (outermode) | ||
350 | + && SCALAR_INT_MODE_P (innermode) | ||
351 | /* Ensure that OUTERMODE is at least twice as wide as the INNERMODE | ||
352 | to avoid the possibility that an outer LSHIFTRT shifts by more | ||
353 | than the sign extension's sign_bit_copies and introduces zeros | ||
354 | @@ -5624,6 +5626,7 @@ | ||
355 | if ((GET_CODE (op) == LSHIFTRT | ||
356 | || GET_CODE (op) == ASHIFTRT) | ||
357 | && SCALAR_INT_MODE_P (outermode) | ||
358 | + && SCALAR_INT_MODE_P (innermode) | ||
359 | && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode) | ||
360 | && CONST_INT_P (XEXP (op, 1)) | ||
361 | && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND | ||
362 | @@ -5638,6 +5641,7 @@ | ||
363 | the outer subreg is effectively a truncation to the original mode. */ | ||
364 | if (GET_CODE (op) == ASHIFT | ||
365 | && SCALAR_INT_MODE_P (outermode) | ||
366 | + && SCALAR_INT_MODE_P (innermode) | ||
367 | && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode) | ||
368 | && CONST_INT_P (XEXP (op, 1)) | ||
369 | && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND | ||
370 | @@ -5651,7 +5655,7 @@ | ||
371 | /* Recognize a word extraction from a multi-word subreg. */ | ||
372 | if ((GET_CODE (op) == LSHIFTRT | ||
373 | || GET_CODE (op) == ASHIFTRT) | ||
374 | - && SCALAR_INT_MODE_P (outermode) | ||
375 | + && SCALAR_INT_MODE_P (innermode) | ||
376 | && GET_MODE_BITSIZE (outermode) >= BITS_PER_WORD | ||
377 | && GET_MODE_BITSIZE (innermode) >= (2 * GET_MODE_BITSIZE (outermode)) | ||
378 | && CONST_INT_P (XEXP (op, 1)) | ||
379 | @@ -5673,6 +5677,7 @@ | ||
380 | |||
381 | if ((GET_CODE (op) == LSHIFTRT | ||
382 | || GET_CODE (op) == ASHIFTRT) | ||
383 | + && SCALAR_INT_MODE_P (innermode) | ||
384 | && MEM_P (XEXP (op, 0)) | ||
385 | && CONST_INT_P (XEXP (op, 1)) | ||
386 | && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op)) | ||
387 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch new file mode 100644 index 000000000..d44f8cf1a --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106817.patch | |||
@@ -0,0 +1,290 @@ | |||
1 | 2011-10-01 Revital Eres <revital.eres@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | Backport from mainline -r179380 and -r179381 | ||
5 | |||
6 | * ddg.c (autoinc_var_is_used_p): New function. | ||
7 | (create_ddg_dep_from_intra_loop_link, | ||
8 | add_cross_iteration_register_deps): Call it. | ||
9 | * ddg.h (autoinc_var_is_used_p): Declare. | ||
10 | * modulo-sched.c (sms_schedule): Handle instructions with REG_INC. | ||
11 | (generate_reg_moves): Call autoinc_var_is_used_p. Skip | ||
12 | instructions that do not set a register and verify no regmoves | ||
13 | are created for !single_set instructions. | ||
14 | |||
15 | gcc/testsuite/ | ||
16 | |||
17 | * gcc.dg/sms-10.c: New file | ||
18 | |||
19 | === modified file 'gcc/ddg.c' | ||
20 | --- old/gcc/ddg.c 2011-07-31 11:29:10 +0000 | ||
21 | +++ new/gcc/ddg.c 2011-10-02 06:56:53 +0000 | ||
22 | @@ -145,6 +145,27 @@ | ||
23 | return rtx_mem_access_p (PATTERN (insn)); | ||
24 | } | ||
25 | |||
26 | +/* Return true if DEF_INSN contains address being auto-inc or auto-dec | ||
27 | + which is used in USE_INSN. Otherwise return false. The result is | ||
28 | + being used to decide whether to remove the edge between def_insn and | ||
29 | + use_insn when -fmodulo-sched-allow-regmoves is set. This function | ||
30 | + doesn't need to consider the specific address register; no reg_moves | ||
31 | + will be allowed for any life range defined by def_insn and used | ||
32 | + by use_insn, if use_insn uses an address register auto-inc'ed by | ||
33 | + def_insn. */ | ||
34 | +bool | ||
35 | +autoinc_var_is_used_p (rtx def_insn, rtx use_insn) | ||
36 | +{ | ||
37 | + rtx note; | ||
38 | + | ||
39 | + for (note = REG_NOTES (def_insn); note; note = XEXP (note, 1)) | ||
40 | + if (REG_NOTE_KIND (note) == REG_INC | ||
41 | + && reg_referenced_p (XEXP (note, 0), PATTERN (use_insn))) | ||
42 | + return true; | ||
43 | + | ||
44 | + return false; | ||
45 | +} | ||
46 | + | ||
47 | /* Computes the dependence parameters (latency, distance etc.), creates | ||
48 | a ddg_edge and adds it to the given DDG. */ | ||
49 | static void | ||
50 | @@ -173,10 +194,15 @@ | ||
51 | compensate for that by generating reg-moves based on the life-range | ||
52 | analysis. The anti-deps that will be deleted are the ones which | ||
53 | have true-deps edges in the opposite direction (in other words | ||
54 | - the kernel has only one def of the relevant register). TODO: | ||
55 | - support the removal of all anti-deps edges, i.e. including those | ||
56 | + the kernel has only one def of the relevant register). | ||
57 | + If the address that is being auto-inc or auto-dec in DEST_NODE | ||
58 | + is used in SRC_NODE then do not remove the edge to make sure | ||
59 | + reg-moves will not be created for this address. | ||
60 | + TODO: support the removal of all anti-deps edges, i.e. including those | ||
61 | whose register has multiple defs in the loop. */ | ||
62 | - if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP)) | ||
63 | + if (flag_modulo_sched_allow_regmoves | ||
64 | + && (t == ANTI_DEP && dt == REG_DEP) | ||
65 | + && !autoinc_var_is_used_p (dest_node->insn, src_node->insn)) | ||
66 | { | ||
67 | rtx set; | ||
68 | |||
69 | @@ -302,10 +328,14 @@ | ||
70 | gcc_assert (first_def_node); | ||
71 | |||
72 | /* Always create the edge if the use node is a branch in | ||
73 | - order to prevent the creation of reg-moves. */ | ||
74 | + order to prevent the creation of reg-moves. | ||
75 | + If the address that is being auto-inc or auto-dec in LAST_DEF | ||
76 | + is used in USE_INSN then do not remove the edge to make sure | ||
77 | + reg-moves will not be created for that address. */ | ||
78 | if (DF_REF_ID (last_def) != DF_REF_ID (first_def) | ||
79 | || !flag_modulo_sched_allow_regmoves | ||
80 | - || JUMP_P (use_node->insn)) | ||
81 | + || JUMP_P (use_node->insn) | ||
82 | + || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)) | ||
83 | create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP, | ||
84 | REG_DEP, 1); | ||
85 | |||
86 | |||
87 | === modified file 'gcc/ddg.h' | ||
88 | --- old/gcc/ddg.h 2009-11-25 10:55:54 +0000 | ||
89 | +++ new/gcc/ddg.h 2011-10-02 06:56:53 +0000 | ||
90 | @@ -186,4 +186,6 @@ | ||
91 | int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to); | ||
92 | int longest_simple_path (ddg_ptr, int from, int to, sbitmap via); | ||
93 | |||
94 | +bool autoinc_var_is_used_p (rtx, rtx); | ||
95 | + | ||
96 | #endif /* GCC_DDG_H */ | ||
97 | |||
98 | === modified file 'gcc/modulo-sched.c' | ||
99 | --- old/gcc/modulo-sched.c 2011-09-14 11:06:06 +0000 | ||
100 | +++ new/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000 | ||
101 | @@ -477,7 +477,12 @@ | ||
102 | sbitmap *uses_of_defs; | ||
103 | rtx last_reg_move; | ||
104 | rtx prev_reg, old_reg; | ||
105 | - | ||
106 | + rtx set = single_set (u->insn); | ||
107 | + | ||
108 | + /* Skip instructions that do not set a register. */ | ||
109 | + if ((set && !REG_P (SET_DEST (set)))) | ||
110 | + continue; | ||
111 | + | ||
112 | /* Compute the number of reg_moves needed for u, by looking at life | ||
113 | ranges started at u (excluding self-loops). */ | ||
114 | for (e = u->out; e; e = e->next_out) | ||
115 | @@ -494,6 +499,20 @@ | ||
116 | && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) | ||
117 | nreg_moves4e--; | ||
118 | |||
119 | + if (nreg_moves4e >= 1) | ||
120 | + { | ||
121 | + /* !single_set instructions are not supported yet and | ||
122 | + thus we do not except to encounter them in the loop | ||
123 | + except from the doloop part. For the latter case | ||
124 | + we assume no regmoves are generated as the doloop | ||
125 | + instructions are tied to the branch with an edge. */ | ||
126 | + gcc_assert (set); | ||
127 | + /* If the instruction contains auto-inc register then | ||
128 | + validate that the regmov is being generated for the | ||
129 | + target regsiter rather then the inc'ed register. */ | ||
130 | + gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn)); | ||
131 | + } | ||
132 | + | ||
133 | nreg_moves = MAX (nreg_moves, nreg_moves4e); | ||
134 | } | ||
135 | |||
136 | @@ -1266,12 +1285,10 @@ | ||
137 | continue; | ||
138 | } | ||
139 | |||
140 | - /* Don't handle BBs with calls or barriers or auto-increment insns | ||
141 | - (to avoid creating invalid reg-moves for the auto-increment insns), | ||
142 | + /* Don't handle BBs with calls or barriers | ||
143 | or !single_set with the exception of instructions that include | ||
144 | count_reg---these instructions are part of the control part | ||
145 | that do-loop recognizes. | ||
146 | - ??? Should handle auto-increment insns. | ||
147 | ??? Should handle insns defining subregs. */ | ||
148 | for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) | ||
149 | { | ||
150 | @@ -1282,7 +1299,6 @@ | ||
151 | || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) | ||
152 | && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE | ||
153 | && !reg_mentioned_p (count_reg, insn)) | ||
154 | - || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) | ||
155 | || (INSN_P (insn) && (set = single_set (insn)) | ||
156 | && GET_CODE (SET_DEST (set)) == SUBREG)) | ||
157 | break; | ||
158 | @@ -1296,8 +1312,6 @@ | ||
159 | fprintf (dump_file, "SMS loop-with-call\n"); | ||
160 | else if (BARRIER_P (insn)) | ||
161 | fprintf (dump_file, "SMS loop-with-barrier\n"); | ||
162 | - else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) | ||
163 | - fprintf (dump_file, "SMS reg inc\n"); | ||
164 | else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn) | ||
165 | && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)) | ||
166 | fprintf (dump_file, "SMS loop-with-not-single-set\n"); | ||
167 | |||
168 | === added file 'gcc/testsuite/gcc.dg/sms-10.c' | ||
169 | --- old/gcc/testsuite/gcc.dg/sms-10.c 1970-01-01 00:00:00 +0000 | ||
170 | +++ new/gcc/testsuite/gcc.dg/sms-10.c 2011-10-02 06:56:53 +0000 | ||
171 | @@ -0,0 +1,118 @@ | ||
172 | + /* { dg-do run } */ | ||
173 | + /* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */ | ||
174 | + | ||
175 | + | ||
176 | +typedef __SIZE_TYPE__ size_t; | ||
177 | +extern void *malloc (size_t); | ||
178 | +extern void free (void *); | ||
179 | +extern void abort (void); | ||
180 | + | ||
181 | +struct regstat_n_sets_and_refs_t | ||
182 | +{ | ||
183 | + int sets; | ||
184 | + int refs; | ||
185 | +}; | ||
186 | + | ||
187 | +struct regstat_n_sets_and_refs_t *regstat_n_sets_and_refs; | ||
188 | + | ||
189 | +struct df_reg_info | ||
190 | +{ | ||
191 | + unsigned int n_refs; | ||
192 | +}; | ||
193 | + | ||
194 | +struct df_d | ||
195 | +{ | ||
196 | + struct df_reg_info **def_regs; | ||
197 | + struct df_reg_info **use_regs; | ||
198 | +}; | ||
199 | +struct df_d *df; | ||
200 | + | ||
201 | +static inline int | ||
202 | +REG_N_SETS (int regno) | ||
203 | +{ | ||
204 | + return regstat_n_sets_and_refs[regno].sets; | ||
205 | +} | ||
206 | + | ||
207 | +__attribute__ ((noinline)) | ||
208 | + int max_reg_num (void) | ||
209 | +{ | ||
210 | + return 100; | ||
211 | +} | ||
212 | + | ||
213 | +__attribute__ ((noinline)) | ||
214 | + void regstat_init_n_sets_and_refs (void) | ||
215 | +{ | ||
216 | + unsigned int i; | ||
217 | + unsigned int max_regno = max_reg_num (); | ||
218 | + | ||
219 | + for (i = 0; i < max_regno; i++) | ||
220 | + { | ||
221 | + (regstat_n_sets_and_refs[i].sets = (df->def_regs[(i)]->n_refs)); | ||
222 | + (regstat_n_sets_and_refs[i].refs = | ||
223 | + (df->use_regs[(i)]->n_refs) + REG_N_SETS (i)); | ||
224 | + } | ||
225 | +} | ||
226 | + | ||
227 | +int a_sets[100] = | ||
228 | + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | ||
229 | + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | ||
230 | + 40, 41, 42, | ||
231 | + 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, | ||
232 | + 62, 63, 64, | ||
233 | + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, | ||
234 | + 84, 85, 86, | ||
235 | + 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 | ||
236 | +}; | ||
237 | + | ||
238 | +int a_refs[100] = | ||
239 | + { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, | ||
240 | + 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, | ||
241 | + 78, 80, 82, | ||
242 | + 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, | ||
243 | + 118, 120, | ||
244 | + 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, | ||
245 | + 152, 154, 156, | ||
246 | + 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, | ||
247 | + 188, 190, 192, | ||
248 | + 194, 196, 198 | ||
249 | +}; | ||
250 | + | ||
251 | +int | ||
252 | +main () | ||
253 | +{ | ||
254 | + struct df_reg_info *b[100], *c[100]; | ||
255 | + struct df_d df1; | ||
256 | + size_t s = sizeof (struct df_reg_info); | ||
257 | + struct regstat_n_sets_and_refs_t a[100]; | ||
258 | + | ||
259 | + df = &df1; | ||
260 | + regstat_n_sets_and_refs = a; | ||
261 | + int i; | ||
262 | + | ||
263 | + for (i = 0; i < 100; i++) | ||
264 | + { | ||
265 | + b[i] = (struct df_reg_info *) malloc (s); | ||
266 | + b[i]->n_refs = i; | ||
267 | + c[i] = (struct df_reg_info *) malloc (s); | ||
268 | + c[i]->n_refs = i; | ||
269 | + } | ||
270 | + | ||
271 | + df1.def_regs = b; | ||
272 | + df1.use_regs = c; | ||
273 | + regstat_init_n_sets_and_refs (); | ||
274 | + | ||
275 | + for (i = 0; i < 100; i++) | ||
276 | + if ((a[i].sets != a_sets[i]) || (a[i].refs != a_refs[i])) | ||
277 | + abort (); | ||
278 | + | ||
279 | + for (i = 0; i < 100; i++) | ||
280 | + { | ||
281 | + free (b[i]); | ||
282 | + free (c[i]); | ||
283 | + } | ||
284 | + | ||
285 | + return 0; | ||
286 | +} | ||
287 | + | ||
288 | +/* { dg-final { scan-rtl-dump-times "SMS succeeded" 1 "sms" { target powerpc*-*-* } } } */ | ||
289 | +/* { dg-final { cleanup-rtl-dump "sms" } } */ | ||
290 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch new file mode 100644 index 000000000..ef98142bc --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106818.patch | |||
@@ -0,0 +1,105 @@ | |||
1 | 2011-10-03 Michael Hope <michael.hope@linaro.org> | ||
2 | |||
3 | Backport from mainline: | ||
4 | |||
5 | 2011-09-13 Sevak Sargsyan <sevak.sargsyan@ispras.ru> | ||
6 | |||
7 | gcc/ | ||
8 | * config/arm/neon.md (neon_vabd<mode>_2, neon_vabd<mode>_3): New | ||
9 | define_insn patterns for combine. | ||
10 | |||
11 | gcc/testsuite/ | ||
12 | * gcc.target/arm/neon-combine-sub-abs-into-vabd.c: New test. | ||
13 | |||
14 | === modified file 'gcc/config/arm/neon.md' | ||
15 | --- old/gcc/config/arm/neon.md 2011-09-28 15:14:59 +0000 | ||
16 | +++ new/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000 | ||
17 | @@ -5428,3 +5428,32 @@ | ||
18 | emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); | ||
19 | DONE; | ||
20 | }) | ||
21 | + | ||
22 | +(define_insn "neon_vabd<mode>_2" | ||
23 | + [(set (match_operand:VDQ 0 "s_register_operand" "=w") | ||
24 | + (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | ||
25 | + (match_operand:VDQ 2 "s_register_operand" "w"))))] | ||
26 | + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" | ||
27 | + "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" | ||
28 | + [(set (attr "neon_type") | ||
29 | + (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) | ||
30 | + (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) | ||
31 | + (const_string "neon_fp_vadd_ddd_vabs_dd") | ||
32 | + (const_string "neon_fp_vadd_qqq_vabs_qq")) | ||
33 | + (const_string "neon_int_5")))] | ||
34 | +) | ||
35 | + | ||
36 | +(define_insn "neon_vabd<mode>_3" | ||
37 | + [(set (match_operand:VDQ 0 "s_register_operand" "=w") | ||
38 | + (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w") | ||
39 | + (match_operand:VDQ 2 "s_register_operand" "w")] | ||
40 | + UNSPEC_VSUB)))] | ||
41 | + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" | ||
42 | + "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" | ||
43 | + [(set (attr "neon_type") | ||
44 | + (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) | ||
45 | + (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) | ||
46 | + (const_string "neon_fp_vadd_ddd_vabs_dd") | ||
47 | + (const_string "neon_fp_vadd_qqq_vabs_qq")) | ||
48 | + (const_string "neon_int_5")))] | ||
49 | +) | ||
50 | |||
51 | === added file 'gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c' | ||
52 | --- old/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 1970-01-01 00:00:00 +0000 | ||
53 | +++ new/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c 2011-10-03 01:32:17 +0000 | ||
54 | @@ -0,0 +1,50 @@ | ||
55 | +/* { dg-do compile } */ | ||
56 | +/* { dg-require-effective-target arm_neon_ok } */ | ||
57 | +/* { dg-options "-O2 -funsafe-math-optimizations" } */ | ||
58 | +/* { dg-add-options arm_neon } */ | ||
59 | + | ||
60 | +#include <arm_neon.h> | ||
61 | +float32x2_t f_sub_abs_to_vabd_32() | ||
62 | +{ | ||
63 | + float32x2_t val1 = vdup_n_f32 (10); | ||
64 | + float32x2_t val2 = vdup_n_f32 (30); | ||
65 | + float32x2_t sres = vsub_f32(val1, val2); | ||
66 | + float32x2_t res = vabs_f32 (sres); | ||
67 | + | ||
68 | + return res; | ||
69 | +} | ||
70 | +/* { dg-final { scan-assembler "vabd\.f32" } }*/ | ||
71 | + | ||
72 | +#include <arm_neon.h> | ||
73 | +int8x8_t sub_abs_to_vabd_8() | ||
74 | +{ | ||
75 | + int8x8_t val1 = vdup_n_s8 (10); | ||
76 | + int8x8_t val2 = vdup_n_s8 (30); | ||
77 | + int8x8_t sres = vsub_s8(val1, val2); | ||
78 | + int8x8_t res = vabs_s8 (sres); | ||
79 | + | ||
80 | + return res; | ||
81 | +} | ||
82 | +/* { dg-final { scan-assembler "vabd\.s8" } }*/ | ||
83 | + | ||
84 | +int16x4_t sub_abs_to_vabd_16() | ||
85 | +{ | ||
86 | + int16x4_t val1 = vdup_n_s16 (10); | ||
87 | + int16x4_t val2 = vdup_n_s16 (30); | ||
88 | + int16x4_t sres = vsub_s16(val1, val2); | ||
89 | + int16x4_t res = vabs_s16 (sres); | ||
90 | + | ||
91 | + return res; | ||
92 | +} | ||
93 | +/* { dg-final { scan-assembler "vabd\.s16" } }*/ | ||
94 | + | ||
95 | +int32x2_t sub_abs_to_vabd_32() | ||
96 | +{ | ||
97 | + int32x2_t val1 = vdup_n_s32 (10); | ||
98 | + int32x2_t val2 = vdup_n_s32 (30); | ||
99 | + int32x2_t sres = vsub_s32(val1, val2); | ||
100 | + int32x2_t res = vabs_s32 (sres); | ||
101 | + | ||
102 | + return res; | ||
103 | +} | ||
104 | +/* { dg-final { scan-assembler "vabd\.s32" } }*/ | ||
105 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch new file mode 100644 index 000000000..e097ec27f --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106819.patch | |||
@@ -0,0 +1,436 @@ | |||
1 | 2011-10-03 Richard Sandiford <richard.sandiford@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | Backport from mainline: | ||
5 | |||
6 | 2011-09-22 Richard Sandiford <richard.sandiford@linaro.org> | ||
7 | |||
8 | * config/arm/predicates.md (expandable_comparison_operator): New | ||
9 | predicate, extracted from... | ||
10 | (arm_comparison_operator): ...here. | ||
11 | * config/arm/arm.md (cbranchsi4, cbranchsf4, cbranchdf4, cbranchdi4) | ||
12 | (cstoresi4, cstoresf4, cstoredf4, cstoredi4, movsicc, movsfcc) | ||
13 | (movdfcc): Use expandable_comparison_operator. | ||
14 | |||
15 | gcc/testsuite/ | ||
16 | Backport from mainline: | ||
17 | |||
18 | 2011-09-22 Richard Sandiford <richard.sandiford@linaro.org> | ||
19 | |||
20 | * gcc.target/arm/cmp-1.c: New test. | ||
21 | * gcc.target/arm/cmp-2.c: Likewise. | ||
22 | |||
23 | 2011-10-03 Richard Sandiford <richard.sandiford@linaro.org> | ||
24 | |||
25 | gcc/ | ||
26 | Backport from mainline: | ||
27 | |||
28 | 2011-09-07 Richard Sandiford <richard.sandiford@linaro.org> | ||
29 | |||
30 | PR target/49030 | ||
31 | * config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare. | ||
32 | * config/arm/arm.c (maybe_get_arm_condition_code): New function, | ||
33 | reusing the old code from get_arm_condition_code. Return ARM_NV | ||
34 | for invalid comparison codes. | ||
35 | (get_arm_condition_code): Redefine in terms of | ||
36 | maybe_get_arm_condition_code. | ||
37 | * config/arm/predicates.md (arm_comparison_operator): Use | ||
38 | maybe_get_arm_condition_code. | ||
39 | |||
40 | gcc/testsuite/ | ||
41 | Backport from mainline: | ||
42 | |||
43 | 2011-09-07 Richard Sandiford <richard.sandiford@linaro.org> | ||
44 | |||
45 | PR target/49030 | ||
46 | * gcc.dg/torture/pr49030.c: New test. | ||
47 | |||
48 | === modified file 'gcc/config/arm/arm-protos.h' | ||
49 | --- old/gcc/config/arm/arm-protos.h 2011-09-15 09:45:31 +0000 | ||
50 | +++ new/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000 | ||
51 | @@ -180,6 +180,7 @@ | ||
52 | #endif | ||
53 | extern int thumb_shiftable_const (unsigned HOST_WIDE_INT); | ||
54 | #ifdef RTX_CODE | ||
55 | +extern enum arm_cond_code maybe_get_arm_condition_code (rtx); | ||
56 | extern void thumb1_final_prescan_insn (rtx); | ||
57 | extern void thumb2_final_prescan_insn (rtx); | ||
58 | extern const char *thumb_load_double_from_address (rtx *); | ||
59 | |||
60 | === modified file 'gcc/config/arm/arm.c' | ||
61 | --- old/gcc/config/arm/arm.c 2011-09-19 07:44:24 +0000 | ||
62 | +++ new/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000 | ||
63 | @@ -17494,10 +17494,10 @@ | ||
64 | decremented/zeroed by arm_asm_output_opcode as the insns are output. */ | ||
65 | |||
66 | /* Returns the index of the ARM condition code string in | ||
67 | - `arm_condition_codes'. COMPARISON should be an rtx like | ||
68 | - `(eq (...) (...))'. */ | ||
69 | -static enum arm_cond_code | ||
70 | -get_arm_condition_code (rtx comparison) | ||
71 | + `arm_condition_codes', or ARM_NV if the comparison is invalid. | ||
72 | + COMPARISON should be an rtx like `(eq (...) (...))'. */ | ||
73 | +enum arm_cond_code | ||
74 | +maybe_get_arm_condition_code (rtx comparison) | ||
75 | { | ||
76 | enum machine_mode mode = GET_MODE (XEXP (comparison, 0)); | ||
77 | enum arm_cond_code code; | ||
78 | @@ -17521,11 +17521,11 @@ | ||
79 | case CC_DLTUmode: code = ARM_CC; | ||
80 | |||
81 | dominance: | ||
82 | - gcc_assert (comp_code == EQ || comp_code == NE); | ||
83 | - | ||
84 | if (comp_code == EQ) | ||
85 | return ARM_INVERSE_CONDITION_CODE (code); | ||
86 | - return code; | ||
87 | + if (comp_code == NE) | ||
88 | + return code; | ||
89 | + return ARM_NV; | ||
90 | |||
91 | case CC_NOOVmode: | ||
92 | switch (comp_code) | ||
93 | @@ -17534,7 +17534,7 @@ | ||
94 | case EQ: return ARM_EQ; | ||
95 | case GE: return ARM_PL; | ||
96 | case LT: return ARM_MI; | ||
97 | - default: gcc_unreachable (); | ||
98 | + default: return ARM_NV; | ||
99 | } | ||
100 | |||
101 | case CC_Zmode: | ||
102 | @@ -17542,7 +17542,7 @@ | ||
103 | { | ||
104 | case NE: return ARM_NE; | ||
105 | case EQ: return ARM_EQ; | ||
106 | - default: gcc_unreachable (); | ||
107 | + default: return ARM_NV; | ||
108 | } | ||
109 | |||
110 | case CC_Nmode: | ||
111 | @@ -17550,7 +17550,7 @@ | ||
112 | { | ||
113 | case NE: return ARM_MI; | ||
114 | case EQ: return ARM_PL; | ||
115 | - default: gcc_unreachable (); | ||
116 | + default: return ARM_NV; | ||
117 | } | ||
118 | |||
119 | case CCFPEmode: | ||
120 | @@ -17575,7 +17575,7 @@ | ||
121 | /* UNEQ and LTGT do not have a representation. */ | ||
122 | case UNEQ: /* Fall through. */ | ||
123 | case LTGT: /* Fall through. */ | ||
124 | - default: gcc_unreachable (); | ||
125 | + default: return ARM_NV; | ||
126 | } | ||
127 | |||
128 | case CC_SWPmode: | ||
129 | @@ -17591,7 +17591,7 @@ | ||
130 | case GTU: return ARM_CC; | ||
131 | case LEU: return ARM_CS; | ||
132 | case LTU: return ARM_HI; | ||
133 | - default: gcc_unreachable (); | ||
134 | + default: return ARM_NV; | ||
135 | } | ||
136 | |||
137 | case CC_Cmode: | ||
138 | @@ -17599,7 +17599,7 @@ | ||
139 | { | ||
140 | case LTU: return ARM_CS; | ||
141 | case GEU: return ARM_CC; | ||
142 | - default: gcc_unreachable (); | ||
143 | + default: return ARM_NV; | ||
144 | } | ||
145 | |||
146 | case CC_CZmode: | ||
147 | @@ -17611,7 +17611,7 @@ | ||
148 | case GTU: return ARM_HI; | ||
149 | case LEU: return ARM_LS; | ||
150 | case LTU: return ARM_CC; | ||
151 | - default: gcc_unreachable (); | ||
152 | + default: return ARM_NV; | ||
153 | } | ||
154 | |||
155 | case CC_NCVmode: | ||
156 | @@ -17621,7 +17621,7 @@ | ||
157 | case LT: return ARM_LT; | ||
158 | case GEU: return ARM_CS; | ||
159 | case LTU: return ARM_CC; | ||
160 | - default: gcc_unreachable (); | ||
161 | + default: return ARM_NV; | ||
162 | } | ||
163 | |||
164 | case CCmode: | ||
165 | @@ -17637,13 +17637,22 @@ | ||
166 | case GTU: return ARM_HI; | ||
167 | case LEU: return ARM_LS; | ||
168 | case LTU: return ARM_CC; | ||
169 | - default: gcc_unreachable (); | ||
170 | + default: return ARM_NV; | ||
171 | } | ||
172 | |||
173 | default: gcc_unreachable (); | ||
174 | } | ||
175 | } | ||
176 | |||
177 | +/* Like maybe_get_arm_condition_code, but never return ARM_NV. */ | ||
178 | +static enum arm_cond_code | ||
179 | +get_arm_condition_code (rtx comparison) | ||
180 | +{ | ||
181 | + enum arm_cond_code code = maybe_get_arm_condition_code (comparison); | ||
182 | + gcc_assert (code != ARM_NV); | ||
183 | + return code; | ||
184 | +} | ||
185 | + | ||
186 | /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed | ||
187 | instructions. */ | ||
188 | void | ||
189 | |||
190 | === modified file 'gcc/config/arm/arm.md' | ||
191 | --- old/gcc/config/arm/arm.md 2011-09-12 14:14:00 +0000 | ||
192 | +++ new/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000 | ||
193 | @@ -6543,7 +6543,7 @@ | ||
194 | |||
195 | (define_expand "cbranchsi4" | ||
196 | [(set (pc) (if_then_else | ||
197 | - (match_operator 0 "arm_comparison_operator" | ||
198 | + (match_operator 0 "expandable_comparison_operator" | ||
199 | [(match_operand:SI 1 "s_register_operand" "") | ||
200 | (match_operand:SI 2 "nonmemory_operand" "")]) | ||
201 | (label_ref (match_operand 3 "" "")) | ||
202 | @@ -6594,7 +6594,7 @@ | ||
203 | |||
204 | (define_expand "cbranchsf4" | ||
205 | [(set (pc) (if_then_else | ||
206 | - (match_operator 0 "arm_comparison_operator" | ||
207 | + (match_operator 0 "expandable_comparison_operator" | ||
208 | [(match_operand:SF 1 "s_register_operand" "") | ||
209 | (match_operand:SF 2 "arm_float_compare_operand" "")]) | ||
210 | (label_ref (match_operand 3 "" "")) | ||
211 | @@ -6606,7 +6606,7 @@ | ||
212 | |||
213 | (define_expand "cbranchdf4" | ||
214 | [(set (pc) (if_then_else | ||
215 | - (match_operator 0 "arm_comparison_operator" | ||
216 | + (match_operator 0 "expandable_comparison_operator" | ||
217 | [(match_operand:DF 1 "s_register_operand" "") | ||
218 | (match_operand:DF 2 "arm_float_compare_operand" "")]) | ||
219 | (label_ref (match_operand 3 "" "")) | ||
220 | @@ -6618,7 +6618,7 @@ | ||
221 | |||
222 | (define_expand "cbranchdi4" | ||
223 | [(set (pc) (if_then_else | ||
224 | - (match_operator 0 "arm_comparison_operator" | ||
225 | + (match_operator 0 "expandable_comparison_operator" | ||
226 | [(match_operand:DI 1 "cmpdi_operand" "") | ||
227 | (match_operand:DI 2 "cmpdi_operand" "")]) | ||
228 | (label_ref (match_operand 3 "" "")) | ||
229 | @@ -7473,7 +7473,7 @@ | ||
230 | |||
231 | (define_expand "cstoresi4" | ||
232 | [(set (match_operand:SI 0 "s_register_operand" "") | ||
233 | - (match_operator:SI 1 "arm_comparison_operator" | ||
234 | + (match_operator:SI 1 "expandable_comparison_operator" | ||
235 | [(match_operand:SI 2 "s_register_operand" "") | ||
236 | (match_operand:SI 3 "reg_or_int_operand" "")]))] | ||
237 | "TARGET_32BIT || TARGET_THUMB1" | ||
238 | @@ -7609,7 +7609,7 @@ | ||
239 | |||
240 | (define_expand "cstoresf4" | ||
241 | [(set (match_operand:SI 0 "s_register_operand" "") | ||
242 | - (match_operator:SI 1 "arm_comparison_operator" | ||
243 | + (match_operator:SI 1 "expandable_comparison_operator" | ||
244 | [(match_operand:SF 2 "s_register_operand" "") | ||
245 | (match_operand:SF 3 "arm_float_compare_operand" "")]))] | ||
246 | "TARGET_32BIT && TARGET_HARD_FLOAT" | ||
247 | @@ -7619,7 +7619,7 @@ | ||
248 | |||
249 | (define_expand "cstoredf4" | ||
250 | [(set (match_operand:SI 0 "s_register_operand" "") | ||
251 | - (match_operator:SI 1 "arm_comparison_operator" | ||
252 | + (match_operator:SI 1 "expandable_comparison_operator" | ||
253 | [(match_operand:DF 2 "s_register_operand" "") | ||
254 | (match_operand:DF 3 "arm_float_compare_operand" "")]))] | ||
255 | "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" | ||
256 | @@ -7629,7 +7629,7 @@ | ||
257 | |||
258 | (define_expand "cstoredi4" | ||
259 | [(set (match_operand:SI 0 "s_register_operand" "") | ||
260 | - (match_operator:SI 1 "arm_comparison_operator" | ||
261 | + (match_operator:SI 1 "expandable_comparison_operator" | ||
262 | [(match_operand:DI 2 "cmpdi_operand" "") | ||
263 | (match_operand:DI 3 "cmpdi_operand" "")]))] | ||
264 | "TARGET_32BIT" | ||
265 | @@ -7749,7 +7749,7 @@ | ||
266 | |||
267 | (define_expand "movsicc" | ||
268 | [(set (match_operand:SI 0 "s_register_operand" "") | ||
269 | - (if_then_else:SI (match_operand 1 "arm_comparison_operator" "") | ||
270 | + (if_then_else:SI (match_operand 1 "expandable_comparison_operator" "") | ||
271 | (match_operand:SI 2 "arm_not_operand" "") | ||
272 | (match_operand:SI 3 "arm_not_operand" "")))] | ||
273 | "TARGET_32BIT" | ||
274 | @@ -7769,7 +7769,7 @@ | ||
275 | |||
276 | (define_expand "movsfcc" | ||
277 | [(set (match_operand:SF 0 "s_register_operand" "") | ||
278 | - (if_then_else:SF (match_operand 1 "arm_comparison_operator" "") | ||
279 | + (if_then_else:SF (match_operand 1 "expandable_comparison_operator" "") | ||
280 | (match_operand:SF 2 "s_register_operand" "") | ||
281 | (match_operand:SF 3 "nonmemory_operand" "")))] | ||
282 | "TARGET_32BIT && TARGET_HARD_FLOAT" | ||
283 | @@ -7795,7 +7795,7 @@ | ||
284 | |||
285 | (define_expand "movdfcc" | ||
286 | [(set (match_operand:DF 0 "s_register_operand" "") | ||
287 | - (if_then_else:DF (match_operand 1 "arm_comparison_operator" "") | ||
288 | + (if_then_else:DF (match_operand 1 "expandable_comparison_operator" "") | ||
289 | (match_operand:DF 2 "s_register_operand" "") | ||
290 | (match_operand:DF 3 "arm_float_add_operand" "")))] | ||
291 | "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" | ||
292 | |||
293 | === modified file 'gcc/config/arm/predicates.md' | ||
294 | --- old/gcc/config/arm/predicates.md 2011-09-15 09:45:31 +0000 | ||
295 | +++ new/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000 | ||
296 | @@ -242,11 +242,15 @@ | ||
297 | |||
298 | ;; True for integer comparisons and, if FP is active, for comparisons | ||
299 | ;; other than LTGT or UNEQ. | ||
300 | +(define_special_predicate "expandable_comparison_operator" | ||
301 | + (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu, | ||
302 | + unordered,ordered,unlt,unle,unge,ungt")) | ||
303 | + | ||
304 | +;; Likewise, but only accept comparisons that are directly supported | ||
305 | +;; by ARM condition codes. | ||
306 | (define_special_predicate "arm_comparison_operator" | ||
307 | - (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu") | ||
308 | - (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT | ||
309 | - && (TARGET_FPA || TARGET_VFP)") | ||
310 | - (match_code "unordered,ordered,unlt,unle,unge,ungt")))) | ||
311 | + (and (match_operand 0 "expandable_comparison_operator") | ||
312 | + (match_test "maybe_get_arm_condition_code (op) != ARM_NV"))) | ||
313 | |||
314 | (define_special_predicate "lt_ge_comparison_operator" | ||
315 | (match_code "lt,ge")) | ||
316 | |||
317 | === added file 'gcc/testsuite/gcc.dg/torture/pr49030.c' | ||
318 | --- old/gcc/testsuite/gcc.dg/torture/pr49030.c 1970-01-01 00:00:00 +0000 | ||
319 | +++ new/gcc/testsuite/gcc.dg/torture/pr49030.c 2011-10-03 09:46:40 +0000 | ||
320 | @@ -0,0 +1,19 @@ | ||
321 | +void | ||
322 | +sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples, | ||
323 | + unsigned long dst_skip) | ||
324 | +{ | ||
325 | + long long y; | ||
326 | + while (nsamples--) | ||
327 | + { | ||
328 | + y = (long long) (*src * 8388608.0f) << 8; | ||
329 | + if (y > 2147483647) { | ||
330 | + *(int *) dst = 2147483647; | ||
331 | + } else if (y < -2147483647 - 1) { | ||
332 | + *(int *) dst = -2147483647 - 1; | ||
333 | + } else { | ||
334 | + *(int *) dst = (int) y; | ||
335 | + } | ||
336 | + dst += dst_skip; | ||
337 | + src++; | ||
338 | + } | ||
339 | +} | ||
340 | |||
341 | === added file 'gcc/testsuite/gcc.target/arm/cmp-1.c' | ||
342 | --- old/gcc/testsuite/gcc.target/arm/cmp-1.c 1970-01-01 00:00:00 +0000 | ||
343 | +++ new/gcc/testsuite/gcc.target/arm/cmp-1.c 2011-10-03 09:47:33 +0000 | ||
344 | @@ -0,0 +1,37 @@ | ||
345 | +/* { dg-do compile } */ | ||
346 | +/* { dg-options "-O" } */ | ||
347 | +/* { dg-final { scan-assembler-not "\tbl\t" } } */ | ||
348 | +/* { dg-final { scan-assembler-not "__aeabi" } } */ | ||
349 | +int x, y; | ||
350 | + | ||
351 | +#define TEST_EXPR(NAME, ARGS, EXPR) \ | ||
352 | + int NAME##1 ARGS { return (EXPR); } \ | ||
353 | + int NAME##2 ARGS { return !(EXPR); } \ | ||
354 | + int NAME##3 ARGS { return (EXPR) ? x : y; } \ | ||
355 | + void NAME##4 ARGS { if (EXPR) x++; } \ | ||
356 | + void NAME##5 ARGS { if (!(EXPR)) x++; } | ||
357 | + | ||
358 | +#define TEST(NAME, TYPE, OPERATOR) \ | ||
359 | + TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), a1 OPERATOR a2) \ | ||
360 | + TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), a1 OPERATOR *a2) \ | ||
361 | + TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), *a1 OPERATOR a2) \ | ||
362 | + TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), *a1 OPERATOR *a2) \ | ||
363 | + TEST_EXPR (NAME##_rc, (TYPE a1), a1 OPERATOR 100) \ | ||
364 | + TEST_EXPR (NAME##_cr, (TYPE a1), 100 OPERATOR a1) | ||
365 | + | ||
366 | +#define TEST_OP(NAME, OPERATOR) \ | ||
367 | + TEST (sc_##NAME, signed char, OPERATOR) \ | ||
368 | + TEST (uc_##NAME, unsigned char, OPERATOR) \ | ||
369 | + TEST (ss_##NAME, short, OPERATOR) \ | ||
370 | + TEST (us_##NAME, unsigned short, OPERATOR) \ | ||
371 | + TEST (si_##NAME, int, OPERATOR) \ | ||
372 | + TEST (ui_##NAME, unsigned int, OPERATOR) \ | ||
373 | + TEST (sll_##NAME, long long, OPERATOR) \ | ||
374 | + TEST (ull_##NAME, unsigned long long, OPERATOR) | ||
375 | + | ||
376 | +TEST_OP (eq, ==) | ||
377 | +TEST_OP (ne, !=) | ||
378 | +TEST_OP (lt, <) | ||
379 | +TEST_OP (gt, >) | ||
380 | +TEST_OP (le, <=) | ||
381 | +TEST_OP (ge, >=) | ||
382 | |||
383 | === added file 'gcc/testsuite/gcc.target/arm/cmp-2.c' | ||
384 | --- old/gcc/testsuite/gcc.target/arm/cmp-2.c 1970-01-01 00:00:00 +0000 | ||
385 | +++ new/gcc/testsuite/gcc.target/arm/cmp-2.c 2011-10-03 09:47:33 +0000 | ||
386 | @@ -0,0 +1,49 @@ | ||
387 | +/* { dg-do compile } */ | ||
388 | +/* { dg-require-effective-target arm_vfp_ok } */ | ||
389 | +/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ | ||
390 | +/* { dg-options "-O -mfpu=vfp -mfloat-abi=softfp" } */ | ||
391 | +/* { dg-final { scan-assembler-not "\tbl\t" } } */ | ||
392 | +/* { dg-final { scan-assembler-not "__aeabi" } } */ | ||
393 | +int x, y; | ||
394 | + | ||
395 | +#define EQ(X, Y) ((X) == (Y)) | ||
396 | +#define NE(X, Y) ((X) != (Y)) | ||
397 | +#define LT(X, Y) ((X) < (Y)) | ||
398 | +#define GT(X, Y) ((X) > (Y)) | ||
399 | +#define LE(X, Y) ((X) <= (Y)) | ||
400 | +#define GE(X, Y) ((X) >= (Y)) | ||
401 | + | ||
402 | +#define TEST_EXPR(NAME, ARGS, EXPR) \ | ||
403 | + int NAME##1 ARGS { return (EXPR); } \ | ||
404 | + int NAME##2 ARGS { return !(EXPR); } \ | ||
405 | + int NAME##3 ARGS { return (EXPR) ? x : y; } \ | ||
406 | + void NAME##4 ARGS { if (EXPR) x++; } \ | ||
407 | + void NAME##5 ARGS { if (!(EXPR)) x++; } | ||
408 | + | ||
409 | +#define TEST(NAME, TYPE, OPERATOR) \ | ||
410 | + TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), OPERATOR (a1, a2)) \ | ||
411 | + TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), OPERATOR (a1, *a2)) \ | ||
412 | + TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), OPERATOR (*a1, a2)) \ | ||
413 | + TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), OPERATOR (*a1, *a2)) \ | ||
414 | + TEST_EXPR (NAME##_rc, (TYPE a1), OPERATOR (a1, 100)) \ | ||
415 | + TEST_EXPR (NAME##_cr, (TYPE a1), OPERATOR (100, a1)) | ||
416 | + | ||
417 | +#define TEST_OP(NAME, OPERATOR) \ | ||
418 | + TEST (f_##NAME, float, OPERATOR) \ | ||
419 | + TEST (d_##NAME, double, OPERATOR) \ | ||
420 | + TEST (ld_##NAME, long double, OPERATOR) | ||
421 | + | ||
422 | +TEST_OP (eq, EQ) | ||
423 | +TEST_OP (ne, NE) | ||
424 | +TEST_OP (lt, LT) | ||
425 | +TEST_OP (gt, GT) | ||
426 | +TEST_OP (le, LE) | ||
427 | +TEST_OP (ge, GE) | ||
428 | +TEST_OP (blt, __builtin_isless) | ||
429 | +TEST_OP (bgt, __builtin_isgreater) | ||
430 | +TEST_OP (ble, __builtin_islessequal) | ||
431 | +TEST_OP (bge, __builtin_isgreaterequal) | ||
432 | +/* This one should be expanded into separate ordered and equality | ||
433 | + comparisons. */ | ||
434 | +TEST_OP (blg, __builtin_islessgreater) | ||
435 | +TEST_OP (bun, __builtin_isunordered) | ||
436 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch new file mode 100644 index 000000000..4a886ce56 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106820.patch | |||
@@ -0,0 +1,378 @@ | |||
1 | 2011-10-06 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | Backport from mainline: | ||
4 | |||
5 | 2011-09-25 Ira Rosen <ira.rosen@linaro.org> | ||
6 | |||
7 | gcc/ | ||
8 | * tree-data-ref.c (dr_analyze_innermost): Add new argument. | ||
9 | Allow not simple iv if analyzing basic block. | ||
10 | (create_data_ref): Update call to dr_analyze_innermost. | ||
11 | (stmt_with_adjacent_zero_store_dr_p, ref_base_address): Likewise. | ||
12 | * tree-loop-distribution.c (generate_memset_zero): Likewise. | ||
13 | * tree-predcom.c (find_looparound_phi): Likewise. | ||
14 | * tree-data-ref.h (dr_analyze_innermost): Add new argument. | ||
15 | |||
16 | gcc/testsuite/ | ||
17 | * gcc.dg/vect/bb-slp-24.c: New. | ||
18 | |||
19 | |||
20 | 2011-09-15 Ira Rosen <ira.rosen@linaro.org> | ||
21 | |||
22 | gcc/ | ||
23 | * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Allow | ||
24 | read-after-read dependencies in basic block SLP. | ||
25 | |||
26 | gcc/testsuite/ | ||
27 | * gcc.dg/vect/bb-slp-25.c: New. | ||
28 | |||
29 | |||
30 | 2011-04-21 Richard Sandiford <richard.sandiford@linaro.org> | ||
31 | |||
32 | gcc/ | ||
33 | * tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): Use | ||
34 | operand_equal_p to compare DR_BASE_ADDRESSes. | ||
35 | (vect_check_interleaving): Likewise. | ||
36 | |||
37 | gcc/testsuite/ | ||
38 | * gcc.dg/vect/vect-119.c: New test. | ||
39 | |||
40 | === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-24.c' | ||
41 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 1970-01-01 00:00:00 +0000 | ||
42 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-24.c 2011-10-02 08:43:10 +0000 | ||
43 | @@ -0,0 +1,59 @@ | ||
44 | +/* { dg-require-effective-target vect_int } */ | ||
45 | + | ||
46 | +#include <stdarg.h> | ||
47 | +#include "tree-vect.h" | ||
48 | + | ||
49 | +#define A 3 | ||
50 | +#define N 256 | ||
51 | + | ||
52 | +short src[N], dst[N]; | ||
53 | + | ||
54 | +void foo (short * __restrict__ dst, short * __restrict__ src, int h, | ||
55 | + int stride, int dummy) | ||
56 | +{ | ||
57 | + int i; | ||
58 | + h /= 8; | ||
59 | + for (i = 0; i < h; i++) | ||
60 | + { | ||
61 | + dst[0] += A*src[0]; | ||
62 | + dst[1] += A*src[1]; | ||
63 | + dst[2] += A*src[2]; | ||
64 | + dst[3] += A*src[3]; | ||
65 | + dst[4] += A*src[4]; | ||
66 | + dst[5] += A*src[5]; | ||
67 | + dst[6] += A*src[6]; | ||
68 | + dst[7] += A*src[7]; | ||
69 | + dst += stride; | ||
70 | + src += stride; | ||
71 | + if (dummy == 32) | ||
72 | + abort (); | ||
73 | + } | ||
74 | +} | ||
75 | + | ||
76 | + | ||
77 | +int main (void) | ||
78 | +{ | ||
79 | + int i; | ||
80 | + | ||
81 | + check_vect (); | ||
82 | + | ||
83 | + for (i = 0; i < N; i++) | ||
84 | + { | ||
85 | + dst[i] = 0; | ||
86 | + src[i] = i; | ||
87 | + } | ||
88 | + | ||
89 | + foo (dst, src, N, 8, 0); | ||
90 | + | ||
91 | + for (i = 0; i < N; i++) | ||
92 | + { | ||
93 | + if (dst[i] != A * i) | ||
94 | + abort (); | ||
95 | + } | ||
96 | + | ||
97 | + return 0; | ||
98 | +} | ||
99 | + | ||
100 | +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */ | ||
101 | +/* { dg-final { cleanup-tree-dump "slp" } } */ | ||
102 | + | ||
103 | |||
104 | === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-25.c' | ||
105 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 1970-01-01 00:00:00 +0000 | ||
106 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-25.c 2011-10-02 08:43:10 +0000 | ||
107 | @@ -0,0 +1,59 @@ | ||
108 | +/* { dg-require-effective-target vect_int } */ | ||
109 | + | ||
110 | +#include <stdarg.h> | ||
111 | +#include "tree-vect.h" | ||
112 | + | ||
113 | +#define A 3 | ||
114 | +#define B 4 | ||
115 | +#define N 256 | ||
116 | + | ||
117 | +short src[N], dst[N]; | ||
118 | + | ||
119 | +void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy) | ||
120 | +{ | ||
121 | + int i; | ||
122 | + h /= 16; | ||
123 | + for (i = 0; i < h; i++) | ||
124 | + { | ||
125 | + dst[0] += A*src[0] + src[stride]; | ||
126 | + dst[1] += A*src[1] + src[1+stride]; | ||
127 | + dst[2] += A*src[2] + src[2+stride]; | ||
128 | + dst[3] += A*src[3] + src[3+stride]; | ||
129 | + dst[4] += A*src[4] + src[4+stride]; | ||
130 | + dst[5] += A*src[5] + src[5+stride]; | ||
131 | + dst[6] += A*src[6] + src[6+stride]; | ||
132 | + dst[7] += A*src[7] + src[7+stride]; | ||
133 | + dst += 8; | ||
134 | + src += 8; | ||
135 | + if (dummy == 32) | ||
136 | + abort (); | ||
137 | + } | ||
138 | +} | ||
139 | + | ||
140 | + | ||
141 | +int main (void) | ||
142 | +{ | ||
143 | + int i; | ||
144 | + | ||
145 | + check_vect (); | ||
146 | + | ||
147 | + for (i = 0; i < N; i++) | ||
148 | + { | ||
149 | + dst[i] = 0; | ||
150 | + src[i] = i; | ||
151 | + } | ||
152 | + | ||
153 | + foo (dst, src, N, 8, 0); | ||
154 | + | ||
155 | + for (i = 0; i < N/2; i++) | ||
156 | + { | ||
157 | + if (dst[i] != A * i + i + 8) | ||
158 | + abort (); | ||
159 | + } | ||
160 | + | ||
161 | + return 0; | ||
162 | +} | ||
163 | + | ||
164 | +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */ | ||
165 | +/* { dg-final { cleanup-tree-dump "slp" } } */ | ||
166 | + | ||
167 | |||
168 | === added file 'gcc/testsuite/gcc.dg/vect/vect-119.c' | ||
169 | --- old/gcc/testsuite/gcc.dg/vect/vect-119.c 1970-01-01 00:00:00 +0000 | ||
170 | +++ new/gcc/testsuite/gcc.dg/vect/vect-119.c 2011-10-02 08:43:10 +0000 | ||
171 | @@ -0,0 +1,28 @@ | ||
172 | +/* { dg-do compile } */ | ||
173 | + | ||
174 | +#define OUTER 32 | ||
175 | +#define INNER 40 | ||
176 | + | ||
177 | +static unsigned int | ||
178 | +bar (const unsigned int x[INNER][2], unsigned int sum) | ||
179 | +{ | ||
180 | + int i; | ||
181 | + | ||
182 | + for (i = 0; i < INNER; i++) | ||
183 | + sum += x[i][0] * x[i][0] + x[i][1] * x[i][1]; | ||
184 | + return sum; | ||
185 | +} | ||
186 | + | ||
187 | +unsigned int foo (const unsigned int x[OUTER][INNER][2]) | ||
188 | +{ | ||
189 | + int i; | ||
190 | + unsigned int sum; | ||
191 | + | ||
192 | + sum = 0.0f; | ||
193 | + for (i = 0; i < OUTER; i++) | ||
194 | + sum = bar (x[i], sum); | ||
195 | + return sum; | ||
196 | +} | ||
197 | + | ||
198 | +/* { dg-final { scan-tree-dump-times "Detected interleaving of size 2" 1 "vect" } } */ | ||
199 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
200 | |||
201 | === modified file 'gcc/tree-data-ref.c' | ||
202 | --- old/gcc/tree-data-ref.c 2011-05-26 14:27:33 +0000 | ||
203 | +++ new/gcc/tree-data-ref.c 2011-10-02 08:43:10 +0000 | ||
204 | @@ -721,11 +721,11 @@ | ||
205 | } | ||
206 | |||
207 | /* Analyzes the behavior of the memory reference DR in the innermost loop or | ||
208 | - basic block that contains it. Returns true if analysis succeed or false | ||
209 | + basic block that contains it. Returns true if analysis succeed or false | ||
210 | otherwise. */ | ||
211 | |||
212 | bool | ||
213 | -dr_analyze_innermost (struct data_reference *dr) | ||
214 | +dr_analyze_innermost (struct data_reference *dr, struct loop *nest) | ||
215 | { | ||
216 | gimple stmt = DR_STMT (dr); | ||
217 | struct loop *loop = loop_containing_stmt (stmt); | ||
218 | @@ -768,14 +768,25 @@ | ||
219 | } | ||
220 | else | ||
221 | base = build_fold_addr_expr (base); | ||
222 | + | ||
223 | if (in_loop) | ||
224 | { | ||
225 | if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, | ||
226 | false)) | ||
227 | { | ||
228 | - if (dump_file && (dump_flags & TDF_DETAILS)) | ||
229 | - fprintf (dump_file, "failed: evolution of base is not affine.\n"); | ||
230 | - return false; | ||
231 | + if (nest) | ||
232 | + { | ||
233 | + if (dump_file && (dump_flags & TDF_DETAILS)) | ||
234 | + fprintf (dump_file, "failed: evolution of base is not" | ||
235 | + " affine.\n"); | ||
236 | + return false; | ||
237 | + } | ||
238 | + else | ||
239 | + { | ||
240 | + base_iv.base = base; | ||
241 | + base_iv.step = ssize_int (0); | ||
242 | + base_iv.no_overflow = true; | ||
243 | + } | ||
244 | } | ||
245 | } | ||
246 | else | ||
247 | @@ -800,10 +811,18 @@ | ||
248 | else if (!simple_iv (loop, loop_containing_stmt (stmt), | ||
249 | poffset, &offset_iv, false)) | ||
250 | { | ||
251 | - if (dump_file && (dump_flags & TDF_DETAILS)) | ||
252 | - fprintf (dump_file, "failed: evolution of offset is not" | ||
253 | - " affine.\n"); | ||
254 | - return false; | ||
255 | + if (nest) | ||
256 | + { | ||
257 | + if (dump_file && (dump_flags & TDF_DETAILS)) | ||
258 | + fprintf (dump_file, "failed: evolution of offset is not" | ||
259 | + " affine.\n"); | ||
260 | + return false; | ||
261 | + } | ||
262 | + else | ||
263 | + { | ||
264 | + offset_iv.base = poffset; | ||
265 | + offset_iv.step = ssize_int (0); | ||
266 | + } | ||
267 | } | ||
268 | } | ||
269 | |||
270 | @@ -967,7 +986,7 @@ | ||
271 | DR_REF (dr) = memref; | ||
272 | DR_IS_READ (dr) = is_read; | ||
273 | |||
274 | - dr_analyze_innermost (dr); | ||
275 | + dr_analyze_innermost (dr, nest); | ||
276 | dr_analyze_indices (dr, nest, loop); | ||
277 | dr_analyze_alias (dr); | ||
278 | |||
279 | @@ -5185,7 +5204,7 @@ | ||
280 | DR_STMT (dr) = stmt; | ||
281 | DR_REF (dr) = op0; | ||
282 | |||
283 | - res = dr_analyze_innermost (dr) | ||
284 | + res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)) | ||
285 | && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)); | ||
286 | |||
287 | free_data_ref (dr); | ||
288 | @@ -5225,7 +5244,7 @@ | ||
289 | |||
290 | DR_STMT (dr) = stmt; | ||
291 | DR_REF (dr) = *ref->pos; | ||
292 | - dr_analyze_innermost (dr); | ||
293 | + dr_analyze_innermost (dr, loop_containing_stmt (stmt)); | ||
294 | base_address = DR_BASE_ADDRESS (dr); | ||
295 | |||
296 | if (!base_address) | ||
297 | |||
298 | === modified file 'gcc/tree-data-ref.h' | ||
299 | --- old/gcc/tree-data-ref.h 2011-03-27 09:38:18 +0000 | ||
300 | +++ new/gcc/tree-data-ref.h 2011-10-02 08:43:10 +0000 | ||
301 | @@ -386,7 +386,7 @@ | ||
302 | DEF_VEC_ALLOC_O (data_ref_loc, heap); | ||
303 | |||
304 | bool get_references_in_stmt (gimple, VEC (data_ref_loc, heap) **); | ||
305 | -bool dr_analyze_innermost (struct data_reference *); | ||
306 | +bool dr_analyze_innermost (struct data_reference *, struct loop *); | ||
307 | extern bool compute_data_dependences_for_loop (struct loop *, bool, | ||
308 | VEC (loop_p, heap) **, | ||
309 | VEC (data_reference_p, heap) **, | ||
310 | |||
311 | === modified file 'gcc/tree-loop-distribution.c' | ||
312 | --- old/gcc/tree-loop-distribution.c 2011-05-11 13:07:54 +0000 | ||
313 | +++ new/gcc/tree-loop-distribution.c 2011-10-02 08:43:10 +0000 | ||
314 | @@ -267,7 +267,7 @@ | ||
315 | |||
316 | DR_STMT (dr) = stmt; | ||
317 | DR_REF (dr) = op0; | ||
318 | - res = dr_analyze_innermost (dr); | ||
319 | + res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)); | ||
320 | gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))); | ||
321 | |||
322 | nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); | ||
323 | |||
324 | === modified file 'gcc/tree-predcom.c' | ||
325 | --- old/gcc/tree-predcom.c 2011-02-11 14:19:44 +0000 | ||
326 | +++ new/gcc/tree-predcom.c 2011-10-02 08:43:10 +0000 | ||
327 | @@ -1114,7 +1114,7 @@ | ||
328 | memset (&init_dr, 0, sizeof (struct data_reference)); | ||
329 | DR_REF (&init_dr) = init_ref; | ||
330 | DR_STMT (&init_dr) = phi; | ||
331 | - if (!dr_analyze_innermost (&init_dr)) | ||
332 | + if (!dr_analyze_innermost (&init_dr, loop)) | ||
333 | return NULL; | ||
334 | |||
335 | if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) | ||
336 | |||
337 | === modified file 'gcc/tree-vect-data-refs.c' | ||
338 | --- old/gcc/tree-vect-data-refs.c 2011-07-04 11:13:51 +0000 | ||
339 | +++ new/gcc/tree-vect-data-refs.c 2011-10-02 08:43:10 +0000 | ||
340 | @@ -353,11 +353,7 @@ | ||
341 | |||
342 | /* Check that the data-refs have same bases and offsets. If not, we can't | ||
343 | determine if they are dependent. */ | ||
344 | - if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) | ||
345 | - && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR | ||
346 | - || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR | ||
347 | - || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) | ||
348 | - != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) | ||
349 | + if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) | ||
350 | || !dr_equal_offsets_p (dra, drb)) | ||
351 | return true; | ||
352 | |||
353 | @@ -403,11 +399,7 @@ | ||
354 | |||
355 | /* Check that the data-refs have same first location (except init) and they | ||
356 | are both either store or load (not load and store). */ | ||
357 | - if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) | ||
358 | - && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR | ||
359 | - || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR | ||
360 | - || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) | ||
361 | - != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) | ||
362 | + if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) | ||
363 | || !dr_equal_offsets_p (dra, drb) | ||
364 | || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) | ||
365 | || DR_IS_READ (dra) != DR_IS_READ (drb)) | ||
366 | @@ -615,6 +607,11 @@ | ||
367 | if (vect_check_interleaving (dra, drb)) | ||
368 | return false; | ||
369 | |||
370 | + /* Read-read is OK (we need this check here, after checking for | ||
371 | + interleaving). */ | ||
372 | + if (DR_IS_READ (dra) && DR_IS_READ (drb)) | ||
373 | + return false; | ||
374 | + | ||
375 | if (vect_print_dump_info (REPORT_DR_DETAILS)) | ||
376 | { | ||
377 | fprintf (vect_dump, "can't determine dependence between "); | ||
378 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch new file mode 100644 index 000000000..f25a37858 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106821.patch | |||
@@ -0,0 +1,240 @@ | |||
1 | 2011-10-06 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | gcc/testsuite/ | ||
4 | * gcc.dg/vect/bb-slp-26.c: Simplify to make the basic block | ||
5 | vectorizable. | ||
6 | |||
7 | Backport from mainline: | ||
8 | |||
9 | 2011-09-25 Ira Rosen <ira.rosen@linaro.org> | ||
10 | |||
11 | gcc/ | ||
12 | * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part | ||
13 | of vect_analyze_bb here. | ||
14 | (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1. | ||
15 | |||
16 | gcc/testsuite/ | ||
17 | * lib/target-supports.exp (check_effective_target_vect64): New. | ||
18 | * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case | ||
19 | of multiple vector sizes. | ||
20 | * gcc.dg/vect/bb-slp-26.c: New. | ||
21 | |||
22 | === modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c' | ||
23 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2010-11-22 12:16:52 +0000 | ||
24 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000 | ||
25 | @@ -49,6 +49,7 @@ | ||
26 | } | ||
27 | |||
28 | /* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */ | ||
29 | -/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */ | ||
30 | +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */ | ||
31 | +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */ | ||
32 | /* { dg-final { cleanup-tree-dump "slp" } } */ | ||
33 | |||
34 | |||
35 | === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-26.c' | ||
36 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 1970-01-01 00:00:00 +0000 | ||
37 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-26.c 2011-10-02 10:40:34 +0000 | ||
38 | @@ -0,0 +1,60 @@ | ||
39 | +/* { dg-require-effective-target vect_int } */ | ||
40 | + | ||
41 | +#include <stdarg.h> | ||
42 | +#include "tree-vect.h" | ||
43 | + | ||
44 | +#define A 3 | ||
45 | +#define B 4 | ||
46 | +#define N 256 | ||
47 | + | ||
48 | +char src[N], dst[N]; | ||
49 | + | ||
50 | +void foo (char * __restrict__ dst, char * __restrict__ src, int h, | ||
51 | + int stride, int dummy) | ||
52 | +{ | ||
53 | + int i; | ||
54 | + h /= 16; | ||
55 | + for (i = 0; i < h; i++) | ||
56 | + { | ||
57 | + dst[0] += A*src[0]; | ||
58 | + dst[1] += A*src[1]; | ||
59 | + dst[2] += A*src[2]; | ||
60 | + dst[3] += A*src[3]; | ||
61 | + dst[4] += A*src[4]; | ||
62 | + dst[5] += A*src[5]; | ||
63 | + dst[6] += A*src[6]; | ||
64 | + dst[7] += A*src[7]; | ||
65 | + dst += 8; | ||
66 | + src += 8; | ||
67 | + if (dummy == 32) | ||
68 | + abort (); | ||
69 | + } | ||
70 | +} | ||
71 | + | ||
72 | + | ||
73 | +int main (void) | ||
74 | +{ | ||
75 | + int i; | ||
76 | + | ||
77 | + check_vect (); | ||
78 | + | ||
79 | + for (i = 0; i < N; i++) | ||
80 | + { | ||
81 | + dst[i] = 0; | ||
82 | + src[i] = i/8; | ||
83 | + } | ||
84 | + | ||
85 | + foo (dst, src, N, 8, 0); | ||
86 | + | ||
87 | + for (i = 0; i < N/2; i++) | ||
88 | + { | ||
89 | + if (dst[i] != A * src[i]) | ||
90 | + abort (); | ||
91 | + } | ||
92 | + | ||
93 | + return 0; | ||
94 | +} | ||
95 | + | ||
96 | +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */ | ||
97 | +/* { dg-final { cleanup-tree-dump "slp" } } */ | ||
98 | + | ||
99 | |||
100 | === modified file 'gcc/testsuite/lib/target-supports.exp' | ||
101 | --- old/gcc/testsuite/lib/target-supports.exp 2011-09-20 07:54:28 +0000 | ||
102 | +++ new/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000 | ||
103 | @@ -3283,6 +3283,24 @@ | ||
104 | return $et_vect_multiple_sizes_saved | ||
105 | } | ||
106 | |||
107 | +# Return 1 if the target supports vectors of 64 bits. | ||
108 | + | ||
109 | +proc check_effective_target_vect64 { } { | ||
110 | + global et_vect64 | ||
111 | + | ||
112 | + if [info exists et_vect64_saved] { | ||
113 | + verbose "check_effective_target_vect64: using cached result" 2 | ||
114 | + } else { | ||
115 | + set et_vect64_saved 0 | ||
116 | + if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { | ||
117 | + set et_vect64_saved 1 | ||
118 | + } | ||
119 | + } | ||
120 | + | ||
121 | + verbose "check_effective_target_vect64: returning $et_vect64_saved" 2 | ||
122 | + return $et_vect64_saved | ||
123 | +} | ||
124 | + | ||
125 | # Return 1 if the target supports section-anchors | ||
126 | |||
127 | proc check_effective_target_section_anchors { } { | ||
128 | |||
129 | === modified file 'gcc/tree-vect-slp.c' | ||
130 | --- old/gcc/tree-vect-slp.c 2011-07-06 12:04:10 +0000 | ||
131 | +++ new/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000 | ||
132 | @@ -1664,42 +1664,18 @@ | ||
133 | |||
134 | /* Check if the basic block can be vectorized. */ | ||
135 | |||
136 | -bb_vec_info | ||
137 | -vect_slp_analyze_bb (basic_block bb) | ||
138 | +static bb_vec_info | ||
139 | +vect_slp_analyze_bb_1 (basic_block bb) | ||
140 | { | ||
141 | bb_vec_info bb_vinfo; | ||
142 | VEC (ddr_p, heap) *ddrs; | ||
143 | VEC (slp_instance, heap) *slp_instances; | ||
144 | slp_instance instance; | ||
145 | - int i, insns = 0; | ||
146 | - gimple_stmt_iterator gsi; | ||
147 | + int i; | ||
148 | int min_vf = 2; | ||
149 | int max_vf = MAX_VECTORIZATION_FACTOR; | ||
150 | bool data_dependence_in_bb = false; | ||
151 | |||
152 | - current_vector_size = 0; | ||
153 | - | ||
154 | - if (vect_print_dump_info (REPORT_DETAILS)) | ||
155 | - fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); | ||
156 | - | ||
157 | - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | ||
158 | - { | ||
159 | - gimple stmt = gsi_stmt (gsi); | ||
160 | - if (!is_gimple_debug (stmt) | ||
161 | - && !gimple_nop_p (stmt) | ||
162 | - && gimple_code (stmt) != GIMPLE_LABEL) | ||
163 | - insns++; | ||
164 | - } | ||
165 | - | ||
166 | - if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) | ||
167 | - { | ||
168 | - if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) | ||
169 | - fprintf (vect_dump, "not vectorized: too many instructions in basic " | ||
170 | - "block.\n"); | ||
171 | - | ||
172 | - return NULL; | ||
173 | - } | ||
174 | - | ||
175 | bb_vinfo = new_bb_vec_info (bb); | ||
176 | if (!bb_vinfo) | ||
177 | return NULL; | ||
178 | @@ -1819,6 +1795,61 @@ | ||
179 | } | ||
180 | |||
181 | |||
182 | +bb_vec_info | ||
183 | +vect_slp_analyze_bb (basic_block bb) | ||
184 | +{ | ||
185 | + bb_vec_info bb_vinfo; | ||
186 | + int insns = 0; | ||
187 | + gimple_stmt_iterator gsi; | ||
188 | + unsigned int vector_sizes; | ||
189 | + | ||
190 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
191 | + fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); | ||
192 | + | ||
193 | + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | ||
194 | + { | ||
195 | + gimple stmt = gsi_stmt (gsi); | ||
196 | + if (!is_gimple_debug (stmt) | ||
197 | + && !gimple_nop_p (stmt) | ||
198 | + && gimple_code (stmt) != GIMPLE_LABEL) | ||
199 | + insns++; | ||
200 | + } | ||
201 | + | ||
202 | + if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) | ||
203 | + { | ||
204 | + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) | ||
205 | + fprintf (vect_dump, "not vectorized: too many instructions in basic " | ||
206 | + "block.\n"); | ||
207 | + | ||
208 | + return NULL; | ||
209 | + } | ||
210 | + | ||
211 | + /* Autodetect first vector size we try. */ | ||
212 | + current_vector_size = 0; | ||
213 | + vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); | ||
214 | + | ||
215 | + while (1) | ||
216 | + { | ||
217 | + bb_vinfo = vect_slp_analyze_bb_1 (bb); | ||
218 | + if (bb_vinfo) | ||
219 | + return bb_vinfo; | ||
220 | + | ||
221 | + destroy_bb_vec_info (bb_vinfo); | ||
222 | + | ||
223 | + vector_sizes &= ~current_vector_size; | ||
224 | + if (vector_sizes == 0 | ||
225 | + || current_vector_size == 0) | ||
226 | + return NULL; | ||
227 | + | ||
228 | + /* Try the next biggest vector size. */ | ||
229 | + current_vector_size = 1 << floor_log2 (vector_sizes); | ||
230 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
231 | + fprintf (vect_dump, "***** Re-trying analysis with " | ||
232 | + "vector size %d\n", current_vector_size); | ||
233 | + } | ||
234 | +} | ||
235 | + | ||
236 | + | ||
237 | /* SLP costs are calculated according to SLP instance unrolling factor (i.e., | ||
238 | the number of created vector stmts depends on the unrolling factor). | ||
239 | However, the actual number of vector stmts for every SLP node depends on | ||
240 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch new file mode 100644 index 000000000..13e6fd26e --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106825.patch | |||
@@ -0,0 +1,124 @@ | |||
1 | 2011-10-13 Andrew Stubbs <ams@codesourcery.com> | ||
2 | |||
3 | Backport from mainline: | ||
4 | |||
5 | 2011-10-07 Andrew Stubbs <ams@codesourcery.com> | ||
6 | |||
7 | gcc/ | ||
8 | * config/arm/predicates.md (shift_amount_operand): Remove constant | ||
9 | range check. | ||
10 | (shift_operator): Check range of constants for all shift operators. | ||
11 | |||
12 | gcc/testsuite/ | ||
13 | * gcc.dg/pr50193-1.c: New file. | ||
14 | * gcc.target/arm/shiftable.c: New file. | ||
15 | |||
16 | === modified file 'gcc/config/arm/predicates.md' | ||
17 | --- old/gcc/config/arm/predicates.md 2011-10-03 09:47:33 +0000 | ||
18 | +++ new/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000 | ||
19 | @@ -129,11 +129,12 @@ | ||
20 | (ior (match_operand 0 "arm_rhs_operand") | ||
21 | (match_operand 0 "memory_operand"))) | ||
22 | |||
23 | +;; This doesn't have to do much because the constant is already checked | ||
24 | +;; in the shift_operator predicate. | ||
25 | (define_predicate "shift_amount_operand" | ||
26 | (ior (and (match_test "TARGET_ARM") | ||
27 | (match_operand 0 "s_register_operand")) | ||
28 | - (and (match_operand 0 "const_int_operand") | ||
29 | - (match_test "INTVAL (op) > 0")))) | ||
30 | + (match_operand 0 "const_int_operand"))) | ||
31 | |||
32 | (define_predicate "arm_add_operand" | ||
33 | (ior (match_operand 0 "arm_rhs_operand") | ||
34 | @@ -219,13 +220,20 @@ | ||
35 | (match_test "mode == GET_MODE (op)"))) | ||
36 | |||
37 | ;; True for shift operators. | ||
38 | +;; Notes: | ||
39 | +;; * mult is only permitted with a constant shift amount | ||
40 | +;; * patterns that permit register shift amounts only in ARM mode use | ||
41 | +;; shift_amount_operand, patterns that always allow registers do not, | ||
42 | +;; so we don't have to worry about that sort of thing here. | ||
43 | (define_special_predicate "shift_operator" | ||
44 | (and (ior (ior (and (match_code "mult") | ||
45 | (match_test "power_of_two_operand (XEXP (op, 1), mode)")) | ||
46 | (and (match_code "rotate") | ||
47 | (match_test "GET_CODE (XEXP (op, 1)) == CONST_INT | ||
48 | && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) | ||
49 | - (match_code "ashift,ashiftrt,lshiftrt,rotatert")) | ||
50 | + (and (match_code "ashift,ashiftrt,lshiftrt,rotatert") | ||
51 | + (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT | ||
52 | + || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) | ||
53 | (match_test "mode == GET_MODE (op)"))) | ||
54 | |||
55 | ;; True for MULT, to identify which variant of shift_operator is in use. | ||
56 | |||
57 | === added file 'gcc/testsuite/gcc.target/arm/shiftable.c' | ||
58 | --- old/gcc/testsuite/gcc.target/arm/shiftable.c 1970-01-01 00:00:00 +0000 | ||
59 | +++ new/gcc/testsuite/gcc.target/arm/shiftable.c 2011-10-10 11:43:28 +0000 | ||
60 | @@ -0,0 +1,63 @@ | ||
61 | +/* { dg-do compile } */ | ||
62 | +/* { dg-options "-O2" } */ | ||
63 | +/* { dg-require-effective-target arm32 } */ | ||
64 | + | ||
65 | +/* ARM has shift-and-alu insns. Depending on the ALU op GCC represents some | ||
66 | + of these as a left shift, others as a multiply. Check that we match the | ||
67 | + right one. */ | ||
68 | + | ||
69 | +int | ||
70 | +plus (int a, int b) | ||
71 | +{ | ||
72 | + return (a * 64) + b; | ||
73 | +} | ||
74 | + | ||
75 | +/* { dg-final { scan-assembler "add.*\[al]sl #6" } } */ | ||
76 | + | ||
77 | +int | ||
78 | +minus (int a, int b) | ||
79 | +{ | ||
80 | + return a - (b * 64); | ||
81 | +} | ||
82 | + | ||
83 | +/* { dg-final { scan-assembler "sub.*\[al]sl #6" } } */ | ||
84 | + | ||
85 | +int | ||
86 | +ior (int a, int b) | ||
87 | +{ | ||
88 | + return (a * 64) | b; | ||
89 | +} | ||
90 | + | ||
91 | +/* { dg-final { scan-assembler "orr.*\[al]sl #6" } } */ | ||
92 | + | ||
93 | +int | ||
94 | +xor (int a, int b) | ||
95 | +{ | ||
96 | + return (a * 64) ^ b; | ||
97 | +} | ||
98 | + | ||
99 | +/* { dg-final { scan-assembler "eor.*\[al]sl #6" } } */ | ||
100 | + | ||
101 | +int | ||
102 | +and (int a, int b) | ||
103 | +{ | ||
104 | + return (a * 64) & b; | ||
105 | +} | ||
106 | + | ||
107 | +/* { dg-final { scan-assembler "and.*\[al]sl #6" } } */ | ||
108 | + | ||
109 | +int | ||
110 | +rsb (int a, int b) | ||
111 | +{ | ||
112 | + return (a * 64) - b; | ||
113 | +} | ||
114 | + | ||
115 | +/* { dg-final { scan-assembler "rsb.*\[al]sl #6" } } */ | ||
116 | + | ||
117 | +int | ||
118 | +mvn (int a, int b) | ||
119 | +{ | ||
120 | + return ~(a * 64); | ||
121 | +} | ||
122 | + | ||
123 | +/* { dg-final { scan-assembler "mvn.*\[al]sl #6" } } */ | ||
124 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch new file mode 100644 index 000000000..6642f8a66 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106826.patch | |||
@@ -0,0 +1,362 @@ | |||
1 | 2011-10-16 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | Backport from mainline: | ||
4 | |||
5 | 2011-09-27 Ira Rosen <ira.rosen@linaro.org> | ||
6 | |||
7 | gcc/ | ||
8 | * tree-vect-stmts.c (vectorizable_type_demotion): Handle basic block | ||
9 | vectorization. | ||
10 | (vectorizable_type_promotion): Likewise. | ||
11 | (vect_analyze_stmt): Call vectorizable_type_demotion and | ||
12 | vectorizable_type_promotion for basic blocks. | ||
13 | (supportable_widening_operation): Don't assume loop vectorization. | ||
14 | * tree-vect-slp.c (vect_build_slp_tree): Allow multiple types for | ||
15 | basic blocks. Update vectorization factor for basic block | ||
16 | vectorization. | ||
17 | (vect_analyze_slp_instance): Allow multiple types for basic block | ||
18 | vectorization. Recheck unrolling factor after construction of SLP | ||
19 | instance. | ||
20 | |||
21 | gcc/testsuite/ | ||
22 | * gcc.dg/vect/bb-slp-11.c: Expect to get vectorized with 64-bit | ||
23 | vectors. | ||
24 | * gcc.dg/vect/bb-slp-27.c: New. | ||
25 | * gcc.dg/vect/bb-slp-28.c: New. | ||
26 | |||
27 | |||
28 | 2011-10-04 Ira Rosen <ira.rosen@linaro.org> | ||
29 | |||
30 | gcc/testsuite/ | ||
31 | * lib/target-supports.exp (check_effective_target_vect_multiple_sizes): | ||
32 | Make et_vect_multiple_sizes_saved global. | ||
33 | (check_effective_target_vect64): Make et_vect64_saved global. | ||
34 | |||
35 | === modified file 'gcc/testsuite/gcc.dg/vect/bb-slp-11.c' | ||
36 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-02 10:40:34 +0000 | ||
37 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-11.c 2011-10-06 11:08:08 +0000 | ||
38 | @@ -48,8 +48,6 @@ | ||
39 | return 0; | ||
40 | } | ||
41 | |||
42 | -/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */ | ||
43 | -/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */ | ||
44 | -/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */ | ||
45 | +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */ | ||
46 | /* { dg-final { cleanup-tree-dump "slp" } } */ | ||
47 | |||
48 | |||
49 | === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-27.c' | ||
50 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 1970-01-01 00:00:00 +0000 | ||
51 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-27.c 2011-10-06 11:08:08 +0000 | ||
52 | @@ -0,0 +1,49 @@ | ||
53 | +/* { dg-require-effective-target vect_int } */ | ||
54 | + | ||
55 | +#include <stdarg.h> | ||
56 | +#include "tree-vect.h" | ||
57 | + | ||
58 | +#define A 3 | ||
59 | +#define N 16 | ||
60 | + | ||
61 | +short src[N], dst[N]; | ||
62 | + | ||
63 | +void foo (int a) | ||
64 | +{ | ||
65 | + dst[0] += a*src[0]; | ||
66 | + dst[1] += a*src[1]; | ||
67 | + dst[2] += a*src[2]; | ||
68 | + dst[3] += a*src[3]; | ||
69 | + dst[4] += a*src[4]; | ||
70 | + dst[5] += a*src[5]; | ||
71 | + dst[6] += a*src[6]; | ||
72 | + dst[7] += a*src[7]; | ||
73 | +} | ||
74 | + | ||
75 | + | ||
76 | +int main (void) | ||
77 | +{ | ||
78 | + int i; | ||
79 | + | ||
80 | + check_vect (); | ||
81 | + | ||
82 | + for (i = 0; i < N; i++) | ||
83 | + { | ||
84 | + dst[i] = 0; | ||
85 | + src[i] = i; | ||
86 | + } | ||
87 | + | ||
88 | + foo (A); | ||
89 | + | ||
90 | + for (i = 0; i < 8; i++) | ||
91 | + { | ||
92 | + if (dst[i] != A * i) | ||
93 | + abort (); | ||
94 | + } | ||
95 | + | ||
96 | + return 0; | ||
97 | +} | ||
98 | + | ||
99 | +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_unpack && vect_pack_trunc } } } } } */ | ||
100 | +/* { dg-final { cleanup-tree-dump "slp" } } */ | ||
101 | + | ||
102 | |||
103 | === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-28.c' | ||
104 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 1970-01-01 00:00:00 +0000 | ||
105 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-28.c 2011-10-06 11:08:08 +0000 | ||
106 | @@ -0,0 +1,71 @@ | ||
107 | +/* { dg-require-effective-target vect_int } */ | ||
108 | + | ||
109 | +#include <stdarg.h> | ||
110 | +#include "tree-vect.h" | ||
111 | + | ||
112 | +#define A 300 | ||
113 | +#define N 16 | ||
114 | + | ||
115 | +char src[N]; | ||
116 | +short dst[N]; | ||
117 | +short src1[N], dst1[N]; | ||
118 | + | ||
119 | +void foo (int a) | ||
120 | +{ | ||
121 | + dst[0] = (short) (a * (int) src[0]); | ||
122 | + dst[1] = (short) (a * (int) src[1]); | ||
123 | + dst[2] = (short) (a * (int) src[2]); | ||
124 | + dst[3] = (short) (a * (int) src[3]); | ||
125 | + dst[4] = (short) (a * (int) src[4]); | ||
126 | + dst[5] = (short) (a * (int) src[5]); | ||
127 | + dst[6] = (short) (a * (int) src[6]); | ||
128 | + dst[7] = (short) (a * (int) src[7]); | ||
129 | + dst[8] = (short) (a * (int) src[8]); | ||
130 | + dst[9] = (short) (a * (int) src[9]); | ||
131 | + dst[10] = (short) (a * (int) src[10]); | ||
132 | + dst[11] = (short) (a * (int) src[11]); | ||
133 | + dst[12] = (short) (a * (int) src[12]); | ||
134 | + dst[13] = (short) (a * (int) src[13]); | ||
135 | + dst[14] = (short) (a * (int) src[14]); | ||
136 | + dst[15] = (short) (a * (int) src[15]); | ||
137 | + | ||
138 | + dst1[0] += src1[0]; | ||
139 | + dst1[1] += src1[1]; | ||
140 | + dst1[2] += src1[2]; | ||
141 | + dst1[3] += src1[3]; | ||
142 | + dst1[4] += src1[4]; | ||
143 | + dst1[5] += src1[5]; | ||
144 | + dst1[6] += src1[6]; | ||
145 | + dst1[7] += src1[7]; | ||
146 | +} | ||
147 | + | ||
148 | + | ||
149 | +int main (void) | ||
150 | +{ | ||
151 | + int i; | ||
152 | + | ||
153 | + check_vect (); | ||
154 | + | ||
155 | + for (i = 0; i < N; i++) | ||
156 | + { | ||
157 | + dst[i] = 2; | ||
158 | + dst1[i] = 0; | ||
159 | + src[i] = i; | ||
160 | + src1[i] = i+2; | ||
161 | + } | ||
162 | + | ||
163 | + foo (A); | ||
164 | + | ||
165 | + for (i = 0; i < N; i++) | ||
166 | + { | ||
167 | + if (dst[i] != A * i | ||
168 | + || (i < N/2 && dst1[i] != i + 2)) | ||
169 | + abort (); | ||
170 | + } | ||
171 | + | ||
172 | + return 0; | ||
173 | +} | ||
174 | + | ||
175 | +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_pack_trunc && vect_unpack } } } } } */ | ||
176 | +/* { dg-final { cleanup-tree-dump "slp" } } */ | ||
177 | + | ||
178 | |||
179 | === modified file 'gcc/testsuite/lib/target-supports.exp' | ||
180 | --- old/gcc/testsuite/lib/target-supports.exp 2011-10-02 10:40:34 +0000 | ||
181 | +++ new/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000 | ||
182 | @@ -3268,7 +3268,7 @@ | ||
183 | # Return 1 if the target supports multiple vector sizes | ||
184 | |||
185 | proc check_effective_target_vect_multiple_sizes { } { | ||
186 | - global et_vect_multiple_sizes | ||
187 | + global et_vect_multiple_sizes_saved | ||
188 | |||
189 | if [info exists et_vect_multiple_sizes_saved] { | ||
190 | verbose "check_effective_target_vect_multiple_sizes: using cached result" 2 | ||
191 | @@ -3286,7 +3286,7 @@ | ||
192 | # Return 1 if the target supports vectors of 64 bits. | ||
193 | |||
194 | proc check_effective_target_vect64 { } { | ||
195 | - global et_vect64 | ||
196 | + global et_vect64_saved | ||
197 | |||
198 | if [info exists et_vect64_saved] { | ||
199 | verbose "check_effective_target_vect64: using cached result" 2 | ||
200 | |||
201 | === modified file 'gcc/tree-vect-slp.c' | ||
202 | --- old/gcc/tree-vect-slp.c 2011-10-02 10:40:34 +0000 | ||
203 | +++ new/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000 | ||
204 | @@ -386,20 +386,15 @@ | ||
205 | return false; | ||
206 | } | ||
207 | |||
208 | - ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); | ||
209 | - if (ncopies != 1) | ||
210 | - { | ||
211 | - if (vect_print_dump_info (REPORT_SLP)) | ||
212 | - fprintf (vect_dump, "SLP with multiple types "); | ||
213 | - | ||
214 | - /* FORNOW: multiple types are unsupported in BB SLP. */ | ||
215 | - if (bb_vinfo) | ||
216 | - return false; | ||
217 | - } | ||
218 | - | ||
219 | /* In case of multiple types we need to detect the smallest type. */ | ||
220 | if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) | ||
221 | - *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); | ||
222 | + { | ||
223 | + *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); | ||
224 | + if (bb_vinfo) | ||
225 | + vectorization_factor = *max_nunits; | ||
226 | + } | ||
227 | + | ||
228 | + ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); | ||
229 | |||
230 | if (is_gimple_call (stmt)) | ||
231 | rhs_code = CALL_EXPR; | ||
232 | @@ -1183,7 +1178,6 @@ | ||
233 | if (loop_vinfo) | ||
234 | vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | ||
235 | else | ||
236 | - /* No multitypes in BB SLP. */ | ||
237 | vectorization_factor = nunits; | ||
238 | |||
239 | /* Calculate the unrolling factor. */ | ||
240 | @@ -1246,16 +1240,23 @@ | ||
241 | &max_nunits, &load_permutation, &loads, | ||
242 | vectorization_factor)) | ||
243 | { | ||
244 | + /* Calculate the unrolling factor based on the smallest type. */ | ||
245 | + if (max_nunits > nunits) | ||
246 | + unrolling_factor = least_common_multiple (max_nunits, group_size) | ||
247 | + / group_size; | ||
248 | + | ||
249 | + if (unrolling_factor != 1 && !loop_vinfo) | ||
250 | + { | ||
251 | + if (vect_print_dump_info (REPORT_SLP)) | ||
252 | + fprintf (vect_dump, "Build SLP failed: unrolling required in basic" | ||
253 | + " block SLP"); | ||
254 | + return false; | ||
255 | + } | ||
256 | + | ||
257 | /* Create a new SLP instance. */ | ||
258 | new_instance = XNEW (struct _slp_instance); | ||
259 | SLP_INSTANCE_TREE (new_instance) = node; | ||
260 | SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; | ||
261 | - /* Calculate the unrolling factor based on the smallest type in the | ||
262 | - loop. */ | ||
263 | - if (max_nunits > nunits) | ||
264 | - unrolling_factor = least_common_multiple (max_nunits, group_size) | ||
265 | - / group_size; | ||
266 | - | ||
267 | SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; | ||
268 | SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost; | ||
269 | SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost; | ||
270 | |||
271 | === modified file 'gcc/tree-vect-stmts.c' | ||
272 | --- old/gcc/tree-vect-stmts.c 2011-10-04 08:57:25 +0000 | ||
273 | +++ new/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000 | ||
274 | @@ -3081,11 +3081,9 @@ | ||
275 | VEC (tree, heap) *vec_oprnds0 = NULL; | ||
276 | VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; | ||
277 | tree last_oprnd, intermediate_type; | ||
278 | - | ||
279 | - /* FORNOW: not supported by basic block SLP vectorization. */ | ||
280 | - gcc_assert (loop_vinfo); | ||
281 | - | ||
282 | - if (!STMT_VINFO_RELEVANT_P (stmt_info)) | ||
283 | + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); | ||
284 | + | ||
285 | + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) | ||
286 | return false; | ||
287 | |||
288 | if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) | ||
289 | @@ -3113,7 +3111,7 @@ | ||
290 | && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) | ||
291 | && CONVERT_EXPR_CODE_P (code)))) | ||
292 | return false; | ||
293 | - if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL, | ||
294 | + if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, | ||
295 | &def_stmt, &def, &dt[0], &vectype_in)) | ||
296 | { | ||
297 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
298 | @@ -3360,11 +3358,9 @@ | ||
299 | int multi_step_cvt = 0; | ||
300 | VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; | ||
301 | VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; | ||
302 | - | ||
303 | - /* FORNOW: not supported by basic block SLP vectorization. */ | ||
304 | - gcc_assert (loop_vinfo); | ||
305 | - | ||
306 | - if (!STMT_VINFO_RELEVANT_P (stmt_info)) | ||
307 | + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); | ||
308 | + | ||
309 | + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) | ||
310 | return false; | ||
311 | |||
312 | if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) | ||
313 | @@ -3393,7 +3389,7 @@ | ||
314 | && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) | ||
315 | && CONVERT_EXPR_CODE_P (code)))) | ||
316 | return false; | ||
317 | - if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL, | ||
318 | + if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, | ||
319 | &def_stmt, &def, &dt[0], &vectype_in)) | ||
320 | { | ||
321 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
322 | @@ -5153,7 +5149,9 @@ | ||
323 | else | ||
324 | { | ||
325 | if (bb_vinfo) | ||
326 | - ok = (vectorizable_shift (stmt, NULL, NULL, node) | ||
327 | + ok = (vectorizable_type_promotion (stmt, NULL, NULL, node) | ||
328 | + || vectorizable_type_demotion (stmt, NULL, NULL, node) | ||
329 | + || vectorizable_shift (stmt, NULL, NULL, node) | ||
330 | || vectorizable_operation (stmt, NULL, NULL, node) | ||
331 | || vectorizable_assignment (stmt, NULL, NULL, node) | ||
332 | || vectorizable_load (stmt, NULL, NULL, node, NULL) | ||
333 | @@ -5780,7 +5778,7 @@ | ||
334 | { | ||
335 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | ||
336 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); | ||
337 | - struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); | ||
338 | + struct loop *vect_loop = NULL; | ||
339 | bool ordered_p; | ||
340 | enum machine_mode vec_mode; | ||
341 | enum insn_code icode1, icode2; | ||
342 | @@ -5789,6 +5787,9 @@ | ||
343 | tree wide_vectype = vectype_out; | ||
344 | enum tree_code c1, c2; | ||
345 | |||
346 | + if (loop_info) | ||
347 | + vect_loop = LOOP_VINFO_LOOP (loop_info); | ||
348 | + | ||
349 | /* The result of a vectorized widening operation usually requires two vectors | ||
350 | (because the widened results do not fit int one vector). The generated | ||
351 | vector results would normally be expected to be generated in the same | ||
352 | @@ -5809,7 +5810,8 @@ | ||
353 | iterations in parallel). We therefore don't allow to change the order | ||
354 | of the computation in the inner-loop during outer-loop vectorization. */ | ||
355 | |||
356 | - if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction | ||
357 | + if (vect_loop | ||
358 | + && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction | ||
359 | && !nested_in_vect_loop_p (vect_loop, stmt)) | ||
360 | ordered_p = false; | ||
361 | else | ||
362 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch new file mode 100644 index 000000000..d49ebab60 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106827.patch | |||
@@ -0,0 +1,628 @@ | |||
1 | 2011-10-17 Michael Hope <michael.hope@linaro.org> | ||
2 | |||
3 | Backport from mainline r178852: | ||
4 | |||
5 | 2011-09-14 Julian Brown <julian@codesourcery.com> | ||
6 | |||
7 | gcc/ | ||
8 | * config/arm/arm.c (arm_override_options): Add unaligned_access | ||
9 | support. | ||
10 | (arm_file_start): Emit attribute for unaligned access as appropriate. | ||
11 | * config/arm/arm.md (UNSPEC_UNALIGNED_LOAD) | ||
12 | (UNSPEC_UNALIGNED_STORE): Add constants for unspecs. | ||
13 | (insv, extzv): Add unaligned-access support. | ||
14 | (extv): Change to expander. Likewise. | ||
15 | (extzv_t1, extv_regsi): Add helpers. | ||
16 | (unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu) | ||
17 | (unaligned_storesi, unaligned_storehi): New. | ||
18 | (*extv_reg): New (previous extv implementation). | ||
19 | * config/arm/arm.opt (munaligned_access): Add option. | ||
20 | * config/arm/constraints.md (Uw): New constraint. | ||
21 | * expmed.c (store_bit_field_1): Adjust bitfield numbering according | ||
22 | to size of access, not size of unit, when BITS_BIG_ENDIAN != | ||
23 | BYTES_BIG_ENDIAN. Don't use bitfield accesses for | ||
24 | volatile accesses when -fstrict-volatile-bitfields is in effect. | ||
25 | (extract_bit_field_1): Likewise. | ||
26 | |||
27 | Backport from mainline r172697: | ||
28 | |||
29 | 2011-04-19 Wei Guozhi <carrot@google.com> | ||
30 | |||
31 | PR target/47855 | ||
32 | gcc/ | ||
33 | * config/arm/arm-protos.h (thumb1_legitimate_address_p): New prototype. | ||
34 | * config/arm/arm.c (thumb1_legitimate_address_p): Remove the static | ||
35 | linkage. | ||
36 | * config/arm/constraints.md (Uu): New constraint. | ||
37 | * config/arm/arm.md (*arm_movqi_insn): Compute attr "length". | ||
38 | |||
39 | === modified file 'gcc/config/arm/arm-protos.h' | ||
40 | --- old/gcc/config/arm/arm-protos.h 2011-10-03 09:46:40 +0000 | ||
41 | +++ new/gcc/config/arm/arm-protos.h 2011-10-11 01:56:19 +0000 | ||
42 | @@ -59,6 +59,7 @@ | ||
43 | int); | ||
44 | extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int, | ||
45 | int); | ||
46 | +extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int); | ||
47 | extern int arm_const_double_rtx (rtx); | ||
48 | extern int neg_const_double_rtx_ok_for_fpa (rtx); | ||
49 | extern int vfp3_const_double_rtx (rtx); | ||
50 | |||
51 | === modified file 'gcc/config/arm/arm.c' | ||
52 | --- old/gcc/config/arm/arm.c 2011-10-03 09:46:40 +0000 | ||
53 | +++ new/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000 | ||
54 | @@ -2065,6 +2065,28 @@ | ||
55 | fix_cm3_ldrd = 0; | ||
56 | } | ||
57 | |||
58 | + /* Enable -munaligned-access by default for | ||
59 | + - all ARMv6 architecture-based processors | ||
60 | + - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors. | ||
61 | + | ||
62 | + Disable -munaligned-access by default for | ||
63 | + - all pre-ARMv6 architecture-based processors | ||
64 | + - ARMv6-M architecture-based processors. */ | ||
65 | + | ||
66 | + if (unaligned_access == 2) | ||
67 | + { | ||
68 | + if (arm_arch6 && (arm_arch_notm || arm_arch7)) | ||
69 | + unaligned_access = 1; | ||
70 | + else | ||
71 | + unaligned_access = 0; | ||
72 | + } | ||
73 | + else if (unaligned_access == 1 | ||
74 | + && !(arm_arch6 && (arm_arch_notm || arm_arch7))) | ||
75 | + { | ||
76 | + warning (0, "target CPU does not support unaligned accesses"); | ||
77 | + unaligned_access = 0; | ||
78 | + } | ||
79 | + | ||
80 | if (TARGET_THUMB1 && flag_schedule_insns) | ||
81 | { | ||
82 | /* Don't warn since it's on by default in -O2. */ | ||
83 | @@ -6106,7 +6128,7 @@ | ||
84 | addresses based on the frame pointer or arg pointer until the | ||
85 | reload pass starts. This is so that eliminating such addresses | ||
86 | into stack based ones won't produce impossible code. */ | ||
87 | -static int | ||
88 | +int | ||
89 | thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) | ||
90 | { | ||
91 | /* ??? Not clear if this is right. Experiment. */ | ||
92 | @@ -22226,6 +22248,10 @@ | ||
93 | val = 6; | ||
94 | asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val); | ||
95 | |||
96 | + /* Tag_CPU_unaligned_access. */ | ||
97 | + asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n", | ||
98 | + unaligned_access); | ||
99 | + | ||
100 | /* Tag_ABI_FP_16bit_format. */ | ||
101 | if (arm_fp16_format) | ||
102 | asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n", | ||
103 | |||
104 | === modified file 'gcc/config/arm/arm.md' | ||
105 | --- old/gcc/config/arm/arm.md 2011-10-03 09:47:33 +0000 | ||
106 | +++ new/gcc/config/arm/arm.md 2011-10-11 02:31:01 +0000 | ||
107 | @@ -113,6 +113,10 @@ | ||
108 | (UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from | ||
109 | ; another symbolic address. | ||
110 | (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier. | ||
111 | + (UNSPEC_UNALIGNED_LOAD 29) ; Used to represent ldr/ldrh instructions that access | ||
112 | + ; unaligned locations, on architectures which support | ||
113 | + ; that. | ||
114 | + (UNSPEC_UNALIGNED_STORE 30) ; Same for str/strh. | ||
115 | ] | ||
116 | ) | ||
117 | |||
118 | @@ -2463,10 +2467,10 @@ | ||
119 | ;;; this insv pattern, so this pattern needs to be reevalutated. | ||
120 | |||
121 | (define_expand "insv" | ||
122 | - [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "") | ||
123 | - (match_operand:SI 1 "general_operand" "") | ||
124 | - (match_operand:SI 2 "general_operand" "")) | ||
125 | - (match_operand:SI 3 "reg_or_int_operand" ""))] | ||
126 | + [(set (zero_extract (match_operand 0 "nonimmediate_operand" "") | ||
127 | + (match_operand 1 "general_operand" "") | ||
128 | + (match_operand 2 "general_operand" "")) | ||
129 | + (match_operand 3 "reg_or_int_operand" ""))] | ||
130 | "TARGET_ARM || arm_arch_thumb2" | ||
131 | " | ||
132 | { | ||
133 | @@ -2477,35 +2481,70 @@ | ||
134 | |||
135 | if (arm_arch_thumb2) | ||
136 | { | ||
137 | - bool use_bfi = TRUE; | ||
138 | - | ||
139 | - if (GET_CODE (operands[3]) == CONST_INT) | ||
140 | - { | ||
141 | - HOST_WIDE_INT val = INTVAL (operands[3]) & mask; | ||
142 | - | ||
143 | - if (val == 0) | ||
144 | - { | ||
145 | - emit_insn (gen_insv_zero (operands[0], operands[1], | ||
146 | - operands[2])); | ||
147 | + if (unaligned_access && MEM_P (operands[0]) | ||
148 | + && s_register_operand (operands[3], GET_MODE (operands[3])) | ||
149 | + && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0) | ||
150 | + { | ||
151 | + rtx base_addr; | ||
152 | + | ||
153 | + if (BYTES_BIG_ENDIAN) | ||
154 | + start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width | ||
155 | + - start_bit; | ||
156 | + | ||
157 | + if (width == 32) | ||
158 | + { | ||
159 | + base_addr = adjust_address (operands[0], SImode, | ||
160 | + start_bit / BITS_PER_UNIT); | ||
161 | + emit_insn (gen_unaligned_storesi (base_addr, operands[3])); | ||
162 | + } | ||
163 | + else | ||
164 | + { | ||
165 | + rtx tmp = gen_reg_rtx (HImode); | ||
166 | + | ||
167 | + base_addr = adjust_address (operands[0], HImode, | ||
168 | + start_bit / BITS_PER_UNIT); | ||
169 | + emit_move_insn (tmp, gen_lowpart (HImode, operands[3])); | ||
170 | + emit_insn (gen_unaligned_storehi (base_addr, tmp)); | ||
171 | + } | ||
172 | + DONE; | ||
173 | + } | ||
174 | + else if (s_register_operand (operands[0], GET_MODE (operands[0]))) | ||
175 | + { | ||
176 | + bool use_bfi = TRUE; | ||
177 | + | ||
178 | + if (GET_CODE (operands[3]) == CONST_INT) | ||
179 | + { | ||
180 | + HOST_WIDE_INT val = INTVAL (operands[3]) & mask; | ||
181 | + | ||
182 | + if (val == 0) | ||
183 | + { | ||
184 | + emit_insn (gen_insv_zero (operands[0], operands[1], | ||
185 | + operands[2])); | ||
186 | + DONE; | ||
187 | + } | ||
188 | + | ||
189 | + /* See if the set can be done with a single orr instruction. */ | ||
190 | + if (val == mask && const_ok_for_arm (val << start_bit)) | ||
191 | + use_bfi = FALSE; | ||
192 | + } | ||
193 | + | ||
194 | + if (use_bfi) | ||
195 | + { | ||
196 | + if (GET_CODE (operands[3]) != REG) | ||
197 | + operands[3] = force_reg (SImode, operands[3]); | ||
198 | + | ||
199 | + emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2], | ||
200 | + operands[3])); | ||
201 | DONE; | ||
202 | } | ||
203 | - | ||
204 | - /* See if the set can be done with a single orr instruction. */ | ||
205 | - if (val == mask && const_ok_for_arm (val << start_bit)) | ||
206 | - use_bfi = FALSE; | ||
207 | - } | ||
208 | - | ||
209 | - if (use_bfi) | ||
210 | - { | ||
211 | - if (GET_CODE (operands[3]) != REG) | ||
212 | - operands[3] = force_reg (SImode, operands[3]); | ||
213 | - | ||
214 | - emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2], | ||
215 | - operands[3])); | ||
216 | - DONE; | ||
217 | - } | ||
218 | + } | ||
219 | + else | ||
220 | + FAIL; | ||
221 | } | ||
222 | |||
223 | + if (!s_register_operand (operands[0], GET_MODE (operands[0]))) | ||
224 | + FAIL; | ||
225 | + | ||
226 | target = copy_rtx (operands[0]); | ||
227 | /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical | ||
228 | subreg as the final target. */ | ||
229 | @@ -3697,12 +3736,10 @@ | ||
230 | ;; to reduce register pressure later on. | ||
231 | |||
232 | (define_expand "extzv" | ||
233 | - [(set (match_dup 4) | ||
234 | - (ashift:SI (match_operand:SI 1 "register_operand" "") | ||
235 | - (match_operand:SI 2 "const_int_operand" ""))) | ||
236 | - (set (match_operand:SI 0 "register_operand" "") | ||
237 | - (lshiftrt:SI (match_dup 4) | ||
238 | - (match_operand:SI 3 "const_int_operand" "")))] | ||
239 | + [(set (match_operand 0 "s_register_operand" "") | ||
240 | + (zero_extract (match_operand 1 "nonimmediate_operand" "") | ||
241 | + (match_operand 2 "const_int_operand" "") | ||
242 | + (match_operand 3 "const_int_operand" "")))] | ||
243 | "TARGET_THUMB1 || arm_arch_thumb2" | ||
244 | " | ||
245 | { | ||
246 | @@ -3711,10 +3748,57 @@ | ||
247 | |||
248 | if (arm_arch_thumb2) | ||
249 | { | ||
250 | - emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2], | ||
251 | - operands[3])); | ||
252 | - DONE; | ||
253 | + HOST_WIDE_INT width = INTVAL (operands[2]); | ||
254 | + HOST_WIDE_INT bitpos = INTVAL (operands[3]); | ||
255 | + | ||
256 | + if (unaligned_access && MEM_P (operands[1]) | ||
257 | + && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0) | ||
258 | + { | ||
259 | + rtx base_addr; | ||
260 | + | ||
261 | + if (BYTES_BIG_ENDIAN) | ||
262 | + bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width | ||
263 | + - bitpos; | ||
264 | + | ||
265 | + if (width == 32) | ||
266 | + { | ||
267 | + base_addr = adjust_address (operands[1], SImode, | ||
268 | + bitpos / BITS_PER_UNIT); | ||
269 | + emit_insn (gen_unaligned_loadsi (operands[0], base_addr)); | ||
270 | + } | ||
271 | + else | ||
272 | + { | ||
273 | + rtx dest = operands[0]; | ||
274 | + rtx tmp = gen_reg_rtx (SImode); | ||
275 | + | ||
276 | + /* We may get a paradoxical subreg here. Strip it off. */ | ||
277 | + if (GET_CODE (dest) == SUBREG | ||
278 | + && GET_MODE (dest) == SImode | ||
279 | + && GET_MODE (SUBREG_REG (dest)) == HImode) | ||
280 | + dest = SUBREG_REG (dest); | ||
281 | + | ||
282 | + if (GET_MODE_BITSIZE (GET_MODE (dest)) != width) | ||
283 | + FAIL; | ||
284 | + | ||
285 | + base_addr = adjust_address (operands[1], HImode, | ||
286 | + bitpos / BITS_PER_UNIT); | ||
287 | + emit_insn (gen_unaligned_loadhiu (tmp, base_addr)); | ||
288 | + emit_move_insn (gen_lowpart (SImode, dest), tmp); | ||
289 | + } | ||
290 | + DONE; | ||
291 | + } | ||
292 | + else if (s_register_operand (operands[1], GET_MODE (operands[1]))) | ||
293 | + { | ||
294 | + emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2], | ||
295 | + operands[3])); | ||
296 | + DONE; | ||
297 | + } | ||
298 | + else | ||
299 | + FAIL; | ||
300 | } | ||
301 | + | ||
302 | + if (!s_register_operand (operands[1], GET_MODE (operands[1]))) | ||
303 | + FAIL; | ||
304 | |||
305 | operands[3] = GEN_INT (rshift); | ||
306 | |||
307 | @@ -3724,12 +3808,154 @@ | ||
308 | DONE; | ||
309 | } | ||
310 | |||
311 | - operands[2] = GEN_INT (lshift); | ||
312 | - operands[4] = gen_reg_rtx (SImode); | ||
313 | + emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift), | ||
314 | + operands[3], gen_reg_rtx (SImode))); | ||
315 | + DONE; | ||
316 | }" | ||
317 | ) | ||
318 | |||
319 | -(define_insn "extv" | ||
320 | +;; Helper for extzv, for the Thumb-1 register-shifts case. | ||
321 | + | ||
322 | +(define_expand "extzv_t1" | ||
323 | + [(set (match_operand:SI 4 "s_register_operand" "") | ||
324 | + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "") | ||
325 | + (match_operand:SI 2 "const_int_operand" ""))) | ||
326 | + (set (match_operand:SI 0 "s_register_operand" "") | ||
327 | + (lshiftrt:SI (match_dup 4) | ||
328 | + (match_operand:SI 3 "const_int_operand" "")))] | ||
329 | + "TARGET_THUMB1" | ||
330 | + "") | ||
331 | + | ||
332 | +(define_expand "extv" | ||
333 | + [(set (match_operand 0 "s_register_operand" "") | ||
334 | + (sign_extract (match_operand 1 "nonimmediate_operand" "") | ||
335 | + (match_operand 2 "const_int_operand" "") | ||
336 | + (match_operand 3 "const_int_operand" "")))] | ||
337 | + "arm_arch_thumb2" | ||
338 | +{ | ||
339 | + HOST_WIDE_INT width = INTVAL (operands[2]); | ||
340 | + HOST_WIDE_INT bitpos = INTVAL (operands[3]); | ||
341 | + | ||
342 | + if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32) | ||
343 | + && (bitpos % BITS_PER_UNIT) == 0) | ||
344 | + { | ||
345 | + rtx base_addr; | ||
346 | + | ||
347 | + if (BYTES_BIG_ENDIAN) | ||
348 | + bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos; | ||
349 | + | ||
350 | + if (width == 32) | ||
351 | + { | ||
352 | + base_addr = adjust_address (operands[1], SImode, | ||
353 | + bitpos / BITS_PER_UNIT); | ||
354 | + emit_insn (gen_unaligned_loadsi (operands[0], base_addr)); | ||
355 | + } | ||
356 | + else | ||
357 | + { | ||
358 | + rtx dest = operands[0]; | ||
359 | + rtx tmp = gen_reg_rtx (SImode); | ||
360 | + | ||
361 | + /* We may get a paradoxical subreg here. Strip it off. */ | ||
362 | + if (GET_CODE (dest) == SUBREG | ||
363 | + && GET_MODE (dest) == SImode | ||
364 | + && GET_MODE (SUBREG_REG (dest)) == HImode) | ||
365 | + dest = SUBREG_REG (dest); | ||
366 | + | ||
367 | + if (GET_MODE_BITSIZE (GET_MODE (dest)) != width) | ||
368 | + FAIL; | ||
369 | + | ||
370 | + base_addr = adjust_address (operands[1], HImode, | ||
371 | + bitpos / BITS_PER_UNIT); | ||
372 | + emit_insn (gen_unaligned_loadhis (tmp, base_addr)); | ||
373 | + emit_move_insn (gen_lowpart (SImode, dest), tmp); | ||
374 | + } | ||
375 | + | ||
376 | + DONE; | ||
377 | + } | ||
378 | + else if (!s_register_operand (operands[1], GET_MODE (operands[1]))) | ||
379 | + FAIL; | ||
380 | + else if (GET_MODE (operands[0]) == SImode | ||
381 | + && GET_MODE (operands[1]) == SImode) | ||
382 | + { | ||
383 | + emit_insn (gen_extv_regsi (operands[0], operands[1], operands[2], | ||
384 | + operands[3])); | ||
385 | + DONE; | ||
386 | + } | ||
387 | + | ||
388 | + FAIL; | ||
389 | +}) | ||
390 | + | ||
391 | +; Helper to expand register forms of extv with the proper modes. | ||
392 | + | ||
393 | +(define_expand "extv_regsi" | ||
394 | + [(set (match_operand:SI 0 "s_register_operand" "") | ||
395 | + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "") | ||
396 | + (match_operand 2 "const_int_operand" "") | ||
397 | + (match_operand 3 "const_int_operand" "")))] | ||
398 | + "" | ||
399 | +{ | ||
400 | +}) | ||
401 | + | ||
402 | +; ARMv6+ unaligned load/store instructions (used for packed structure accesses). | ||
403 | + | ||
404 | +(define_insn "unaligned_loadsi" | ||
405 | + [(set (match_operand:SI 0 "s_register_operand" "=l,r") | ||
406 | + (unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")] | ||
407 | + UNSPEC_UNALIGNED_LOAD))] | ||
408 | + "unaligned_access && TARGET_32BIT" | ||
409 | + "ldr%?\t%0, %1\t@ unaligned" | ||
410 | + [(set_attr "arch" "t2,any") | ||
411 | + (set_attr "length" "2,4") | ||
412 | + (set_attr "predicable" "yes") | ||
413 | + (set_attr "type" "load1")]) | ||
414 | + | ||
415 | +(define_insn "unaligned_loadhis" | ||
416 | + [(set (match_operand:SI 0 "s_register_operand" "=l,r") | ||
417 | + (sign_extend:SI | ||
418 | + (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")] | ||
419 | + UNSPEC_UNALIGNED_LOAD)))] | ||
420 | + "unaligned_access && TARGET_32BIT" | ||
421 | + "ldr%(sh%)\t%0, %1\t@ unaligned" | ||
422 | + [(set_attr "arch" "t2,any") | ||
423 | + (set_attr "length" "2,4") | ||
424 | + (set_attr "predicable" "yes") | ||
425 | + (set_attr "type" "load_byte")]) | ||
426 | + | ||
427 | +(define_insn "unaligned_loadhiu" | ||
428 | + [(set (match_operand:SI 0 "s_register_operand" "=l,r") | ||
429 | + (zero_extend:SI | ||
430 | + (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")] | ||
431 | + UNSPEC_UNALIGNED_LOAD)))] | ||
432 | + "unaligned_access && TARGET_32BIT" | ||
433 | + "ldr%(h%)\t%0, %1\t@ unaligned" | ||
434 | + [(set_attr "arch" "t2,any") | ||
435 | + (set_attr "length" "2,4") | ||
436 | + (set_attr "predicable" "yes") | ||
437 | + (set_attr "type" "load_byte")]) | ||
438 | + | ||
439 | +(define_insn "unaligned_storesi" | ||
440 | + [(set (match_operand:SI 0 "memory_operand" "=Uw,m") | ||
441 | + (unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")] | ||
442 | + UNSPEC_UNALIGNED_STORE))] | ||
443 | + "unaligned_access && TARGET_32BIT" | ||
444 | + "str%?\t%1, %0\t@ unaligned" | ||
445 | + [(set_attr "arch" "t2,any") | ||
446 | + (set_attr "length" "2,4") | ||
447 | + (set_attr "predicable" "yes") | ||
448 | + (set_attr "type" "store1")]) | ||
449 | + | ||
450 | +(define_insn "unaligned_storehi" | ||
451 | + [(set (match_operand:HI 0 "memory_operand" "=Uw,m") | ||
452 | + (unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")] | ||
453 | + UNSPEC_UNALIGNED_STORE))] | ||
454 | + "unaligned_access && TARGET_32BIT" | ||
455 | + "str%(h%)\t%1, %0\t@ unaligned" | ||
456 | + [(set_attr "arch" "t2,any") | ||
457 | + (set_attr "length" "2,4") | ||
458 | + (set_attr "predicable" "yes") | ||
459 | + (set_attr "type" "store1")]) | ||
460 | + | ||
461 | +(define_insn "*extv_reg" | ||
462 | [(set (match_operand:SI 0 "s_register_operand" "=r") | ||
463 | (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") | ||
464 | (match_operand:SI 2 "const_int_operand" "M") | ||
465 | @@ -6038,8 +6264,8 @@ | ||
466 | |||
467 | |||
468 | (define_insn "*arm_movqi_insn" | ||
469 | - [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m") | ||
470 | - (match_operand:QI 1 "general_operand" "rI,K,m,r"))] | ||
471 | + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,l,Uu,r,m") | ||
472 | + (match_operand:QI 1 "general_operand" "rI,K,Uu,l,m,r"))] | ||
473 | "TARGET_32BIT | ||
474 | && ( register_operand (operands[0], QImode) | ||
475 | || register_operand (operands[1], QImode))" | ||
476 | @@ -6047,10 +6273,14 @@ | ||
477 | mov%?\\t%0, %1 | ||
478 | mvn%?\\t%0, #%B1 | ||
479 | ldr%(b%)\\t%0, %1 | ||
480 | + str%(b%)\\t%1, %0 | ||
481 | + ldr%(b%)\\t%0, %1 | ||
482 | str%(b%)\\t%1, %0" | ||
483 | - [(set_attr "type" "*,*,load1,store1") | ||
484 | - (set_attr "insn" "mov,mvn,*,*") | ||
485 | - (set_attr "predicable" "yes")] | ||
486 | + [(set_attr "type" "*,*,load1,store1,load1,store1") | ||
487 | + (set_attr "insn" "mov,mvn,*,*,*,*") | ||
488 | + (set_attr "predicable" "yes") | ||
489 | + (set_attr "arch" "any,any,t2,t2,any,any") | ||
490 | + (set_attr "length" "4,4,2,2,4,4")] | ||
491 | ) | ||
492 | |||
493 | (define_insn "*thumb1_movqi_insn" | ||
494 | |||
495 | === modified file 'gcc/config/arm/arm.opt' | ||
496 | --- old/gcc/config/arm/arm.opt 2011-09-19 07:44:24 +0000 | ||
497 | +++ new/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000 | ||
498 | @@ -173,3 +173,7 @@ | ||
499 | Target Report Var(fix_cm3_ldrd) Init(2) | ||
500 | Avoid overlapping destination and address registers on LDRD instructions | ||
501 | that may trigger Cortex-M3 errata. | ||
502 | + | ||
503 | +munaligned-access | ||
504 | +Target Report Var(unaligned_access) Init(2) | ||
505 | +Enable unaligned word and halfword accesses to packed data. | ||
506 | |||
507 | === modified file 'gcc/config/arm/constraints.md' | ||
508 | --- old/gcc/config/arm/constraints.md 2011-09-12 14:14:00 +0000 | ||
509 | +++ new/gcc/config/arm/constraints.md 2011-10-11 02:31:01 +0000 | ||
510 | @@ -36,6 +36,7 @@ | ||
511 | ;; The following memory constraints have been used: | ||
512 | ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us | ||
513 | ;; in ARM state: Uq | ||
514 | +;; in Thumb state: Uu, Uw | ||
515 | |||
516 | |||
517 | (define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS" | ||
518 | @@ -344,6 +345,27 @@ | ||
519 | (and (match_code "mem") | ||
520 | (match_test "REG_P (XEXP (op, 0))"))) | ||
521 | |||
522 | +(define_memory_constraint "Uu" | ||
523 | + "@internal | ||
524 | + In Thumb state an address that is valid in 16bit encoding." | ||
525 | + (and (match_code "mem") | ||
526 | + (match_test "TARGET_THUMB | ||
527 | + && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0), | ||
528 | + 0)"))) | ||
529 | + | ||
530 | +; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p | ||
531 | +; are actually LDM/STM instructions, so cannot be used to access unaligned | ||
532 | +; data. | ||
533 | +(define_memory_constraint "Uw" | ||
534 | + "@internal | ||
535 | + In Thumb state an address that is valid in 16bit encoding, and that can be | ||
536 | + used for unaligned accesses." | ||
537 | + (and (match_code "mem") | ||
538 | + (match_test "TARGET_THUMB | ||
539 | + && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0), | ||
540 | + 0) | ||
541 | + && GET_CODE (XEXP (op, 0)) != POST_INC"))) | ||
542 | + | ||
543 | ;; We used to have constraint letters for S and R in ARM state, but | ||
544 | ;; all uses of these now appear to have been removed. | ||
545 | |||
546 | |||
547 | === modified file 'gcc/expmed.c' | ||
548 | --- old/gcc/expmed.c 2011-05-22 19:02:59 +0000 | ||
549 | +++ new/gcc/expmed.c 2011-10-11 02:31:01 +0000 | ||
550 | @@ -657,6 +657,10 @@ | ||
551 | && GET_MODE (value) != BLKmode | ||
552 | && bitsize > 0 | ||
553 | && GET_MODE_BITSIZE (op_mode) >= bitsize | ||
554 | + /* Do not use insv for volatile bitfields when | ||
555 | + -fstrict-volatile-bitfields is in effect. */ | ||
556 | + && !(MEM_P (op0) && MEM_VOLATILE_P (op0) | ||
557 | + && flag_strict_volatile_bitfields > 0) | ||
558 | && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) | ||
559 | && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))) | ||
560 | && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize), | ||
561 | @@ -700,19 +704,21 @@ | ||
562 | copy_back = true; | ||
563 | } | ||
564 | |||
565 | - /* On big-endian machines, we count bits from the most significant. | ||
566 | - If the bit field insn does not, we must invert. */ | ||
567 | - | ||
568 | - if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | ||
569 | - xbitpos = unit - bitsize - xbitpos; | ||
570 | - | ||
571 | /* We have been counting XBITPOS within UNIT. | ||
572 | Count instead within the size of the register. */ | ||
573 | - if (BITS_BIG_ENDIAN && !MEM_P (xop0)) | ||
574 | + if (BYTES_BIG_ENDIAN && !MEM_P (xop0)) | ||
575 | xbitpos += GET_MODE_BITSIZE (op_mode) - unit; | ||
576 | |||
577 | unit = GET_MODE_BITSIZE (op_mode); | ||
578 | |||
579 | + /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count | ||
580 | + "backwards" from the size of the unit we are inserting into. | ||
581 | + Otherwise, we count bits from the most significant on a | ||
582 | + BYTES/BITS_BIG_ENDIAN machine. */ | ||
583 | + | ||
584 | + if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | ||
585 | + xbitpos = unit - bitsize - xbitpos; | ||
586 | + | ||
587 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ | ||
588 | value1 = value; | ||
589 | if (GET_MODE (value) != op_mode) | ||
590 | @@ -1528,6 +1534,10 @@ | ||
591 | if (ext_mode != MAX_MACHINE_MODE | ||
592 | && bitsize > 0 | ||
593 | && GET_MODE_BITSIZE (ext_mode) >= bitsize | ||
594 | + /* Do not use extv/extzv for volatile bitfields when | ||
595 | + -fstrict-volatile-bitfields is in effect. */ | ||
596 | + && !(MEM_P (op0) && MEM_VOLATILE_P (op0) | ||
597 | + && flag_strict_volatile_bitfields > 0) | ||
598 | /* If op0 is a register, we need it in EXT_MODE to make it | ||
599 | acceptable to the format of ext(z)v. */ | ||
600 | && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) | ||
601 | @@ -1552,17 +1562,20 @@ | ||
602 | /* Get ref to first byte containing part of the field. */ | ||
603 | xop0 = adjust_address (xop0, byte_mode, xoffset); | ||
604 | |||
605 | - /* On big-endian machines, we count bits from the most significant. | ||
606 | - If the bit field insn does not, we must invert. */ | ||
607 | - if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | ||
608 | - xbitpos = unit - bitsize - xbitpos; | ||
609 | - | ||
610 | /* Now convert from counting within UNIT to counting in EXT_MODE. */ | ||
611 | - if (BITS_BIG_ENDIAN && !MEM_P (xop0)) | ||
612 | + if (BYTES_BIG_ENDIAN && !MEM_P (xop0)) | ||
613 | xbitpos += GET_MODE_BITSIZE (ext_mode) - unit; | ||
614 | |||
615 | unit = GET_MODE_BITSIZE (ext_mode); | ||
616 | |||
617 | + /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count | ||
618 | + "backwards" from the size of the unit we are extracting from. | ||
619 | + Otherwise, we count bits from the most significant on a | ||
620 | + BYTES/BITS_BIG_ENDIAN machine. */ | ||
621 | + | ||
622 | + if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | ||
623 | + xbitpos = unit - bitsize - xbitpos; | ||
624 | + | ||
625 | if (xtarget == 0) | ||
626 | xtarget = xspec_target = gen_reg_rtx (tmode); | ||
627 | |||
628 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch new file mode 100644 index 000000000..3c0ff0085 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106828.patch | |||
@@ -0,0 +1,1951 @@ | |||
1 | 2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | Backport from mainline: | ||
5 | |||
6 | 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> | ||
7 | |||
8 | * modulo-sched.c (ps_reg_move_info): Add num_consecutive_stages. | ||
9 | (SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES): Delete. | ||
10 | (node_sched_params): Remove first_reg_move and nreg_moves. | ||
11 | (ps_num_consecutive_stages, extend_node_sched_params): New functions. | ||
12 | (update_node_sched_params): Move up file. | ||
13 | (print_node_sched_params): Print the stage. Don't dump info related | ||
14 | to first_reg_move and nreg_moves. | ||
15 | (set_columns_for_row): New function. | ||
16 | (set_columns_for_ps): Move up file and use set_columns_for_row. | ||
17 | (schedule_reg_move): New function. | ||
18 | (schedule_reg_moves): Call extend_node_sched_params and | ||
19 | schedule_reg_move. Extend size of uses bitmap. Initialize | ||
20 | num_consecutive_stages. Return false if a move could not be | ||
21 | scheduled. | ||
22 | (apply_reg_moves): Don't emit moves here. | ||
23 | (permute_partial_schedule): Handle register moves. | ||
24 | (duplicate_insns_of_cycles): Remove for_prolog. Emit moves according | ||
25 | to the same stage-count test as ddg nodes. | ||
26 | (generate_prolog_epilog): Update calls accordingly. | ||
27 | (sms_schedule): Allow move-scheduling to add a new first stage. | ||
28 | |||
29 | 2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> | ||
30 | |||
31 | gcc/ | ||
32 | Backport from mainline: | ||
33 | |||
34 | 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> | ||
35 | |||
36 | * modulo-sched.c (ps_insn): Adjust comment. | ||
37 | (ps_reg_move_info): New structure. | ||
38 | (partial_schedule): Add reg_moves field. | ||
39 | (SCHED_PARAMS): Use node_sched_param_vec instead of node_sched_params. | ||
40 | (node_sched_params): Turn first_reg_move into an identifier. | ||
41 | (ps_reg_move): New function. | ||
42 | (ps_rtl_insn): Cope with register moves. | ||
43 | (ps_first_note): Adjust comment and assert that the instruction | ||
44 | isn't a register move. | ||
45 | (node_sched_params): Replace with... | ||
46 | (node_sched_param_vec): ...this vector. | ||
47 | (set_node_sched_params): Adjust accordingly. | ||
48 | (print_node_sched_params): Take a partial schedule instead of a ddg. | ||
49 | Use ps_rtl_insn and ps_reg_move. | ||
50 | (generate_reg_moves): Rename to... | ||
51 | (schedule_reg_moves): ...this. Remove rescan parameter. Record each | ||
52 | move in the partial schedule, but don't emit it here. Don't perform | ||
53 | register substitutions here either. | ||
54 | (apply_reg_moves): New function. | ||
55 | (duplicate_insns_of_cycles): Use register indices directly, | ||
56 | rather than finding instructions using PREV_INSN. Use ps_reg_move. | ||
57 | (sms_schedule): Call schedule_reg_moves before committing to | ||
58 | a partial schedule. Try the next ii if the schedule fails. | ||
59 | Use apply_reg_moves instead of generate_reg_moves. Adjust | ||
60 | call to print_node_sched_params. Free node_sched_param_vec | ||
61 | instead of node_sched_params. | ||
62 | (create_partial_schedule): Initialize reg_moves. | ||
63 | (free_partial_schedule): Free reg_moves. | ||
64 | |||
65 | 2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> | ||
66 | |||
67 | gcc/ | ||
68 | Backport from mainline: | ||
69 | |||
70 | 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> | ||
71 | |||
72 | * modulo-sched.c (ps_insn): Replace node field with an identifier. | ||
73 | (SCHED_ASAP): Replace with.. | ||
74 | (NODE_ASAP): ...this macro. | ||
75 | (SCHED_PARAMS): New macro. | ||
76 | (SCHED_TIME, SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES, SCHED_ROW) | ||
77 | (SCHED_STAGE, SCHED_COLUMN): Redefine using SCHED_PARAMS. | ||
78 | (node_sched_params): Remove asap. | ||
79 | (ps_rtl_insn, ps_first_note): New functions. | ||
80 | (set_node_sched_params): Use XCNEWVEC. Don't copy across the | ||
81 | asap values. | ||
82 | (print_node_sched_params): Use SCHED_PARAMS and NODE_ASAP. | ||
83 | (generate_reg_moves): Pass ids to the SCHED_* macros. | ||
84 | (update_node_sched_params): Take a ps insn identifier rather than | ||
85 | a node as parameter. Use ps_rtl_insn. | ||
86 | (set_columns_for_ps): Update for above field and SCHED_* macro changes. | ||
87 | (permute_partial_schedule): Use ps_rtl_insn and ps_first_note. | ||
88 | (optimize_sc): Update for above field and SCHED_* macro changes. | ||
89 | Update calls to try_scheduling_node_in_cycle and | ||
90 | update_node_sched_params. | ||
91 | (duplicate_insns_of_cycles): Adjust for above field and SCHED_* | ||
92 | macro changes. Use ps_rtl_insn and ps_first_note. | ||
93 | (sms_schedule): Pass ids to the SCHED_* macros. | ||
94 | (get_sched_window): Adjust for above field and SCHED_* macro changes. | ||
95 | Use NODE_ASAP instead of SCHED_ASAP. | ||
96 | (try_scheduling_node_in_cycle): Remove node parameter. Update | ||
97 | call to ps_add_node_check_conflicts. Pass ids to the SCHED_* | ||
98 | macros. | ||
99 | (sms_schedule_by_order): Update call to try_scheduling_node_in_cycle. | ||
100 | (ps_insert_empty_row): Adjust for above field changes. | ||
101 | (compute_split_row): Use ids rather than nodes. | ||
102 | (verify_partial_schedule): Adjust for above field changes. | ||
103 | (print_partial_schedule): Use ps_rtl_insn. | ||
104 | (create_ps_insn): Take an id rather than a node. | ||
105 | (ps_insn_find_column): Adjust for above field changes. | ||
106 | Use ps_rtl_insn. | ||
107 | (ps_insn_advance_column): Adjust for above field changes. | ||
108 | (add_node_to_ps): Remove node parameter. Update call to | ||
109 | create_ps_insn. | ||
110 | (ps_has_conflicts): Use ps_rtl_insn. | ||
111 | (ps_add_node_check_conflicts): Replace node parameter than an id. | ||
112 | |||
113 | 2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> | ||
114 | |||
115 | gcc/ | ||
116 | Backport from mainline: | ||
117 | |||
118 | 2011-10-10 Richard Sandiford <richard.sandiford@linaro.org> | ||
119 | |||
120 | * modulo-sched.c (undo_replace_buff_elem): Delete. | ||
121 | (generate_reg_moves): Don't build and return an undo list. | ||
122 | (free_undo_replace_buff): Delete. | ||
123 | (sms_schedule): Adjust call to generate_reg_moves. | ||
124 | Don't call free_undo_replace_buff. | ||
125 | |||
126 | 2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> | ||
127 | |||
128 | gcc/ | ||
129 | Backport from mainline: | ||
130 | |||
131 | 2011-08-08 Richard Sandiford <richard.sandiford@linaro.org> | ||
132 | |||
133 | * modulo-sched.c (get_sched_window): Use a table for the debug output. | ||
134 | Print the current ii. | ||
135 | (sms_schedule_by_order): Reduce whitespace in dump line. | ||
136 | |||
137 | 2011-10-17 Richard Sandiford <richard.sandiford@linaro.org> | ||
138 | |||
139 | gcc/ | ||
140 | Backport from mainline: | ||
141 | |||
142 | 2011-08-08 Richard Sandiford <richard.sandiford@linaro.org> | ||
143 | |||
144 | * modulo-sched.c (get_sched_window): Use just one loop for predecessors | ||
145 | and one loop for successors. Fix upper bound of memory range. | ||
146 | |||
147 | === modified file 'gcc/modulo-sched.c' | ||
148 | --- old/gcc/modulo-sched.c 2011-10-02 06:56:53 +0000 | ||
149 | +++ new/gcc/modulo-sched.c 2011-10-10 14:35:32 +0000 | ||
150 | @@ -124,8 +124,10 @@ | ||
151 | /* A single instruction in the partial schedule. */ | ||
152 | struct ps_insn | ||
153 | { | ||
154 | - /* The corresponding DDG_NODE. */ | ||
155 | - ddg_node_ptr node; | ||
156 | + /* Identifies the instruction to be scheduled. Values smaller than | ||
157 | + the ddg's num_nodes refer directly to ddg nodes. A value of | ||
158 | + X - num_nodes refers to register move X. */ | ||
159 | + int id; | ||
160 | |||
161 | /* The (absolute) cycle in which the PS instruction is scheduled. | ||
162 | Same as SCHED_TIME (node). */ | ||
163 | @@ -137,6 +139,33 @@ | ||
164 | |||
165 | }; | ||
166 | |||
167 | +/* Information about a register move that has been added to a partial | ||
168 | + schedule. */ | ||
169 | +struct ps_reg_move_info | ||
170 | +{ | ||
171 | + /* The source of the move is defined by the ps_insn with id DEF. | ||
172 | + The destination is used by the ps_insns with the ids in USES. */ | ||
173 | + int def; | ||
174 | + sbitmap uses; | ||
175 | + | ||
176 | + /* The original form of USES' instructions used OLD_REG, but they | ||
177 | + should now use NEW_REG. */ | ||
178 | + rtx old_reg; | ||
179 | + rtx new_reg; | ||
180 | + | ||
181 | + /* The number of consecutive stages that the move occupies. */ | ||
182 | + int num_consecutive_stages; | ||
183 | + | ||
184 | + /* An instruction that sets NEW_REG to the correct value. The first | ||
185 | + move associated with DEF will have an rhs of OLD_REG; later moves | ||
186 | + use the result of the previous move. */ | ||
187 | + rtx insn; | ||
188 | +}; | ||
189 | + | ||
190 | +typedef struct ps_reg_move_info ps_reg_move_info; | ||
191 | +DEF_VEC_O (ps_reg_move_info); | ||
192 | +DEF_VEC_ALLOC_O (ps_reg_move_info, heap); | ||
193 | + | ||
194 | /* Holds the partial schedule as an array of II rows. Each entry of the | ||
195 | array points to a linked list of PS_INSNs, which represents the | ||
196 | instructions that are scheduled for that row. */ | ||
197 | @@ -148,6 +177,10 @@ | ||
198 | /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */ | ||
199 | ps_insn_ptr *rows; | ||
200 | |||
201 | + /* All the moves added for this partial schedule. Index X has | ||
202 | + a ps_insn id of X + g->num_nodes. */ | ||
203 | + VEC (ps_reg_move_info, heap) *reg_moves; | ||
204 | + | ||
205 | /* rows_length[i] holds the number of instructions in the row. | ||
206 | It is used only (as an optimization) to back off quickly from | ||
207 | trying to schedule a node in a full row; that is, to avoid running | ||
208 | @@ -165,17 +198,6 @@ | ||
209 | int stage_count; /* The stage count of the partial schedule. */ | ||
210 | }; | ||
211 | |||
212 | -/* We use this to record all the register replacements we do in | ||
213 | - the kernel so we can undo SMS if it is not profitable. */ | ||
214 | -struct undo_replace_buff_elem | ||
215 | -{ | ||
216 | - rtx insn; | ||
217 | - rtx orig_reg; | ||
218 | - rtx new_reg; | ||
219 | - struct undo_replace_buff_elem *next; | ||
220 | -}; | ||
221 | - | ||
222 | - | ||
223 | |||
224 | static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); | ||
225 | static void free_partial_schedule (partial_schedule_ptr); | ||
226 | @@ -183,9 +205,7 @@ | ||
227 | void print_partial_schedule (partial_schedule_ptr, FILE *); | ||
228 | static void verify_partial_schedule (partial_schedule_ptr, sbitmap); | ||
229 | static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, | ||
230 | - ddg_node_ptr node, int cycle, | ||
231 | - sbitmap must_precede, | ||
232 | - sbitmap must_follow); | ||
233 | + int, int, sbitmap, sbitmap); | ||
234 | static void rotate_partial_schedule (partial_schedule_ptr, int); | ||
235 | void set_row_column_for_ps (partial_schedule_ptr); | ||
236 | static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap); | ||
237 | @@ -201,43 +221,27 @@ | ||
238 | static void permute_partial_schedule (partial_schedule_ptr, rtx); | ||
239 | static void generate_prolog_epilog (partial_schedule_ptr, struct loop *, | ||
240 | rtx, rtx); | ||
241 | -static void duplicate_insns_of_cycles (partial_schedule_ptr, | ||
242 | - int, int, int, rtx); | ||
243 | static int calculate_stage_count (partial_schedule_ptr, int); | ||
244 | static void calculate_must_precede_follow (ddg_node_ptr, int, int, | ||
245 | int, int, sbitmap, sbitmap, sbitmap); | ||
246 | static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, | ||
247 | sbitmap, int, int *, int *, int *); | ||
248 | -static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr, | ||
249 | - int, int, sbitmap, int *, sbitmap, | ||
250 | - sbitmap); | ||
251 | +static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int, | ||
252 | + sbitmap, int *, sbitmap, sbitmap); | ||
253 | static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); | ||
254 | |||
255 | -#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) | ||
256 | -#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) | ||
257 | -#define SCHED_FIRST_REG_MOVE(x) \ | ||
258 | - (((node_sched_params_ptr)(x)->aux.info)->first_reg_move) | ||
259 | -#define SCHED_NREG_MOVES(x) \ | ||
260 | - (((node_sched_params_ptr)(x)->aux.info)->nreg_moves) | ||
261 | -#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row) | ||
262 | -#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage) | ||
263 | -#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column) | ||
264 | +#define NODE_ASAP(node) ((node)->aux.count) | ||
265 | + | ||
266 | +#define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x) | ||
267 | +#define SCHED_TIME(x) (SCHED_PARAMS (x)->time) | ||
268 | +#define SCHED_ROW(x) (SCHED_PARAMS (x)->row) | ||
269 | +#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage) | ||
270 | +#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column) | ||
271 | |||
272 | /* The scheduling parameters held for each node. */ | ||
273 | typedef struct node_sched_params | ||
274 | { | ||
275 | - int asap; /* A lower-bound on the absolute scheduling cycle. */ | ||
276 | - int time; /* The absolute scheduling cycle (time >= asap). */ | ||
277 | - | ||
278 | - /* The following field (first_reg_move) is a pointer to the first | ||
279 | - register-move instruction added to handle the modulo-variable-expansion | ||
280 | - of the register defined by this node. This register-move copies the | ||
281 | - original register defined by the node. */ | ||
282 | - rtx first_reg_move; | ||
283 | - | ||
284 | - /* The number of register-move instructions added, immediately preceding | ||
285 | - first_reg_move. */ | ||
286 | - int nreg_moves; | ||
287 | + int time; /* The absolute scheduling cycle. */ | ||
288 | |||
289 | int row; /* Holds time % ii. */ | ||
290 | int stage; /* Holds time / ii. */ | ||
291 | @@ -247,6 +251,9 @@ | ||
292 | int column; | ||
293 | } *node_sched_params_ptr; | ||
294 | |||
295 | +typedef struct node_sched_params node_sched_params; | ||
296 | +DEF_VEC_O (node_sched_params); | ||
297 | +DEF_VEC_ALLOC_O (node_sched_params, heap); | ||
298 | |||
299 | /* The following three functions are copied from the current scheduler | ||
300 | code in order to use sched_analyze() for computing the dependencies. | ||
301 | @@ -296,6 +303,49 @@ | ||
302 | 0 | ||
303 | }; | ||
304 | |||
305 | +/* Partial schedule instruction ID in PS is a register move. Return | ||
306 | + information about it. */ | ||
307 | +static struct ps_reg_move_info * | ||
308 | +ps_reg_move (partial_schedule_ptr ps, int id) | ||
309 | +{ | ||
310 | + gcc_checking_assert (id >= ps->g->num_nodes); | ||
311 | + return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes); | ||
312 | +} | ||
313 | + | ||
314 | +/* Return the rtl instruction that is being scheduled by partial schedule | ||
315 | + instruction ID, which belongs to schedule PS. */ | ||
316 | +static rtx | ||
317 | +ps_rtl_insn (partial_schedule_ptr ps, int id) | ||
318 | +{ | ||
319 | + if (id < ps->g->num_nodes) | ||
320 | + return ps->g->nodes[id].insn; | ||
321 | + else | ||
322 | + return ps_reg_move (ps, id)->insn; | ||
323 | +} | ||
324 | + | ||
325 | +/* Partial schedule instruction ID, which belongs to PS, occured in | ||
326 | + the original (unscheduled) loop. Return the first instruction | ||
327 | + in the loop that was associated with ps_rtl_insn (PS, ID). | ||
328 | + If the instruction had some notes before it, this is the first | ||
329 | + of those notes. */ | ||
330 | +static rtx | ||
331 | +ps_first_note (partial_schedule_ptr ps, int id) | ||
332 | +{ | ||
333 | + gcc_assert (id < ps->g->num_nodes); | ||
334 | + return ps->g->nodes[id].first_note; | ||
335 | +} | ||
336 | + | ||
337 | +/* Return the number of consecutive stages that are occupied by | ||
338 | + partial schedule instruction ID in PS. */ | ||
339 | +static int | ||
340 | +ps_num_consecutive_stages (partial_schedule_ptr ps, int id) | ||
341 | +{ | ||
342 | + if (id < ps->g->num_nodes) | ||
343 | + return 1; | ||
344 | + else | ||
345 | + return ps_reg_move (ps, id)->num_consecutive_stages; | ||
346 | +} | ||
347 | + | ||
348 | /* Given HEAD and TAIL which are the first and last insns in a loop; | ||
349 | return the register which controls the loop. Return zero if it has | ||
350 | more than one occurrence in the loop besides the control part or the | ||
351 | @@ -396,35 +446,59 @@ | ||
352 | } | ||
353 | |||
354 | |||
355 | -/* Points to the array that contains the sched data for each node. */ | ||
356 | -static node_sched_params_ptr node_sched_params; | ||
357 | +/* A vector that contains the sched data for each ps_insn. */ | ||
358 | +static VEC (node_sched_params, heap) *node_sched_param_vec; | ||
359 | |||
360 | -/* Allocate sched_params for each node and initialize it. Assumes that | ||
361 | - the aux field of each node contain the asap bound (computed earlier), | ||
362 | - and copies it into the sched_params field. */ | ||
363 | +/* Allocate sched_params for each node and initialize it. */ | ||
364 | static void | ||
365 | set_node_sched_params (ddg_ptr g) | ||
366 | { | ||
367 | - int i; | ||
368 | - | ||
369 | - /* Allocate for each node in the DDG a place to hold the "sched_data". */ | ||
370 | - /* Initialize ASAP/ALAP/HIGHT to zero. */ | ||
371 | - node_sched_params = (node_sched_params_ptr) | ||
372 | - xcalloc (g->num_nodes, | ||
373 | - sizeof (struct node_sched_params)); | ||
374 | - | ||
375 | - /* Set the pointer of the general data of the node to point to the | ||
376 | - appropriate sched_params structure. */ | ||
377 | - for (i = 0; i < g->num_nodes; i++) | ||
378 | - { | ||
379 | - /* Watch out for aliasing problems? */ | ||
380 | - node_sched_params[i].asap = g->nodes[i].aux.count; | ||
381 | - g->nodes[i].aux.info = &node_sched_params[i]; | ||
382 | - } | ||
383 | -} | ||
384 | - | ||
385 | -static void | ||
386 | -print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g) | ||
387 | + VEC_truncate (node_sched_params, node_sched_param_vec, 0); | ||
388 | + VEC_safe_grow_cleared (node_sched_params, heap, | ||
389 | + node_sched_param_vec, g->num_nodes); | ||
390 | +} | ||
391 | + | ||
392 | +/* Make sure that node_sched_param_vec has an entry for every move in PS. */ | ||
393 | +static void | ||
394 | +extend_node_sched_params (partial_schedule_ptr ps) | ||
395 | +{ | ||
396 | + VEC_safe_grow_cleared (node_sched_params, heap, node_sched_param_vec, | ||
397 | + ps->g->num_nodes + VEC_length (ps_reg_move_info, | ||
398 | + ps->reg_moves)); | ||
399 | +} | ||
400 | + | ||
401 | +/* Update the sched_params (time, row and stage) for node U using the II, | ||
402 | + the CYCLE of U and MIN_CYCLE. | ||
403 | + We're not simply taking the following | ||
404 | + SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); | ||
405 | + because the stages may not be aligned on cycle 0. */ | ||
406 | +static void | ||
407 | +update_node_sched_params (int u, int ii, int cycle, int min_cycle) | ||
408 | +{ | ||
409 | + int sc_until_cycle_zero; | ||
410 | + int stage; | ||
411 | + | ||
412 | + SCHED_TIME (u) = cycle; | ||
413 | + SCHED_ROW (u) = SMODULO (cycle, ii); | ||
414 | + | ||
415 | + /* The calculation of stage count is done adding the number | ||
416 | + of stages before cycle zero and after cycle zero. */ | ||
417 | + sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii); | ||
418 | + | ||
419 | + if (SCHED_TIME (u) < 0) | ||
420 | + { | ||
421 | + stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); | ||
422 | + SCHED_STAGE (u) = sc_until_cycle_zero - stage; | ||
423 | + } | ||
424 | + else | ||
425 | + { | ||
426 | + stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); | ||
427 | + SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; | ||
428 | + } | ||
429 | +} | ||
430 | + | ||
431 | +static void | ||
432 | +print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps) | ||
433 | { | ||
434 | int i; | ||
435 | |||
436 | @@ -432,22 +506,170 @@ | ||
437 | return; | ||
438 | for (i = 0; i < num_nodes; i++) | ||
439 | { | ||
440 | - node_sched_params_ptr nsp = &node_sched_params[i]; | ||
441 | - rtx reg_move = nsp->first_reg_move; | ||
442 | - int j; | ||
443 | + node_sched_params_ptr nsp = SCHED_PARAMS (i); | ||
444 | |||
445 | fprintf (file, "Node = %d; INSN = %d\n", i, | ||
446 | - (INSN_UID (g->nodes[i].insn))); | ||
447 | - fprintf (file, " asap = %d:\n", nsp->asap); | ||
448 | + INSN_UID (ps_rtl_insn (ps, i))); | ||
449 | + fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i])); | ||
450 | fprintf (file, " time = %d:\n", nsp->time); | ||
451 | - fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves); | ||
452 | - for (j = 0; j < nsp->nreg_moves; j++) | ||
453 | + fprintf (file, " stage = %d:\n", nsp->stage); | ||
454 | + } | ||
455 | +} | ||
456 | + | ||
457 | +/* Set SCHED_COLUMN for each instruction in row ROW of PS. */ | ||
458 | +static void | ||
459 | +set_columns_for_row (partial_schedule_ptr ps, int row) | ||
460 | +{ | ||
461 | + ps_insn_ptr cur_insn; | ||
462 | + int column; | ||
463 | + | ||
464 | + column = 0; | ||
465 | + for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row) | ||
466 | + SCHED_COLUMN (cur_insn->id) = column++; | ||
467 | +} | ||
468 | + | ||
469 | +/* Set SCHED_COLUMN for each instruction in PS. */ | ||
470 | +static void | ||
471 | +set_columns_for_ps (partial_schedule_ptr ps) | ||
472 | +{ | ||
473 | + int row; | ||
474 | + | ||
475 | + for (row = 0; row < ps->ii; row++) | ||
476 | + set_columns_for_row (ps, row); | ||
477 | +} | ||
478 | + | ||
479 | +/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS. | ||
480 | + Its single predecessor has already been scheduled, as has its | ||
481 | + ddg node successors. (The move may have also another move as its | ||
482 | + successor, in which case that successor will be scheduled later.) | ||
483 | + | ||
484 | + The move is part of a chain that satisfies register dependencies | ||
485 | + between a producing ddg node and various consuming ddg nodes. | ||
486 | + If some of these dependencies have a distance of 1 (meaning that | ||
487 | + the use is upward-exposoed) then DISTANCE1_USES is nonnull and | ||
488 | + contains the set of uses with distance-1 dependencies. | ||
489 | + DISTANCE1_USES is null otherwise. | ||
490 | + | ||
491 | + MUST_FOLLOW is a scratch bitmap that is big enough to hold | ||
492 | + all current ps_insn ids. | ||
493 | + | ||
494 | + Return true on success. */ | ||
495 | +static bool | ||
496 | +schedule_reg_move (partial_schedule_ptr ps, int i_reg_move, | ||
497 | + sbitmap distance1_uses, sbitmap must_follow) | ||
498 | +{ | ||
499 | + unsigned int u; | ||
500 | + int this_time, this_distance, this_start, this_end, this_latency; | ||
501 | + int start, end, c, ii; | ||
502 | + sbitmap_iterator sbi; | ||
503 | + ps_reg_move_info *move; | ||
504 | + rtx this_insn; | ||
505 | + ps_insn_ptr psi; | ||
506 | + | ||
507 | + move = ps_reg_move (ps, i_reg_move); | ||
508 | + ii = ps->ii; | ||
509 | + if (dump_file) | ||
510 | + { | ||
511 | + fprintf (dump_file, "Scheduling register move INSN %d; ii = %d" | ||
512 | + ", min cycle = %d\n\n", INSN_UID (move->insn), ii, | ||
513 | + PS_MIN_CYCLE (ps)); | ||
514 | + print_rtl_single (dump_file, move->insn); | ||
515 | + fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time"); | ||
516 | + fprintf (dump_file, "=========== =========== =====\n"); | ||
517 | + } | ||
518 | + | ||
519 | + start = INT_MIN; | ||
520 | + end = INT_MAX; | ||
521 | + | ||
522 | + /* For dependencies of distance 1 between a producer ddg node A | ||
523 | + and consumer ddg node B, we have a chain of dependencies: | ||
524 | + | ||
525 | + A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B | ||
526 | + | ||
527 | + where Mi is the ith move. For dependencies of distance 0 between | ||
528 | + a producer ddg node A and consumer ddg node C, we have a chain of | ||
529 | + dependencies: | ||
530 | + | ||
531 | + A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C | ||
532 | + | ||
533 | + where Mi' occupies the same position as Mi but occurs a stage later. | ||
534 | + We can only schedule each move once, so if we have both types of | ||
535 | + chain, we model the second as: | ||
536 | + | ||
537 | + A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C | ||
538 | + | ||
539 | + First handle the dependencies between the previously-scheduled | ||
540 | + predecessor and the move. */ | ||
541 | + this_insn = ps_rtl_insn (ps, move->def); | ||
542 | + this_latency = insn_latency (this_insn, move->insn); | ||
543 | + this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0; | ||
544 | + this_time = SCHED_TIME (move->def) - this_distance * ii; | ||
545 | + this_start = this_time + this_latency; | ||
546 | + this_end = this_time + ii; | ||
547 | + if (dump_file) | ||
548 | + fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", | ||
549 | + this_start, this_end, SCHED_TIME (move->def), | ||
550 | + INSN_UID (this_insn), this_latency, this_distance, | ||
551 | + INSN_UID (move->insn)); | ||
552 | + | ||
553 | + if (start < this_start) | ||
554 | + start = this_start; | ||
555 | + if (end > this_end) | ||
556 | + end = this_end; | ||
557 | + | ||
558 | + /* Handle the dependencies between the move and previously-scheduled | ||
559 | + successors. */ | ||
560 | + EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, u, sbi) | ||
561 | + { | ||
562 | + this_insn = ps_rtl_insn (ps, u); | ||
563 | + this_latency = insn_latency (move->insn, this_insn); | ||
564 | + if (distance1_uses && !TEST_BIT (distance1_uses, u)) | ||
565 | + this_distance = -1; | ||
566 | + else | ||
567 | + this_distance = 0; | ||
568 | + this_time = SCHED_TIME (u) + this_distance * ii; | ||
569 | + this_start = this_time - ii; | ||
570 | + this_end = this_time - this_latency; | ||
571 | + if (dump_file) | ||
572 | + fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", | ||
573 | + this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn), | ||
574 | + this_latency, this_distance, INSN_UID (this_insn)); | ||
575 | + | ||
576 | + if (start < this_start) | ||
577 | + start = this_start; | ||
578 | + if (end > this_end) | ||
579 | + end = this_end; | ||
580 | + } | ||
581 | + | ||
582 | + if (dump_file) | ||
583 | + { | ||
584 | + fprintf (dump_file, "----------- ----------- -----\n"); | ||
585 | + fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)"); | ||
586 | + } | ||
587 | + | ||
588 | + sbitmap_zero (must_follow); | ||
589 | + SET_BIT (must_follow, move->def); | ||
590 | + | ||
591 | + start = MAX (start, end - (ii - 1)); | ||
592 | + for (c = end; c >= start; c--) | ||
593 | + { | ||
594 | + psi = ps_add_node_check_conflicts (ps, i_reg_move, c, | ||
595 | + move->uses, must_follow); | ||
596 | + if (psi) | ||
597 | { | ||
598 | - fprintf (file, " reg_move = "); | ||
599 | - print_rtl_single (file, reg_move); | ||
600 | - reg_move = PREV_INSN (reg_move); | ||
601 | + update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps)); | ||
602 | + if (dump_file) | ||
603 | + fprintf (dump_file, "\nScheduled register move INSN %d at" | ||
604 | + " time %d, row %d\n\n", INSN_UID (move->insn), c, | ||
605 | + SCHED_ROW (i_reg_move)); | ||
606 | + return true; | ||
607 | } | ||
608 | } | ||
609 | + | ||
610 | + if (dump_file) | ||
611 | + fprintf (dump_file, "\nNo available slot\n\n"); | ||
612 | + | ||
613 | + return false; | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | @@ -461,22 +683,23 @@ | ||
618 | nreg_moves = ----------------------------------- + 1 - { dependence. | ||
619 | ii { 1 if not. | ||
620 | */ | ||
621 | -static struct undo_replace_buff_elem * | ||
622 | -generate_reg_moves (partial_schedule_ptr ps, bool rescan) | ||
623 | +static bool | ||
624 | +schedule_reg_moves (partial_schedule_ptr ps) | ||
625 | { | ||
626 | ddg_ptr g = ps->g; | ||
627 | int ii = ps->ii; | ||
628 | int i; | ||
629 | - struct undo_replace_buff_elem *reg_move_replaces = NULL; | ||
630 | |||
631 | for (i = 0; i < g->num_nodes; i++) | ||
632 | { | ||
633 | ddg_node_ptr u = &g->nodes[i]; | ||
634 | ddg_edge_ptr e; | ||
635 | int nreg_moves = 0, i_reg_move; | ||
636 | - sbitmap *uses_of_defs; | ||
637 | - rtx last_reg_move; | ||
638 | rtx prev_reg, old_reg; | ||
639 | + int first_move; | ||
640 | + int distances[2]; | ||
641 | + sbitmap must_follow; | ||
642 | + sbitmap distance1_uses; | ||
643 | rtx set = single_set (u->insn); | ||
644 | |||
645 | /* Skip instructions that do not set a register. */ | ||
646 | @@ -485,18 +708,21 @@ | ||
647 | |||
648 | /* Compute the number of reg_moves needed for u, by looking at life | ||
649 | ranges started at u (excluding self-loops). */ | ||
650 | + distances[0] = distances[1] = false; | ||
651 | for (e = u->out; e; e = e->next_out) | ||
652 | if (e->type == TRUE_DEP && e->dest != e->src) | ||
653 | { | ||
654 | - int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; | ||
655 | + int nreg_moves4e = (SCHED_TIME (e->dest->cuid) | ||
656 | + - SCHED_TIME (e->src->cuid)) / ii; | ||
657 | |||
658 | if (e->distance == 1) | ||
659 | - nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; | ||
660 | + nreg_moves4e = (SCHED_TIME (e->dest->cuid) | ||
661 | + - SCHED_TIME (e->src->cuid) + ii) / ii; | ||
662 | |||
663 | /* If dest precedes src in the schedule of the kernel, then dest | ||
664 | will read before src writes and we can save one reg_copy. */ | ||
665 | - if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) | ||
666 | - && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) | ||
667 | + if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) | ||
668 | + && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) | ||
669 | nreg_moves4e--; | ||
670 | |||
671 | if (nreg_moves4e >= 1) | ||
672 | @@ -513,125 +739,105 @@ | ||
673 | gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn)); | ||
674 | } | ||
675 | |||
676 | + if (nreg_moves4e) | ||
677 | + { | ||
678 | + gcc_assert (e->distance < 2); | ||
679 | + distances[e->distance] = true; | ||
680 | + } | ||
681 | nreg_moves = MAX (nreg_moves, nreg_moves4e); | ||
682 | } | ||
683 | |||
684 | if (nreg_moves == 0) | ||
685 | continue; | ||
686 | |||
687 | + /* Create NREG_MOVES register moves. */ | ||
688 | + first_move = VEC_length (ps_reg_move_info, ps->reg_moves); | ||
689 | + VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves, | ||
690 | + first_move + nreg_moves); | ||
691 | + extend_node_sched_params (ps); | ||
692 | + | ||
693 | + /* Record the moves associated with this node. */ | ||
694 | + first_move += ps->g->num_nodes; | ||
695 | + | ||
696 | + /* Generate each move. */ | ||
697 | + old_reg = prev_reg = SET_DEST (single_set (u->insn)); | ||
698 | + for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) | ||
699 | + { | ||
700 | + ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move); | ||
701 | + | ||
702 | + move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i; | ||
703 | + move->uses = sbitmap_alloc (first_move + nreg_moves); | ||
704 | + move->old_reg = old_reg; | ||
705 | + move->new_reg = gen_reg_rtx (GET_MODE (prev_reg)); | ||
706 | + move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1; | ||
707 | + move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg)); | ||
708 | + sbitmap_zero (move->uses); | ||
709 | + | ||
710 | + prev_reg = move->new_reg; | ||
711 | + } | ||
712 | + | ||
713 | + distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL; | ||
714 | + | ||
715 | /* Every use of the register defined by node may require a different | ||
716 | copy of this register, depending on the time the use is scheduled. | ||
717 | - Set a bitmap vector, telling which nodes use each copy of this | ||
718 | - register. */ | ||
719 | - uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes); | ||
720 | - sbitmap_vector_zero (uses_of_defs, nreg_moves); | ||
721 | + Record which uses require which move results. */ | ||
722 | for (e = u->out; e; e = e->next_out) | ||
723 | if (e->type == TRUE_DEP && e->dest != e->src) | ||
724 | { | ||
725 | - int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; | ||
726 | + int dest_copy = (SCHED_TIME (e->dest->cuid) | ||
727 | + - SCHED_TIME (e->src->cuid)) / ii; | ||
728 | |||
729 | if (e->distance == 1) | ||
730 | - dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; | ||
731 | + dest_copy = (SCHED_TIME (e->dest->cuid) | ||
732 | + - SCHED_TIME (e->src->cuid) + ii) / ii; | ||
733 | |||
734 | - if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) | ||
735 | - && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) | ||
736 | + if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) | ||
737 | + && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) | ||
738 | dest_copy--; | ||
739 | |||
740 | if (dest_copy) | ||
741 | - SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid); | ||
742 | + { | ||
743 | + ps_reg_move_info *move; | ||
744 | + | ||
745 | + move = ps_reg_move (ps, first_move + dest_copy - 1); | ||
746 | + SET_BIT (move->uses, e->dest->cuid); | ||
747 | + if (e->distance == 1) | ||
748 | + SET_BIT (distance1_uses, e->dest->cuid); | ||
749 | + } | ||
750 | } | ||
751 | |||
752 | - /* Now generate the reg_moves, attaching relevant uses to them. */ | ||
753 | - SCHED_NREG_MOVES (u) = nreg_moves; | ||
754 | - old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn))); | ||
755 | - /* Insert the reg-moves right before the notes which precede | ||
756 | - the insn they relates to. */ | ||
757 | - last_reg_move = u->first_note; | ||
758 | - | ||
759 | + must_follow = sbitmap_alloc (first_move + nreg_moves); | ||
760 | for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) | ||
761 | + if (!schedule_reg_move (ps, first_move + i_reg_move, | ||
762 | + distance1_uses, must_follow)) | ||
763 | + break; | ||
764 | + sbitmap_free (must_follow); | ||
765 | + if (distance1_uses) | ||
766 | + sbitmap_free (distance1_uses); | ||
767 | + if (i_reg_move < nreg_moves) | ||
768 | + return false; | ||
769 | + } | ||
770 | + return true; | ||
771 | +} | ||
772 | + | ||
773 | +/* Emit the moves associatied with PS. Apply the substitutions | ||
774 | + associated with them. */ | ||
775 | +static void | ||
776 | +apply_reg_moves (partial_schedule_ptr ps) | ||
777 | +{ | ||
778 | + ps_reg_move_info *move; | ||
779 | + int i; | ||
780 | + | ||
781 | + FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move) | ||
782 | + { | ||
783 | + unsigned int i_use; | ||
784 | + sbitmap_iterator sbi; | ||
785 | + | ||
786 | + EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi) | ||
787 | { | ||
788 | - unsigned int i_use = 0; | ||
789 | - rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg)); | ||
790 | - rtx reg_move = gen_move_insn (new_reg, prev_reg); | ||
791 | - sbitmap_iterator sbi; | ||
792 | - | ||
793 | - add_insn_before (reg_move, last_reg_move, NULL); | ||
794 | - last_reg_move = reg_move; | ||
795 | - | ||
796 | - if (!SCHED_FIRST_REG_MOVE (u)) | ||
797 | - SCHED_FIRST_REG_MOVE (u) = reg_move; | ||
798 | - | ||
799 | - EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi) | ||
800 | - { | ||
801 | - struct undo_replace_buff_elem *rep; | ||
802 | - | ||
803 | - rep = (struct undo_replace_buff_elem *) | ||
804 | - xcalloc (1, sizeof (struct undo_replace_buff_elem)); | ||
805 | - rep->insn = g->nodes[i_use].insn; | ||
806 | - rep->orig_reg = old_reg; | ||
807 | - rep->new_reg = new_reg; | ||
808 | - | ||
809 | - if (! reg_move_replaces) | ||
810 | - reg_move_replaces = rep; | ||
811 | - else | ||
812 | - { | ||
813 | - rep->next = reg_move_replaces; | ||
814 | - reg_move_replaces = rep; | ||
815 | - } | ||
816 | - | ||
817 | - replace_rtx (g->nodes[i_use].insn, old_reg, new_reg); | ||
818 | - if (rescan) | ||
819 | - df_insn_rescan (g->nodes[i_use].insn); | ||
820 | - } | ||
821 | - | ||
822 | - prev_reg = new_reg; | ||
823 | + replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg); | ||
824 | + df_insn_rescan (ps->g->nodes[i_use].insn); | ||
825 | } | ||
826 | - sbitmap_vector_free (uses_of_defs); | ||
827 | - } | ||
828 | - return reg_move_replaces; | ||
829 | -} | ||
830 | - | ||
831 | -/* Free memory allocated for the undo buffer. */ | ||
832 | -static void | ||
833 | -free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces) | ||
834 | -{ | ||
835 | - | ||
836 | - while (reg_move_replaces) | ||
837 | - { | ||
838 | - struct undo_replace_buff_elem *rep = reg_move_replaces; | ||
839 | - | ||
840 | - reg_move_replaces = reg_move_replaces->next; | ||
841 | - free (rep); | ||
842 | - } | ||
843 | -} | ||
844 | - | ||
845 | -/* Update the sched_params (time, row and stage) for node U using the II, | ||
846 | - the CYCLE of U and MIN_CYCLE. | ||
847 | - We're not simply taking the following | ||
848 | - SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); | ||
849 | - because the stages may not be aligned on cycle 0. */ | ||
850 | -static void | ||
851 | -update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle) | ||
852 | -{ | ||
853 | - int sc_until_cycle_zero; | ||
854 | - int stage; | ||
855 | - | ||
856 | - SCHED_TIME (u) = cycle; | ||
857 | - SCHED_ROW (u) = SMODULO (cycle, ii); | ||
858 | - | ||
859 | - /* The calculation of stage count is done adding the number | ||
860 | - of stages before cycle zero and after cycle zero. */ | ||
861 | - sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii); | ||
862 | - | ||
863 | - if (SCHED_TIME (u) < 0) | ||
864 | - { | ||
865 | - stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); | ||
866 | - SCHED_STAGE (u) = sc_until_cycle_zero - stage; | ||
867 | - } | ||
868 | - else | ||
869 | - { | ||
870 | - stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); | ||
871 | - SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; | ||
872 | } | ||
873 | } | ||
874 | |||
875 | @@ -647,18 +853,19 @@ | ||
876 | for (row = 0; row < ii; row++) | ||
877 | for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | ||
878 | { | ||
879 | - ddg_node_ptr u = crr_insn->node; | ||
880 | + int u = crr_insn->id; | ||
881 | int normalized_time = SCHED_TIME (u) - amount; | ||
882 | int new_min_cycle = PS_MIN_CYCLE (ps) - amount; | ||
883 | |||
884 | if (dump_file) | ||
885 | { | ||
886 | /* Print the scheduling times after the rotation. */ | ||
887 | + rtx insn = ps_rtl_insn (ps, u); | ||
888 | + | ||
889 | fprintf (dump_file, "crr_insn->node=%d (insn id %d), " | ||
890 | - "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid, | ||
891 | - INSN_UID (crr_insn->node->insn), normalized_time, | ||
892 | - new_min_cycle); | ||
893 | - if (JUMP_P (crr_insn->node->insn)) | ||
894 | + "crr_insn->cycle=%d, min_cycle=%d", u, | ||
895 | + INSN_UID (insn), normalized_time, new_min_cycle); | ||
896 | + if (JUMP_P (insn)) | ||
897 | fprintf (dump_file, " (branch)"); | ||
898 | fprintf (dump_file, "\n"); | ||
899 | } | ||
900 | @@ -671,22 +878,6 @@ | ||
901 | } | ||
902 | } | ||
903 | |||
904 | -/* Set SCHED_COLUMN of each node according to its position in PS. */ | ||
905 | -static void | ||
906 | -set_columns_for_ps (partial_schedule_ptr ps) | ||
907 | -{ | ||
908 | - int row; | ||
909 | - | ||
910 | - for (row = 0; row < ps->ii; row++) | ||
911 | - { | ||
912 | - ps_insn_ptr cur_insn = ps->rows[row]; | ||
913 | - int column = 0; | ||
914 | - | ||
915 | - for (; cur_insn; cur_insn = cur_insn->next_in_row) | ||
916 | - SCHED_COLUMN (cur_insn->node) = column++; | ||
917 | - } | ||
918 | -} | ||
919 | - | ||
920 | /* Permute the insns according to their order in PS, from row 0 to | ||
921 | row ii-1, and position them right before LAST. This schedules | ||
922 | the insns of the loop kernel. */ | ||
923 | @@ -699,9 +890,18 @@ | ||
924 | |||
925 | for (row = 0; row < ii ; row++) | ||
926 | for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | ||
927 | - if (PREV_INSN (last) != ps_ij->node->insn) | ||
928 | - reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn, | ||
929 | - PREV_INSN (last)); | ||
930 | + { | ||
931 | + rtx insn = ps_rtl_insn (ps, ps_ij->id); | ||
932 | + | ||
933 | + if (PREV_INSN (last) != insn) | ||
934 | + { | ||
935 | + if (ps_ij->id < ps->g->num_nodes) | ||
936 | + reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn, | ||
937 | + PREV_INSN (last)); | ||
938 | + else | ||
939 | + add_insn_before (insn, last, NULL); | ||
940 | + } | ||
941 | + } | ||
942 | } | ||
943 | |||
944 | /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE | ||
945 | @@ -750,7 +950,7 @@ | ||
946 | to row ii-1. If they are equal just bail out. */ | ||
947 | stage_count = calculate_stage_count (ps, amount); | ||
948 | stage_count_curr = | ||
949 | - calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1)); | ||
950 | + calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1)); | ||
951 | |||
952 | if (stage_count == stage_count_curr) | ||
953 | { | ||
954 | @@ -779,7 +979,7 @@ | ||
955 | print_partial_schedule (ps, dump_file); | ||
956 | } | ||
957 | |||
958 | - if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1) | ||
959 | + if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1) | ||
960 | { | ||
961 | ok = true; | ||
962 | goto clear; | ||
963 | @@ -794,7 +994,7 @@ | ||
964 | { | ||
965 | bool success; | ||
966 | ps_insn_ptr next_ps_i; | ||
967 | - int branch_cycle = SCHED_TIME (g->closing_branch); | ||
968 | + int branch_cycle = SCHED_TIME (g->closing_branch->cuid); | ||
969 | int row = SMODULO (branch_cycle, ps->ii); | ||
970 | int num_splits = 0; | ||
971 | sbitmap must_precede, must_follow, tmp_precede, tmp_follow; | ||
972 | @@ -850,13 +1050,12 @@ | ||
973 | branch so we can remove it from it's current cycle. */ | ||
974 | for (next_ps_i = ps->rows[row]; | ||
975 | next_ps_i; next_ps_i = next_ps_i->next_in_row) | ||
976 | - if (next_ps_i->node->cuid == g->closing_branch->cuid) | ||
977 | + if (next_ps_i->id == g->closing_branch->cuid) | ||
978 | break; | ||
979 | |||
980 | remove_node_from_ps (ps, next_ps_i); | ||
981 | success = | ||
982 | - try_scheduling_node_in_cycle (ps, g->closing_branch, | ||
983 | - g->closing_branch->cuid, c, | ||
984 | + try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c, | ||
985 | sched_nodes, &num_splits, | ||
986 | tmp_precede, tmp_follow); | ||
987 | gcc_assert (num_splits == 0); | ||
988 | @@ -874,8 +1073,7 @@ | ||
989 | must_precede, branch_cycle, start, end, | ||
990 | step); | ||
991 | success = | ||
992 | - try_scheduling_node_in_cycle (ps, g->closing_branch, | ||
993 | - g->closing_branch->cuid, | ||
994 | + try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, | ||
995 | branch_cycle, sched_nodes, | ||
996 | &num_splits, tmp_precede, | ||
997 | tmp_follow); | ||
998 | @@ -889,7 +1087,7 @@ | ||
999 | fprintf (dump_file, | ||
1000 | "SMS success in moving branch to cycle %d\n", c); | ||
1001 | |||
1002 | - update_node_sched_params (g->closing_branch, ii, c, | ||
1003 | + update_node_sched_params (g->closing_branch->cuid, ii, c, | ||
1004 | PS_MIN_CYCLE (ps)); | ||
1005 | ok = true; | ||
1006 | } | ||
1007 | @@ -905,7 +1103,7 @@ | ||
1008 | |||
1009 | static void | ||
1010 | duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, | ||
1011 | - int to_stage, int for_prolog, rtx count_reg) | ||
1012 | + int to_stage, rtx count_reg) | ||
1013 | { | ||
1014 | int row; | ||
1015 | ps_insn_ptr ps_ij; | ||
1016 | @@ -913,9 +1111,9 @@ | ||
1017 | for (row = 0; row < ps->ii; row++) | ||
1018 | for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | ||
1019 | { | ||
1020 | - ddg_node_ptr u_node = ps_ij->node; | ||
1021 | - int j, i_reg_moves; | ||
1022 | - rtx reg_move = NULL_RTX; | ||
1023 | + int u = ps_ij->id; | ||
1024 | + int first_u, last_u; | ||
1025 | + rtx u_insn; | ||
1026 | |||
1027 | /* Do not duplicate any insn which refers to count_reg as it | ||
1028 | belongs to the control part. | ||
1029 | @@ -923,52 +1121,20 @@ | ||
1030 | be ignored. | ||
1031 | TODO: This should be done by analyzing the control part of | ||
1032 | the loop. */ | ||
1033 | - if (reg_mentioned_p (count_reg, u_node->insn) | ||
1034 | - || JUMP_P (ps_ij->node->insn)) | ||
1035 | + u_insn = ps_rtl_insn (ps, u); | ||
1036 | + if (reg_mentioned_p (count_reg, u_insn) | ||
1037 | + || JUMP_P (u_insn)) | ||
1038 | continue; | ||
1039 | |||
1040 | - if (for_prolog) | ||
1041 | - { | ||
1042 | - /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing | ||
1043 | - number of reg_moves starting with the second occurrence of | ||
1044 | - u_node, which is generated if its SCHED_STAGE <= to_stage. */ | ||
1045 | - i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1; | ||
1046 | - i_reg_moves = MAX (i_reg_moves, 0); | ||
1047 | - i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); | ||
1048 | - | ||
1049 | - /* The reg_moves start from the *first* reg_move backwards. */ | ||
1050 | - if (i_reg_moves) | ||
1051 | - { | ||
1052 | - reg_move = SCHED_FIRST_REG_MOVE (u_node); | ||
1053 | - for (j = 1; j < i_reg_moves; j++) | ||
1054 | - reg_move = PREV_INSN (reg_move); | ||
1055 | - } | ||
1056 | - } | ||
1057 | - else /* It's for the epilog. */ | ||
1058 | - { | ||
1059 | - /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves, | ||
1060 | - starting to decrease one stage after u_node no longer occurs; | ||
1061 | - that is, generate all reg_moves until | ||
1062 | - SCHED_STAGE (u_node) == from_stage - 1. */ | ||
1063 | - i_reg_moves = SCHED_NREG_MOVES (u_node) | ||
1064 | - - (from_stage - SCHED_STAGE (u_node) - 1); | ||
1065 | - i_reg_moves = MAX (i_reg_moves, 0); | ||
1066 | - i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); | ||
1067 | - | ||
1068 | - /* The reg_moves start from the *last* reg_move forwards. */ | ||
1069 | - if (i_reg_moves) | ||
1070 | - { | ||
1071 | - reg_move = SCHED_FIRST_REG_MOVE (u_node); | ||
1072 | - for (j = 1; j < SCHED_NREG_MOVES (u_node); j++) | ||
1073 | - reg_move = PREV_INSN (reg_move); | ||
1074 | - } | ||
1075 | - } | ||
1076 | - | ||
1077 | - for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move)) | ||
1078 | - emit_insn (copy_rtx (PATTERN (reg_move))); | ||
1079 | - if (SCHED_STAGE (u_node) >= from_stage | ||
1080 | - && SCHED_STAGE (u_node) <= to_stage) | ||
1081 | - duplicate_insn_chain (u_node->first_note, u_node->insn); | ||
1082 | + first_u = SCHED_STAGE (u); | ||
1083 | + last_u = first_u + ps_num_consecutive_stages (ps, u) - 1; | ||
1084 | + if (from_stage <= last_u && to_stage >= first_u) | ||
1085 | + { | ||
1086 | + if (u < ps->g->num_nodes) | ||
1087 | + duplicate_insn_chain (ps_first_note (ps, u), u_insn); | ||
1088 | + else | ||
1089 | + emit_insn (copy_rtx (PATTERN (u_insn))); | ||
1090 | + } | ||
1091 | } | ||
1092 | } | ||
1093 | |||
1094 | @@ -1002,7 +1168,7 @@ | ||
1095 | } | ||
1096 | |||
1097 | for (i = 0; i < last_stage; i++) | ||
1098 | - duplicate_insns_of_cycles (ps, 0, i, 1, count_reg); | ||
1099 | + duplicate_insns_of_cycles (ps, 0, i, count_reg); | ||
1100 | |||
1101 | /* Put the prolog on the entry edge. */ | ||
1102 | e = loop_preheader_edge (loop); | ||
1103 | @@ -1014,7 +1180,7 @@ | ||
1104 | start_sequence (); | ||
1105 | |||
1106 | for (i = 0; i < last_stage; i++) | ||
1107 | - duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg); | ||
1108 | + duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg); | ||
1109 | |||
1110 | /* Put the epilogue on the exit edge. */ | ||
1111 | gcc_assert (single_exit (loop)); | ||
1112 | @@ -1350,10 +1516,9 @@ | ||
1113 | { | ||
1114 | rtx head, tail; | ||
1115 | rtx count_reg, count_init; | ||
1116 | - int mii, rec_mii; | ||
1117 | - unsigned stage_count = 0; | ||
1118 | + int mii, rec_mii, stage_count, min_cycle; | ||
1119 | HOST_WIDEST_INT loop_count = 0; | ||
1120 | - bool opt_sc_p = false; | ||
1121 | + bool opt_sc_p; | ||
1122 | |||
1123 | if (! (g = g_arr[loop->num])) | ||
1124 | continue; | ||
1125 | @@ -1430,62 +1595,63 @@ | ||
1126 | fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", | ||
1127 | rec_mii, mii, maxii); | ||
1128 | |||
1129 | - /* After sms_order_nodes and before sms_schedule_by_order, to copy over | ||
1130 | - ASAP. */ | ||
1131 | - set_node_sched_params (g); | ||
1132 | - | ||
1133 | - ps = sms_schedule_by_order (g, mii, maxii, node_order); | ||
1134 | - | ||
1135 | - if (ps) | ||
1136 | + for (;;) | ||
1137 | { | ||
1138 | - /* Try to achieve optimized SC by normalizing the partial | ||
1139 | - schedule (having the cycles start from cycle zero). | ||
1140 | - The branch location must be placed in row ii-1 in the | ||
1141 | - final scheduling. If failed, shift all instructions to | ||
1142 | - position the branch in row ii-1. */ | ||
1143 | - opt_sc_p = optimize_sc (ps, g); | ||
1144 | - if (opt_sc_p) | ||
1145 | - stage_count = calculate_stage_count (ps, 0); | ||
1146 | - else | ||
1147 | + set_node_sched_params (g); | ||
1148 | + | ||
1149 | + stage_count = 0; | ||
1150 | + opt_sc_p = false; | ||
1151 | + ps = sms_schedule_by_order (g, mii, maxii, node_order); | ||
1152 | + | ||
1153 | + if (ps) | ||
1154 | { | ||
1155 | - /* Bring the branch to cycle ii-1. */ | ||
1156 | - int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1); | ||
1157 | + /* Try to achieve optimized SC by normalizing the partial | ||
1158 | + schedule (having the cycles start from cycle zero). | ||
1159 | + The branch location must be placed in row ii-1 in the | ||
1160 | + final scheduling. If failed, shift all instructions to | ||
1161 | + position the branch in row ii-1. */ | ||
1162 | + opt_sc_p = optimize_sc (ps, g); | ||
1163 | + if (opt_sc_p) | ||
1164 | + stage_count = calculate_stage_count (ps, 0); | ||
1165 | + else | ||
1166 | + { | ||
1167 | + /* Bring the branch to cycle ii-1. */ | ||
1168 | + int amount = (SCHED_TIME (g->closing_branch->cuid) | ||
1169 | + - (ps->ii - 1)); | ||
1170 | |||
1171 | + if (dump_file) | ||
1172 | + fprintf (dump_file, "SMS schedule branch at cycle ii-1\n"); | ||
1173 | + | ||
1174 | + stage_count = calculate_stage_count (ps, amount); | ||
1175 | + } | ||
1176 | + | ||
1177 | + gcc_assert (stage_count >= 1); | ||
1178 | + } | ||
1179 | + | ||
1180 | + /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of | ||
1181 | + 1 means that there is no interleaving between iterations thus | ||
1182 | + we let the scheduling passes do the job in this case. */ | ||
1183 | + if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC) | ||
1184 | + || (count_init && (loop_count <= stage_count)) | ||
1185 | + || (flag_branch_probabilities && (trip_count <= stage_count))) | ||
1186 | + { | ||
1187 | if (dump_file) | ||
1188 | - fprintf (dump_file, "SMS schedule branch at cycle ii-1\n"); | ||
1189 | - | ||
1190 | - stage_count = calculate_stage_count (ps, amount); | ||
1191 | - } | ||
1192 | - | ||
1193 | - gcc_assert (stage_count >= 1); | ||
1194 | - PS_STAGE_COUNT (ps) = stage_count; | ||
1195 | - } | ||
1196 | - | ||
1197 | - /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of | ||
1198 | - 1 means that there is no interleaving between iterations thus | ||
1199 | - we let the scheduling passes do the job in this case. */ | ||
1200 | - if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC) | ||
1201 | - || (count_init && (loop_count <= stage_count)) | ||
1202 | - || (flag_branch_probabilities && (trip_count <= stage_count))) | ||
1203 | - { | ||
1204 | - if (dump_file) | ||
1205 | - { | ||
1206 | - fprintf (dump_file, "SMS failed... \n"); | ||
1207 | - fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count); | ||
1208 | - fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); | ||
1209 | - fprintf (dump_file, ", trip-count="); | ||
1210 | - fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); | ||
1211 | - fprintf (dump_file, ")\n"); | ||
1212 | - } | ||
1213 | - } | ||
1214 | - else | ||
1215 | - { | ||
1216 | - struct undo_replace_buff_elem *reg_move_replaces; | ||
1217 | + { | ||
1218 | + fprintf (dump_file, "SMS failed... \n"); | ||
1219 | + fprintf (dump_file, "SMS sched-failed (stage-count=%d," | ||
1220 | + " loop-count=", stage_count); | ||
1221 | + fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); | ||
1222 | + fprintf (dump_file, ", trip-count="); | ||
1223 | + fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); | ||
1224 | + fprintf (dump_file, ")\n"); | ||
1225 | + } | ||
1226 | + break; | ||
1227 | + } | ||
1228 | |||
1229 | if (!opt_sc_p) | ||
1230 | { | ||
1231 | /* Rotate the partial schedule to have the branch in row ii-1. */ | ||
1232 | - int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1); | ||
1233 | + int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1); | ||
1234 | |||
1235 | reset_sched_times (ps, amount); | ||
1236 | rotate_partial_schedule (ps, amount); | ||
1237 | @@ -1493,6 +1659,29 @@ | ||
1238 | |||
1239 | set_columns_for_ps (ps); | ||
1240 | |||
1241 | + min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii); | ||
1242 | + if (!schedule_reg_moves (ps)) | ||
1243 | + { | ||
1244 | + mii = ps->ii + 1; | ||
1245 | + free_partial_schedule (ps); | ||
1246 | + continue; | ||
1247 | + } | ||
1248 | + | ||
1249 | + /* Moves that handle incoming values might have been added | ||
1250 | + to a new first stage. Bump the stage count if so. | ||
1251 | + | ||
1252 | + ??? Perhaps we could consider rotating the schedule here | ||
1253 | + instead? */ | ||
1254 | + if (PS_MIN_CYCLE (ps) < min_cycle) | ||
1255 | + { | ||
1256 | + reset_sched_times (ps, 0); | ||
1257 | + stage_count++; | ||
1258 | + } | ||
1259 | + | ||
1260 | + /* The stage count should now be correct without rotation. */ | ||
1261 | + gcc_checking_assert (stage_count == calculate_stage_count (ps, 0)); | ||
1262 | + PS_STAGE_COUNT (ps) = stage_count; | ||
1263 | + | ||
1264 | canon_loop (loop); | ||
1265 | |||
1266 | if (dump_file) | ||
1267 | @@ -1531,17 +1720,16 @@ | ||
1268 | /* The life-info is not valid any more. */ | ||
1269 | df_set_bb_dirty (g->bb); | ||
1270 | |||
1271 | - reg_move_replaces = generate_reg_moves (ps, true); | ||
1272 | + apply_reg_moves (ps); | ||
1273 | if (dump_file) | ||
1274 | - print_node_sched_params (dump_file, g->num_nodes, g); | ||
1275 | + print_node_sched_params (dump_file, g->num_nodes, ps); | ||
1276 | /* Generate prolog and epilog. */ | ||
1277 | generate_prolog_epilog (ps, loop, count_reg, count_init); | ||
1278 | - | ||
1279 | - free_undo_replace_buff (reg_move_replaces); | ||
1280 | + break; | ||
1281 | } | ||
1282 | |||
1283 | free_partial_schedule (ps); | ||
1284 | - free (node_sched_params); | ||
1285 | + VEC_free (node_sched_params, heap, node_sched_param_vec); | ||
1286 | free (node_order); | ||
1287 | free_ddg (g); | ||
1288 | } | ||
1289 | @@ -1643,9 +1831,11 @@ | ||
1290 | |||
1291 | static int | ||
1292 | get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node, | ||
1293 | - sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p) | ||
1294 | + sbitmap sched_nodes, int ii, int *start_p, int *step_p, | ||
1295 | + int *end_p) | ||
1296 | { | ||
1297 | int start, step, end; | ||
1298 | + int early_start, late_start; | ||
1299 | ddg_edge_ptr e; | ||
1300 | sbitmap psp = sbitmap_alloc (ps->g->num_nodes); | ||
1301 | sbitmap pss = sbitmap_alloc (ps->g->num_nodes); | ||
1302 | @@ -1653,6 +1843,8 @@ | ||
1303 | sbitmap u_node_succs = NODE_SUCCESSORS (u_node); | ||
1304 | int psp_not_empty; | ||
1305 | int pss_not_empty; | ||
1306 | + int count_preds; | ||
1307 | + int count_succs; | ||
1308 | |||
1309 | /* 1. compute sched window for u (start, end, step). */ | ||
1310 | sbitmap_zero (psp); | ||
1311 | @@ -1660,214 +1852,119 @@ | ||
1312 | psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes); | ||
1313 | pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes); | ||
1314 | |||
1315 | - if (psp_not_empty && !pss_not_empty) | ||
1316 | - { | ||
1317 | - int early_start = INT_MIN; | ||
1318 | - | ||
1319 | - end = INT_MAX; | ||
1320 | - for (e = u_node->in; e != 0; e = e->next_in) | ||
1321 | - { | ||
1322 | - ddg_node_ptr v_node = e->src; | ||
1323 | - | ||
1324 | - if (dump_file) | ||
1325 | - { | ||
1326 | - fprintf (dump_file, "\nProcessing edge: "); | ||
1327 | - print_ddg_edge (dump_file, e); | ||
1328 | - fprintf (dump_file, | ||
1329 | - "\nScheduling %d (%d) in psp_not_empty," | ||
1330 | - " checking p %d (%d): ", u_node->cuid, | ||
1331 | - INSN_UID (u_node->insn), v_node->cuid, INSN_UID | ||
1332 | - (v_node->insn)); | ||
1333 | - } | ||
1334 | - | ||
1335 | - if (TEST_BIT (sched_nodes, v_node->cuid)) | ||
1336 | - { | ||
1337 | - int p_st = SCHED_TIME (v_node); | ||
1338 | - | ||
1339 | - early_start = | ||
1340 | - MAX (early_start, p_st + e->latency - (e->distance * ii)); | ||
1341 | - | ||
1342 | - if (dump_file) | ||
1343 | - fprintf (dump_file, | ||
1344 | - "pred st = %d; early_start = %d; latency: %d", | ||
1345 | - p_st, early_start, e->latency); | ||
1346 | - | ||
1347 | - if (e->data_type == MEM_DEP) | ||
1348 | - end = MIN (end, SCHED_TIME (v_node) + ii - 1); | ||
1349 | - } | ||
1350 | - else if (dump_file) | ||
1351 | - fprintf (dump_file, "the node is not scheduled\n"); | ||
1352 | - } | ||
1353 | - start = early_start; | ||
1354 | - end = MIN (end, early_start + ii); | ||
1355 | - /* Schedule the node close to it's predecessors. */ | ||
1356 | - step = 1; | ||
1357 | - | ||
1358 | - if (dump_file) | ||
1359 | - fprintf (dump_file, | ||
1360 | - "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", | ||
1361 | - u_node->cuid, INSN_UID (u_node->insn), start, end, step); | ||
1362 | - } | ||
1363 | - | ||
1364 | - else if (!psp_not_empty && pss_not_empty) | ||
1365 | - { | ||
1366 | - int late_start = INT_MAX; | ||
1367 | - | ||
1368 | - end = INT_MIN; | ||
1369 | - for (e = u_node->out; e != 0; e = e->next_out) | ||
1370 | - { | ||
1371 | - ddg_node_ptr v_node = e->dest; | ||
1372 | - | ||
1373 | - if (dump_file) | ||
1374 | - { | ||
1375 | - fprintf (dump_file, "\nProcessing edge:"); | ||
1376 | - print_ddg_edge (dump_file, e); | ||
1377 | - fprintf (dump_file, | ||
1378 | - "\nScheduling %d (%d) in pss_not_empty," | ||
1379 | - " checking s %d (%d): ", u_node->cuid, | ||
1380 | - INSN_UID (u_node->insn), v_node->cuid, INSN_UID | ||
1381 | - (v_node->insn)); | ||
1382 | - } | ||
1383 | - | ||
1384 | - if (TEST_BIT (sched_nodes, v_node->cuid)) | ||
1385 | - { | ||
1386 | - int s_st = SCHED_TIME (v_node); | ||
1387 | - | ||
1388 | - late_start = MIN (late_start, | ||
1389 | - s_st - e->latency + (e->distance * ii)); | ||
1390 | - | ||
1391 | - if (dump_file) | ||
1392 | - fprintf (dump_file, | ||
1393 | - "succ st = %d; late_start = %d; latency = %d", | ||
1394 | - s_st, late_start, e->latency); | ||
1395 | - | ||
1396 | - if (e->data_type == MEM_DEP) | ||
1397 | - end = MAX (end, SCHED_TIME (v_node) - ii + 1); | ||
1398 | - if (dump_file) | ||
1399 | - fprintf (dump_file, "end = %d\n", end); | ||
1400 | - | ||
1401 | - } | ||
1402 | - else if (dump_file) | ||
1403 | - fprintf (dump_file, "the node is not scheduled\n"); | ||
1404 | - | ||
1405 | - } | ||
1406 | - start = late_start; | ||
1407 | - end = MAX (end, late_start - ii); | ||
1408 | - /* Schedule the node close to it's successors. */ | ||
1409 | + /* We first compute a forward range (start <= end), then decide whether | ||
1410 | + to reverse it. */ | ||
1411 | + early_start = INT_MIN; | ||
1412 | + late_start = INT_MAX; | ||
1413 | + start = INT_MIN; | ||
1414 | + end = INT_MAX; | ||
1415 | + step = 1; | ||
1416 | + | ||
1417 | + count_preds = 0; | ||
1418 | + count_succs = 0; | ||
1419 | + | ||
1420 | + if (dump_file && (psp_not_empty || pss_not_empty)) | ||
1421 | + { | ||
1422 | + fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)" | ||
1423 | + "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii); | ||
1424 | + fprintf (dump_file, "%11s %11s %11s %11s %5s\n", | ||
1425 | + "start", "early start", "late start", "end", "time"); | ||
1426 | + fprintf (dump_file, "=========== =========== =========== ===========" | ||
1427 | + " =====\n"); | ||
1428 | + } | ||
1429 | + /* Calculate early_start and limit end. Both bounds are inclusive. */ | ||
1430 | + if (psp_not_empty) | ||
1431 | + for (e = u_node->in; e != 0; e = e->next_in) | ||
1432 | + { | ||
1433 | + int v = e->src->cuid; | ||
1434 | + | ||
1435 | + if (TEST_BIT (sched_nodes, v)) | ||
1436 | + { | ||
1437 | + int p_st = SCHED_TIME (v); | ||
1438 | + int earliest = p_st + e->latency - (e->distance * ii); | ||
1439 | + int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX); | ||
1440 | + | ||
1441 | + if (dump_file) | ||
1442 | + { | ||
1443 | + fprintf (dump_file, "%11s %11d %11s %11d %5d", | ||
1444 | + "", earliest, "", latest, p_st); | ||
1445 | + print_ddg_edge (dump_file, e); | ||
1446 | + fprintf (dump_file, "\n"); | ||
1447 | + } | ||
1448 | + | ||
1449 | + early_start = MAX (early_start, earliest); | ||
1450 | + end = MIN (end, latest); | ||
1451 | + | ||
1452 | + if (e->type == TRUE_DEP && e->data_type == REG_DEP) | ||
1453 | + count_preds++; | ||
1454 | + } | ||
1455 | + } | ||
1456 | + | ||
1457 | + /* Calculate late_start and limit start. Both bounds are inclusive. */ | ||
1458 | + if (pss_not_empty) | ||
1459 | + for (e = u_node->out; e != 0; e = e->next_out) | ||
1460 | + { | ||
1461 | + int v = e->dest->cuid; | ||
1462 | + | ||
1463 | + if (TEST_BIT (sched_nodes, v)) | ||
1464 | + { | ||
1465 | + int s_st = SCHED_TIME (v); | ||
1466 | + int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN); | ||
1467 | + int latest = s_st - e->latency + (e->distance * ii); | ||
1468 | + | ||
1469 | + if (dump_file) | ||
1470 | + { | ||
1471 | + fprintf (dump_file, "%11d %11s %11d %11s %5d", | ||
1472 | + earliest, "", latest, "", s_st); | ||
1473 | + print_ddg_edge (dump_file, e); | ||
1474 | + fprintf (dump_file, "\n"); | ||
1475 | + } | ||
1476 | + | ||
1477 | + start = MAX (start, earliest); | ||
1478 | + late_start = MIN (late_start, latest); | ||
1479 | + | ||
1480 | + if (e->type == TRUE_DEP && e->data_type == REG_DEP) | ||
1481 | + count_succs++; | ||
1482 | + } | ||
1483 | + } | ||
1484 | + | ||
1485 | + if (dump_file && (psp_not_empty || pss_not_empty)) | ||
1486 | + { | ||
1487 | + fprintf (dump_file, "----------- ----------- ----------- -----------" | ||
1488 | + " -----\n"); | ||
1489 | + fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n", | ||
1490 | + start, early_start, late_start, end, "", | ||
1491 | + "(max, max, min, min)"); | ||
1492 | + } | ||
1493 | + | ||
1494 | + /* Get a target scheduling window no bigger than ii. */ | ||
1495 | + if (early_start == INT_MIN && late_start == INT_MAX) | ||
1496 | + early_start = NODE_ASAP (u_node); | ||
1497 | + else if (early_start == INT_MIN) | ||
1498 | + early_start = late_start - (ii - 1); | ||
1499 | + late_start = MIN (late_start, early_start + (ii - 1)); | ||
1500 | + | ||
1501 | + /* Apply memory dependence limits. */ | ||
1502 | + start = MAX (start, early_start); | ||
1503 | + end = MIN (end, late_start); | ||
1504 | + | ||
1505 | + if (dump_file && (psp_not_empty || pss_not_empty)) | ||
1506 | + fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n", | ||
1507 | + "", start, end, "", ""); | ||
1508 | + | ||
1509 | + /* If there are at least as many successors as predecessors, schedule the | ||
1510 | + node close to its successors. */ | ||
1511 | + if (pss_not_empty && count_succs >= count_preds) | ||
1512 | + { | ||
1513 | + int tmp = end; | ||
1514 | + end = start; | ||
1515 | + start = tmp; | ||
1516 | step = -1; | ||
1517 | - | ||
1518 | - if (dump_file) | ||
1519 | - fprintf (dump_file, | ||
1520 | - "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", | ||
1521 | - u_node->cuid, INSN_UID (u_node->insn), start, end, step); | ||
1522 | - | ||
1523 | - } | ||
1524 | - | ||
1525 | - else if (psp_not_empty && pss_not_empty) | ||
1526 | - { | ||
1527 | - int early_start = INT_MIN; | ||
1528 | - int late_start = INT_MAX; | ||
1529 | - int count_preds = 0; | ||
1530 | - int count_succs = 0; | ||
1531 | - | ||
1532 | - start = INT_MIN; | ||
1533 | - end = INT_MAX; | ||
1534 | - for (e = u_node->in; e != 0; e = e->next_in) | ||
1535 | - { | ||
1536 | - ddg_node_ptr v_node = e->src; | ||
1537 | - | ||
1538 | - if (dump_file) | ||
1539 | - { | ||
1540 | - fprintf (dump_file, "\nProcessing edge:"); | ||
1541 | - print_ddg_edge (dump_file, e); | ||
1542 | - fprintf (dump_file, | ||
1543 | - "\nScheduling %d (%d) in psp_pss_not_empty," | ||
1544 | - " checking p %d (%d): ", u_node->cuid, INSN_UID | ||
1545 | - (u_node->insn), v_node->cuid, INSN_UID | ||
1546 | - (v_node->insn)); | ||
1547 | - } | ||
1548 | - | ||
1549 | - if (TEST_BIT (sched_nodes, v_node->cuid)) | ||
1550 | - { | ||
1551 | - int p_st = SCHED_TIME (v_node); | ||
1552 | - | ||
1553 | - early_start = MAX (early_start, | ||
1554 | - p_st + e->latency | ||
1555 | - - (e->distance * ii)); | ||
1556 | - | ||
1557 | - if (dump_file) | ||
1558 | - fprintf (dump_file, | ||
1559 | - "pred st = %d; early_start = %d; latency = %d", | ||
1560 | - p_st, early_start, e->latency); | ||
1561 | - | ||
1562 | - if (e->type == TRUE_DEP && e->data_type == REG_DEP) | ||
1563 | - count_preds++; | ||
1564 | - | ||
1565 | - if (e->data_type == MEM_DEP) | ||
1566 | - end = MIN (end, SCHED_TIME (v_node) + ii - 1); | ||
1567 | - } | ||
1568 | - else if (dump_file) | ||
1569 | - fprintf (dump_file, "the node is not scheduled\n"); | ||
1570 | - | ||
1571 | - } | ||
1572 | - for (e = u_node->out; e != 0; e = e->next_out) | ||
1573 | - { | ||
1574 | - ddg_node_ptr v_node = e->dest; | ||
1575 | - | ||
1576 | - if (dump_file) | ||
1577 | - { | ||
1578 | - fprintf (dump_file, "\nProcessing edge:"); | ||
1579 | - print_ddg_edge (dump_file, e); | ||
1580 | - fprintf (dump_file, | ||
1581 | - "\nScheduling %d (%d) in psp_pss_not_empty," | ||
1582 | - " checking s %d (%d): ", u_node->cuid, INSN_UID | ||
1583 | - (u_node->insn), v_node->cuid, INSN_UID | ||
1584 | - (v_node->insn)); | ||
1585 | - } | ||
1586 | - | ||
1587 | - if (TEST_BIT (sched_nodes, v_node->cuid)) | ||
1588 | - { | ||
1589 | - int s_st = SCHED_TIME (v_node); | ||
1590 | - | ||
1591 | - late_start = MIN (late_start, | ||
1592 | - s_st - e->latency | ||
1593 | - + (e->distance * ii)); | ||
1594 | - | ||
1595 | - if (dump_file) | ||
1596 | - fprintf (dump_file, | ||
1597 | - "succ st = %d; late_start = %d; latency = %d", | ||
1598 | - s_st, late_start, e->latency); | ||
1599 | - | ||
1600 | - if (e->type == TRUE_DEP && e->data_type == REG_DEP) | ||
1601 | - count_succs++; | ||
1602 | - | ||
1603 | - if (e->data_type == MEM_DEP) | ||
1604 | - start = MAX (start, SCHED_TIME (v_node) - ii + 1); | ||
1605 | - } | ||
1606 | - else if (dump_file) | ||
1607 | - fprintf (dump_file, "the node is not scheduled\n"); | ||
1608 | - | ||
1609 | - } | ||
1610 | - start = MAX (start, early_start); | ||
1611 | - end = MIN (end, MIN (early_start + ii, late_start + 1)); | ||
1612 | - step = 1; | ||
1613 | - /* If there are more successors than predecessors schedule the | ||
1614 | - node close to it's successors. */ | ||
1615 | - if (count_succs >= count_preds) | ||
1616 | - { | ||
1617 | - int old_start = start; | ||
1618 | - | ||
1619 | - start = end - 1; | ||
1620 | - end = old_start - 1; | ||
1621 | - step = -1; | ||
1622 | - } | ||
1623 | - } | ||
1624 | - else /* psp is empty && pss is empty. */ | ||
1625 | - { | ||
1626 | - start = SCHED_ASAP (u_node); | ||
1627 | - end = start + ii; | ||
1628 | - step = 1; | ||
1629 | - } | ||
1630 | + } | ||
1631 | + | ||
1632 | + /* Now that we've finalized the window, make END an exclusive rather | ||
1633 | + than an inclusive bound. */ | ||
1634 | + end += step; | ||
1635 | |||
1636 | *start_p = start; | ||
1637 | *step_p = step; | ||
1638 | @@ -1880,10 +1977,10 @@ | ||
1639 | if (dump_file) | ||
1640 | fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n", | ||
1641 | start, end, step); | ||
1642 | - return -1; | ||
1643 | + return -1; | ||
1644 | } | ||
1645 | |||
1646 | - return 0; | ||
1647 | + return 0; | ||
1648 | } | ||
1649 | |||
1650 | /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the | ||
1651 | @@ -1939,7 +2036,7 @@ | ||
1652 | SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */ | ||
1653 | for (e = u_node->in; e != 0; e = e->next_in) | ||
1654 | if (TEST_BIT (sched_nodes, e->src->cuid) | ||
1655 | - && ((SCHED_TIME (e->src) - (e->distance * ii)) == | ||
1656 | + && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) == | ||
1657 | first_cycle_in_window)) | ||
1658 | { | ||
1659 | if (dump_file) | ||
1660 | @@ -1964,7 +2061,7 @@ | ||
1661 | SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */ | ||
1662 | for (e = u_node->out; e != 0; e = e->next_out) | ||
1663 | if (TEST_BIT (sched_nodes, e->dest->cuid) | ||
1664 | - && ((SCHED_TIME (e->dest) + (e->distance * ii)) == | ||
1665 | + && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) == | ||
1666 | last_cycle_in_window)) | ||
1667 | { | ||
1668 | if (dump_file) | ||
1669 | @@ -1988,7 +2085,7 @@ | ||
1670 | last row of the scheduling window) */ | ||
1671 | |||
1672 | static bool | ||
1673 | -try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node, | ||
1674 | +try_scheduling_node_in_cycle (partial_schedule_ptr ps, | ||
1675 | int u, int cycle, sbitmap sched_nodes, | ||
1676 | int *num_splits, sbitmap must_precede, | ||
1677 | sbitmap must_follow) | ||
1678 | @@ -1997,11 +2094,10 @@ | ||
1679 | bool success = 0; | ||
1680 | |||
1681 | verify_partial_schedule (ps, sched_nodes); | ||
1682 | - psi = ps_add_node_check_conflicts (ps, u_node, cycle, | ||
1683 | - must_precede, must_follow); | ||
1684 | + psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow); | ||
1685 | if (psi) | ||
1686 | { | ||
1687 | - SCHED_TIME (u_node) = cycle; | ||
1688 | + SCHED_TIME (u) = cycle; | ||
1689 | SET_BIT (sched_nodes, u); | ||
1690 | success = 1; | ||
1691 | *num_splits = 0; | ||
1692 | @@ -2062,8 +2158,8 @@ | ||
1693 | &step, &end) == 0) | ||
1694 | { | ||
1695 | if (dump_file) | ||
1696 | - fprintf (dump_file, "\nTrying to schedule node %d \ | ||
1697 | - INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID | ||
1698 | + fprintf (dump_file, "\nTrying to schedule node %d " | ||
1699 | + "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID | ||
1700 | (g->nodes[u].insn)), start, end, step); | ||
1701 | |||
1702 | gcc_assert ((step > 0 && start < end) | ||
1703 | @@ -2081,7 +2177,7 @@ | ||
1704 | &tmp_precede, must_precede, | ||
1705 | c, start, end, step); | ||
1706 | success = | ||
1707 | - try_scheduling_node_in_cycle (ps, u_node, u, c, | ||
1708 | + try_scheduling_node_in_cycle (ps, u, c, | ||
1709 | sched_nodes, | ||
1710 | &num_splits, tmp_precede, | ||
1711 | tmp_follow); | ||
1712 | @@ -2181,7 +2277,7 @@ | ||
1713 | for (crr_insn = rows_new[row]; | ||
1714 | crr_insn; crr_insn = crr_insn->next_in_row) | ||
1715 | { | ||
1716 | - ddg_node_ptr u = crr_insn->node; | ||
1717 | + int u = crr_insn->id; | ||
1718 | int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii); | ||
1719 | |||
1720 | SCHED_TIME (u) = new_time; | ||
1721 | @@ -2202,7 +2298,7 @@ | ||
1722 | for (crr_insn = rows_new[row + 1]; | ||
1723 | crr_insn; crr_insn = crr_insn->next_in_row) | ||
1724 | { | ||
1725 | - ddg_node_ptr u = crr_insn->node; | ||
1726 | + int u = crr_insn->id; | ||
1727 | int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1; | ||
1728 | |||
1729 | SCHED_TIME (u) = new_time; | ||
1730 | @@ -2242,24 +2338,24 @@ | ||
1731 | { | ||
1732 | ddg_edge_ptr e; | ||
1733 | int lower = INT_MIN, upper = INT_MAX; | ||
1734 | - ddg_node_ptr crit_pred = NULL; | ||
1735 | - ddg_node_ptr crit_succ = NULL; | ||
1736 | + int crit_pred = -1; | ||
1737 | + int crit_succ = -1; | ||
1738 | int crit_cycle; | ||
1739 | |||
1740 | for (e = u_node->in; e != 0; e = e->next_in) | ||
1741 | { | ||
1742 | - ddg_node_ptr v_node = e->src; | ||
1743 | + int v = e->src->cuid; | ||
1744 | |||
1745 | - if (TEST_BIT (sched_nodes, v_node->cuid) | ||
1746 | - && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii))) | ||
1747 | - if (SCHED_TIME (v_node) > lower) | ||
1748 | + if (TEST_BIT (sched_nodes, v) | ||
1749 | + && (low == SCHED_TIME (v) + e->latency - (e->distance * ii))) | ||
1750 | + if (SCHED_TIME (v) > lower) | ||
1751 | { | ||
1752 | - crit_pred = v_node; | ||
1753 | - lower = SCHED_TIME (v_node); | ||
1754 | + crit_pred = v; | ||
1755 | + lower = SCHED_TIME (v); | ||
1756 | } | ||
1757 | } | ||
1758 | |||
1759 | - if (crit_pred != NULL) | ||
1760 | + if (crit_pred >= 0) | ||
1761 | { | ||
1762 | crit_cycle = SCHED_TIME (crit_pred) + 1; | ||
1763 | return SMODULO (crit_cycle, ii); | ||
1764 | @@ -2267,17 +2363,18 @@ | ||
1765 | |||
1766 | for (e = u_node->out; e != 0; e = e->next_out) | ||
1767 | { | ||
1768 | - ddg_node_ptr v_node = e->dest; | ||
1769 | - if (TEST_BIT (sched_nodes, v_node->cuid) | ||
1770 | - && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii))) | ||
1771 | - if (SCHED_TIME (v_node) < upper) | ||
1772 | + int v = e->dest->cuid; | ||
1773 | + | ||
1774 | + if (TEST_BIT (sched_nodes, v) | ||
1775 | + && (up == SCHED_TIME (v) - e->latency + (e->distance * ii))) | ||
1776 | + if (SCHED_TIME (v) < upper) | ||
1777 | { | ||
1778 | - crit_succ = v_node; | ||
1779 | - upper = SCHED_TIME (v_node); | ||
1780 | + crit_succ = v; | ||
1781 | + upper = SCHED_TIME (v); | ||
1782 | } | ||
1783 | } | ||
1784 | |||
1785 | - if (crit_succ != NULL) | ||
1786 | + if (crit_succ >= 0) | ||
1787 | { | ||
1788 | crit_cycle = SCHED_TIME (crit_succ); | ||
1789 | return SMODULO (crit_cycle, ii); | ||
1790 | @@ -2301,10 +2398,10 @@ | ||
1791 | |||
1792 | for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | ||
1793 | { | ||
1794 | - ddg_node_ptr u = crr_insn->node; | ||
1795 | + int u = crr_insn->id; | ||
1796 | |||
1797 | length++; | ||
1798 | - gcc_assert (TEST_BIT (sched_nodes, u->cuid)); | ||
1799 | + gcc_assert (TEST_BIT (sched_nodes, u)); | ||
1800 | /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by | ||
1801 | popcount (sched_nodes) == number of insns in ps. */ | ||
1802 | gcc_assert (SCHED_TIME (u) >= ps->min_cycle); | ||
1803 | @@ -2719,6 +2816,7 @@ | ||
1804 | partial_schedule_ptr ps = XNEW (struct partial_schedule); | ||
1805 | ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); | ||
1806 | ps->rows_length = (int *) xcalloc (ii, sizeof (int)); | ||
1807 | + ps->reg_moves = NULL; | ||
1808 | ps->ii = ii; | ||
1809 | ps->history = history; | ||
1810 | ps->min_cycle = INT_MAX; | ||
1811 | @@ -2753,8 +2851,16 @@ | ||
1812 | static void | ||
1813 | free_partial_schedule (partial_schedule_ptr ps) | ||
1814 | { | ||
1815 | + ps_reg_move_info *move; | ||
1816 | + unsigned int i; | ||
1817 | + | ||
1818 | if (!ps) | ||
1819 | return; | ||
1820 | + | ||
1821 | + FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move) | ||
1822 | + sbitmap_free (move->uses); | ||
1823 | + VEC_free (ps_reg_move_info, heap, ps->reg_moves); | ||
1824 | + | ||
1825 | free_ps_insns (ps); | ||
1826 | free (ps->rows); | ||
1827 | free (ps->rows_length); | ||
1828 | @@ -2796,12 +2902,12 @@ | ||
1829 | fprintf (dump, "\n[ROW %d ]: ", i); | ||
1830 | while (ps_i) | ||
1831 | { | ||
1832 | - if (JUMP_P (ps_i->node->insn)) | ||
1833 | - fprintf (dump, "%d (branch), ", | ||
1834 | - INSN_UID (ps_i->node->insn)); | ||
1835 | + rtx insn = ps_rtl_insn (ps, ps_i->id); | ||
1836 | + | ||
1837 | + if (JUMP_P (insn)) | ||
1838 | + fprintf (dump, "%d (branch), ", INSN_UID (insn)); | ||
1839 | else | ||
1840 | - fprintf (dump, "%d, ", | ||
1841 | - INSN_UID (ps_i->node->insn)); | ||
1842 | + fprintf (dump, "%d, ", INSN_UID (insn)); | ||
1843 | |||
1844 | ps_i = ps_i->next_in_row; | ||
1845 | } | ||
1846 | @@ -2810,11 +2916,11 @@ | ||
1847 | |||
1848 | /* Creates an object of PS_INSN and initializes it to the given parameters. */ | ||
1849 | static ps_insn_ptr | ||
1850 | -create_ps_insn (ddg_node_ptr node, int cycle) | ||
1851 | +create_ps_insn (int id, int cycle) | ||
1852 | { | ||
1853 | ps_insn_ptr ps_i = XNEW (struct ps_insn); | ||
1854 | |||
1855 | - ps_i->node = node; | ||
1856 | + ps_i->id = id; | ||
1857 | ps_i->next_in_row = NULL; | ||
1858 | ps_i->prev_in_row = NULL; | ||
1859 | ps_i->cycle = cycle; | ||
1860 | @@ -2879,10 +2985,11 @@ | ||
1861 | next_ps_i; | ||
1862 | next_ps_i = next_ps_i->next_in_row) | ||
1863 | { | ||
1864 | - if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid) | ||
1865 | + if (must_follow | ||
1866 | + && TEST_BIT (must_follow, next_ps_i->id) | ||
1867 | && ! first_must_follow) | ||
1868 | first_must_follow = next_ps_i; | ||
1869 | - if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid)) | ||
1870 | + if (must_precede && TEST_BIT (must_precede, next_ps_i->id)) | ||
1871 | { | ||
1872 | /* If we have already met a node that must follow, then | ||
1873 | there is no possible column. */ | ||
1874 | @@ -2893,8 +3000,8 @@ | ||
1875 | } | ||
1876 | /* The closing branch must be the last in the row. */ | ||
1877 | if (must_precede | ||
1878 | - && TEST_BIT (must_precede, next_ps_i->node->cuid) | ||
1879 | - && JUMP_P (next_ps_i->node->insn)) | ||
1880 | + && TEST_BIT (must_precede, next_ps_i->id) | ||
1881 | + && JUMP_P (ps_rtl_insn (ps, next_ps_i->id))) | ||
1882 | return false; | ||
1883 | |||
1884 | last_in_row = next_ps_i; | ||
1885 | @@ -2903,7 +3010,7 @@ | ||
1886 | /* The closing branch is scheduled as well. Make sure there is no | ||
1887 | dependent instruction after it as the branch should be the last | ||
1888 | instruction in the row. */ | ||
1889 | - if (JUMP_P (ps_i->node->insn)) | ||
1890 | + if (JUMP_P (ps_rtl_insn (ps, ps_i->id))) | ||
1891 | { | ||
1892 | if (first_must_follow) | ||
1893 | return false; | ||
1894 | @@ -2954,7 +3061,6 @@ | ||
1895 | { | ||
1896 | ps_insn_ptr prev, next; | ||
1897 | int row; | ||
1898 | - ddg_node_ptr next_node; | ||
1899 | |||
1900 | if (!ps || !ps_i) | ||
1901 | return false; | ||
1902 | @@ -2964,11 +3070,9 @@ | ||
1903 | if (! ps_i->next_in_row) | ||
1904 | return false; | ||
1905 | |||
1906 | - next_node = ps_i->next_in_row->node; | ||
1907 | - | ||
1908 | /* Check if next_in_row is dependent on ps_i, both having same sched | ||
1909 | times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ | ||
1910 | - if (must_follow && TEST_BIT (must_follow, next_node->cuid)) | ||
1911 | + if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id)) | ||
1912 | return false; | ||
1913 | |||
1914 | /* Advance PS_I over its next_in_row in the doubly linked list. */ | ||
1915 | @@ -2999,7 +3103,7 @@ | ||
1916 | before/after (respectively) the node pointed to by PS_I when scheduled | ||
1917 | in the same cycle. */ | ||
1918 | static ps_insn_ptr | ||
1919 | -add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle, | ||
1920 | +add_node_to_ps (partial_schedule_ptr ps, int id, int cycle, | ||
1921 | sbitmap must_precede, sbitmap must_follow) | ||
1922 | { | ||
1923 | ps_insn_ptr ps_i; | ||
1924 | @@ -3008,7 +3112,7 @@ | ||
1925 | if (ps->rows_length[row] >= issue_rate) | ||
1926 | return NULL; | ||
1927 | |||
1928 | - ps_i = create_ps_insn (node, cycle); | ||
1929 | + ps_i = create_ps_insn (id, cycle); | ||
1930 | |||
1931 | /* Finds and inserts PS_I according to MUST_FOLLOW and | ||
1932 | MUST_PRECEDE. */ | ||
1933 | @@ -3060,7 +3164,7 @@ | ||
1934 | crr_insn; | ||
1935 | crr_insn = crr_insn->next_in_row) | ||
1936 | { | ||
1937 | - rtx insn = crr_insn->node->insn; | ||
1938 | + rtx insn = ps_rtl_insn (ps, crr_insn->id); | ||
1939 | |||
1940 | if (!NONDEBUG_INSN_P (insn)) | ||
1941 | continue; | ||
1942 | @@ -3097,7 +3201,7 @@ | ||
1943 | cuid N must be come before/after (respectively) the node pointed to by | ||
1944 | PS_I when scheduled in the same cycle. */ | ||
1945 | ps_insn_ptr | ||
1946 | -ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, | ||
1947 | +ps_add_node_check_conflicts (partial_schedule_ptr ps, int n, | ||
1948 | int c, sbitmap must_precede, | ||
1949 | sbitmap must_follow) | ||
1950 | { | ||
1951 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch new file mode 100644 index 000000000..02f8e5177 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106829.patch | |||
@@ -0,0 +1,147 @@ | |||
1 | 2011-10-19 Andrew Stubbs <ams@codesourcery.com> | ||
2 | |||
3 | Backport from FSF: | ||
4 | |||
5 | 2011-09-09 Andrew Stubbs <ams@codesourcery.com> | ||
6 | |||
7 | gcc/ | ||
8 | * config/arm/arm-cores.def (generic-armv7-a): New architecture. | ||
9 | * config/arm/arm-tables.opt: Regenerate. | ||
10 | * config/arm/arm-tune.md: Regenerate. | ||
11 | * config/arm/arm.c (arm_file_start): Output .arch directive when | ||
12 | user passes -mcpu=generic-*. | ||
13 | (arm_issue_rate): Add genericv7a support. | ||
14 | * config/arm/arm.h (EXTRA_SPECS): Add asm_cpu_spec. | ||
15 | (ASM_CPU_SPEC): New define. | ||
16 | * config/arm/elf.h (ASM_SPEC): Use %(asm_cpu_spec). | ||
17 | * config/arm/semi.h (ASM_SPEC): Likewise. | ||
18 | * doc/invoke.texi (ARM Options): Document -mcpu=generic-* | ||
19 | and -mtune=generic-*. | ||
20 | |||
21 | === modified file 'gcc/config/arm/arm-cores.def' | ||
22 | --- old/gcc/config/arm/arm-cores.def 2011-06-14 16:00:30 +0000 | ||
23 | +++ new/gcc/config/arm/arm-cores.def 2011-10-19 16:46:51 +0000 | ||
24 | @@ -124,6 +124,7 @@ | ||
25 | ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) | ||
26 | ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2) | ||
27 | ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2) | ||
28 | +ARM_CORE("generic-armv7-a", genericv7a, 7A, FL_LDSCHED, cortex) | ||
29 | ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5) | ||
30 | ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) | ||
31 | ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) | ||
32 | @@ -135,3 +136,4 @@ | ||
33 | ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex) | ||
34 | ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex) | ||
35 | ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex) | ||
36 | + | ||
37 | |||
38 | === modified file 'gcc/config/arm/arm-tune.md' | ||
39 | --- old/gcc/config/arm/arm-tune.md 2011-06-14 14:37:30 +0000 | ||
40 | +++ new/gcc/config/arm/arm-tune.md 2011-10-19 16:46:51 +0000 | ||
41 | @@ -1,5 +1,5 @@ | ||
42 | ;; -*- buffer-read-only: t -*- | ||
43 | ;; Generated automatically by gentune.sh from arm-cores.def | ||
44 | (define_attr "tune" | ||
45 | - "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" | ||
46 | + "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" | ||
47 | (const (symbol_ref "((enum attr_tune) arm_tune)"))) | ||
48 | |||
49 | === modified file 'gcc/config/arm/arm.c' | ||
50 | --- old/gcc/config/arm/arm.c 2011-10-11 02:31:01 +0000 | ||
51 | +++ new/gcc/config/arm/arm.c 2011-10-19 16:46:51 +0000 | ||
52 | @@ -22185,6 +22185,8 @@ | ||
53 | const char *fpu_name; | ||
54 | if (arm_selected_arch) | ||
55 | asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name); | ||
56 | + else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0) | ||
57 | + asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8); | ||
58 | else | ||
59 | asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name); | ||
60 | |||
61 | @@ -23717,6 +23719,7 @@ | ||
62 | case cortexr4: | ||
63 | case cortexr4f: | ||
64 | case cortexr5: | ||
65 | + case genericv7a: | ||
66 | case cortexa5: | ||
67 | case cortexa8: | ||
68 | case cortexa9: | ||
69 | |||
70 | === modified file 'gcc/config/arm/arm.h' | ||
71 | --- old/gcc/config/arm/arm.h 2011-09-05 14:32:11 +0000 | ||
72 | +++ new/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000 | ||
73 | @@ -198,6 +198,7 @@ | ||
74 | Do not define this macro if it does not need to do anything. */ | ||
75 | #define EXTRA_SPECS \ | ||
76 | { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \ | ||
77 | + { "asm_cpu_spec", ASM_CPU_SPEC }, \ | ||
78 | SUBTARGET_EXTRA_SPECS | ||
79 | |||
80 | #ifndef SUBTARGET_EXTRA_SPECS | ||
81 | @@ -2278,4 +2279,8 @@ | ||
82 | instruction. */ | ||
83 | #define MAX_LDM_STM_OPS 4 | ||
84 | |||
85 | +#define ASM_CPU_SPEC \ | ||
86 | + " %{mcpu=generic-*:-march=%*;" \ | ||
87 | + " :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}" | ||
88 | + | ||
89 | #endif /* ! GCC_ARM_H */ | ||
90 | |||
91 | === modified file 'gcc/config/arm/elf.h' | ||
92 | --- old/gcc/config/arm/elf.h 2009-06-21 19:48:15 +0000 | ||
93 | +++ new/gcc/config/arm/elf.h 2011-10-19 16:46:51 +0000 | ||
94 | @@ -56,8 +56,7 @@ | ||
95 | #define ASM_SPEC "\ | ||
96 | %{mbig-endian:-EB} \ | ||
97 | %{mlittle-endian:-EL} \ | ||
98 | -%{mcpu=*:-mcpu=%*} \ | ||
99 | -%{march=*:-march=%*} \ | ||
100 | +%(asm_cpu_spec) \ | ||
101 | %{mapcs-*:-mapcs-%*} \ | ||
102 | %(subtarget_asm_float_spec) \ | ||
103 | %{mthumb-interwork:-mthumb-interwork} \ | ||
104 | |||
105 | === modified file 'gcc/config/arm/semi.h' | ||
106 | --- old/gcc/config/arm/semi.h 2007-08-02 09:49:31 +0000 | ||
107 | +++ new/gcc/config/arm/semi.h 2011-10-19 16:46:51 +0000 | ||
108 | @@ -65,8 +65,7 @@ | ||
109 | #define ASM_SPEC "\ | ||
110 | %{fpic|fpie: -k} %{fPIC|fPIE: -k} \ | ||
111 | %{mbig-endian:-EB} \ | ||
112 | -%{mcpu=*:-mcpu=%*} \ | ||
113 | -%{march=*:-march=%*} \ | ||
114 | +%(arm_cpu_spec) \ | ||
115 | %{mapcs-float:-mfloat} \ | ||
116 | %{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \ | ||
117 | %{mfloat-abi=*} %{mfpu=*} \ | ||
118 | |||
119 | === modified file 'gcc/doc/invoke.texi' | ||
120 | --- old/gcc/doc/invoke.texi 2011-08-13 08:32:32 +0000 | ||
121 | +++ new/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000 | ||
122 | @@ -10215,6 +10215,10 @@ | ||
123 | @samp{cortex-m0}, | ||
124 | @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. | ||
125 | |||
126 | +@option{-mcpu=generic-@var{arch}} is also permissible, and is | ||
127 | +equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}. | ||
128 | +See @option{-mtune} for more information. | ||
129 | + | ||
130 | @item -mtune=@var{name} | ||
131 | @opindex mtune | ||
132 | This option is very similar to the @option{-mcpu=} option, except that | ||
133 | @@ -10226,6 +10230,13 @@ | ||
134 | For some ARM implementations better performance can be obtained by using | ||
135 | this option. | ||
136 | |||
137 | +@option{-mtune=generic-@var{arch}} specifies that GCC should tune the | ||
138 | +performance for a blend of processors within architecture @var{arch}. | ||
139 | +The aim is to generate code that run well on the current most popular | ||
140 | +processors, balancing between optimizations that benefit some CPUs in the | ||
141 | +range, and avoiding performance pitfalls of other CPUs. The effects of | ||
142 | +this option may change in future GCC versions as CPU models come and go. | ||
143 | + | ||
144 | @item -march=@var{name} | ||
145 | @opindex march | ||
146 | This specifies the name of the target ARM architecture. GCC uses this | ||
147 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch new file mode 100644 index 000000000..695aa8559 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106830.patch | |||
@@ -0,0 +1,304 @@ | |||
1 | 2011-10-19 Andrew Stubbs <ams@codesourcery.com> | ||
2 | |||
3 | Backport from FSF: | ||
4 | |||
5 | 2011-10-18 Andrew Stubbs <ams@codesourcery.com> | ||
6 | |||
7 | * config/arm/driver-arm.c (host_detect_local_cpu): Close the file | ||
8 | before exiting. | ||
9 | |||
10 | 2011-10-18 Andrew Stubbs <ams@codesourcery.com> | ||
11 | |||
12 | gcc/ | ||
13 | * config.host (arm*-*-linux*): Add driver-arm.o and x-arm. | ||
14 | * config/arm/arm.opt: Add 'native' processor_type and | ||
15 | arm_arch enum values. | ||
16 | * config/arm/arm.h (host_detect_local_cpu): New prototype. | ||
17 | (EXTRA_SPEC_FUNCTIONS): New define. | ||
18 | (MCPU_MTUNE_NATIVE_SPECS): New define. | ||
19 | (DRIVER_SELF_SPECS): New define. | ||
20 | * config/arm/driver-arm.c: New file. | ||
21 | * config/arm/x-arm: New file. | ||
22 | * doc/invoke.texi (ARM Options): Document -mcpu=native, | ||
23 | -mtune=native and -march=native. | ||
24 | |||
25 | === modified file 'gcc/config.host' | ||
26 | --- old/gcc/config.host 2011-02-15 09:49:14 +0000 | ||
27 | +++ new/gcc/config.host 2011-10-19 17:01:50 +0000 | ||
28 | @@ -100,6 +100,14 @@ | ||
29 | esac | ||
30 | |||
31 | case ${host} in | ||
32 | + arm*-*-linux*) | ||
33 | + case ${target} in | ||
34 | + arm*-*-*) | ||
35 | + host_extra_gcc_objs="driver-arm.o" | ||
36 | + host_xmake_file="${host_xmake_file} arm/x-arm" | ||
37 | + ;; | ||
38 | + esac | ||
39 | + ;; | ||
40 | alpha*-*-linux*) | ||
41 | case ${target} in | ||
42 | alpha*-*-linux*) | ||
43 | |||
44 | === modified file 'gcc/config/arm/arm.h' | ||
45 | --- old/gcc/config/arm/arm.h 2011-10-19 16:46:51 +0000 | ||
46 | +++ new/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000 | ||
47 | @@ -2283,4 +2283,21 @@ | ||
48 | " %{mcpu=generic-*:-march=%*;" \ | ||
49 | " :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}" | ||
50 | |||
51 | +/* -mcpu=native handling only makes sense with compiler running on | ||
52 | + an ARM chip. */ | ||
53 | +#if defined(__arm__) | ||
54 | +extern const char *host_detect_local_cpu (int argc, const char **argv); | ||
55 | +# define EXTRA_SPEC_FUNCTIONS \ | ||
56 | + { "local_cpu_detect", host_detect_local_cpu }, | ||
57 | + | ||
58 | +# define MCPU_MTUNE_NATIVE_SPECS \ | ||
59 | + " %{march=native:%<march=native %:local_cpu_detect(arch)}" \ | ||
60 | + " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}" \ | ||
61 | + " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}" | ||
62 | +#else | ||
63 | +# define MCPU_MTUNE_NATIVE_SPECS "" | ||
64 | +#endif | ||
65 | + | ||
66 | +#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS | ||
67 | + | ||
68 | #endif /* ! GCC_ARM_H */ | ||
69 | |||
70 | === modified file 'gcc/config/arm/arm.opt' | ||
71 | --- old/gcc/config/arm/arm.opt 2011-10-11 02:31:01 +0000 | ||
72 | +++ new/gcc/config/arm/arm.opt 2011-10-19 17:01:50 +0000 | ||
73 | @@ -48,6 +48,11 @@ | ||
74 | Target RejectNegative Joined | ||
75 | Specify the name of the target architecture | ||
76 | |||
77 | +; Other arm_arch values are loaded from arm-tables.opt | ||
78 | +; but that is a generated file and this is an odd-one-out. | ||
79 | +EnumValue | ||
80 | +Enum(arm_arch) String(native) Value(-1) DriverOnly | ||
81 | + | ||
82 | marm | ||
83 | Target RejectNegative InverseMask(THUMB) Undocumented | ||
84 | |||
85 | @@ -153,6 +158,11 @@ | ||
86 | Target RejectNegative Joined | ||
87 | Tune code for the given processor | ||
88 | |||
89 | +; Other processor_type values are loaded from arm-tables.opt | ||
90 | +; but that is a generated file and this is an odd-one-out. | ||
91 | +EnumValue | ||
92 | +Enum(processor_type) String(native) Value(-1) DriverOnly | ||
93 | + | ||
94 | mwords-little-endian | ||
95 | Target Report RejectNegative Mask(LITTLE_WORDS) | ||
96 | Assume big endian bytes, little endian words | ||
97 | |||
98 | === added file 'gcc/config/arm/driver-arm.c' | ||
99 | --- old/gcc/config/arm/driver-arm.c 1970-01-01 00:00:00 +0000 | ||
100 | +++ new/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000 | ||
101 | @@ -0,0 +1,149 @@ | ||
102 | +/* Subroutines for the gcc driver. | ||
103 | + Copyright (C) 2011 Free Software Foundation, Inc. | ||
104 | + | ||
105 | +This file is part of GCC. | ||
106 | + | ||
107 | +GCC is free software; you can redistribute it and/or modify | ||
108 | +it under the terms of the GNU General Public License as published by | ||
109 | +the Free Software Foundation; either version 3, or (at your option) | ||
110 | +any later version. | ||
111 | + | ||
112 | +GCC is distributed in the hope that it will be useful, | ||
113 | +but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
114 | +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
115 | +GNU General Public License for more details. | ||
116 | + | ||
117 | +You should have received a copy of the GNU General Public License | ||
118 | +along with GCC; see the file COPYING3. If not see | ||
119 | +<http://www.gnu.org/licenses/>. */ | ||
120 | + | ||
121 | +#include "config.h" | ||
122 | +#include "system.h" | ||
123 | +#include "coretypes.h" | ||
124 | +#include "tm.h" | ||
125 | +#include "configargs.h" | ||
126 | + | ||
127 | +struct vendor_cpu { | ||
128 | + const char *part_no; | ||
129 | + const char *arch_name; | ||
130 | + const char *cpu_name; | ||
131 | +}; | ||
132 | + | ||
133 | +static struct vendor_cpu arm_cpu_table[] = { | ||
134 | + {"0x926", "armv5te", "arm926ej-s"}, | ||
135 | + {"0xa26", "armv5te", "arm1026ej-s"}, | ||
136 | + {"0xb02", "armv6k", "mpcore"}, | ||
137 | + {"0xb36", "armv6j", "arm1136j-s"}, | ||
138 | + {"0xb56", "armv6t2", "arm1156t2-s"}, | ||
139 | + {"0xb76", "armv6zk", "arm1176jz-s"}, | ||
140 | + {"0xc05", "armv7-a", "cortex-a5"}, | ||
141 | + {"0xc08", "armv7-a", "cortex-a8"}, | ||
142 | + {"0xc09", "armv7-a", "cortex-a9"}, | ||
143 | + {"0xc0f", "armv7-a", "cortex-a15"}, | ||
144 | + {"0xc14", "armv7-r", "cortex-r4"}, | ||
145 | + {"0xc15", "armv7-r", "cortex-r5"}, | ||
146 | + {"0xc20", "armv6-m", "cortex-m0"}, | ||
147 | + {"0xc21", "armv6-m", "cortex-m1"}, | ||
148 | + {"0xc23", "armv7-m", "cortex-m3"}, | ||
149 | + {"0xc24", "armv7e-m", "cortex-m4"}, | ||
150 | + {NULL, NULL, NULL} | ||
151 | +}; | ||
152 | + | ||
153 | +struct { | ||
154 | + const char *vendor_no; | ||
155 | + const struct vendor_cpu *vendor_parts; | ||
156 | +} vendors[] = { | ||
157 | + {"0x41", arm_cpu_table}, | ||
158 | + {NULL, NULL} | ||
159 | +}; | ||
160 | + | ||
161 | +/* This will be called by the spec parser in gcc.c when it sees | ||
162 | + a %:local_cpu_detect(args) construct. Currently it will be called | ||
163 | + with either "arch", "cpu" or "tune" as argument depending on if | ||
164 | + -march=native, -mcpu=native or -mtune=native is to be substituted. | ||
165 | + | ||
166 | + It returns a string containing new command line parameters to be | ||
167 | + put at the place of the above two options, depending on what CPU | ||
168 | + this is executed. E.g. "-march=armv7-a" on a Cortex-A8 for | ||
169 | + -march=native. If the routine can't detect a known processor, | ||
170 | + the -march or -mtune option is discarded. | ||
171 | + | ||
172 | + ARGC and ARGV are set depending on the actual arguments given | ||
173 | + in the spec. */ | ||
174 | +const char * | ||
175 | +host_detect_local_cpu (int argc, const char **argv) | ||
176 | +{ | ||
177 | + const char *val = NULL; | ||
178 | + char buf[128]; | ||
179 | + FILE *f = NULL; | ||
180 | + bool arch; | ||
181 | + const struct vendor_cpu *cpu_table = NULL; | ||
182 | + | ||
183 | + if (argc < 1) | ||
184 | + goto not_found; | ||
185 | + | ||
186 | + arch = strcmp (argv[0], "arch") == 0; | ||
187 | + if (!arch && strcmp (argv[0], "cpu") != 0 && strcmp (argv[0], "tune")) | ||
188 | + goto not_found; | ||
189 | + | ||
190 | + f = fopen ("/proc/cpuinfo", "r"); | ||
191 | + if (f == NULL) | ||
192 | + goto not_found; | ||
193 | + | ||
194 | + while (fgets (buf, sizeof (buf), f) != NULL) | ||
195 | + { | ||
196 | + /* Ensure that CPU implementer is ARM (0x41). */ | ||
197 | + if (strncmp (buf, "CPU implementer", sizeof ("CPU implementer") - 1) == 0) | ||
198 | + { | ||
199 | + int i; | ||
200 | + for (i = 0; vendors[i].vendor_no != NULL; i++) | ||
201 | + if (strstr (buf, vendors[i].vendor_no) != NULL) | ||
202 | + { | ||
203 | + cpu_table = vendors[i].vendor_parts; | ||
204 | + break; | ||
205 | + } | ||
206 | + } | ||
207 | + | ||
208 | + /* Detect arch/cpu. */ | ||
209 | + if (strncmp (buf, "CPU part", sizeof ("CPU part") - 1) == 0) | ||
210 | + { | ||
211 | + int i; | ||
212 | + | ||
213 | + if (cpu_table == NULL) | ||
214 | + goto not_found; | ||
215 | + | ||
216 | + for (i = 0; cpu_table[i].part_no != NULL; i++) | ||
217 | + if (strstr (buf, cpu_table[i].part_no) != NULL) | ||
218 | + { | ||
219 | + val = arch ? cpu_table[i].arch_name : cpu_table[i].cpu_name; | ||
220 | + break; | ||
221 | + } | ||
222 | + break; | ||
223 | + } | ||
224 | + } | ||
225 | + | ||
226 | + fclose (f); | ||
227 | + | ||
228 | + if (val == NULL) | ||
229 | + goto not_found; | ||
230 | + | ||
231 | + return concat ("-m", argv[0], "=", val, NULL); | ||
232 | + | ||
233 | +not_found: | ||
234 | + { | ||
235 | + unsigned int i; | ||
236 | + unsigned int opt; | ||
237 | + const char *search[] = {NULL, "arch"}; | ||
238 | + | ||
239 | + if (f) | ||
240 | + fclose (f); | ||
241 | + | ||
242 | + search[0] = argv[0]; | ||
243 | + for (opt = 0; opt < ARRAY_SIZE (search); opt++) | ||
244 | + for (i = 0; i < ARRAY_SIZE (configure_default_options); i++) | ||
245 | + if (strcmp (configure_default_options[i].name, search[opt]) == 0) | ||
246 | + return concat ("-m", search[opt], "=", | ||
247 | + configure_default_options[i].value, NULL); | ||
248 | + return NULL; | ||
249 | + } | ||
250 | +} | ||
251 | |||
252 | === added file 'gcc/config/arm/x-arm' | ||
253 | --- old/gcc/config/arm/x-arm 1970-01-01 00:00:00 +0000 | ||
254 | +++ new/gcc/config/arm/x-arm 2011-10-19 17:01:50 +0000 | ||
255 | @@ -0,0 +1,3 @@ | ||
256 | +driver-arm.o: $(srcdir)/config/arm/driver-arm.c \ | ||
257 | + $(CONFIG_H) $(SYSTEM_H) | ||
258 | + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< | ||
259 | |||
260 | === modified file 'gcc/doc/invoke.texi' | ||
261 | --- old/gcc/doc/invoke.texi 2011-10-19 16:46:51 +0000 | ||
262 | +++ new/gcc/doc/invoke.texi 2011-10-19 17:01:50 +0000 | ||
263 | @@ -10215,10 +10215,16 @@ | ||
264 | @samp{cortex-m0}, | ||
265 | @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. | ||
266 | |||
267 | + | ||
268 | @option{-mcpu=generic-@var{arch}} is also permissible, and is | ||
269 | equivalent to @option{-march=@var{arch} -mtune=generic-@var{arch}}. | ||
270 | See @option{-mtune} for more information. | ||
271 | |||
272 | +@option{-mcpu=native} causes the compiler to auto-detect the CPU | ||
273 | +of the build computer. At present, this feature is only supported on | ||
274 | +Linux, and not all architectures are recognised. If the auto-detect is | ||
275 | +unsuccessful the option has no effect. | ||
276 | + | ||
277 | @item -mtune=@var{name} | ||
278 | @opindex mtune | ||
279 | This option is very similar to the @option{-mcpu=} option, except that | ||
280 | @@ -10237,6 +10243,11 @@ | ||
281 | range, and avoiding performance pitfalls of other CPUs. The effects of | ||
282 | this option may change in future GCC versions as CPU models come and go. | ||
283 | |||
284 | +@option{-mtune=native} causes the compiler to auto-detect the CPU | ||
285 | +of the build computer. At present, this feature is only supported on | ||
286 | +Linux, and not all architectures are recognised. If the auto-detect is | ||
287 | +unsuccessful the option has no effect. | ||
288 | + | ||
289 | @item -march=@var{name} | ||
290 | @opindex march | ||
291 | This specifies the name of the target ARM architecture. GCC uses this | ||
292 | @@ -10250,6 +10261,11 @@ | ||
293 | @samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m}, | ||
294 | @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. | ||
295 | |||
296 | +@option{-march=native} causes the compiler to auto-detect the architecture | ||
297 | +of the build computer. At present, this feature is only supported on | ||
298 | +Linux, and not all architectures are recognised. If the auto-detect is | ||
299 | +unsuccessful the option has no effect. | ||
300 | + | ||
301 | @item -mfpu=@var{name} | ||
302 | @itemx -mfpe=@var{number} | ||
303 | @itemx -mfp=@var{number} | ||
304 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch new file mode 100644 index 000000000..ad91d7736 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106831.patch | |||
@@ -0,0 +1,123 @@ | |||
1 | 2011-10-19 Andrew Stubbs <ams@codesourcery.com> | ||
2 | |||
3 | Backport from FSF: | ||
4 | |||
5 | 2011-10-18 Andrew Stubbs <ams@codesourcery.com> | ||
6 | |||
7 | PR tree-optimization/50717 | ||
8 | |||
9 | gcc/ | ||
10 | * tree-ssa-math-opts.c (is_widening_mult_p): Remove the 'type' | ||
11 | parameter. Calculate 'type' from stmt. | ||
12 | (convert_mult_to_widen): Update call the is_widening_mult_p. | ||
13 | (convert_plusminus_to_widen): Likewise. | ||
14 | |||
15 | gcc/testsuite/ | ||
16 | * gcc.dg/pr50717-1.c: New file. | ||
17 | * gcc.target/arm/wmul-12.c: Correct types. | ||
18 | * gcc.target/arm/wmul-8.c: Correct types. | ||
19 | |||
20 | === added file 'gcc/testsuite/gcc.dg/pr50717-1.c' | ||
21 | --- old/gcc/testsuite/gcc.dg/pr50717-1.c 1970-01-01 00:00:00 +0000 | ||
22 | +++ new/gcc/testsuite/gcc.dg/pr50717-1.c 2011-10-19 14:42:50 +0000 | ||
23 | @@ -0,0 +1,26 @@ | ||
24 | +/* PR tree-optimization/50717 */ | ||
25 | +/* Ensure that widening multiply-and-accumulate is not used where integer | ||
26 | + type promotion or users' casts should prevent it. */ | ||
27 | + | ||
28 | +/* { dg-options "-O2 -fdump-tree-widening_mul" } */ | ||
29 | + | ||
30 | +long long | ||
31 | +f (unsigned int a, char b, long long c) | ||
32 | +{ | ||
33 | + return (a * b) + c; | ||
34 | +} | ||
35 | + | ||
36 | +int | ||
37 | +g (short a, short b, int c) | ||
38 | +{ | ||
39 | + return (short)(a * b) + c; | ||
40 | +} | ||
41 | + | ||
42 | +int | ||
43 | +h (char a, char b, int c) | ||
44 | +{ | ||
45 | + return (char)(a * b) + c; | ||
46 | +} | ||
47 | + | ||
48 | +/* { dg-final { scan-tree-dump-times "WIDEN_MULT_PLUS_EXPR" 0 "widening_mul" } } */ | ||
49 | +/* { dg-final { cleanup-tree-dump "widening_mul" } } */ | ||
50 | |||
51 | === modified file 'gcc/testsuite/gcc.target/arm/wmul-12.c' | ||
52 | --- old/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-07-22 15:46:42 +0000 | ||
53 | +++ new/gcc/testsuite/gcc.target/arm/wmul-12.c 2011-10-19 14:42:50 +0000 | ||
54 | @@ -4,8 +4,8 @@ | ||
55 | long long | ||
56 | foo (int *b, int *c) | ||
57 | { | ||
58 | - int tmp = *b * *c; | ||
59 | - return 10 + (long long)tmp; | ||
60 | + long long tmp = (long long)*b * *c; | ||
61 | + return 10 + tmp; | ||
62 | } | ||
63 | |||
64 | /* { dg-final { scan-assembler "smlal" } } */ | ||
65 | |||
66 | === modified file 'gcc/testsuite/gcc.target/arm/wmul-8.c' | ||
67 | --- old/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-07-15 14:16:54 +0000 | ||
68 | +++ new/gcc/testsuite/gcc.target/arm/wmul-8.c 2011-10-19 14:42:50 +0000 | ||
69 | @@ -4,7 +4,7 @@ | ||
70 | long long | ||
71 | foo (long long a, int *b, int *c) | ||
72 | { | ||
73 | - return a + *b * *c; | ||
74 | + return a + (long long)*b * *c; | ||
75 | } | ||
76 | |||
77 | /* { dg-final { scan-assembler "smlal" } } */ | ||
78 | |||
79 | === modified file 'gcc/tree-ssa-math-opts.c' | ||
80 | --- old/gcc/tree-ssa-math-opts.c 2011-09-08 20:11:43 +0000 | ||
81 | +++ new/gcc/tree-ssa-math-opts.c 2011-10-19 14:42:50 +0000 | ||
82 | @@ -1351,10 +1351,12 @@ | ||
83 | and *TYPE2_OUT would give the operands of the multiplication. */ | ||
84 | |||
85 | static bool | ||
86 | -is_widening_mult_p (tree type, gimple stmt, | ||
87 | +is_widening_mult_p (gimple stmt, | ||
88 | tree *type1_out, tree *rhs1_out, | ||
89 | tree *type2_out, tree *rhs2_out) | ||
90 | { | ||
91 | + tree type = TREE_TYPE (gimple_assign_lhs (stmt)); | ||
92 | + | ||
93 | if (TREE_CODE (type) != INTEGER_TYPE | ||
94 | && TREE_CODE (type) != FIXED_POINT_TYPE) | ||
95 | return false; | ||
96 | @@ -1416,7 +1418,7 @@ | ||
97 | if (TREE_CODE (type) != INTEGER_TYPE) | ||
98 | return false; | ||
99 | |||
100 | - if (!is_widening_mult_p (type, stmt, &type1, &rhs1, &type2, &rhs2)) | ||
101 | + if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) | ||
102 | return false; | ||
103 | |||
104 | to_mode = TYPE_MODE (type); | ||
105 | @@ -1592,7 +1594,7 @@ | ||
106 | if (code == PLUS_EXPR | ||
107 | && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) | ||
108 | { | ||
109 | - if (!is_widening_mult_p (type, rhs1_stmt, &type1, &mult_rhs1, | ||
110 | + if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, | ||
111 | &type2, &mult_rhs2)) | ||
112 | return false; | ||
113 | add_rhs = rhs2; | ||
114 | @@ -1600,7 +1602,7 @@ | ||
115 | } | ||
116 | else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) | ||
117 | { | ||
118 | - if (!is_widening_mult_p (type, rhs2_stmt, &type1, &mult_rhs1, | ||
119 | + if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, | ||
120 | &type2, &mult_rhs2)) | ||
121 | return false; | ||
122 | add_rhs = rhs1; | ||
123 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch new file mode 100644 index 000000000..843f1cff2 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106832.patch | |||
@@ -0,0 +1,24 @@ | |||
1 | 2011-10-21 Andrew Stubbs <ams@codesourcery.com> | ||
2 | |||
3 | Backport from FSF mainline: | ||
4 | |||
5 | 2011-10-21 Andrew Stubbs <ams@codesourcery.com> | ||
6 | |||
7 | PR target/50809 | ||
8 | |||
9 | gcc/ | ||
10 | * config/arm/driver-arm.c (vendors): Make static. | ||
11 | |||
12 | === modified file 'gcc/config/arm/driver-arm.c' | ||
13 | --- old/gcc/config/arm/driver-arm.c 2011-10-19 17:07:55 +0000 | ||
14 | +++ new/gcc/config/arm/driver-arm.c 2011-10-21 19:27:47 +0000 | ||
15 | @@ -49,7 +49,7 @@ | ||
16 | {NULL, NULL, NULL} | ||
17 | }; | ||
18 | |||
19 | -struct { | ||
20 | +static struct { | ||
21 | const char *vendor_no; | ||
22 | const struct vendor_cpu *vendor_parts; | ||
23 | } vendors[] = { | ||
24 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch new file mode 100644 index 000000000..1ad48e512 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106833.patch | |||
@@ -0,0 +1,453 @@ | |||
1 | 2011-10-27 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | Backport from mainline: | ||
4 | |||
5 | 2011-10-16 Ira Rosen <ira.rosen@linaro.org> | ||
6 | |||
7 | gcc/ | ||
8 | * tree-vect-stmts.c (vectorizable_load): For SLP without permutation | ||
9 | treat the first load of the node as the first element in its | ||
10 | interleaving chain. | ||
11 | * tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if | ||
12 | necessary and possible. | ||
13 | (vect_build_slp_tree): Add new argument. Allow load groups of any size | ||
14 | in basic blocks. Keep all the loads for further permutation check. | ||
15 | Use the new argument to determine if there is a permutation. Update | ||
16 | the recursive calls. | ||
17 | (vect_supported_load_permutation_p): Allow subchains of interleaving | ||
18 | chains in basic block vectorization. | ||
19 | (vect_analyze_slp_instance): Update the call to vect_build_slp_tree. | ||
20 | Check load permutation based on the new parameter. | ||
21 | (vect_schedule_slp_instance): Don't start from the first element in | ||
22 | interleaving chain unless the loads are permuted. | ||
23 | |||
24 | gcc/testsuite/ | ||
25 | * gcc.dg/vect/bb-slp-29.c: New test. | ||
26 | |||
27 | === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-29.c' | ||
28 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 1970-01-01 00:00:00 +0000 | ||
29 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-29.c 2011-10-23 11:29:25 +0000 | ||
30 | @@ -0,0 +1,59 @@ | ||
31 | +/* { dg-require-effective-target vect_int } */ | ||
32 | + | ||
33 | +#include <stdarg.h> | ||
34 | +#include "tree-vect.h" | ||
35 | + | ||
36 | +#define A 3 | ||
37 | +#define B 4 | ||
38 | +#define N 256 | ||
39 | + | ||
40 | +short src[N], dst[N]; | ||
41 | + | ||
42 | +void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy) | ||
43 | +{ | ||
44 | + int i; | ||
45 | + h /= 16; | ||
46 | + for (i = 0; i < h; i++) | ||
47 | + { | ||
48 | + dst[0] = A*src[0] + B*src[1]; | ||
49 | + dst[1] = A*src[1] + B*src[2]; | ||
50 | + dst[2] = A*src[2] + B*src[3]; | ||
51 | + dst[3] = A*src[3] + B*src[4]; | ||
52 | + dst[4] = A*src[4] + B*src[5]; | ||
53 | + dst[5] = A*src[5] + B*src[6]; | ||
54 | + dst[6] = A*src[6] + B*src[7]; | ||
55 | + dst[7] = A*src[7] + B*src[8]; | ||
56 | + dst += stride; | ||
57 | + src += stride; | ||
58 | + if (dummy == 32) | ||
59 | + abort (); | ||
60 | + } | ||
61 | +} | ||
62 | + | ||
63 | + | ||
64 | +int main (void) | ||
65 | +{ | ||
66 | + int i; | ||
67 | + | ||
68 | + check_vect (); | ||
69 | + | ||
70 | + for (i = 0; i < N; i++) | ||
71 | + { | ||
72 | + dst[i] = 0; | ||
73 | + src[i] = i; | ||
74 | + } | ||
75 | + | ||
76 | + foo (dst, src, N, 8, 0); | ||
77 | + | ||
78 | + for (i = 0; i < N/2; i++) | ||
79 | + { | ||
80 | + if (dst[i] != A * src[i] + B * src[i+1]) | ||
81 | + abort (); | ||
82 | + } | ||
83 | + | ||
84 | + return 0; | ||
85 | +} | ||
86 | + | ||
87 | +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && vect_element_align } } } } */ | ||
88 | +/* { dg-final { cleanup-tree-dump "slp" } } */ | ||
89 | + | ||
90 | |||
91 | === modified file 'gcc/tree-vect-slp.c' | ||
92 | --- old/gcc/tree-vect-slp.c 2011-10-06 11:08:08 +0000 | ||
93 | +++ new/gcc/tree-vect-slp.c 2011-10-23 11:29:25 +0000 | ||
94 | @@ -115,13 +115,15 @@ | ||
95 | { | ||
96 | tree oprnd; | ||
97 | unsigned int i, number_of_oprnds; | ||
98 | - tree def; | ||
99 | + tree def[2]; | ||
100 | gimple def_stmt; | ||
101 | enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; | ||
102 | stmt_vec_info stmt_info = | ||
103 | vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)); | ||
104 | enum gimple_rhs_class rhs_class; | ||
105 | struct loop *loop = NULL; | ||
106 | + enum tree_code rhs_code; | ||
107 | + bool different_types = false; | ||
108 | |||
109 | if (loop_vinfo) | ||
110 | loop = LOOP_VINFO_LOOP (loop_vinfo); | ||
111 | @@ -133,7 +135,7 @@ | ||
112 | { | ||
113 | oprnd = gimple_op (stmt, i + 1); | ||
114 | |||
115 | - if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, | ||
116 | + if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i], | ||
117 | &dt[i]) | ||
118 | || (!def_stmt && dt[i] != vect_constant_def)) | ||
119 | { | ||
120 | @@ -188,11 +190,11 @@ | ||
121 | switch (gimple_code (def_stmt)) | ||
122 | { | ||
123 | case GIMPLE_PHI: | ||
124 | - def = gimple_phi_result (def_stmt); | ||
125 | + def[i] = gimple_phi_result (def_stmt); | ||
126 | break; | ||
127 | |||
128 | case GIMPLE_ASSIGN: | ||
129 | - def = gimple_assign_lhs (def_stmt); | ||
130 | + def[i] = gimple_assign_lhs (def_stmt); | ||
131 | break; | ||
132 | |||
133 | default: | ||
134 | @@ -206,8 +208,8 @@ | ||
135 | { | ||
136 | /* op0 of the first stmt of the group - store its info. */ | ||
137 | *first_stmt_dt0 = dt[i]; | ||
138 | - if (def) | ||
139 | - *first_stmt_def0_type = TREE_TYPE (def); | ||
140 | + if (def[i]) | ||
141 | + *first_stmt_def0_type = TREE_TYPE (def[i]); | ||
142 | else | ||
143 | *first_stmt_const_oprnd = oprnd; | ||
144 | |||
145 | @@ -227,8 +229,8 @@ | ||
146 | { | ||
147 | /* op1 of the first stmt of the group - store its info. */ | ||
148 | *first_stmt_dt1 = dt[i]; | ||
149 | - if (def) | ||
150 | - *first_stmt_def1_type = TREE_TYPE (def); | ||
151 | + if (def[i]) | ||
152 | + *first_stmt_def1_type = TREE_TYPE (def[i]); | ||
153 | else | ||
154 | { | ||
155 | /* We assume that the stmt contains only one constant | ||
156 | @@ -249,22 +251,53 @@ | ||
157 | the def-stmt/s of the first stmt. */ | ||
158 | if ((i == 0 | ||
159 | && (*first_stmt_dt0 != dt[i] | ||
160 | - || (*first_stmt_def0_type && def | ||
161 | + || (*first_stmt_def0_type && def[0] | ||
162 | && !types_compatible_p (*first_stmt_def0_type, | ||
163 | - TREE_TYPE (def))))) | ||
164 | + TREE_TYPE (def[0]))))) | ||
165 | || (i == 1 | ||
166 | && (*first_stmt_dt1 != dt[i] | ||
167 | - || (*first_stmt_def1_type && def | ||
168 | + || (*first_stmt_def1_type && def[1] | ||
169 | && !types_compatible_p (*first_stmt_def1_type, | ||
170 | - TREE_TYPE (def))))) | ||
171 | - || (!def | ||
172 | + TREE_TYPE (def[1]))))) | ||
173 | + || (!def[i] | ||
174 | && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd), | ||
175 | - TREE_TYPE (oprnd)))) | ||
176 | + TREE_TYPE (oprnd))) | ||
177 | + || different_types) | ||
178 | { | ||
179 | - if (vect_print_dump_info (REPORT_SLP)) | ||
180 | - fprintf (vect_dump, "Build SLP failed: different types "); | ||
181 | + if (i != number_of_oprnds - 1) | ||
182 | + different_types = true; | ||
183 | + else | ||
184 | + { | ||
185 | + if (is_gimple_assign (stmt) | ||
186 | + && (rhs_code = gimple_assign_rhs_code (stmt)) | ||
187 | + && TREE_CODE_CLASS (rhs_code) == tcc_binary | ||
188 | + && commutative_tree_code (rhs_code) | ||
189 | + && *first_stmt_dt0 == dt[1] | ||
190 | + && *first_stmt_dt1 == dt[0] | ||
191 | + && def[0] && def[1] | ||
192 | + && !(*first_stmt_def0_type | ||
193 | + && !types_compatible_p (*first_stmt_def0_type, | ||
194 | + TREE_TYPE (def[1]))) | ||
195 | + && !(*first_stmt_def1_type | ||
196 | + && !types_compatible_p (*first_stmt_def1_type, | ||
197 | + TREE_TYPE (def[0])))) | ||
198 | + { | ||
199 | + if (vect_print_dump_info (REPORT_SLP)) | ||
200 | + { | ||
201 | + fprintf (vect_dump, "Swapping operands of "); | ||
202 | + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
203 | + } | ||
204 | + swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), | ||
205 | + gimple_assign_rhs2_ptr (stmt)); | ||
206 | + } | ||
207 | + else | ||
208 | + { | ||
209 | + if (vect_print_dump_info (REPORT_SLP)) | ||
210 | + fprintf (vect_dump, "Build SLP failed: different types "); | ||
211 | |||
212 | - return false; | ||
213 | + return false; | ||
214 | + } | ||
215 | + } | ||
216 | } | ||
217 | } | ||
218 | } | ||
219 | @@ -278,10 +311,10 @@ | ||
220 | |||
221 | case vect_internal_def: | ||
222 | case vect_reduction_def: | ||
223 | - if (i == 0) | ||
224 | + if ((i == 0 && !different_types) || (i == 1 && different_types)) | ||
225 | VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); | ||
226 | else | ||
227 | - VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); | ||
228 | + VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); | ||
229 | break; | ||
230 | |||
231 | default: | ||
232 | @@ -289,7 +322,7 @@ | ||
233 | if (vect_print_dump_info (REPORT_SLP)) | ||
234 | { | ||
235 | fprintf (vect_dump, "Build SLP failed: illegal type of def "); | ||
236 | - print_generic_expr (vect_dump, def, TDF_SLIM); | ||
237 | + print_generic_expr (vect_dump, def[i], TDF_SLIM); | ||
238 | } | ||
239 | |||
240 | return false; | ||
241 | @@ -312,7 +345,7 @@ | ||
242 | int ncopies_for_cost, unsigned int *max_nunits, | ||
243 | VEC (int, heap) **load_permutation, | ||
244 | VEC (slp_tree, heap) **loads, | ||
245 | - unsigned int vectorization_factor) | ||
246 | + unsigned int vectorization_factor, bool *loads_permuted) | ||
247 | { | ||
248 | VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size); | ||
249 | VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size); | ||
250 | @@ -523,7 +556,9 @@ | ||
251 | |||
252 | /* Check that the size of interleaved loads group is not | ||
253 | greater than the SLP group size. */ | ||
254 | - if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size) | ||
255 | + if (loop_vinfo | ||
256 | + && DR_GROUP_SIZE (vinfo_for_stmt (stmt)) | ||
257 | + > ncopies * group_size) | ||
258 | { | ||
259 | if (vect_print_dump_info (REPORT_SLP)) | ||
260 | { | ||
261 | @@ -644,19 +679,22 @@ | ||
262 | /* Strided loads were reached - stop the recursion. */ | ||
263 | if (stop_recursion) | ||
264 | { | ||
265 | + VEC_safe_push (slp_tree, heap, *loads, *node); | ||
266 | if (permutation) | ||
267 | { | ||
268 | - VEC_safe_push (slp_tree, heap, *loads, *node); | ||
269 | + | ||
270 | + *loads_permuted = true; | ||
271 | *inside_cost | ||
272 | += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0) | ||
273 | * group_size; | ||
274 | } | ||
275 | else | ||
276 | - { | ||
277 | - /* We don't check here complex numbers chains, so we keep them in | ||
278 | - LOADS for further check in vect_supported_load_permutation_p. */ | ||
279 | + { | ||
280 | + /* We don't check here complex numbers chains, so we set | ||
281 | + LOADS_PERMUTED for further check in | ||
282 | + vect_supported_load_permutation_p. */ | ||
283 | if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR) | ||
284 | - VEC_safe_push (slp_tree, heap, *loads, *node); | ||
285 | + *loads_permuted = true; | ||
286 | } | ||
287 | |||
288 | return true; | ||
289 | @@ -675,7 +713,7 @@ | ||
290 | if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size, | ||
291 | inside_cost, outside_cost, ncopies_for_cost, | ||
292 | max_nunits, load_permutation, loads, | ||
293 | - vectorization_factor)) | ||
294 | + vectorization_factor, loads_permuted)) | ||
295 | return false; | ||
296 | |||
297 | SLP_TREE_LEFT (*node) = left_node; | ||
298 | @@ -693,7 +731,7 @@ | ||
299 | if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size, | ||
300 | inside_cost, outside_cost, ncopies_for_cost, | ||
301 | max_nunits, load_permutation, loads, | ||
302 | - vectorization_factor)) | ||
303 | + vectorization_factor, loads_permuted)) | ||
304 | return false; | ||
305 | |||
306 | SLP_TREE_RIGHT (*node) = right_node; | ||
307 | @@ -879,8 +917,10 @@ | ||
308 | bool supported, bad_permutation = false; | ||
309 | sbitmap load_index; | ||
310 | slp_tree node, other_complex_node; | ||
311 | - gimple stmt, first = NULL, other_node_first; | ||
312 | + gimple stmt, first = NULL, other_node_first, load, next_load, first_load; | ||
313 | unsigned complex_numbers = 0; | ||
314 | + struct data_reference *dr; | ||
315 | + bb_vec_info bb_vinfo; | ||
316 | |||
317 | /* FORNOW: permutations are only supported in SLP. */ | ||
318 | if (!slp_instn) | ||
319 | @@ -1040,6 +1080,76 @@ | ||
320 | } | ||
321 | } | ||
322 | |||
323 | + /* In basic block vectorization we allow any subchain of an interleaving | ||
324 | + chain. | ||
325 | + FORNOW: not supported in loop SLP because of realignment compications. */ | ||
326 | + bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); | ||
327 | + bad_permutation = false; | ||
328 | + /* Check that for every node in the instance teh loads form a subchain. */ | ||
329 | + if (bb_vinfo) | ||
330 | + { | ||
331 | + FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) | ||
332 | + { | ||
333 | + next_load = NULL; | ||
334 | + first_load = NULL; | ||
335 | + FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load) | ||
336 | + { | ||
337 | + if (!first_load) | ||
338 | + first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (load)); | ||
339 | + else if (first_load | ||
340 | + != DR_GROUP_FIRST_DR (vinfo_for_stmt (load))) | ||
341 | + { | ||
342 | + bad_permutation = true; | ||
343 | + break; | ||
344 | + } | ||
345 | + | ||
346 | + if (j != 0 && next_load != load) | ||
347 | + { | ||
348 | + bad_permutation = true; | ||
349 | + break; | ||
350 | + } | ||
351 | + | ||
352 | + next_load = DR_GROUP_NEXT_DR (vinfo_for_stmt (load)); | ||
353 | + } | ||
354 | + | ||
355 | + if (bad_permutation) | ||
356 | + break; | ||
357 | + } | ||
358 | + | ||
359 | + /* Check that the alignment of the first load in every subchain, i.e., | ||
360 | + the first statement in every load node, is supported. */ | ||
361 | + if (!bad_permutation) | ||
362 | + { | ||
363 | + FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) | ||
364 | + { | ||
365 | + first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); | ||
366 | + if (first_load | ||
367 | + != DR_GROUP_FIRST_DR (vinfo_for_stmt (first_load))) | ||
368 | + { | ||
369 | + dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load)); | ||
370 | + if (vect_supportable_dr_alignment (dr, false) | ||
371 | + == dr_unaligned_unsupported) | ||
372 | + { | ||
373 | + if (vect_print_dump_info (REPORT_SLP)) | ||
374 | + { | ||
375 | + fprintf (vect_dump, "unsupported unaligned load "); | ||
376 | + print_gimple_stmt (vect_dump, first_load, 0, | ||
377 | + TDF_SLIM); | ||
378 | + } | ||
379 | + bad_permutation = true; | ||
380 | + break; | ||
381 | + } | ||
382 | + } | ||
383 | + } | ||
384 | + | ||
385 | + if (!bad_permutation) | ||
386 | + { | ||
387 | + VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn)); | ||
388 | + return true; | ||
389 | + } | ||
390 | + } | ||
391 | + } | ||
392 | + | ||
393 | /* FORNOW: the only supported permutation is 0..01..1.. of length equal to | ||
394 | GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as | ||
395 | well (unless it's reduction). */ | ||
396 | @@ -1149,6 +1259,7 @@ | ||
397 | VEC (int, heap) *load_permutation; | ||
398 | VEC (slp_tree, heap) *loads; | ||
399 | struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); | ||
400 | + bool loads_permuted = false; | ||
401 | |||
402 | if (dr) | ||
403 | { | ||
404 | @@ -1238,7 +1349,7 @@ | ||
405 | if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size, | ||
406 | &inside_cost, &outside_cost, ncopies_for_cost, | ||
407 | &max_nunits, &load_permutation, &loads, | ||
408 | - vectorization_factor)) | ||
409 | + vectorization_factor, &loads_permuted)) | ||
410 | { | ||
411 | /* Calculate the unrolling factor based on the smallest type. */ | ||
412 | if (max_nunits > nunits) | ||
413 | @@ -1263,7 +1374,8 @@ | ||
414 | SLP_INSTANCE_LOADS (new_instance) = loads; | ||
415 | SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL; | ||
416 | SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation; | ||
417 | - if (VEC_length (slp_tree, loads)) | ||
418 | + | ||
419 | + if (loads_permuted) | ||
420 | { | ||
421 | if (!vect_supported_load_permutation_p (new_instance, group_size, | ||
422 | load_permutation)) | ||
423 | @@ -2542,10 +2654,11 @@ | ||
424 | /* Loads should be inserted before the first load. */ | ||
425 | if (SLP_INSTANCE_FIRST_LOAD_STMT (instance) | ||
426 | && STMT_VINFO_STRIDED_ACCESS (stmt_info) | ||
427 | - && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))) | ||
428 | + && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)) | ||
429 | + && SLP_INSTANCE_LOAD_PERMUTATION (instance)) | ||
430 | si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); | ||
431 | else if (is_pattern_stmt_p (stmt_info)) | ||
432 | - si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); | ||
433 | + si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); | ||
434 | else | ||
435 | si = gsi_for_stmt (stmt); | ||
436 | |||
437 | |||
438 | === modified file 'gcc/tree-vect-stmts.c' | ||
439 | --- old/gcc/tree-vect-stmts.c 2011-10-16 12:16:07 +0000 | ||
440 | +++ new/gcc/tree-vect-stmts.c 2011-10-23 11:29:25 +0000 | ||
441 | @@ -4285,6 +4285,11 @@ | ||
442 | if (strided_load) | ||
443 | { | ||
444 | first_stmt = DR_GROUP_FIRST_DR (stmt_info); | ||
445 | + if (slp | ||
446 | + && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance) | ||
447 | + && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)) | ||
448 | + first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); | ||
449 | + | ||
450 | /* Check if the chain of loads is already vectorized. */ | ||
451 | if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))) | ||
452 | { | ||
453 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch new file mode 100644 index 000000000..421a8fe3a --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106834.patch | |||
@@ -0,0 +1,1505 @@ | |||
1 | 2011-10-18 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | * doc/md.texi (vec_widen_ushiftl_hi, vec_widen_ushiftl_lo, | ||
5 | vec_widen_sshiftl_hi, vec_widen_sshiftl_lo): Document. | ||
6 | * tree-pretty-print.c (dump_generic_node): Handle WIDEN_LSHIFT_EXPR, | ||
7 | VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. | ||
8 | (op_code_prio): Likewise. | ||
9 | (op_symbol_code): Handle WIDEN_LSHIFT_EXPR. | ||
10 | * optabs.c (optab_for_tree_code): Handle | ||
11 | VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. | ||
12 | (init-optabs): Initialize optab codes for vec_widen_u/sshiftl_hi/lo. | ||
13 | * optabs.h (enum optab_index): Add OTI_vec_widen_u/sshiftl_hi/lo. | ||
14 | * genopinit.c (optabs): Initialize the new optabs. | ||
15 | * expr.c (expand_expr_real_2): Handle | ||
16 | VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR. | ||
17 | * gimple-pretty-print.c (dump_binary_rhs): Likewise. | ||
18 | * tree-vectorizer.h (NUM_PATTERNS): Increase to 8. | ||
19 | * tree.def (WIDEN_LSHIFT_EXPR, VEC_WIDEN_LSHIFT_HI_EXPR, | ||
20 | VEC_WIDEN_LSHIFT_LO_EXPR): New. | ||
21 | * cfgexpand.c (expand_debug_expr): Handle new tree codes. | ||
22 | * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add | ||
23 | vect_recog_widen_shift_pattern. | ||
24 | (vect_handle_widen_mult_by_const): Rename... | ||
25 | (vect_handle_widen_op_by_const): ...to this. Handle shifts. | ||
26 | Add a new argument, update documentation. | ||
27 | (vect_recog_widen_mult_pattern): Assume that only second | ||
28 | operand can be constant. Update call to | ||
29 | vect_handle_widen_op_by_const. | ||
30 | (vect_recog_over_widening_pattern): Fix typo. | ||
31 | (vect_recog_widen_shift_pattern): New. | ||
32 | * tree-vect-stmts.c (vectorizable_type_promotion): Handle | ||
33 | widening shifts. | ||
34 | (supportable_widening_operation): Likewise. | ||
35 | * tree-inline.c (estimate_operator_cost): Handle new tree codes. | ||
36 | * tree-vect-generic.c (expand_vector_operations_1): Likewise. | ||
37 | * tree-cfg.c (verify_gimple_assign_binary): Likewise. | ||
38 | * config/arm/neon.md (neon_vec_<US>shiftl_<mode>): New. | ||
39 | (vec_widen_<US>shiftl_lo_<mode>, neon_vec_<US>shiftl_hi_<mode>, | ||
40 | vec_widen_<US>shiftl_hi_<mode>, neon_vec_<US>shift_left_<mode>): | ||
41 | Likewise. | ||
42 | * config/arm/predicates.md (const_neon_scalar_shift_amount_operand): | ||
43 | New. | ||
44 | * config/arm/iterators.md (V_innermode): New. | ||
45 | * tree-vect-slp.c (vect_build_slp_tree): Require same shift operand | ||
46 | for widening shift. | ||
47 | |||
48 | gcc/testsuite | ||
49 | * testsuite/lib/target-supports.exp | ||
50 | (check_effective_target_vect_widen_shift): New. | ||
51 | * gcc.dg/vect/vect-widen-shift-s16.c: New. | ||
52 | * gcc.dg/vect/vect-widen-shift-s8.c: New. | ||
53 | * gcc.dg/vect/vect-widen-shift-u16.c: New. | ||
54 | * gcc.dg/vect/vect-widen-shift-u8.c: New. | ||
55 | |||
56 | 2011-10-06 Jakub Jelinek <jakub@redhat.com> | ||
57 | |||
58 | gcc/ | ||
59 | * tree-vect-patterns.c (vect_pattern_recog_1): Use | ||
60 | vect_recog_func_ptr typedef for the first argument. | ||
61 | (vect_pattern_recog): Rename vect_recog_func_ptr variable | ||
62 | to vect_recog_func, use vect_recog_func_ptr typedef for it. | ||
63 | |||
64 | 2011-10-16 Ira Rosen <ira.rosen@linaro.org> | ||
65 | |||
66 | gcc/ | ||
67 | PR tree-optimization/50727 | ||
68 | * tree-vect-patterns.c (vect_operation_fits_smaller_type): Add | ||
69 | DEF_STMT to the list of statements to be replaced by the | ||
70 | pattern statements. | ||
71 | |||
72 | 2011-10-09 Ira Rosen <ira.rosen@linaro.org> | ||
73 | |||
74 | gcc/ | ||
75 | PR tree-optimization/50635 | ||
76 | * tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add | ||
77 | DEF_STMT to the list of statements to be replaced by the | ||
78 | pattern statements. | ||
79 | (vect_handle_widen_mult_by_const): Don't check TYPE_OUT. | ||
80 | |||
81 | === modified file 'gcc/cfgexpand.c' | ||
82 | --- old/gcc/cfgexpand.c 2011-07-01 09:19:21 +0000 | ||
83 | +++ new/gcc/cfgexpand.c 2011-10-23 13:33:07 +0000 | ||
84 | @@ -3215,6 +3215,8 @@ | ||
85 | case VEC_UNPACK_LO_EXPR: | ||
86 | case VEC_WIDEN_MULT_HI_EXPR: | ||
87 | case VEC_WIDEN_MULT_LO_EXPR: | ||
88 | + case VEC_WIDEN_LSHIFT_HI_EXPR: | ||
89 | + case VEC_WIDEN_LSHIFT_LO_EXPR: | ||
90 | return NULL; | ||
91 | |||
92 | /* Misc codes. */ | ||
93 | |||
94 | === modified file 'gcc/config/arm/iterators.md' | ||
95 | --- old/gcc/config/arm/iterators.md 2011-09-06 14:29:24 +0000 | ||
96 | +++ new/gcc/config/arm/iterators.md 2011-10-23 13:33:07 +0000 | ||
97 | @@ -388,6 +388,9 @@ | ||
98 | (define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")]) | ||
99 | (define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")]) | ||
100 | |||
101 | +;; Mode attribute for vshll. | ||
102 | +(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")]) | ||
103 | + | ||
104 | ;;---------------------------------------------------------------------------- | ||
105 | ;; Code attributes | ||
106 | ;;---------------------------------------------------------------------------- | ||
107 | |||
108 | === modified file 'gcc/config/arm/neon.md' | ||
109 | --- old/gcc/config/arm/neon.md 2011-10-03 01:32:17 +0000 | ||
110 | +++ new/gcc/config/arm/neon.md 2011-10-23 13:33:07 +0000 | ||
111 | @@ -5316,6 +5316,44 @@ | ||
112 | } | ||
113 | ) | ||
114 | |||
115 | +(define_insn "neon_vec_<US>shiftl_<mode>" | ||
116 | + [(set (match_operand:<V_widen> 0 "register_operand" "=w") | ||
117 | + (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w") | ||
118 | + (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))] | ||
119 | + "TARGET_NEON" | ||
120 | +{ | ||
121 | + return "vshll.<US><V_sz_elem> %q0, %P1, %2"; | ||
122 | +} | ||
123 | + [(set_attr "neon_type" "neon_shift_1")] | ||
124 | +) | ||
125 | + | ||
126 | +(define_expand "vec_widen_<US>shiftl_lo_<mode>" | ||
127 | + [(match_operand:<V_unpack> 0 "register_operand" "") | ||
128 | + (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) | ||
129 | + (match_operand:SI 2 "immediate_operand" "i")] | ||
130 | + "TARGET_NEON && !BYTES_BIG_ENDIAN" | ||
131 | + { | ||
132 | + emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], | ||
133 | + simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0), | ||
134 | + operands[2])); | ||
135 | + DONE; | ||
136 | + } | ||
137 | +) | ||
138 | + | ||
139 | +(define_expand "vec_widen_<US>shiftl_hi_<mode>" | ||
140 | + [(match_operand:<V_unpack> 0 "register_operand" "") | ||
141 | + (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) | ||
142 | + (match_operand:SI 2 "immediate_operand" "i")] | ||
143 | + "TARGET_NEON && !BYTES_BIG_ENDIAN" | ||
144 | + { | ||
145 | + emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], | ||
146 | + simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, | ||
147 | + GET_MODE_SIZE (<V_HALF>mode)), | ||
148 | + operands[2])); | ||
149 | + DONE; | ||
150 | + } | ||
151 | +) | ||
152 | + | ||
153 | ;; Vectorize for non-neon-quad case | ||
154 | (define_insn "neon_unpack<US>_<mode>" | ||
155 | [(set (match_operand:<V_widen> 0 "register_operand" "=w") | ||
156 | @@ -5392,6 +5430,34 @@ | ||
157 | } | ||
158 | ) | ||
159 | |||
160 | +(define_expand "vec_widen_<US>shiftl_hi_<mode>" | ||
161 | + [(match_operand:<V_double_width> 0 "register_operand" "") | ||
162 | + (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) | ||
163 | + (match_operand:SI 2 "immediate_operand" "i")] | ||
164 | + "TARGET_NEON" | ||
165 | + { | ||
166 | + rtx tmpreg = gen_reg_rtx (<V_widen>mode); | ||
167 | + emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); | ||
168 | + emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); | ||
169 | + | ||
170 | + DONE; | ||
171 | + } | ||
172 | +) | ||
173 | + | ||
174 | +(define_expand "vec_widen_<US>shiftl_lo_<mode>" | ||
175 | + [(match_operand:<V_double_width> 0 "register_operand" "") | ||
176 | + (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) | ||
177 | + (match_operand:SI 2 "immediate_operand" "i")] | ||
178 | + "TARGET_NEON" | ||
179 | + { | ||
180 | + rtx tmpreg = gen_reg_rtx (<V_widen>mode); | ||
181 | + emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); | ||
182 | + emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); | ||
183 | + | ||
184 | + DONE; | ||
185 | + } | ||
186 | +) | ||
187 | + | ||
188 | ;; The case when using all quad registers. | ||
189 | (define_insn "vec_pack_trunc_<mode>" | ||
190 | [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w") | ||
191 | |||
192 | === modified file 'gcc/config/arm/predicates.md' | ||
193 | --- old/gcc/config/arm/predicates.md 2011-10-10 11:43:28 +0000 | ||
194 | +++ new/gcc/config/arm/predicates.md 2011-10-23 13:33:07 +0000 | ||
195 | @@ -136,6 +136,11 @@ | ||
196 | (match_operand 0 "s_register_operand")) | ||
197 | (match_operand 0 "const_int_operand"))) | ||
198 | |||
199 | +(define_predicate "const_neon_scalar_shift_amount_operand" | ||
200 | + (and (match_code "const_int") | ||
201 | + (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) <= GET_MODE_BITSIZE (mode) | ||
202 | + && ((unsigned HOST_WIDE_INT) INTVAL (op)) > 0"))) | ||
203 | + | ||
204 | (define_predicate "arm_add_operand" | ||
205 | (ior (match_operand 0 "arm_rhs_operand") | ||
206 | (match_operand 0 "arm_neg_immediate_operand"))) | ||
207 | |||
208 | === modified file 'gcc/doc/md.texi' | ||
209 | --- old/gcc/doc/md.texi 2011-08-13 08:32:32 +0000 | ||
210 | +++ new/gcc/doc/md.texi 2011-10-23 13:33:07 +0000 | ||
211 | @@ -4230,6 +4230,17 @@ | ||
212 | elements of the two vectors, and put the N/2 products of size 2*S in the | ||
213 | output vector (operand 0). | ||
214 | |||
215 | +@cindex @code{vec_widen_ushiftl_hi_@var{m}} instruction pattern | ||
216 | +@cindex @code{vec_widen_ushiftl_lo_@var{m}} instruction pattern | ||
217 | +@cindex @code{vec_widen_sshiftl_hi_@var{m}} instruction pattern | ||
218 | +@cindex @code{vec_widen_sshiftl_lo_@var{m}} instruction pattern | ||
219 | +@item @samp{vec_widen_ushiftl_hi_@var{m}}, @samp{vec_widen_ushiftl_lo_@var{m}} | ||
220 | +@itemx @samp{vec_widen_sshiftl_hi_@var{m}}, @samp{vec_widen_sshiftl_lo_@var{m}} | ||
221 | +Signed/Unsigned widening shift left. The first input (operand 1) is a vector | ||
222 | +with N signed/unsigned elements of size S@. Operand 2 is a constant. Shift | ||
223 | +the high/low elements of operand 1, and put the N/2 results of size 2*S in the | ||
224 | +output vector (operand 0). | ||
225 | + | ||
226 | @cindex @code{mulhisi3} instruction pattern | ||
227 | @item @samp{mulhisi3} | ||
228 | Multiply operands 1 and 2, which have mode @code{HImode}, and store | ||
229 | |||
230 | === modified file 'gcc/expr.c' | ||
231 | --- old/gcc/expr.c 2011-08-25 11:42:09 +0000 | ||
232 | +++ new/gcc/expr.c 2011-10-23 13:33:07 +0000 | ||
233 | @@ -8290,6 +8290,19 @@ | ||
234 | return target; | ||
235 | } | ||
236 | |||
237 | + case VEC_WIDEN_LSHIFT_HI_EXPR: | ||
238 | + case VEC_WIDEN_LSHIFT_LO_EXPR: | ||
239 | + { | ||
240 | + tree oprnd0 = treeop0; | ||
241 | + tree oprnd1 = treeop1; | ||
242 | + | ||
243 | + expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); | ||
244 | + target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX, | ||
245 | + target, unsignedp); | ||
246 | + gcc_assert (target); | ||
247 | + return target; | ||
248 | + } | ||
249 | + | ||
250 | case VEC_PACK_TRUNC_EXPR: | ||
251 | case VEC_PACK_SAT_EXPR: | ||
252 | case VEC_PACK_FIX_TRUNC_EXPR: | ||
253 | |||
254 | === modified file 'gcc/genopinit.c' | ||
255 | --- old/gcc/genopinit.c 2011-07-15 13:06:31 +0000 | ||
256 | +++ new/gcc/genopinit.c 2011-10-23 13:33:07 +0000 | ||
257 | @@ -268,6 +268,10 @@ | ||
258 | "set_optab_handler (vec_widen_umult_lo_optab, $A, CODE_FOR_$(vec_widen_umult_lo_$a$))", | ||
259 | "set_optab_handler (vec_widen_smult_hi_optab, $A, CODE_FOR_$(vec_widen_smult_hi_$a$))", | ||
260 | "set_optab_handler (vec_widen_smult_lo_optab, $A, CODE_FOR_$(vec_widen_smult_lo_$a$))", | ||
261 | + "set_optab_handler (vec_widen_ushiftl_hi_optab, $A, CODE_FOR_$(vec_widen_ushiftl_hi_$a$))", | ||
262 | + "set_optab_handler (vec_widen_ushiftl_lo_optab, $A, CODE_FOR_$(vec_widen_ushiftl_lo_$a$))", | ||
263 | + "set_optab_handler (vec_widen_sshiftl_hi_optab, $A, CODE_FOR_$(vec_widen_sshiftl_hi_$a$))", | ||
264 | + "set_optab_handler (vec_widen_sshiftl_lo_optab, $A, CODE_FOR_$(vec_widen_sshiftl_lo_$a$))", | ||
265 | "set_optab_handler (vec_unpacks_hi_optab, $A, CODE_FOR_$(vec_unpacks_hi_$a$))", | ||
266 | "set_optab_handler (vec_unpacks_lo_optab, $A, CODE_FOR_$(vec_unpacks_lo_$a$))", | ||
267 | "set_optab_handler (vec_unpacku_hi_optab, $A, CODE_FOR_$(vec_unpacku_hi_$a$))", | ||
268 | |||
269 | === modified file 'gcc/gimple-pretty-print.c' | ||
270 | --- old/gcc/gimple-pretty-print.c 2011-05-05 15:42:22 +0000 | ||
271 | +++ new/gcc/gimple-pretty-print.c 2011-10-23 13:33:07 +0000 | ||
272 | @@ -343,6 +343,8 @@ | ||
273 | case VEC_EXTRACT_ODD_EXPR: | ||
274 | case VEC_INTERLEAVE_HIGH_EXPR: | ||
275 | case VEC_INTERLEAVE_LOW_EXPR: | ||
276 | + case VEC_WIDEN_LSHIFT_HI_EXPR: | ||
277 | + case VEC_WIDEN_LSHIFT_LO_EXPR: | ||
278 | for (p = tree_code_name [(int) code]; *p; p++) | ||
279 | pp_character (buffer, TOUPPER (*p)); | ||
280 | pp_string (buffer, " <"); | ||
281 | |||
282 | === modified file 'gcc/optabs.c' | ||
283 | --- old/gcc/optabs.c 2011-08-11 15:46:01 +0000 | ||
284 | +++ new/gcc/optabs.c 2011-10-23 13:33:07 +0000 | ||
285 | @@ -454,6 +454,14 @@ | ||
286 | return TYPE_UNSIGNED (type) ? | ||
287 | vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; | ||
288 | |||
289 | + case VEC_WIDEN_LSHIFT_HI_EXPR: | ||
290 | + return TYPE_UNSIGNED (type) ? | ||
291 | + vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab; | ||
292 | + | ||
293 | + case VEC_WIDEN_LSHIFT_LO_EXPR: | ||
294 | + return TYPE_UNSIGNED (type) ? | ||
295 | + vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab; | ||
296 | + | ||
297 | case VEC_UNPACK_HI_EXPR: | ||
298 | return TYPE_UNSIGNED (type) ? | ||
299 | vec_unpacku_hi_optab : vec_unpacks_hi_optab; | ||
300 | @@ -6351,6 +6359,10 @@ | ||
301 | init_optab (vec_widen_umult_lo_optab, UNKNOWN); | ||
302 | init_optab (vec_widen_smult_hi_optab, UNKNOWN); | ||
303 | init_optab (vec_widen_smult_lo_optab, UNKNOWN); | ||
304 | + init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN); | ||
305 | + init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN); | ||
306 | + init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN); | ||
307 | + init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN); | ||
308 | init_optab (vec_unpacks_hi_optab, UNKNOWN); | ||
309 | init_optab (vec_unpacks_lo_optab, UNKNOWN); | ||
310 | init_optab (vec_unpacku_hi_optab, UNKNOWN); | ||
311 | |||
312 | === modified file 'gcc/optabs.h' | ||
313 | --- old/gcc/optabs.h 2011-07-27 14:12:45 +0000 | ||
314 | +++ new/gcc/optabs.h 2011-10-23 13:33:07 +0000 | ||
315 | @@ -350,6 +350,12 @@ | ||
316 | OTI_vec_widen_umult_lo, | ||
317 | OTI_vec_widen_smult_hi, | ||
318 | OTI_vec_widen_smult_lo, | ||
319 | + /* Widening shift left. | ||
320 | + The high/low part of the resulting vector is returned. */ | ||
321 | + OTI_vec_widen_ushiftl_hi, | ||
322 | + OTI_vec_widen_ushiftl_lo, | ||
323 | + OTI_vec_widen_sshiftl_hi, | ||
324 | + OTI_vec_widen_sshiftl_lo, | ||
325 | /* Extract and widen the high/low part of a vector of signed or | ||
326 | floating point elements. */ | ||
327 | OTI_vec_unpacks_hi, | ||
328 | @@ -542,6 +548,10 @@ | ||
329 | #define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo]) | ||
330 | #define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi]) | ||
331 | #define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo]) | ||
332 | +#define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi]) | ||
333 | +#define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo]) | ||
334 | +#define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi]) | ||
335 | +#define vec_widen_sshiftl_lo_optab (&optab_table[OTI_vec_widen_sshiftl_lo]) | ||
336 | #define vec_unpacks_hi_optab (&optab_table[OTI_vec_unpacks_hi]) | ||
337 | #define vec_unpacks_lo_optab (&optab_table[OTI_vec_unpacks_lo]) | ||
338 | #define vec_unpacku_hi_optab (&optab_table[OTI_vec_unpacku_hi]) | ||
339 | |||
340 | === added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c' | ||
341 | --- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c 1970-01-01 00:00:00 +0000 | ||
342 | +++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c 2011-10-23 13:33:07 +0000 | ||
343 | @@ -0,0 +1,107 @@ | ||
344 | +/* { dg-require-effective-target vect_int } */ | ||
345 | +/* { dg-require-effective-target vect_shift } */ | ||
346 | + | ||
347 | +#include <stdarg.h> | ||
348 | +#include "tree-vect.h" | ||
349 | + | ||
350 | +#define N 64 | ||
351 | +#define C 16 | ||
352 | + | ||
353 | +__attribute__ ((noinline)) void | ||
354 | +foo (short *src, int *dst) | ||
355 | +{ | ||
356 | + int i; | ||
357 | + short b, b0, b1, b2, b3, *s = src; | ||
358 | + int *d = dst; | ||
359 | + | ||
360 | + for (i = 0; i < N/4; i++) | ||
361 | + { | ||
362 | + b0 = *s++; | ||
363 | + b1 = *s++; | ||
364 | + b2 = *s++; | ||
365 | + b3 = *s++; | ||
366 | + *d = b0 << C; | ||
367 | + d++; | ||
368 | + *d = b1 << C; | ||
369 | + d++; | ||
370 | + *d = b2 << C; | ||
371 | + d++; | ||
372 | + *d = b3 << C; | ||
373 | + d++; | ||
374 | + } | ||
375 | + | ||
376 | + s = src; | ||
377 | + d = dst; | ||
378 | + for (i = 0; i < N; i++) | ||
379 | + { | ||
380 | + b = *s++; | ||
381 | + if (*d != b << C) | ||
382 | + abort (); | ||
383 | + d++; | ||
384 | + } | ||
385 | + | ||
386 | + s = src; | ||
387 | + d = dst; | ||
388 | + for (i = 0; i < N/4; i++) | ||
389 | + { | ||
390 | + b0 = *s++; | ||
391 | + b1 = *s++; | ||
392 | + b2 = *s++; | ||
393 | + b3 = *s++; | ||
394 | + *d = b0 << C; | ||
395 | + d++; | ||
396 | + *d = b1 << C; | ||
397 | + d++; | ||
398 | + *d = b2 << C; | ||
399 | + d++; | ||
400 | + *d = b3 << 6; | ||
401 | + d++; | ||
402 | + } | ||
403 | + | ||
404 | + s = src; | ||
405 | + d = dst; | ||
406 | + for (i = 0; i < N/4; i++) | ||
407 | + { | ||
408 | + b = *s++; | ||
409 | + if (*d != b << C) | ||
410 | + abort (); | ||
411 | + d++; | ||
412 | + b = *s++; | ||
413 | + if (*d != b << C) | ||
414 | + abort (); | ||
415 | + d++; | ||
416 | + b = *s++; | ||
417 | + if (*d != b << C) | ||
418 | + abort (); | ||
419 | + d++; | ||
420 | + b = *s++; | ||
421 | + if (*d != b << 6) | ||
422 | + abort (); | ||
423 | + d++; | ||
424 | + } | ||
425 | +} | ||
426 | + | ||
427 | +int main (void) | ||
428 | +{ | ||
429 | + int i; | ||
430 | + short in[N]; | ||
431 | + int out[N]; | ||
432 | + | ||
433 | + check_vect (); | ||
434 | + | ||
435 | + for (i = 0; i < N; i++) | ||
436 | + { | ||
437 | + in[i] = i; | ||
438 | + out[i] = 255; | ||
439 | + __asm__ volatile (""); | ||
440 | + } | ||
441 | + | ||
442 | + foo (in, out); | ||
443 | + | ||
444 | + return 0; | ||
445 | +} | ||
446 | + | ||
447 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 8 "vect" { target vect_widen_shift } } } */ | ||
448 | +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ | ||
449 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
450 | + | ||
451 | |||
452 | === added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c' | ||
453 | --- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c 1970-01-01 00:00:00 +0000 | ||
454 | +++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c 2011-10-23 13:33:07 +0000 | ||
455 | @@ -0,0 +1,58 @@ | ||
456 | +/* { dg-require-effective-target vect_int } */ | ||
457 | +/* { dg-require-effective-target vect_shift } */ | ||
458 | + | ||
459 | +#include <stdarg.h> | ||
460 | +#include "tree-vect.h" | ||
461 | + | ||
462 | +#define N 64 | ||
463 | +#define C 12 | ||
464 | + | ||
465 | +__attribute__ ((noinline)) void | ||
466 | +foo (char *src, int *dst) | ||
467 | +{ | ||
468 | + int i; | ||
469 | + char b, *s = src; | ||
470 | + int *d = dst; | ||
471 | + | ||
472 | + for (i = 0; i < N; i++) | ||
473 | + { | ||
474 | + b = *s++; | ||
475 | + *d = b << C; | ||
476 | + d++; | ||
477 | + } | ||
478 | + | ||
479 | + s = src; | ||
480 | + d = dst; | ||
481 | + for (i = 0; i < N; i++) | ||
482 | + { | ||
483 | + b = *s++; | ||
484 | + if (*d != b << C) | ||
485 | + abort (); | ||
486 | + d++; | ||
487 | + } | ||
488 | +} | ||
489 | + | ||
490 | +int main (void) | ||
491 | +{ | ||
492 | + int i; | ||
493 | + char in[N]; | ||
494 | + int out[N]; | ||
495 | + | ||
496 | + check_vect (); | ||
497 | + | ||
498 | + for (i = 0; i < N; i++) | ||
499 | + { | ||
500 | + in[i] = i; | ||
501 | + out[i] = 255; | ||
502 | + __asm__ volatile (""); | ||
503 | + } | ||
504 | + | ||
505 | + foo (in, out); | ||
506 | + | ||
507 | + return 0; | ||
508 | +} | ||
509 | + | ||
510 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */ | ||
511 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ | ||
512 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
513 | + | ||
514 | |||
515 | === added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c' | ||
516 | --- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c 1970-01-01 00:00:00 +0000 | ||
517 | +++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c 2011-10-23 13:33:07 +0000 | ||
518 | @@ -0,0 +1,58 @@ | ||
519 | +/* { dg-require-effective-target vect_int } */ | ||
520 | +/* { dg-require-effective-target vect_shift } */ | ||
521 | + | ||
522 | +#include <stdarg.h> | ||
523 | +#include "tree-vect.h" | ||
524 | + | ||
525 | +#define N 64 | ||
526 | +#define C 7 | ||
527 | + | ||
528 | +__attribute__ ((noinline)) void | ||
529 | +foo (unsigned short *src, unsigned int *dst) | ||
530 | +{ | ||
531 | + int i; | ||
532 | + unsigned short b, *s = src; | ||
533 | + unsigned int *d = dst; | ||
534 | + | ||
535 | + for (i = 0; i < N; i++) | ||
536 | + { | ||
537 | + b = *s++; | ||
538 | + *d = b << C; | ||
539 | + d++; | ||
540 | + } | ||
541 | + | ||
542 | + s = src; | ||
543 | + d = dst; | ||
544 | + for (i = 0; i < N; i++) | ||
545 | + { | ||
546 | + b = *s++; | ||
547 | + if (*d != b << C) | ||
548 | + abort (); | ||
549 | + d++; | ||
550 | + } | ||
551 | +} | ||
552 | + | ||
553 | +int main (void) | ||
554 | +{ | ||
555 | + int i; | ||
556 | + unsigned short in[N]; | ||
557 | + unsigned int out[N]; | ||
558 | + | ||
559 | + check_vect (); | ||
560 | + | ||
561 | + for (i = 0; i < N; i++) | ||
562 | + { | ||
563 | + in[i] = i; | ||
564 | + out[i] = 255; | ||
565 | + __asm__ volatile (""); | ||
566 | + } | ||
567 | + | ||
568 | + foo (in, out); | ||
569 | + | ||
570 | + return 0; | ||
571 | +} | ||
572 | + | ||
573 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */ | ||
574 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ | ||
575 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
576 | + | ||
577 | |||
578 | === added file 'gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c' | ||
579 | --- old/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 1970-01-01 00:00:00 +0000 | ||
580 | +++ new/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c 2011-10-23 13:33:07 +0000 | ||
581 | @@ -0,0 +1,65 @@ | ||
582 | +/* { dg-require-effective-target vect_int } */ | ||
583 | +/* { dg-require-effective-target vect_shift } */ | ||
584 | + | ||
585 | +#include <stdarg.h> | ||
586 | +#include "tree-vect.h" | ||
587 | + | ||
588 | +#define N 64 | ||
589 | +#define C1 10 | ||
590 | +#define C2 5 | ||
591 | + | ||
592 | +__attribute__ ((noinline)) void | ||
593 | +foo (unsigned char *src, unsigned int *dst1, unsigned int *dst2) | ||
594 | +{ | ||
595 | + int i; | ||
596 | + unsigned char b, *s = src; | ||
597 | + unsigned int *d1 = dst1, *d2 = dst2; | ||
598 | + | ||
599 | + for (i = 0; i < N; i++) | ||
600 | + { | ||
601 | + b = *s++; | ||
602 | + *d1 = b << C1; | ||
603 | + d1++; | ||
604 | + *d2 = b << C2; | ||
605 | + d2++; | ||
606 | + } | ||
607 | + | ||
608 | + s = src; | ||
609 | + d1 = dst1; | ||
610 | + d2 = dst2; | ||
611 | + for (i = 0; i < N; i++) | ||
612 | + { | ||
613 | + b = *s++; | ||
614 | + if (*d1 != b << C1 || *d2 != b << C2) | ||
615 | + abort (); | ||
616 | + d1++; | ||
617 | + d2++; | ||
618 | + } | ||
619 | +} | ||
620 | + | ||
621 | +int main (void) | ||
622 | +{ | ||
623 | + int i; | ||
624 | + unsigned char in[N]; | ||
625 | + unsigned int out1[N]; | ||
626 | + unsigned int out2[N]; | ||
627 | + | ||
628 | + check_vect (); | ||
629 | + | ||
630 | + for (i = 0; i < N; i++) | ||
631 | + { | ||
632 | + in[i] = i; | ||
633 | + out1[i] = 255; | ||
634 | + out2[i] = 255; | ||
635 | + __asm__ volatile (""); | ||
636 | + } | ||
637 | + | ||
638 | + foo (in, out1, out2); | ||
639 | + | ||
640 | + return 0; | ||
641 | +} | ||
642 | + | ||
643 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */ | ||
644 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ | ||
645 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
646 | + | ||
647 | |||
648 | === modified file 'gcc/testsuite/lib/target-supports.exp' | ||
649 | --- old/gcc/testsuite/lib/target-supports.exp 2011-10-06 11:08:08 +0000 | ||
650 | +++ new/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000 | ||
651 | @@ -2783,6 +2783,26 @@ | ||
652 | } | ||
653 | |||
654 | # Return 1 if the target plus current options supports a vector | ||
655 | +# widening shift, 0 otherwise. | ||
656 | +# | ||
657 | +# This won't change for different subtargets so cache the result. | ||
658 | + | ||
659 | +proc check_effective_target_vect_widen_shift { } { | ||
660 | + global et_vect_widen_shift_saved | ||
661 | + | ||
662 | + if [info exists et_vect_shift_saved] { | ||
663 | + verbose "check_effective_target_vect_widen_shift: using cached result" 2 | ||
664 | + } else { | ||
665 | + set et_vect_widen_shift_saved 0 | ||
666 | + if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { | ||
667 | + set et_vect_widen_shift_saved 1 | ||
668 | + } | ||
669 | + } | ||
670 | + verbose "check_effective_target_vect_widen_shift: returning $et_vect_widen_shift_saved" 2 | ||
671 | + return $et_vect_widen_shift_saved | ||
672 | +} | ||
673 | + | ||
674 | +# Return 1 if the target plus current options supports a vector | ||
675 | # dot-product of signed chars, 0 otherwise. | ||
676 | # | ||
677 | # This won't change for different subtargets so cache the result. | ||
678 | |||
679 | === modified file 'gcc/tree-cfg.c' | ||
680 | --- old/gcc/tree-cfg.c 2011-07-15 13:44:50 +0000 | ||
681 | +++ new/gcc/tree-cfg.c 2011-10-23 13:33:07 +0000 | ||
682 | @@ -3473,6 +3473,44 @@ | ||
683 | return false; | ||
684 | } | ||
685 | |||
686 | + case WIDEN_LSHIFT_EXPR: | ||
687 | + { | ||
688 | + if (!INTEGRAL_TYPE_P (lhs_type) | ||
689 | + || !INTEGRAL_TYPE_P (rhs1_type) | ||
690 | + || TREE_CODE (rhs2) != INTEGER_CST | ||
691 | + || (2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type))) | ||
692 | + { | ||
693 | + error ("type mismatch in widening vector shift expression"); | ||
694 | + debug_generic_expr (lhs_type); | ||
695 | + debug_generic_expr (rhs1_type); | ||
696 | + debug_generic_expr (rhs2_type); | ||
697 | + return true; | ||
698 | + } | ||
699 | + | ||
700 | + return false; | ||
701 | + } | ||
702 | + | ||
703 | + case VEC_WIDEN_LSHIFT_HI_EXPR: | ||
704 | + case VEC_WIDEN_LSHIFT_LO_EXPR: | ||
705 | + { | ||
706 | + if (TREE_CODE (rhs1_type) != VECTOR_TYPE | ||
707 | + || TREE_CODE (lhs_type) != VECTOR_TYPE | ||
708 | + || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) | ||
709 | + || !INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) | ||
710 | + || TREE_CODE (rhs2) != INTEGER_CST | ||
711 | + || (2 * TYPE_PRECISION (TREE_TYPE (rhs1_type)) | ||
712 | + > TYPE_PRECISION (TREE_TYPE (lhs_type)))) | ||
713 | + { | ||
714 | + error ("type mismatch in widening vector shift expression"); | ||
715 | + debug_generic_expr (lhs_type); | ||
716 | + debug_generic_expr (rhs1_type); | ||
717 | + debug_generic_expr (rhs2_type); | ||
718 | + return true; | ||
719 | + } | ||
720 | + | ||
721 | + return false; | ||
722 | + } | ||
723 | + | ||
724 | case PLUS_EXPR: | ||
725 | case MINUS_EXPR: | ||
726 | { | ||
727 | |||
728 | === modified file 'gcc/tree-inline.c' | ||
729 | --- old/gcc/tree-inline.c 2011-08-13 08:32:32 +0000 | ||
730 | +++ new/gcc/tree-inline.c 2011-10-23 13:33:07 +0000 | ||
731 | @@ -3343,6 +3343,7 @@ | ||
732 | case DOT_PROD_EXPR: | ||
733 | case WIDEN_MULT_PLUS_EXPR: | ||
734 | case WIDEN_MULT_MINUS_EXPR: | ||
735 | + case WIDEN_LSHIFT_EXPR: | ||
736 | |||
737 | case VEC_WIDEN_MULT_HI_EXPR: | ||
738 | case VEC_WIDEN_MULT_LO_EXPR: | ||
739 | @@ -3357,6 +3358,8 @@ | ||
740 | case VEC_EXTRACT_ODD_EXPR: | ||
741 | case VEC_INTERLEAVE_HIGH_EXPR: | ||
742 | case VEC_INTERLEAVE_LOW_EXPR: | ||
743 | + case VEC_WIDEN_LSHIFT_HI_EXPR: | ||
744 | + case VEC_WIDEN_LSHIFT_LO_EXPR: | ||
745 | |||
746 | return 1; | ||
747 | |||
748 | |||
749 | === modified file 'gcc/tree-pretty-print.c' | ||
750 | --- old/gcc/tree-pretty-print.c 2010-11-05 09:00:50 +0000 | ||
751 | +++ new/gcc/tree-pretty-print.c 2011-10-23 13:33:07 +0000 | ||
752 | @@ -1539,6 +1539,7 @@ | ||
753 | case RROTATE_EXPR: | ||
754 | case VEC_LSHIFT_EXPR: | ||
755 | case VEC_RSHIFT_EXPR: | ||
756 | + case WIDEN_LSHIFT_EXPR: | ||
757 | case BIT_IOR_EXPR: | ||
758 | case BIT_XOR_EXPR: | ||
759 | case BIT_AND_EXPR: | ||
760 | @@ -2209,6 +2210,22 @@ | ||
761 | pp_string (buffer, " > "); | ||
762 | break; | ||
763 | |||
764 | + case VEC_WIDEN_LSHIFT_HI_EXPR: | ||
765 | + pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < "); | ||
766 | + dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); | ||
767 | + pp_string (buffer, ", "); | ||
768 | + dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); | ||
769 | + pp_string (buffer, " > "); | ||
770 | + break; | ||
771 | + | ||
772 | + case VEC_WIDEN_LSHIFT_LO_EXPR: | ||
773 | + pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < "); | ||
774 | + dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); | ||
775 | + pp_string (buffer, ", "); | ||
776 | + dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); | ||
777 | + pp_string (buffer, " > "); | ||
778 | + break; | ||
779 | + | ||
780 | case VEC_UNPACK_HI_EXPR: | ||
781 | pp_string (buffer, " VEC_UNPACK_HI_EXPR < "); | ||
782 | dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); | ||
783 | @@ -2531,6 +2548,9 @@ | ||
784 | case RSHIFT_EXPR: | ||
785 | case LROTATE_EXPR: | ||
786 | case RROTATE_EXPR: | ||
787 | + case VEC_WIDEN_LSHIFT_HI_EXPR: | ||
788 | + case VEC_WIDEN_LSHIFT_LO_EXPR: | ||
789 | + case WIDEN_LSHIFT_EXPR: | ||
790 | return 11; | ||
791 | |||
792 | case WIDEN_SUM_EXPR: | ||
793 | @@ -2706,6 +2726,9 @@ | ||
794 | case VEC_RSHIFT_EXPR: | ||
795 | return "v>>"; | ||
796 | |||
797 | + case WIDEN_LSHIFT_EXPR: | ||
798 | + return "w<<"; | ||
799 | + | ||
800 | case POINTER_PLUS_EXPR: | ||
801 | return "+"; | ||
802 | |||
803 | |||
804 | === modified file 'gcc/tree-vect-generic.c' | ||
805 | --- old/gcc/tree-vect-generic.c 2011-02-08 14:16:50 +0000 | ||
806 | +++ new/gcc/tree-vect-generic.c 2011-10-23 13:33:07 +0000 | ||
807 | @@ -552,7 +552,9 @@ | ||
808 | || code == VEC_UNPACK_LO_EXPR | ||
809 | || code == VEC_PACK_TRUNC_EXPR | ||
810 | || code == VEC_PACK_SAT_EXPR | ||
811 | - || code == VEC_PACK_FIX_TRUNC_EXPR) | ||
812 | + || code == VEC_PACK_FIX_TRUNC_EXPR | ||
813 | + || code == VEC_WIDEN_LSHIFT_HI_EXPR | ||
814 | + || code == VEC_WIDEN_LSHIFT_LO_EXPR) | ||
815 | type = TREE_TYPE (rhs1); | ||
816 | |||
817 | /* Optabs will try converting a negation into a subtraction, so | ||
818 | |||
819 | === modified file 'gcc/tree-vect-patterns.c' | ||
820 | --- old/gcc/tree-vect-patterns.c 2011-09-05 06:23:37 +0000 | ||
821 | +++ new/gcc/tree-vect-patterns.c 2011-10-23 13:33:07 +0000 | ||
822 | @@ -48,12 +48,15 @@ | ||
823 | static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); | ||
824 | static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *, | ||
825 | tree *); | ||
826 | +static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **, | ||
827 | + tree *, tree *); | ||
828 | static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { | ||
829 | vect_recog_widen_mult_pattern, | ||
830 | vect_recog_widen_sum_pattern, | ||
831 | vect_recog_dot_prod_pattern, | ||
832 | vect_recog_pow_pattern, | ||
833 | - vect_recog_over_widening_pattern}; | ||
834 | + vect_recog_over_widening_pattern, | ||
835 | + vect_recog_widen_shift_pattern}; | ||
836 | |||
837 | |||
838 | /* Function widened_name_p | ||
839 | @@ -331,27 +334,38 @@ | ||
840 | return pattern_stmt; | ||
841 | } | ||
842 | |||
843 | -/* Handle two cases of multiplication by a constant. The first one is when | ||
844 | - the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second | ||
845 | - operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to | ||
846 | - TYPE. | ||
847 | + | ||
848 | +/* Handle widening operation by a constant. At the moment we support MULT_EXPR | ||
849 | + and LSHIFT_EXPR. | ||
850 | + | ||
851 | + For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR | ||
852 | + we check that CONST_OPRND is less or equal to the size of HALF_TYPE. | ||
853 | |||
854 | Otherwise, if the type of the result (TYPE) is at least 4 times bigger than | ||
855 | - HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than | ||
856 | - TYPE), we can perform widen-mult from the intermediate type to TYPE and | ||
857 | - replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */ | ||
858 | + HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE) | ||
859 | + that satisfies the above restrictions, we can perform a widening opeartion | ||
860 | + from the intermediate type to TYPE and replace a_T = (TYPE) a_t; | ||
861 | + with a_it = (interm_type) a_t; */ | ||
862 | |||
863 | static bool | ||
864 | -vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd, | ||
865 | - VEC (gimple, heap) **stmts, tree type, | ||
866 | - tree *half_type, gimple def_stmt) | ||
867 | +vect_handle_widen_op_by_const (gimple stmt, enum tree_code code, | ||
868 | + tree const_oprnd, tree *oprnd, | ||
869 | + VEC (gimple, heap) **stmts, tree type, | ||
870 | + tree *half_type, gimple def_stmt) | ||
871 | { | ||
872 | tree new_type, new_oprnd, tmp; | ||
873 | gimple new_stmt; | ||
874 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); | ||
875 | struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
876 | |||
877 | - if (int_fits_type_p (const_oprnd, *half_type)) | ||
878 | + if (code != MULT_EXPR && code != LSHIFT_EXPR) | ||
879 | + return false; | ||
880 | + | ||
881 | + if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type)) | ||
882 | + || (code == LSHIFT_EXPR | ||
883 | + && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type)) | ||
884 | + != 1)) | ||
885 | + && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2)) | ||
886 | { | ||
887 | /* CONST_OPRND is a constant of HALF_TYPE. */ | ||
888 | *oprnd = gimple_assign_rhs1 (def_stmt); | ||
889 | @@ -364,14 +378,16 @@ | ||
890 | || !vinfo_for_stmt (def_stmt)) | ||
891 | return false; | ||
892 | |||
893 | - /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for | ||
894 | + /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for | ||
895 | a type 2 times bigger than HALF_TYPE. */ | ||
896 | new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, | ||
897 | TYPE_UNSIGNED (type)); | ||
898 | - if (!int_fits_type_p (const_oprnd, new_type)) | ||
899 | + if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type)) | ||
900 | + || (code == LSHIFT_EXPR | ||
901 | + && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1)) | ||
902 | return false; | ||
903 | |||
904 | - /* Use NEW_TYPE for widen_mult. */ | ||
905 | + /* Use NEW_TYPE for widening operation. */ | ||
906 | if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) | ||
907 | { | ||
908 | new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); | ||
909 | @@ -381,6 +397,7 @@ | ||
910 | || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type) | ||
911 | return false; | ||
912 | |||
913 | + VEC_safe_push (gimple, heap, *stmts, def_stmt); | ||
914 | *oprnd = gimple_assign_lhs (new_stmt); | ||
915 | } | ||
916 | else | ||
917 | @@ -392,7 +409,6 @@ | ||
918 | new_oprnd = make_ssa_name (tmp, NULL); | ||
919 | new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd, | ||
920 | NULL_TREE); | ||
921 | - SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; | ||
922 | STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; | ||
923 | VEC_safe_push (gimple, heap, *stmts, def_stmt); | ||
924 | *oprnd = new_oprnd; | ||
925 | @@ -402,7 +418,6 @@ | ||
926 | return true; | ||
927 | } | ||
928 | |||
929 | - | ||
930 | /* Function vect_recog_widen_mult_pattern | ||
931 | |||
932 | Try to find the following pattern: | ||
933 | @@ -491,7 +506,7 @@ | ||
934 | enum tree_code dummy_code; | ||
935 | int dummy_int; | ||
936 | VEC (tree, heap) *dummy_vec; | ||
937 | - bool op0_ok, op1_ok; | ||
938 | + bool op1_ok; | ||
939 | |||
940 | if (!is_gimple_assign (last_stmt)) | ||
941 | return NULL; | ||
942 | @@ -511,38 +526,23 @@ | ||
943 | return NULL; | ||
944 | |||
945 | /* Check argument 0. */ | ||
946 | - op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false); | ||
947 | + if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) | ||
948 | + return NULL; | ||
949 | /* Check argument 1. */ | ||
950 | op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false); | ||
951 | |||
952 | - /* In case of multiplication by a constant one of the operands may not match | ||
953 | - the pattern, but not both. */ | ||
954 | - if (!op0_ok && !op1_ok) | ||
955 | - return NULL; | ||
956 | - | ||
957 | - if (op0_ok && op1_ok) | ||
958 | + if (op1_ok) | ||
959 | { | ||
960 | oprnd0 = gimple_assign_rhs1 (def_stmt0); | ||
961 | oprnd1 = gimple_assign_rhs1 (def_stmt1); | ||
962 | } | ||
963 | - else if (!op0_ok) | ||
964 | - { | ||
965 | - if (TREE_CODE (oprnd0) == INTEGER_CST | ||
966 | - && TREE_CODE (half_type1) == INTEGER_TYPE | ||
967 | - && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1, | ||
968 | - stmts, type, | ||
969 | - &half_type1, def_stmt1)) | ||
970 | - half_type0 = half_type1; | ||
971 | - else | ||
972 | - return NULL; | ||
973 | - } | ||
974 | - else if (!op1_ok) | ||
975 | + else | ||
976 | { | ||
977 | if (TREE_CODE (oprnd1) == INTEGER_CST | ||
978 | && TREE_CODE (half_type0) == INTEGER_TYPE | ||
979 | - && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0, | ||
980 | - stmts, type, | ||
981 | - &half_type0, def_stmt0)) | ||
982 | + && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1, | ||
983 | + &oprnd0, stmts, type, | ||
984 | + &half_type0, def_stmt0)) | ||
985 | half_type1 = half_type0; | ||
986 | else | ||
987 | return NULL; | ||
988 | @@ -998,6 +998,7 @@ | ||
989 | || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type) | ||
990 | return false; | ||
991 | |||
992 | + VEC_safe_push (gimple, heap, *stmts, def_stmt); | ||
993 | oprnd = gimple_assign_lhs (new_stmt); | ||
994 | } | ||
995 | else | ||
996 | @@ -1128,7 +1129,7 @@ | ||
997 | statetments, except for the case when the last statement in the | ||
998 | sequence doesn't have a corresponding pattern statement. In such | ||
999 | case we associate the last pattern statement with the last statement | ||
1000 | - in the sequence. Therefore, we only add an original statetement to | ||
1001 | + in the sequence. Therefore, we only add the original statement to | ||
1002 | the list if we know that it is not the last. */ | ||
1003 | if (prev_stmt) | ||
1004 | VEC_safe_push (gimple, heap, *stmts, prev_stmt); | ||
1005 | @@ -1215,6 +1216,231 @@ | ||
1006 | } | ||
1007 | |||
1008 | |||
1009 | +/* Detect widening shift pattern: | ||
1010 | + | ||
1011 | + type a_t; | ||
1012 | + TYPE a_T, res_T; | ||
1013 | + | ||
1014 | + S1 a_t = ; | ||
1015 | + S2 a_T = (TYPE) a_t; | ||
1016 | + S3 res_T = a_T << CONST; | ||
1017 | + | ||
1018 | + where type 'TYPE' is at least double the size of type 'type'. | ||
1019 | + | ||
1020 | + Also detect unsigned cases: | ||
1021 | + | ||
1022 | + unsigned type a_t; | ||
1023 | + unsigned TYPE u_res_T; | ||
1024 | + TYPE a_T, res_T; | ||
1025 | + | ||
1026 | + S1 a_t = ; | ||
1027 | + S2 a_T = (TYPE) a_t; | ||
1028 | + S3 res_T = a_T << CONST; | ||
1029 | + S4 u_res_T = (unsigned TYPE) res_T; | ||
1030 | + | ||
1031 | + And a case when 'TYPE' is 4 times bigger than 'type'. In that case we | ||
1032 | + create an additional pattern stmt for S2 to create a variable of an | ||
1033 | + intermediate type, and perform widen-shift on the intermediate type: | ||
1034 | + | ||
1035 | + type a_t; | ||
1036 | + interm_type a_it; | ||
1037 | + TYPE a_T, res_T, res_T'; | ||
1038 | + | ||
1039 | + S1 a_t = ; | ||
1040 | + S2 a_T = (TYPE) a_t; | ||
1041 | + '--> a_it = (interm_type) a_t; | ||
1042 | + S3 res_T = a_T << CONST; | ||
1043 | + '--> res_T' = a_it <<* CONST; | ||
1044 | + | ||
1045 | + Input/Output: | ||
1046 | + | ||
1047 | + * STMTS: Contains a stmt from which the pattern search begins. | ||
1048 | + In case of unsigned widen-shift, the original stmt (S3) is replaced with S4 | ||
1049 | + in STMTS. When an intermediate type is used and a pattern statement is | ||
1050 | + created for S2, we also put S2 here (before S3). | ||
1051 | + | ||
1052 | + Output: | ||
1053 | + | ||
1054 | + * TYPE_IN: The type of the input arguments to the pattern. | ||
1055 | + | ||
1056 | + * TYPE_OUT: The type of the output of this pattern. | ||
1057 | + | ||
1058 | + * Return value: A new stmt that will be used to replace the sequence of | ||
1059 | + stmts that constitute the pattern. In this case it will be: | ||
1060 | + WIDEN_LSHIFT_EXPR <a_t, CONST>. */ | ||
1061 | + | ||
1062 | +static gimple | ||
1063 | +vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts, | ||
1064 | + tree *type_in, tree *type_out) | ||
1065 | +{ | ||
1066 | + gimple last_stmt = VEC_pop (gimple, *stmts); | ||
1067 | + gimple def_stmt0; | ||
1068 | + tree oprnd0, oprnd1; | ||
1069 | + tree type, half_type0; | ||
1070 | + gimple pattern_stmt, orig_stmt = NULL; | ||
1071 | + tree vectype, vectype_out = NULL_TREE; | ||
1072 | + tree dummy; | ||
1073 | + tree var; | ||
1074 | + enum tree_code dummy_code; | ||
1075 | + int dummy_int; | ||
1076 | + VEC (tree, heap) * dummy_vec; | ||
1077 | + gimple use_stmt = NULL; | ||
1078 | + bool over_widen = false; | ||
1079 | + | ||
1080 | + if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt)) | ||
1081 | + return NULL; | ||
1082 | + | ||
1083 | + orig_stmt = last_stmt; | ||
1084 | + if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt))) | ||
1085 | + { | ||
1086 | + /* This statement was also detected as over-widening operation (it can't | ||
1087 | + be any other pattern, because only over-widening detects shifts). | ||
1088 | + LAST_STMT is the final type demotion statement, but its related | ||
1089 | + statement is shift. We analyze the related statement to catch cases: | ||
1090 | + | ||
1091 | + orig code: | ||
1092 | + type a_t; | ||
1093 | + itype res; | ||
1094 | + TYPE a_T, res_T; | ||
1095 | + | ||
1096 | + S1 a_T = (TYPE) a_t; | ||
1097 | + S2 res_T = a_T << CONST; | ||
1098 | + S3 res = (itype)res_T; | ||
1099 | + | ||
1100 | + (size of type * 2 <= size of itype | ||
1101 | + and size of itype * 2 <= size of TYPE) | ||
1102 | + | ||
1103 | + code after over-widening pattern detection: | ||
1104 | + | ||
1105 | + S1 a_T = (TYPE) a_t; | ||
1106 | + --> a_it = (itype) a_t; | ||
1107 | + S2 res_T = a_T << CONST; | ||
1108 | + S3 res = (itype)res_T; <--- LAST_STMT | ||
1109 | + --> res = a_it << CONST; | ||
1110 | + | ||
1111 | + after widen_shift: | ||
1112 | + | ||
1113 | + S1 a_T = (TYPE) a_t; | ||
1114 | + --> a_it = (itype) a_t; - redundant | ||
1115 | + S2 res_T = a_T << CONST; | ||
1116 | + S3 res = (itype)res_T; | ||
1117 | + --> res = a_t w<< CONST; | ||
1118 | + | ||
1119 | + i.e., we replace the three statements with res = a_t w<< CONST. */ | ||
1120 | + last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt)); | ||
1121 | + over_widen = true; | ||
1122 | + } | ||
1123 | + | ||
1124 | + if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR) | ||
1125 | + return NULL; | ||
1126 | + | ||
1127 | + oprnd0 = gimple_assign_rhs1 (last_stmt); | ||
1128 | + oprnd1 = gimple_assign_rhs2 (last_stmt); | ||
1129 | + if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST) | ||
1130 | + return NULL; | ||
1131 | + | ||
1132 | + /* Check operand 0: it has to be defined by a type promotion. */ | ||
1133 | + if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) | ||
1134 | + return NULL; | ||
1135 | + | ||
1136 | + /* Check operand 1: has to be positive. We check that it fits the type | ||
1137 | + in vect_handle_widen_op_by_const (). */ | ||
1138 | + if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0) | ||
1139 | + return NULL; | ||
1140 | + | ||
1141 | + oprnd0 = gimple_assign_rhs1 (def_stmt0); | ||
1142 | + type = gimple_expr_type (last_stmt); | ||
1143 | + | ||
1144 | + /* Check if this a widening operation. */ | ||
1145 | + if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1, | ||
1146 | + &oprnd0, stmts, | ||
1147 | + type, &half_type0, def_stmt0)) | ||
1148 | + return NULL; | ||
1149 | + | ||
1150 | + /* Handle unsigned case. Look for | ||
1151 | + S4 u_res_T = (unsigned TYPE) res_T; | ||
1152 | + Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */ | ||
1153 | + if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) | ||
1154 | + { | ||
1155 | + tree lhs = gimple_assign_lhs (last_stmt), use_lhs; | ||
1156 | + imm_use_iterator imm_iter; | ||
1157 | + use_operand_p use_p; | ||
1158 | + int nuses = 0; | ||
1159 | + tree use_type; | ||
1160 | + | ||
1161 | + if (over_widen) | ||
1162 | + { | ||
1163 | + /* In case of over-widening pattern, S4 should be ORIG_STMT itself. | ||
1164 | + We check here that TYPE is the correct type for the operation, | ||
1165 | + i.e., it's the type of the original result. */ | ||
1166 | + tree orig_type = gimple_expr_type (orig_stmt); | ||
1167 | + if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type)) | ||
1168 | + || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type))) | ||
1169 | + return NULL; | ||
1170 | + } | ||
1171 | + else | ||
1172 | + { | ||
1173 | + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) | ||
1174 | + { | ||
1175 | + if (is_gimple_debug (USE_STMT (use_p))) | ||
1176 | + continue; | ||
1177 | + use_stmt = USE_STMT (use_p); | ||
1178 | + nuses++; | ||
1179 | + } | ||
1180 | + | ||
1181 | + if (nuses != 1 || !is_gimple_assign (use_stmt) | ||
1182 | + || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) | ||
1183 | + return NULL; | ||
1184 | + | ||
1185 | + use_lhs = gimple_assign_lhs (use_stmt); | ||
1186 | + use_type = TREE_TYPE (use_lhs); | ||
1187 | + | ||
1188 | + if (!INTEGRAL_TYPE_P (use_type) | ||
1189 | + || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) | ||
1190 | + || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) | ||
1191 | + return NULL; | ||
1192 | + | ||
1193 | + type = use_type; | ||
1194 | + } | ||
1195 | + } | ||
1196 | + | ||
1197 | + /* Pattern detected. */ | ||
1198 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
1199 | + fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: "); | ||
1200 | + | ||
1201 | + /* Check target support. */ | ||
1202 | + vectype = get_vectype_for_scalar_type (half_type0); | ||
1203 | + vectype_out = get_vectype_for_scalar_type (type); | ||
1204 | + | ||
1205 | + if (!vectype | ||
1206 | + || !vectype_out | ||
1207 | + || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt, | ||
1208 | + vectype_out, vectype, | ||
1209 | + &dummy, &dummy, &dummy_code, | ||
1210 | + &dummy_code, &dummy_int, | ||
1211 | + &dummy_vec)) | ||
1212 | + return NULL; | ||
1213 | + | ||
1214 | + *type_in = vectype; | ||
1215 | + *type_out = vectype_out; | ||
1216 | + | ||
1217 | + /* Pattern supported. Create a stmt to be used to replace the pattern. */ | ||
1218 | + var = vect_recog_temp_ssa_var (type, NULL); | ||
1219 | + pattern_stmt = | ||
1220 | + gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1); | ||
1221 | + | ||
1222 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
1223 | + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); | ||
1224 | + | ||
1225 | + if (use_stmt) | ||
1226 | + last_stmt = use_stmt; | ||
1227 | + else | ||
1228 | + last_stmt = orig_stmt; | ||
1229 | + | ||
1230 | + VEC_safe_push (gimple, heap, *stmts, last_stmt); | ||
1231 | + return pattern_stmt; | ||
1232 | +} | ||
1233 | + | ||
1234 | /* Mark statements that are involved in a pattern. */ | ||
1235 | |||
1236 | static inline void | ||
1237 | @@ -1278,7 +1504,8 @@ | ||
1238 | static void | ||
1239 | vect_pattern_recog_1 ( | ||
1240 | gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *), | ||
1241 | - gimple_stmt_iterator si) | ||
1242 | + gimple_stmt_iterator si, | ||
1243 | + VEC (gimple, heap) **stmts_to_replace) | ||
1244 | { | ||
1245 | gimple stmt = gsi_stmt (si), pattern_stmt; | ||
1246 | stmt_vec_info stmt_info; | ||
1247 | @@ -1288,14 +1515,14 @@ | ||
1248 | enum tree_code code; | ||
1249 | int i; | ||
1250 | gimple next; | ||
1251 | - VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); | ||
1252 | |||
1253 | - VEC_quick_push (gimple, stmts_to_replace, stmt); | ||
1254 | - pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out); | ||
1255 | + VEC_truncate (gimple, *stmts_to_replace, 0); | ||
1256 | + VEC_quick_push (gimple, *stmts_to_replace, stmt); | ||
1257 | + pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out); | ||
1258 | if (!pattern_stmt) | ||
1259 | return; | ||
1260 | |||
1261 | - stmt = VEC_last (gimple, stmts_to_replace); | ||
1262 | + stmt = VEC_last (gimple, *stmts_to_replace); | ||
1263 | stmt_info = vinfo_for_stmt (stmt); | ||
1264 | loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | ||
1265 | |||
1266 | @@ -1303,8 +1530,6 @@ | ||
1267 | { | ||
1268 | /* No need to check target support (already checked by the pattern | ||
1269 | recognition function). */ | ||
1270 | - if (type_out) | ||
1271 | - gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out))); | ||
1272 | pattern_vectype = type_out ? type_out : type_in; | ||
1273 | } | ||
1274 | else | ||
1275 | @@ -1360,8 +1585,8 @@ | ||
1276 | /* It is possible that additional pattern stmts are created and inserted in | ||
1277 | STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the | ||
1278 | relevant statements. */ | ||
1279 | - for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt) | ||
1280 | - && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1); | ||
1281 | + for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt) | ||
1282 | + && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1); | ||
1283 | i++) | ||
1284 | { | ||
1285 | stmt_info = vinfo_for_stmt (stmt); | ||
1286 | @@ -1374,8 +1599,6 @@ | ||
1287 | |||
1288 | vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE); | ||
1289 | } | ||
1290 | - | ||
1291 | - VEC_free (gimple, heap, stmts_to_replace); | ||
1292 | } | ||
1293 | |||
1294 | |||
1295 | @@ -1465,6 +1688,7 @@ | ||
1296 | gimple_stmt_iterator si; | ||
1297 | unsigned int i, j; | ||
1298 | gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); | ||
1299 | + VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); | ||
1300 | |||
1301 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
1302 | fprintf (vect_dump, "=== vect_pattern_recog ==="); | ||
1303 | @@ -1480,8 +1704,11 @@ | ||
1304 | for (j = 0; j < NUM_PATTERNS; j++) | ||
1305 | { | ||
1306 | vect_recog_func_ptr = vect_vect_recog_func_ptrs[j]; | ||
1307 | - vect_pattern_recog_1 (vect_recog_func_ptr, si); | ||
1308 | + vect_pattern_recog_1 (vect_recog_func_ptr, si, | ||
1309 | + &stmts_to_replace); | ||
1310 | } | ||
1311 | } | ||
1312 | } | ||
1313 | + | ||
1314 | + VEC_free (gimple, heap, stmts_to_replace); | ||
1315 | } | ||
1316 | |||
1317 | === modified file 'gcc/tree-vect-slp.c' | ||
1318 | --- old/gcc/tree-vect-slp.c 2011-10-23 11:29:25 +0000 | ||
1319 | +++ new/gcc/tree-vect-slp.c 2011-10-27 11:27:59 +0000 | ||
1320 | @@ -480,6 +480,11 @@ | ||
1321 | } | ||
1322 | } | ||
1323 | } | ||
1324 | + else if (rhs_code == WIDEN_LSHIFT_EXPR) | ||
1325 | + { | ||
1326 | + need_same_oprnds = true; | ||
1327 | + first_op1 = gimple_assign_rhs2 (stmt); | ||
1328 | + } | ||
1329 | } | ||
1330 | else | ||
1331 | { | ||
1332 | |||
1333 | === modified file 'gcc/tree-vect-stmts.c' | ||
1334 | --- old/gcc/tree-vect-stmts.c 2011-10-23 11:29:25 +0000 | ||
1335 | +++ new/gcc/tree-vect-stmts.c 2011-10-27 11:27:59 +0000 | ||
1336 | @@ -3359,6 +3359,7 @@ | ||
1337 | VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; | ||
1338 | VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; | ||
1339 | bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); | ||
1340 | + unsigned int k; | ||
1341 | |||
1342 | if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) | ||
1343 | return false; | ||
1344 | @@ -3375,7 +3376,8 @@ | ||
1345 | |||
1346 | code = gimple_assign_rhs_code (stmt); | ||
1347 | if (!CONVERT_EXPR_CODE_P (code) | ||
1348 | - && code != WIDEN_MULT_EXPR) | ||
1349 | + && code != WIDEN_MULT_EXPR | ||
1350 | + && code != WIDEN_LSHIFT_EXPR) | ||
1351 | return false; | ||
1352 | |||
1353 | scalar_dest = gimple_assign_lhs (stmt); | ||
1354 | @@ -3403,7 +3405,7 @@ | ||
1355 | bool ok; | ||
1356 | |||
1357 | op1 = gimple_assign_rhs2 (stmt); | ||
1358 | - if (code == WIDEN_MULT_EXPR) | ||
1359 | + if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR) | ||
1360 | { | ||
1361 | /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of | ||
1362 | OP1. */ | ||
1363 | @@ -3480,7 +3482,7 @@ | ||
1364 | fprintf (vect_dump, "transform type promotion operation. ncopies = %d.", | ||
1365 | ncopies); | ||
1366 | |||
1367 | - if (code == WIDEN_MULT_EXPR) | ||
1368 | + if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR) | ||
1369 | { | ||
1370 | if (CONSTANT_CLASS_P (op0)) | ||
1371 | op0 = fold_convert (TREE_TYPE (op1), op0); | ||
1372 | @@ -3521,6 +3523,8 @@ | ||
1373 | if (op_type == binary_op) | ||
1374 | vec_oprnds1 = VEC_alloc (tree, heap, 1); | ||
1375 | } | ||
1376 | + else if (code == WIDEN_LSHIFT_EXPR) | ||
1377 | + vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size); | ||
1378 | |||
1379 | /* In case the vectorization factor (VF) is bigger than the number | ||
1380 | of elements that we can fit in a vectype (nunits), we have to generate | ||
1381 | @@ -3534,15 +3538,33 @@ | ||
1382 | if (j == 0) | ||
1383 | { | ||
1384 | if (slp_node) | ||
1385 | - vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, | ||
1386 | - &vec_oprnds1, -1); | ||
1387 | - else | ||
1388 | + { | ||
1389 | + if (code == WIDEN_LSHIFT_EXPR) | ||
1390 | + { | ||
1391 | + vec_oprnd1 = op1; | ||
1392 | + /* Store vec_oprnd1 for every vector stmt to be created | ||
1393 | + for SLP_NODE. We check during the analysis that all | ||
1394 | + the shift arguments are the same. */ | ||
1395 | + for (k = 0; k < slp_node->vec_stmts_size - 1; k++) | ||
1396 | + VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); | ||
1397 | + | ||
1398 | + vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, | ||
1399 | + -1); | ||
1400 | + } | ||
1401 | + else | ||
1402 | + vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, | ||
1403 | + &vec_oprnds1, -1); | ||
1404 | + } | ||
1405 | + else | ||
1406 | { | ||
1407 | vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); | ||
1408 | VEC_quick_push (tree, vec_oprnds0, vec_oprnd0); | ||
1409 | if (op_type == binary_op) | ||
1410 | { | ||
1411 | - vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); | ||
1412 | + if (code == WIDEN_LSHIFT_EXPR) | ||
1413 | + vec_oprnd1 = op1; | ||
1414 | + else | ||
1415 | + vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); | ||
1416 | VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); | ||
1417 | } | ||
1418 | } | ||
1419 | @@ -3553,7 +3575,10 @@ | ||
1420 | VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0); | ||
1421 | if (op_type == binary_op) | ||
1422 | { | ||
1423 | - vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); | ||
1424 | + if (code == WIDEN_LSHIFT_EXPR) | ||
1425 | + vec_oprnd1 = op1; | ||
1426 | + else | ||
1427 | + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); | ||
1428 | VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1); | ||
1429 | } | ||
1430 | } | ||
1431 | @@ -5853,6 +5878,19 @@ | ||
1432 | } | ||
1433 | break; | ||
1434 | |||
1435 | + case WIDEN_LSHIFT_EXPR: | ||
1436 | + if (BYTES_BIG_ENDIAN) | ||
1437 | + { | ||
1438 | + c1 = VEC_WIDEN_LSHIFT_HI_EXPR; | ||
1439 | + c2 = VEC_WIDEN_LSHIFT_LO_EXPR; | ||
1440 | + } | ||
1441 | + else | ||
1442 | + { | ||
1443 | + c2 = VEC_WIDEN_LSHIFT_HI_EXPR; | ||
1444 | + c1 = VEC_WIDEN_LSHIFT_LO_EXPR; | ||
1445 | + } | ||
1446 | + break; | ||
1447 | + | ||
1448 | CASE_CONVERT: | ||
1449 | if (BYTES_BIG_ENDIAN) | ||
1450 | { | ||
1451 | |||
1452 | === modified file 'gcc/tree-vectorizer.h' | ||
1453 | --- old/gcc/tree-vectorizer.h 2011-09-05 06:23:37 +0000 | ||
1454 | +++ new/gcc/tree-vectorizer.h 2011-10-23 13:33:07 +0000 | ||
1455 | @@ -896,7 +896,7 @@ | ||
1456 | Additional pattern recognition functions can (and will) be added | ||
1457 | in the future. */ | ||
1458 | typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); | ||
1459 | -#define NUM_PATTERNS 5 | ||
1460 | +#define NUM_PATTERNS 6 | ||
1461 | void vect_pattern_recog (loop_vec_info); | ||
1462 | |||
1463 | /* In tree-vectorizer.c. */ | ||
1464 | |||
1465 | === modified file 'gcc/tree.def' | ||
1466 | --- old/gcc/tree.def 2011-01-21 14:14:12 +0000 | ||
1467 | +++ new/gcc/tree.def 2011-10-23 13:33:07 +0000 | ||
1468 | @@ -1092,6 +1092,19 @@ | ||
1469 | is subtracted from t3. */ | ||
1470 | DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3) | ||
1471 | |||
1472 | +/* Widening shift left. | ||
1473 | + The first operand is of type t1. | ||
1474 | + The second operand is the number of bits to shift by; it need not be the | ||
1475 | + same type as the first operand and result. | ||
1476 | + Note that the result is undefined if the second operand is larger | ||
1477 | + than or equal to the first operand's type size. | ||
1478 | + The type of the entire expression is t2, such that t2 is at least twice | ||
1479 | + the size of t1. | ||
1480 | + WIDEN_LSHIFT_EXPR is equivalent to first widening (promoting) | ||
1481 | + the first argument from type t1 to type t2, and then shifting it | ||
1482 | + by the second argument. */ | ||
1483 | +DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2) | ||
1484 | + | ||
1485 | /* Fused multiply-add. | ||
1486 | All operands and the result are of the same type. No intermediate | ||
1487 | rounding is performed after multiplying operand one with operand two | ||
1488 | @@ -1147,6 +1160,16 @@ | ||
1489 | DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2) | ||
1490 | DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2) | ||
1491 | |||
1492 | +/* Widening vector shift left in bits. | ||
1493 | + Operand 0 is a vector to be shifted with N elements of size S. | ||
1494 | + Operand 1 is an integer shift amount in bits. | ||
1495 | + The result of the operation is N elements of size 2*S. | ||
1496 | + VEC_WIDEN_LSHIFT_HI_EXPR computes the N/2 high results. | ||
1497 | + VEC_WIDEN_LSHIFT_LO_EXPR computes the N/2 low results. | ||
1498 | + */ | ||
1499 | +DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2) | ||
1500 | +DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2) | ||
1501 | + | ||
1502 | /* PREDICT_EXPR. Specify hint for branch prediction. The | ||
1503 | PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the | ||
1504 | outcome (0 for not taken and 1 for taken). Once the profile is guessed | ||
1505 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch new file mode 100644 index 000000000..95b9ea9b8 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106836.patch | |||
@@ -0,0 +1,61 @@ | |||
1 | 2011-11-04 Revital Eres <revital.eres@linaro.org> | ||
2 | |||
3 | Backport from mainline -r180673: | ||
4 | |||
5 | gcc/ | ||
6 | * modulo-sched.c (generate_prolog_epilog): Mark prolog | ||
7 | and epilog as BB_DISABLE_SCHEDULE. | ||
8 | (mark_loop_unsched): New function. | ||
9 | (sms_schedule): Call it. | ||
10 | |||
11 | === modified file 'gcc/modulo-sched.c' | ||
12 | --- old/gcc/modulo-sched.c 2011-10-10 14:35:32 +0000 | ||
13 | +++ new/gcc/modulo-sched.c 2011-10-30 05:31:00 +0000 | ||
14 | @@ -1173,6 +1173,8 @@ | ||
15 | /* Put the prolog on the entry edge. */ | ||
16 | e = loop_preheader_edge (loop); | ||
17 | split_edge_and_insert (e, get_insns ()); | ||
18 | + if (!flag_resched_modulo_sched) | ||
19 | + e->dest->flags |= BB_DISABLE_SCHEDULE; | ||
20 | |||
21 | end_sequence (); | ||
22 | |||
23 | @@ -1186,9 +1188,24 @@ | ||
24 | gcc_assert (single_exit (loop)); | ||
25 | e = single_exit (loop); | ||
26 | split_edge_and_insert (e, get_insns ()); | ||
27 | + if (!flag_resched_modulo_sched) | ||
28 | + e->dest->flags |= BB_DISABLE_SCHEDULE; | ||
29 | + | ||
30 | end_sequence (); | ||
31 | } | ||
32 | |||
33 | +/* Mark LOOP as software pipelined so the later | ||
34 | + scheduling passes don't touch it. */ | ||
35 | +static void | ||
36 | +mark_loop_unsched (struct loop *loop) | ||
37 | +{ | ||
38 | + unsigned i; | ||
39 | + basic_block *bbs = get_loop_body (loop); | ||
40 | + | ||
41 | + for (i = 0; i < loop->num_nodes; i++) | ||
42 | + bbs[i]->flags |= BB_DISABLE_SCHEDULE; | ||
43 | +} | ||
44 | + | ||
45 | /* Return true if all the BBs of the loop are empty except the | ||
46 | loop header. */ | ||
47 | static bool | ||
48 | @@ -1714,9 +1731,10 @@ | ||
49 | permute_partial_schedule (ps, g->closing_branch->first_note); | ||
50 | |||
51 | /* Mark this loop as software pipelined so the later | ||
52 | - scheduling passes doesn't touch it. */ | ||
53 | + scheduling passes don't touch it. */ | ||
54 | if (! flag_resched_modulo_sched) | ||
55 | - g->bb->flags |= BB_DISABLE_SCHEDULE; | ||
56 | + mark_loop_unsched (loop); | ||
57 | + | ||
58 | /* The life-info is not valid any more. */ | ||
59 | df_set_bb_dirty (g->bb); | ||
60 | |||
61 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch new file mode 100644 index 000000000..dcffe1945 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106839.patch | |||
@@ -0,0 +1,23 @@ | |||
1 | 2011-11-02 Andrew Stubbs <ams@codesourcery.com> | ||
2 | |||
3 | Backport from FSF mainline: | ||
4 | |||
5 | 2011-11-01 Andrew Stubbs <ams@codesourcery.com> | ||
6 | |||
7 | gcc/ | ||
8 | * config/arm/bpabi.h (BE8_LINK_SPEC): Recognize generic-armv7 tuning. | ||
9 | |||
10 | === modified file 'gcc/config/arm/bpabi.h' | ||
11 | --- old/gcc/config/arm/bpabi.h 2010-12-20 17:48:51 +0000 | ||
12 | +++ new/gcc/config/arm/bpabi.h 2011-11-02 21:02:53 +0000 | ||
13 | @@ -56,7 +56,8 @@ | ||
14 | "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}" | ||
15 | |||
16 | #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\ | ||
17 | - "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}" | ||
18 | + "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\ | ||
19 | + ":%{!r:--be8}}}" | ||
20 | |||
21 | /* Tell the assembler to build BPABI binaries. */ | ||
22 | #undef SUBTARGET_EXTRA_ASM_SPEC | ||
23 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch new file mode 100644 index 000000000..70a7bdfa2 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106840.patch | |||
@@ -0,0 +1,1400 @@ | |||
1 | 2011-11-17 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | Backport from mainline: | ||
4 | |||
5 | 2011-11-03 Ira Rosen <ira.rosen@linaro.org> | ||
6 | |||
7 | gcc/ | ||
8 | * tree-vectorizer.h (slp_void_p): New. | ||
9 | (struct _slp_tree): Replace left and right with children. Update | ||
10 | documentation. | ||
11 | (struct _slp_oprnd_info): New. | ||
12 | (vect_get_vec_defs): Declare. | ||
13 | (vect_get_slp_defs): Update arguments. | ||
14 | * tree-vect-loop.c (vect_create_epilog_for_reduction): Call | ||
15 | vect_get_vec_defs instead of vect_get_slp_defs. | ||
16 | (vectorizable_reduction): Likewise. | ||
17 | * tree-vect-stmts.c (vect_get_vec_defs): Remove static, add argument. | ||
18 | Update call to vect_get_slp_defs. | ||
19 | (vectorizable_conversion): Update call to vect_get_vec_defs. | ||
20 | (vectorizable_assignment, vectorizable_shift, | ||
21 | vectorizable_operation): Likewise. | ||
22 | (vectorizable_type_demotion): Call vect_get_vec_defs instead of | ||
23 | vect_get_slp_defs. | ||
24 | (vectorizable_type_promotion, vectorizable_store): Likewise. | ||
25 | (vect_analyze_stmt): Fix typo. | ||
26 | * tree-vect-slp.c (vect_free_slp_tree): Update SLP tree traversal. | ||
27 | (vect_print_slp_tree, vect_mark_slp_stmts, | ||
28 | vect_mark_slp_stmts_relevant, vect_slp_rearrange_stmts, | ||
29 | vect_detect_hybrid_slp_stmts, vect_slp_analyze_node_operations, | ||
30 | vect_schedule_slp_instance): Likewise. | ||
31 | (vect_create_new_slp_node): New. | ||
32 | (vect_create_oprnd_info, vect_free_oprnd_info): Likewise. | ||
33 | (vect_get_and_check_slp_defs): Pass information about defs using | ||
34 | oprnds_info, allow any number of operands. | ||
35 | (vect_build_slp_tree): Likewise. Update calls to | ||
36 | vect_get_and_check_slp_defs. Fix comments. | ||
37 | (vect_analyze_slp_instance): Move node creation to | ||
38 | vect_create_new_slp_node. | ||
39 | (vect_get_slp_defs): Allow any number of operands. | ||
40 | |||
41 | 2011-11-11 Jakub Jelinek <jakub@redhat.com> | ||
42 | |||
43 | gcc/ | ||
44 | * tree-vect-slp.c (vect_free_slp_tree): Also free SLP_TREE_CHILDREN | ||
45 | vector. | ||
46 | (vect_create_new_slp_node): Don't allocate node before checking stmt | ||
47 | type. | ||
48 | (vect_free_oprnd_info): Remove FREE_DEF_STMTS argument, always | ||
49 | free def_stmts vectors and additionally free oprnd_info. | ||
50 | (vect_build_slp_tree): Adjust callers. Call it even if | ||
51 | stop_recursion. If vect_create_new_slp_node or | ||
52 | vect_build_slp_tree fails, properly handle freeing memory. | ||
53 | If it succeeded, clear def_stmts in oprnd_info. | ||
54 | |||
55 | === modified file 'gcc/tree-vect-loop.c' | ||
56 | --- old/gcc/tree-vect-loop.c 2011-09-05 06:23:37 +0000 | ||
57 | +++ new/gcc/tree-vect-loop.c 2011-11-14 11:38:08 +0000 | ||
58 | @@ -3282,8 +3282,8 @@ | ||
59 | |||
60 | /* Get the loop-entry arguments. */ | ||
61 | if (slp_node) | ||
62 | - vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs, | ||
63 | - NULL, reduc_index); | ||
64 | + vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs, | ||
65 | + NULL, slp_node, reduc_index); | ||
66 | else | ||
67 | { | ||
68 | vec_initial_defs = VEC_alloc (tree, heap, 1); | ||
69 | @@ -4451,8 +4451,8 @@ | ||
70 | } | ||
71 | |||
72 | if (slp_node) | ||
73 | - vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1, | ||
74 | - -1); | ||
75 | + vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, | ||
76 | + slp_node, -1); | ||
77 | else | ||
78 | { | ||
79 | loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index], | ||
80 | |||
81 | === modified file 'gcc/tree-vect-slp.c' | ||
82 | --- old/gcc/tree-vect-slp.c 2011-10-27 11:27:59 +0000 | ||
83 | +++ new/gcc/tree-vect-slp.c 2011-11-14 11:38:08 +0000 | ||
84 | @@ -67,15 +67,16 @@ | ||
85 | static void | ||
86 | vect_free_slp_tree (slp_tree node) | ||
87 | { | ||
88 | + int i; | ||
89 | + slp_void_p child; | ||
90 | + | ||
91 | if (!node) | ||
92 | return; | ||
93 | |||
94 | - if (SLP_TREE_LEFT (node)) | ||
95 | - vect_free_slp_tree (SLP_TREE_LEFT (node)); | ||
96 | - | ||
97 | - if (SLP_TREE_RIGHT (node)) | ||
98 | - vect_free_slp_tree (SLP_TREE_RIGHT (node)); | ||
99 | - | ||
100 | + FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) | ||
101 | + vect_free_slp_tree ((slp_tree) child); | ||
102 | + | ||
103 | + VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node)); | ||
104 | VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node)); | ||
105 | |||
106 | if (SLP_TREE_VEC_STMTS (node)) | ||
107 | @@ -96,48 +97,116 @@ | ||
108 | } | ||
109 | |||
110 | |||
111 | -/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that | ||
112 | - they are of a legal type and that they match the defs of the first stmt of | ||
113 | - the SLP group (stored in FIRST_STMT_...). */ | ||
114 | +/* Create an SLP node for SCALAR_STMTS. */ | ||
115 | + | ||
116 | +static slp_tree | ||
117 | +vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts) | ||
118 | +{ | ||
119 | + slp_tree node; | ||
120 | + gimple stmt = VEC_index (gimple, scalar_stmts, 0); | ||
121 | + unsigned int nops; | ||
122 | + | ||
123 | + if (is_gimple_call (stmt)) | ||
124 | + nops = gimple_call_num_args (stmt); | ||
125 | + else if (is_gimple_assign (stmt)) | ||
126 | + nops = gimple_num_ops (stmt) - 1; | ||
127 | + else | ||
128 | + return NULL; | ||
129 | + | ||
130 | + node = XNEW (struct _slp_tree); | ||
131 | + SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; | ||
132 | + SLP_TREE_VEC_STMTS (node) = NULL; | ||
133 | + SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops); | ||
134 | + SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; | ||
135 | + SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; | ||
136 | + | ||
137 | + return node; | ||
138 | +} | ||
139 | + | ||
140 | + | ||
141 | +/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each | ||
142 | + operand. */ | ||
143 | +static VEC (slp_oprnd_info, heap) * | ||
144 | +vect_create_oprnd_info (int nops, int group_size) | ||
145 | +{ | ||
146 | + int i; | ||
147 | + slp_oprnd_info oprnd_info; | ||
148 | + VEC (slp_oprnd_info, heap) *oprnds_info; | ||
149 | + | ||
150 | + oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops); | ||
151 | + for (i = 0; i < nops; i++) | ||
152 | + { | ||
153 | + oprnd_info = XNEW (struct _slp_oprnd_info); | ||
154 | + oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size); | ||
155 | + oprnd_info->first_dt = vect_uninitialized_def; | ||
156 | + oprnd_info->first_def_type = NULL_TREE; | ||
157 | + oprnd_info->first_const_oprnd = NULL_TREE; | ||
158 | + oprnd_info->first_pattern = false; | ||
159 | + VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info); | ||
160 | + } | ||
161 | + | ||
162 | + return oprnds_info; | ||
163 | +} | ||
164 | + | ||
165 | + | ||
166 | +/* Free operands info. */ | ||
167 | + | ||
168 | +static void | ||
169 | +vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info) | ||
170 | +{ | ||
171 | + int i; | ||
172 | + slp_oprnd_info oprnd_info; | ||
173 | + | ||
174 | + FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info) | ||
175 | + { | ||
176 | + VEC_free (gimple, heap, oprnd_info->def_stmts); | ||
177 | + XDELETE (oprnd_info); | ||
178 | + } | ||
179 | + | ||
180 | + VEC_free (slp_oprnd_info, heap, *oprnds_info); | ||
181 | +} | ||
182 | + | ||
183 | + | ||
184 | +/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that | ||
185 | + they are of a valid type and that they match the defs of the first stmt of | ||
186 | + the SLP group (stored in OPRNDS_INFO). */ | ||
187 | |||
188 | static bool | ||
189 | vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, | ||
190 | slp_tree slp_node, gimple stmt, | ||
191 | - VEC (gimple, heap) **def_stmts0, | ||
192 | - VEC (gimple, heap) **def_stmts1, | ||
193 | - enum vect_def_type *first_stmt_dt0, | ||
194 | - enum vect_def_type *first_stmt_dt1, | ||
195 | - tree *first_stmt_def0_type, | ||
196 | - tree *first_stmt_def1_type, | ||
197 | - tree *first_stmt_const_oprnd, | ||
198 | - int ncopies_for_cost, | ||
199 | - bool *pattern0, bool *pattern1) | ||
200 | + int ncopies_for_cost, bool first, | ||
201 | + VEC (slp_oprnd_info, heap) **oprnds_info) | ||
202 | { | ||
203 | tree oprnd; | ||
204 | unsigned int i, number_of_oprnds; | ||
205 | - tree def[2]; | ||
206 | + tree def, def_op0 = NULL_TREE; | ||
207 | gimple def_stmt; | ||
208 | - enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; | ||
209 | - stmt_vec_info stmt_info = | ||
210 | - vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)); | ||
211 | - enum gimple_rhs_class rhs_class; | ||
212 | + enum vect_def_type dt = vect_uninitialized_def; | ||
213 | + enum vect_def_type dt_op0 = vect_uninitialized_def; | ||
214 | + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | ||
215 | + tree lhs = gimple_get_lhs (stmt); | ||
216 | struct loop *loop = NULL; | ||
217 | enum tree_code rhs_code; | ||
218 | bool different_types = false; | ||
219 | + bool pattern = false; | ||
220 | + slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info; | ||
221 | |||
222 | if (loop_vinfo) | ||
223 | loop = LOOP_VINFO_LOOP (loop_vinfo); | ||
224 | |||
225 | - rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt)); | ||
226 | - number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */ | ||
227 | + if (is_gimple_call (stmt)) | ||
228 | + number_of_oprnds = gimple_call_num_args (stmt); | ||
229 | + else | ||
230 | + number_of_oprnds = gimple_num_ops (stmt) - 1; | ||
231 | |||
232 | for (i = 0; i < number_of_oprnds; i++) | ||
233 | { | ||
234 | oprnd = gimple_op (stmt, i + 1); | ||
235 | + oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i); | ||
236 | |||
237 | - if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i], | ||
238 | - &dt[i]) | ||
239 | - || (!def_stmt && dt[i] != vect_constant_def)) | ||
240 | + if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, | ||
241 | + &dt) | ||
242 | + || (!def_stmt && dt != vect_constant_def)) | ||
243 | { | ||
244 | if (vect_print_dump_info (REPORT_SLP)) | ||
245 | { | ||
246 | @@ -158,29 +227,24 @@ | ||
247 | && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) | ||
248 | && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) | ||
249 | { | ||
250 | - if (!*first_stmt_dt0) | ||
251 | - *pattern0 = true; | ||
252 | - else | ||
253 | - { | ||
254 | - if (i == 1 && !*first_stmt_dt1) | ||
255 | - *pattern1 = true; | ||
256 | - else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1)) | ||
257 | - { | ||
258 | - if (vect_print_dump_info (REPORT_DETAILS)) | ||
259 | - { | ||
260 | - fprintf (vect_dump, "Build SLP failed: some of the stmts" | ||
261 | - " are in a pattern, and others are not "); | ||
262 | - print_generic_expr (vect_dump, oprnd, TDF_SLIM); | ||
263 | - } | ||
264 | + pattern = true; | ||
265 | + if (!first && !oprnd_info->first_pattern) | ||
266 | + { | ||
267 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
268 | + { | ||
269 | + fprintf (vect_dump, "Build SLP failed: some of the stmts" | ||
270 | + " are in a pattern, and others are not "); | ||
271 | + print_generic_expr (vect_dump, oprnd, TDF_SLIM); | ||
272 | + } | ||
273 | |||
274 | - return false; | ||
275 | - } | ||
276 | + return false; | ||
277 | } | ||
278 | |||
279 | def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); | ||
280 | - dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); | ||
281 | + dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); | ||
282 | |||
283 | - if (*dt == vect_unknown_def_type) | ||
284 | + if (dt == vect_unknown_def_type | ||
285 | + || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt))) | ||
286 | { | ||
287 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
288 | fprintf (vect_dump, "Unsupported pattern."); | ||
289 | @@ -190,11 +254,11 @@ | ||
290 | switch (gimple_code (def_stmt)) | ||
291 | { | ||
292 | case GIMPLE_PHI: | ||
293 | - def[i] = gimple_phi_result (def_stmt); | ||
294 | + def = gimple_phi_result (def_stmt); | ||
295 | break; | ||
296 | |||
297 | case GIMPLE_ASSIGN: | ||
298 | - def[i] = gimple_assign_lhs (def_stmt); | ||
299 | + def = gimple_assign_lhs (def_stmt); | ||
300 | break; | ||
301 | |||
302 | default: | ||
303 | @@ -204,117 +268,125 @@ | ||
304 | } | ||
305 | } | ||
306 | |||
307 | - if (!*first_stmt_dt0) | ||
308 | + if (first) | ||
309 | { | ||
310 | - /* op0 of the first stmt of the group - store its info. */ | ||
311 | - *first_stmt_dt0 = dt[i]; | ||
312 | - if (def[i]) | ||
313 | - *first_stmt_def0_type = TREE_TYPE (def[i]); | ||
314 | - else | ||
315 | - *first_stmt_const_oprnd = oprnd; | ||
316 | + oprnd_info->first_dt = dt; | ||
317 | + oprnd_info->first_pattern = pattern; | ||
318 | + if (def) | ||
319 | + { | ||
320 | + oprnd_info->first_def_type = TREE_TYPE (def); | ||
321 | + oprnd_info->first_const_oprnd = NULL_TREE; | ||
322 | + } | ||
323 | + else | ||
324 | + { | ||
325 | + oprnd_info->first_def_type = NULL_TREE; | ||
326 | + oprnd_info->first_const_oprnd = oprnd; | ||
327 | + } | ||
328 | |||
329 | - /* Analyze costs (for the first stmt of the group only). */ | ||
330 | - if (rhs_class != GIMPLE_SINGLE_RHS) | ||
331 | - /* Not memory operation (we don't call this functions for loads). */ | ||
332 | - vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node); | ||
333 | - else | ||
334 | - /* Store. */ | ||
335 | - vect_model_store_cost (stmt_info, ncopies_for_cost, false, | ||
336 | - dt[0], slp_node); | ||
337 | + if (i == 0) | ||
338 | + { | ||
339 | + def_op0 = def; | ||
340 | + dt_op0 = dt; | ||
341 | + /* Analyze costs (for the first stmt of the group only). */ | ||
342 | + if (REFERENCE_CLASS_P (lhs)) | ||
343 | + /* Store. */ | ||
344 | + vect_model_store_cost (stmt_info, ncopies_for_cost, false, | ||
345 | + dt, slp_node); | ||
346 | + else | ||
347 | + /* Not memory operation (we don't call this function for | ||
348 | + loads). */ | ||
349 | + vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt, | ||
350 | + slp_node); | ||
351 | + } | ||
352 | } | ||
353 | |||
354 | else | ||
355 | { | ||
356 | - if (!*first_stmt_dt1 && i == 1) | ||
357 | - { | ||
358 | - /* op1 of the first stmt of the group - store its info. */ | ||
359 | - *first_stmt_dt1 = dt[i]; | ||
360 | - if (def[i]) | ||
361 | - *first_stmt_def1_type = TREE_TYPE (def[i]); | ||
362 | - else | ||
363 | - { | ||
364 | - /* We assume that the stmt contains only one constant | ||
365 | - operand. We fail otherwise, to be on the safe side. */ | ||
366 | - if (*first_stmt_const_oprnd) | ||
367 | - { | ||
368 | - if (vect_print_dump_info (REPORT_SLP)) | ||
369 | - fprintf (vect_dump, "Build SLP failed: two constant " | ||
370 | - "oprnds in stmt"); | ||
371 | - return false; | ||
372 | - } | ||
373 | - *first_stmt_const_oprnd = oprnd; | ||
374 | - } | ||
375 | - } | ||
376 | - else | ||
377 | - { | ||
378 | - /* Not first stmt of the group, check that the def-stmt/s match | ||
379 | - the def-stmt/s of the first stmt. */ | ||
380 | - if ((i == 0 | ||
381 | - && (*first_stmt_dt0 != dt[i] | ||
382 | - || (*first_stmt_def0_type && def[0] | ||
383 | - && !types_compatible_p (*first_stmt_def0_type, | ||
384 | - TREE_TYPE (def[0]))))) | ||
385 | - || (i == 1 | ||
386 | - && (*first_stmt_dt1 != dt[i] | ||
387 | - || (*first_stmt_def1_type && def[1] | ||
388 | - && !types_compatible_p (*first_stmt_def1_type, | ||
389 | - TREE_TYPE (def[1]))))) | ||
390 | - || (!def[i] | ||
391 | - && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd), | ||
392 | - TREE_TYPE (oprnd))) | ||
393 | - || different_types) | ||
394 | - { | ||
395 | - if (i != number_of_oprnds - 1) | ||
396 | - different_types = true; | ||
397 | + /* Not first stmt of the group, check that the def-stmt/s match | ||
398 | + the def-stmt/s of the first stmt. Allow different definition | ||
399 | + types for reduction chains: the first stmt must be a | ||
400 | + vect_reduction_def (a phi node), and the rest | ||
401 | + vect_internal_def. */ | ||
402 | + if (((oprnd_info->first_dt != dt | ||
403 | + && !(oprnd_info->first_dt == vect_reduction_def | ||
404 | + && dt == vect_internal_def)) | ||
405 | + || (oprnd_info->first_def_type != NULL_TREE | ||
406 | + && def | ||
407 | + && !types_compatible_p (oprnd_info->first_def_type, | ||
408 | + TREE_TYPE (def)))) | ||
409 | + || (!def | ||
410 | + && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd), | ||
411 | + TREE_TYPE (oprnd))) | ||
412 | + || different_types) | ||
413 | + { | ||
414 | + if (number_of_oprnds != 2) | ||
415 | + { | ||
416 | + if (vect_print_dump_info (REPORT_SLP)) | ||
417 | + fprintf (vect_dump, "Build SLP failed: different types "); | ||
418 | + | ||
419 | + return false; | ||
420 | + } | ||
421 | + | ||
422 | + /* Try to swap operands in case of binary operation. */ | ||
423 | + if (i == 0) | ||
424 | + different_types = true; | ||
425 | + else | ||
426 | + { | ||
427 | + oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); | ||
428 | + if (is_gimple_assign (stmt) | ||
429 | + && (rhs_code = gimple_assign_rhs_code (stmt)) | ||
430 | + && TREE_CODE_CLASS (rhs_code) == tcc_binary | ||
431 | + && commutative_tree_code (rhs_code) | ||
432 | + && oprnd0_info->first_dt == dt | ||
433 | + && oprnd_info->first_dt == dt_op0 | ||
434 | + && def_op0 && def | ||
435 | + && !(oprnd0_info->first_def_type | ||
436 | + && !types_compatible_p (oprnd0_info->first_def_type, | ||
437 | + TREE_TYPE (def))) | ||
438 | + && !(oprnd_info->first_def_type | ||
439 | + && !types_compatible_p (oprnd_info->first_def_type, | ||
440 | + TREE_TYPE (def_op0)))) | ||
441 | + { | ||
442 | + if (vect_print_dump_info (REPORT_SLP)) | ||
443 | + { | ||
444 | + fprintf (vect_dump, "Swapping operands of "); | ||
445 | + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
446 | + } | ||
447 | + | ||
448 | + swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), | ||
449 | + gimple_assign_rhs2_ptr (stmt)); | ||
450 | + } | ||
451 | else | ||
452 | - { | ||
453 | - if (is_gimple_assign (stmt) | ||
454 | - && (rhs_code = gimple_assign_rhs_code (stmt)) | ||
455 | - && TREE_CODE_CLASS (rhs_code) == tcc_binary | ||
456 | - && commutative_tree_code (rhs_code) | ||
457 | - && *first_stmt_dt0 == dt[1] | ||
458 | - && *first_stmt_dt1 == dt[0] | ||
459 | - && def[0] && def[1] | ||
460 | - && !(*first_stmt_def0_type | ||
461 | - && !types_compatible_p (*first_stmt_def0_type, | ||
462 | - TREE_TYPE (def[1]))) | ||
463 | - && !(*first_stmt_def1_type | ||
464 | - && !types_compatible_p (*first_stmt_def1_type, | ||
465 | - TREE_TYPE (def[0])))) | ||
466 | - { | ||
467 | - if (vect_print_dump_info (REPORT_SLP)) | ||
468 | - { | ||
469 | - fprintf (vect_dump, "Swapping operands of "); | ||
470 | - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
471 | - } | ||
472 | - swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), | ||
473 | - gimple_assign_rhs2_ptr (stmt)); | ||
474 | - } | ||
475 | - else | ||
476 | - { | ||
477 | - if (vect_print_dump_info (REPORT_SLP)) | ||
478 | - fprintf (vect_dump, "Build SLP failed: different types "); | ||
479 | - | ||
480 | - return false; | ||
481 | - } | ||
482 | - } | ||
483 | + { | ||
484 | + if (vect_print_dump_info (REPORT_SLP)) | ||
485 | + fprintf (vect_dump, "Build SLP failed: different types "); | ||
486 | + | ||
487 | + return false; | ||
488 | + } | ||
489 | } | ||
490 | } | ||
491 | } | ||
492 | |||
493 | /* Check the types of the definitions. */ | ||
494 | - switch (dt[i]) | ||
495 | + switch (dt) | ||
496 | { | ||
497 | case vect_constant_def: | ||
498 | case vect_external_def: | ||
499 | + case vect_reduction_def: | ||
500 | break; | ||
501 | |||
502 | case vect_internal_def: | ||
503 | - case vect_reduction_def: | ||
504 | - if ((i == 0 && !different_types) || (i == 1 && different_types)) | ||
505 | - VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); | ||
506 | + if (different_types) | ||
507 | + { | ||
508 | + oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); | ||
509 | + oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); | ||
510 | + if (i == 0) | ||
511 | + VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt); | ||
512 | + else | ||
513 | + VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt); | ||
514 | + } | ||
515 | else | ||
516 | - VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); | ||
517 | + VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt); | ||
518 | break; | ||
519 | |||
520 | default: | ||
521 | @@ -322,7 +394,7 @@ | ||
522 | if (vect_print_dump_info (REPORT_SLP)) | ||
523 | { | ||
524 | fprintf (vect_dump, "Build SLP failed: illegal type of def "); | ||
525 | - print_generic_expr (vect_dump, def[i], TDF_SLIM); | ||
526 | + print_generic_expr (vect_dump, def, TDF_SLIM); | ||
527 | } | ||
528 | |||
529 | return false; | ||
530 | @@ -347,15 +419,10 @@ | ||
531 | VEC (slp_tree, heap) **loads, | ||
532 | unsigned int vectorization_factor, bool *loads_permuted) | ||
533 | { | ||
534 | - VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size); | ||
535 | - VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size); | ||
536 | unsigned int i; | ||
537 | VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node); | ||
538 | gimple stmt = VEC_index (gimple, stmts, 0); | ||
539 | - enum vect_def_type first_stmt_dt0 = vect_uninitialized_def; | ||
540 | - enum vect_def_type first_stmt_dt1 = vect_uninitialized_def; | ||
541 | enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK; | ||
542 | - tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE; | ||
543 | tree lhs; | ||
544 | bool stop_recursion = false, need_same_oprnds = false; | ||
545 | tree vectype, scalar_type, first_op1 = NULL_TREE; | ||
546 | @@ -364,13 +431,21 @@ | ||
547 | int icode; | ||
548 | enum machine_mode optab_op2_mode; | ||
549 | enum machine_mode vec_mode; | ||
550 | - tree first_stmt_const_oprnd = NULL_TREE; | ||
551 | struct data_reference *first_dr; | ||
552 | - bool pattern0 = false, pattern1 = false; | ||
553 | HOST_WIDE_INT dummy; | ||
554 | bool permutation = false; | ||
555 | unsigned int load_place; | ||
556 | gimple first_load, prev_first_load = NULL; | ||
557 | + VEC (slp_oprnd_info, heap) *oprnds_info; | ||
558 | + unsigned int nops; | ||
559 | + slp_oprnd_info oprnd_info; | ||
560 | + | ||
561 | + if (is_gimple_call (stmt)) | ||
562 | + nops = gimple_call_num_args (stmt); | ||
563 | + else | ||
564 | + nops = gimple_num_ops (stmt) - 1; | ||
565 | + | ||
566 | + oprnds_info = vect_create_oprnd_info (nops, group_size); | ||
567 | |||
568 | /* For every stmt in NODE find its def stmt/s. */ | ||
569 | FOR_EACH_VEC_ELT (gimple, stmts, i, stmt) | ||
570 | @@ -391,6 +466,7 @@ | ||
571 | print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
572 | } | ||
573 | |||
574 | + vect_free_oprnd_info (&oprnds_info); | ||
575 | return false; | ||
576 | } | ||
577 | |||
578 | @@ -400,10 +476,11 @@ | ||
579 | if (vect_print_dump_info (REPORT_SLP)) | ||
580 | { | ||
581 | fprintf (vect_dump, | ||
582 | - "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL"); | ||
583 | + "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL "); | ||
584 | print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
585 | } | ||
586 | |||
587 | + vect_free_oprnd_info (&oprnds_info); | ||
588 | return false; | ||
589 | } | ||
590 | |||
591 | @@ -416,6 +493,8 @@ | ||
592 | fprintf (vect_dump, "Build SLP failed: unsupported data-type "); | ||
593 | print_generic_expr (vect_dump, scalar_type, TDF_SLIM); | ||
594 | } | ||
595 | + | ||
596 | + vect_free_oprnd_info (&oprnds_info); | ||
597 | return false; | ||
598 | } | ||
599 | |||
600 | @@ -462,6 +541,7 @@ | ||
601 | { | ||
602 | if (vect_print_dump_info (REPORT_SLP)) | ||
603 | fprintf (vect_dump, "Build SLP failed: no optab."); | ||
604 | + vect_free_oprnd_info (&oprnds_info); | ||
605 | return false; | ||
606 | } | ||
607 | icode = (int) optab_handler (optab, vec_mode); | ||
608 | @@ -470,6 +550,7 @@ | ||
609 | if (vect_print_dump_info (REPORT_SLP)) | ||
610 | fprintf (vect_dump, "Build SLP failed: " | ||
611 | "op not supported by target."); | ||
612 | + vect_free_oprnd_info (&oprnds_info); | ||
613 | return false; | ||
614 | } | ||
615 | optab_op2_mode = insn_data[icode].operand[2].mode; | ||
616 | @@ -506,6 +587,7 @@ | ||
617 | print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
618 | } | ||
619 | |||
620 | + vect_free_oprnd_info (&oprnds_info); | ||
621 | return false; | ||
622 | } | ||
623 | |||
624 | @@ -519,6 +601,7 @@ | ||
625 | print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
626 | } | ||
627 | |||
628 | + vect_free_oprnd_info (&oprnds_info); | ||
629 | return false; | ||
630 | } | ||
631 | } | ||
632 | @@ -530,15 +613,12 @@ | ||
633 | { | ||
634 | /* Store. */ | ||
635 | if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, | ||
636 | - stmt, &def_stmts0, &def_stmts1, | ||
637 | - &first_stmt_dt0, | ||
638 | - &first_stmt_dt1, | ||
639 | - &first_stmt_def0_type, | ||
640 | - &first_stmt_def1_type, | ||
641 | - &first_stmt_const_oprnd, | ||
642 | - ncopies_for_cost, | ||
643 | - &pattern0, &pattern1)) | ||
644 | - return false; | ||
645 | + stmt, ncopies_for_cost, | ||
646 | + (i == 0), &oprnds_info)) | ||
647 | + { | ||
648 | + vect_free_oprnd_info (&oprnds_info); | ||
649 | + return false; | ||
650 | + } | ||
651 | } | ||
652 | else | ||
653 | { | ||
654 | @@ -556,6 +636,7 @@ | ||
655 | print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
656 | } | ||
657 | |||
658 | + vect_free_oprnd_info (&oprnds_info); | ||
659 | return false; | ||
660 | } | ||
661 | |||
662 | @@ -573,6 +654,7 @@ | ||
663 | print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
664 | } | ||
665 | |||
666 | + vect_free_oprnd_info (&oprnds_info); | ||
667 | return false; | ||
668 | } | ||
669 | |||
670 | @@ -593,6 +675,7 @@ | ||
671 | print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
672 | } | ||
673 | |||
674 | + vect_free_oprnd_info (&oprnds_info); | ||
675 | return false; | ||
676 | } | ||
677 | } | ||
678 | @@ -612,6 +695,7 @@ | ||
679 | print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
680 | } | ||
681 | |||
682 | + vect_free_oprnd_info (&oprnds_info); | ||
683 | return false; | ||
684 | } | ||
685 | |||
686 | @@ -639,7 +723,7 @@ | ||
687 | { | ||
688 | if (TREE_CODE_CLASS (rhs_code) == tcc_reference) | ||
689 | { | ||
690 | - /* Not strided load. */ | ||
691 | + /* Not strided load. */ | ||
692 | if (vect_print_dump_info (REPORT_SLP)) | ||
693 | { | ||
694 | fprintf (vect_dump, "Build SLP failed: not strided load "); | ||
695 | @@ -647,6 +731,7 @@ | ||
696 | } | ||
697 | |||
698 | /* FORNOW: Not strided loads are not supported. */ | ||
699 | + vect_free_oprnd_info (&oprnds_info); | ||
700 | return false; | ||
701 | } | ||
702 | |||
703 | @@ -661,19 +746,18 @@ | ||
704 | print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
705 | } | ||
706 | |||
707 | + vect_free_oprnd_info (&oprnds_info); | ||
708 | return false; | ||
709 | } | ||
710 | |||
711 | /* Find the def-stmts. */ | ||
712 | if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt, | ||
713 | - &def_stmts0, &def_stmts1, | ||
714 | - &first_stmt_dt0, &first_stmt_dt1, | ||
715 | - &first_stmt_def0_type, | ||
716 | - &first_stmt_def1_type, | ||
717 | - &first_stmt_const_oprnd, | ||
718 | - ncopies_for_cost, | ||
719 | - &pattern0, &pattern1)) | ||
720 | - return false; | ||
721 | + ncopies_for_cost, (i == 0), | ||
722 | + &oprnds_info)) | ||
723 | + { | ||
724 | + vect_free_oprnd_info (&oprnds_info); | ||
725 | + return false; | ||
726 | + } | ||
727 | } | ||
728 | } | ||
729 | |||
730 | @@ -702,46 +786,37 @@ | ||
731 | *loads_permuted = true; | ||
732 | } | ||
733 | |||
734 | + vect_free_oprnd_info (&oprnds_info); | ||
735 | return true; | ||
736 | } | ||
737 | |||
738 | /* Create SLP_TREE nodes for the definition node/s. */ | ||
739 | - if (first_stmt_dt0 == vect_internal_def) | ||
740 | - { | ||
741 | - slp_tree left_node = XNEW (struct _slp_tree); | ||
742 | - SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0; | ||
743 | - SLP_TREE_VEC_STMTS (left_node) = NULL; | ||
744 | - SLP_TREE_LEFT (left_node) = NULL; | ||
745 | - SLP_TREE_RIGHT (left_node) = NULL; | ||
746 | - SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0; | ||
747 | - SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0; | ||
748 | - if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size, | ||
749 | - inside_cost, outside_cost, ncopies_for_cost, | ||
750 | - max_nunits, load_permutation, loads, | ||
751 | - vectorization_factor, loads_permuted)) | ||
752 | - return false; | ||
753 | - | ||
754 | - SLP_TREE_LEFT (*node) = left_node; | ||
755 | - } | ||
756 | - | ||
757 | - if (first_stmt_dt1 == vect_internal_def) | ||
758 | - { | ||
759 | - slp_tree right_node = XNEW (struct _slp_tree); | ||
760 | - SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1; | ||
761 | - SLP_TREE_VEC_STMTS (right_node) = NULL; | ||
762 | - SLP_TREE_LEFT (right_node) = NULL; | ||
763 | - SLP_TREE_RIGHT (right_node) = NULL; | ||
764 | - SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0; | ||
765 | - SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0; | ||
766 | - if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size, | ||
767 | - inside_cost, outside_cost, ncopies_for_cost, | ||
768 | - max_nunits, load_permutation, loads, | ||
769 | - vectorization_factor, loads_permuted)) | ||
770 | - return false; | ||
771 | - | ||
772 | - SLP_TREE_RIGHT (*node) = right_node; | ||
773 | - } | ||
774 | - | ||
775 | + FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info) | ||
776 | + { | ||
777 | + slp_tree child; | ||
778 | + | ||
779 | + if (oprnd_info->first_dt != vect_internal_def) | ||
780 | + continue; | ||
781 | + | ||
782 | + child = vect_create_new_slp_node (oprnd_info->def_stmts); | ||
783 | + if (!child | ||
784 | + || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size, | ||
785 | + inside_cost, outside_cost, ncopies_for_cost, | ||
786 | + max_nunits, load_permutation, loads, | ||
787 | + vectorization_factor, loads_permuted)) | ||
788 | + { | ||
789 | + if (child) | ||
790 | + oprnd_info->def_stmts = NULL; | ||
791 | + vect_free_slp_tree (child); | ||
792 | + vect_free_oprnd_info (&oprnds_info); | ||
793 | + return false; | ||
794 | + } | ||
795 | + | ||
796 | + oprnd_info->def_stmts = NULL; | ||
797 | + VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child); | ||
798 | + } | ||
799 | + | ||
800 | + vect_free_oprnd_info (&oprnds_info); | ||
801 | return true; | ||
802 | } | ||
803 | |||
804 | @@ -751,6 +826,7 @@ | ||
805 | { | ||
806 | int i; | ||
807 | gimple stmt; | ||
808 | + slp_void_p child; | ||
809 | |||
810 | if (!node) | ||
811 | return; | ||
812 | @@ -763,8 +839,8 @@ | ||
813 | } | ||
814 | fprintf (vect_dump, "\n"); | ||
815 | |||
816 | - vect_print_slp_tree (SLP_TREE_LEFT (node)); | ||
817 | - vect_print_slp_tree (SLP_TREE_RIGHT (node)); | ||
818 | + FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) | ||
819 | + vect_print_slp_tree ((slp_tree) child); | ||
820 | } | ||
821 | |||
822 | |||
823 | @@ -778,6 +854,7 @@ | ||
824 | { | ||
825 | int i; | ||
826 | gimple stmt; | ||
827 | + slp_void_p child; | ||
828 | |||
829 | if (!node) | ||
830 | return; | ||
831 | @@ -786,8 +863,8 @@ | ||
832 | if (j < 0 || i == j) | ||
833 | STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark; | ||
834 | |||
835 | - vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j); | ||
836 | - vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j); | ||
837 | + FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) | ||
838 | + vect_mark_slp_stmts ((slp_tree) child, mark, j); | ||
839 | } | ||
840 | |||
841 | |||
842 | @@ -799,6 +876,7 @@ | ||
843 | int i; | ||
844 | gimple stmt; | ||
845 | stmt_vec_info stmt_info; | ||
846 | + slp_void_p child; | ||
847 | |||
848 | if (!node) | ||
849 | return; | ||
850 | @@ -811,8 +889,8 @@ | ||
851 | STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; | ||
852 | } | ||
853 | |||
854 | - vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node)); | ||
855 | - vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node)); | ||
856 | + FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) | ||
857 | + vect_mark_slp_stmts_relevant ((slp_tree) child); | ||
858 | } | ||
859 | |||
860 | |||
861 | @@ -885,12 +963,13 @@ | ||
862 | gimple stmt; | ||
863 | VEC (gimple, heap) *tmp_stmts; | ||
864 | unsigned int index, i; | ||
865 | + slp_void_p child; | ||
866 | |||
867 | if (!node) | ||
868 | return; | ||
869 | |||
870 | - vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation); | ||
871 | - vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation); | ||
872 | + FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) | ||
873 | + vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation); | ||
874 | |||
875 | gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))); | ||
876 | tmp_stmts = VEC_alloc (gimple, heap, group_size); | ||
877 | @@ -1253,7 +1332,7 @@ | ||
878 | gimple stmt) | ||
879 | { | ||
880 | slp_instance new_instance; | ||
881 | - slp_tree node = XNEW (struct _slp_tree); | ||
882 | + slp_tree node; | ||
883 | unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt)); | ||
884 | unsigned int unrolling_factor = 1, nunits; | ||
885 | tree vectype, scalar_type = NULL_TREE; | ||
886 | @@ -1265,6 +1344,7 @@ | ||
887 | VEC (slp_tree, heap) *loads; | ||
888 | struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); | ||
889 | bool loads_permuted = false; | ||
890 | + VEC (gimple, heap) *scalar_stmts; | ||
891 | |||
892 | if (dr) | ||
893 | { | ||
894 | @@ -1308,39 +1388,26 @@ | ||
895 | } | ||
896 | |||
897 | /* Create a node (a root of the SLP tree) for the packed strided stores. */ | ||
898 | - SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size); | ||
899 | + scalar_stmts = VEC_alloc (gimple, heap, group_size); | ||
900 | next = stmt; | ||
901 | if (dr) | ||
902 | { | ||
903 | /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ | ||
904 | while (next) | ||
905 | { | ||
906 | - VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); | ||
907 | + VEC_safe_push (gimple, heap, scalar_stmts, next); | ||
908 | next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); | ||
909 | } | ||
910 | } | ||
911 | else | ||
912 | { | ||
913 | /* Collect reduction statements. */ | ||
914 | - for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, | ||
915 | - next); | ||
916 | - i++) | ||
917 | - { | ||
918 | - VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); | ||
919 | - if (vect_print_dump_info (REPORT_DETAILS)) | ||
920 | - { | ||
921 | - fprintf (vect_dump, "pushing reduction into node: "); | ||
922 | - print_gimple_stmt (vect_dump, next, 0, TDF_SLIM); | ||
923 | - } | ||
924 | - } | ||
925 | + VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo); | ||
926 | + for (i = 0; VEC_iterate (gimple, reductions, i, next); i++) | ||
927 | + VEC_safe_push (gimple, heap, scalar_stmts, next); | ||
928 | } | ||
929 | |||
930 | - SLP_TREE_VEC_STMTS (node) = NULL; | ||
931 | - SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; | ||
932 | - SLP_TREE_LEFT (node) = NULL; | ||
933 | - SLP_TREE_RIGHT (node) = NULL; | ||
934 | - SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; | ||
935 | - SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; | ||
936 | + node = vect_create_new_slp_node (scalar_stmts); | ||
937 | |||
938 | /* Calculate the number of vector stmts to create based on the unrolling | ||
939 | factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is | ||
940 | @@ -1517,6 +1584,7 @@ | ||
941 | imm_use_iterator imm_iter; | ||
942 | gimple use_stmt; | ||
943 | stmt_vec_info stmt_vinfo; | ||
944 | + slp_void_p child; | ||
945 | |||
946 | if (!node) | ||
947 | return; | ||
948 | @@ -1534,8 +1602,8 @@ | ||
949 | == vect_reduction_def)) | ||
950 | vect_mark_slp_stmts (node, hybrid, i); | ||
951 | |||
952 | - vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node)); | ||
953 | - vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node)); | ||
954 | + FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) | ||
955 | + vect_detect_hybrid_slp_stmts ((slp_tree) child); | ||
956 | } | ||
957 | |||
958 | |||
959 | @@ -1625,13 +1693,14 @@ | ||
960 | bool dummy; | ||
961 | int i; | ||
962 | gimple stmt; | ||
963 | + slp_void_p child; | ||
964 | |||
965 | if (!node) | ||
966 | return true; | ||
967 | |||
968 | - if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node)) | ||
969 | - || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node))) | ||
970 | - return false; | ||
971 | + FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) | ||
972 | + if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child)) | ||
973 | + return false; | ||
974 | |||
975 | FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt) | ||
976 | { | ||
977 | @@ -2207,88 +2276,102 @@ | ||
978 | If the scalar definitions are loop invariants or constants, collect them and | ||
979 | call vect_get_constant_vectors() to create vector stmts. | ||
980 | Otherwise, the def-stmts must be already vectorized and the vectorized stmts | ||
981 | - must be stored in the LEFT/RIGHT node of SLP_NODE, and we call | ||
982 | - vect_get_slp_vect_defs() to retrieve them. | ||
983 | - If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from | ||
984 | - the right node. This is used when the second operand must remain scalar. */ | ||
985 | + must be stored in the corresponding child of SLP_NODE, and we call | ||
986 | + vect_get_slp_vect_defs () to retrieve them. */ | ||
987 | |||
988 | void | ||
989 | -vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node, | ||
990 | - VEC (tree,heap) **vec_oprnds0, | ||
991 | - VEC (tree,heap) **vec_oprnds1, int reduc_index) | ||
992 | +vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node, | ||
993 | + VEC (slp_void_p, heap) **vec_oprnds, int reduc_index) | ||
994 | { | ||
995 | - gimple first_stmt; | ||
996 | - enum tree_code code; | ||
997 | - int number_of_vects; | ||
998 | + gimple first_stmt, first_def; | ||
999 | + int number_of_vects = 0, i; | ||
1000 | + unsigned int child_index = 0; | ||
1001 | HOST_WIDE_INT lhs_size_unit, rhs_size_unit; | ||
1002 | + slp_tree child = NULL; | ||
1003 | + VEC (tree, heap) *vec_defs; | ||
1004 | + tree oprnd, def_lhs; | ||
1005 | + bool vectorized_defs; | ||
1006 | |||
1007 | first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); | ||
1008 | - /* The number of vector defs is determined by the number of vector statements | ||
1009 | - in the node from which we get those statements. */ | ||
1010 | - if (SLP_TREE_LEFT (slp_node)) | ||
1011 | - number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node)); | ||
1012 | - else | ||
1013 | - { | ||
1014 | - number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); | ||
1015 | - /* Number of vector stmts was calculated according to LHS in | ||
1016 | - vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if | ||
1017 | - necessary. See vect_get_smallest_scalar_type () for details. */ | ||
1018 | - vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, | ||
1019 | - &rhs_size_unit); | ||
1020 | - if (rhs_size_unit != lhs_size_unit) | ||
1021 | - { | ||
1022 | - number_of_vects *= rhs_size_unit; | ||
1023 | - number_of_vects /= lhs_size_unit; | ||
1024 | - } | ||
1025 | + FOR_EACH_VEC_ELT (tree, ops, i, oprnd) | ||
1026 | + { | ||
1027 | + /* For each operand we check if it has vectorized definitions in a child | ||
1028 | + node or we need to create them (for invariants and constants). We | ||
1029 | + check if the LHS of the first stmt of the next child matches OPRND. | ||
1030 | + If it does, we found the correct child. Otherwise, we call | ||
1031 | + vect_get_constant_vectors (), and not advance CHILD_INDEX in order | ||
1032 | + to check this child node for the next operand. */ | ||
1033 | + vectorized_defs = false; | ||
1034 | + if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index) | ||
1035 | + { | ||
1036 | + child = (slp_tree) VEC_index (slp_void_p, | ||
1037 | + SLP_TREE_CHILDREN (slp_node), | ||
1038 | + child_index); | ||
1039 | + first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0); | ||
1040 | + | ||
1041 | + /* In the end of a pattern sequence we have a use of the original stmt, | ||
1042 | + so we need to compare OPRND with the original def. */ | ||
1043 | + if (is_pattern_stmt_p (vinfo_for_stmt (first_def)) | ||
1044 | + && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt)) | ||
1045 | + && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt))) | ||
1046 | + first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def)); | ||
1047 | + | ||
1048 | + if (is_gimple_call (first_def)) | ||
1049 | + def_lhs = gimple_call_lhs (first_def); | ||
1050 | + else | ||
1051 | + def_lhs = gimple_assign_lhs (first_def); | ||
1052 | + | ||
1053 | + if (operand_equal_p (oprnd, def_lhs, 0)) | ||
1054 | + { | ||
1055 | + /* The number of vector defs is determined by the number of | ||
1056 | + vector statements in the node from which we get those | ||
1057 | + statements. */ | ||
1058 | + number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child); | ||
1059 | + vectorized_defs = true; | ||
1060 | + child_index++; | ||
1061 | + } | ||
1062 | + } | ||
1063 | + | ||
1064 | + if (!vectorized_defs) | ||
1065 | + { | ||
1066 | + if (i == 0) | ||
1067 | + { | ||
1068 | + number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); | ||
1069 | + /* Number of vector stmts was calculated according to LHS in | ||
1070 | + vect_schedule_slp_instance (), fix it by replacing LHS with | ||
1071 | + RHS, if necessary. See vect_get_smallest_scalar_type () for | ||
1072 | + details. */ | ||
1073 | + vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, | ||
1074 | + &rhs_size_unit); | ||
1075 | + if (rhs_size_unit != lhs_size_unit) | ||
1076 | + { | ||
1077 | + number_of_vects *= rhs_size_unit; | ||
1078 | + number_of_vects /= lhs_size_unit; | ||
1079 | + } | ||
1080 | + } | ||
1081 | + } | ||
1082 | + | ||
1083 | + /* Allocate memory for vectorized defs. */ | ||
1084 | + vec_defs = VEC_alloc (tree, heap, number_of_vects); | ||
1085 | + | ||
1086 | + /* For reduction defs we call vect_get_constant_vectors (), since we are | ||
1087 | + looking for initial loop invariant values. */ | ||
1088 | + if (vectorized_defs && reduc_index == -1) | ||
1089 | + /* The defs are already vectorized. */ | ||
1090 | + vect_get_slp_vect_defs (child, &vec_defs); | ||
1091 | + else | ||
1092 | + /* Build vectors from scalar defs. */ | ||
1093 | + vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i, | ||
1094 | + number_of_vects, reduc_index); | ||
1095 | + | ||
1096 | + VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs); | ||
1097 | + | ||
1098 | + /* For reductions, we only need initial values. */ | ||
1099 | + if (reduc_index != -1) | ||
1100 | + return; | ||
1101 | } | ||
1102 | - | ||
1103 | - /* Allocate memory for vectorized defs. */ | ||
1104 | - *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects); | ||
1105 | - | ||
1106 | - /* SLP_NODE corresponds either to a group of stores or to a group of | ||
1107 | - unary/binary operations. We don't call this function for loads. | ||
1108 | - For reduction defs we call vect_get_constant_vectors(), since we are | ||
1109 | - looking for initial loop invariant values. */ | ||
1110 | - if (SLP_TREE_LEFT (slp_node) && reduc_index == -1) | ||
1111 | - /* The defs are already vectorized. */ | ||
1112 | - vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0); | ||
1113 | - else | ||
1114 | - /* Build vectors from scalar defs. */ | ||
1115 | - vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects, | ||
1116 | - reduc_index); | ||
1117 | - | ||
1118 | - if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))) | ||
1119 | - /* Since we don't call this function with loads, this is a group of | ||
1120 | - stores. */ | ||
1121 | - return; | ||
1122 | - | ||
1123 | - /* For reductions, we only need initial values. */ | ||
1124 | - if (reduc_index != -1) | ||
1125 | - return; | ||
1126 | - | ||
1127 | - code = gimple_assign_rhs_code (first_stmt); | ||
1128 | - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1) | ||
1129 | - return; | ||
1130 | - | ||
1131 | - /* The number of vector defs is determined by the number of vector statements | ||
1132 | - in the node from which we get those statements. */ | ||
1133 | - if (SLP_TREE_RIGHT (slp_node)) | ||
1134 | - number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node)); | ||
1135 | - else | ||
1136 | - number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); | ||
1137 | - | ||
1138 | - *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects); | ||
1139 | - | ||
1140 | - if (SLP_TREE_RIGHT (slp_node)) | ||
1141 | - /* The defs are already vectorized. */ | ||
1142 | - vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1); | ||
1143 | - else | ||
1144 | - /* Build vectors from scalar defs. */ | ||
1145 | - vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects, | ||
1146 | - -1); | ||
1147 | } | ||
1148 | |||
1149 | - | ||
1150 | /* Create NCOPIES permutation statements using the mask MASK_BYTES (by | ||
1151 | building a vector of type MASK_TYPE from it) and two input vectors placed in | ||
1152 | DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and | ||
1153 | @@ -2605,14 +2688,14 @@ | ||
1154 | tree vectype; | ||
1155 | int i; | ||
1156 | slp_tree loads_node; | ||
1157 | + slp_void_p child; | ||
1158 | |||
1159 | if (!node) | ||
1160 | return false; | ||
1161 | |||
1162 | - vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance, | ||
1163 | - vectorization_factor); | ||
1164 | - vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance, | ||
1165 | - vectorization_factor); | ||
1166 | + FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) | ||
1167 | + vect_schedule_slp_instance ((slp_tree) child, instance, | ||
1168 | + vectorization_factor); | ||
1169 | |||
1170 | stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); | ||
1171 | stmt_info = vinfo_for_stmt (stmt); | ||
1172 | |||
1173 | === modified file 'gcc/tree-vect-stmts.c' | ||
1174 | --- old/gcc/tree-vect-stmts.c 2011-10-27 11:27:59 +0000 | ||
1175 | +++ new/gcc/tree-vect-stmts.c 2011-11-14 11:38:08 +0000 | ||
1176 | @@ -1419,16 +1419,35 @@ | ||
1177 | } | ||
1178 | |||
1179 | |||
1180 | -/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not | ||
1181 | - NULL. */ | ||
1182 | +/* Get vectorized definitions for OP0 and OP1. | ||
1183 | + REDUC_INDEX is the index of reduction operand in case of reduction, | ||
1184 | + and -1 otherwise. */ | ||
1185 | |||
1186 | -static void | ||
1187 | +void | ||
1188 | vect_get_vec_defs (tree op0, tree op1, gimple stmt, | ||
1189 | - VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1, | ||
1190 | - slp_tree slp_node) | ||
1191 | + VEC (tree, heap) **vec_oprnds0, | ||
1192 | + VEC (tree, heap) **vec_oprnds1, | ||
1193 | + slp_tree slp_node, int reduc_index) | ||
1194 | { | ||
1195 | if (slp_node) | ||
1196 | - vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1); | ||
1197 | + { | ||
1198 | + int nops = (op1 == NULL_TREE) ? 1 : 2; | ||
1199 | + VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops); | ||
1200 | + VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops); | ||
1201 | + | ||
1202 | + VEC_quick_push (tree, ops, op0); | ||
1203 | + if (op1) | ||
1204 | + VEC_quick_push (tree, ops, op1); | ||
1205 | + | ||
1206 | + vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index); | ||
1207 | + | ||
1208 | + *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); | ||
1209 | + if (op1) | ||
1210 | + *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1); | ||
1211 | + | ||
1212 | + VEC_free (tree, heap, ops); | ||
1213 | + VEC_free (slp_void_p, heap, vec_defs); | ||
1214 | + } | ||
1215 | else | ||
1216 | { | ||
1217 | tree vec_oprnd; | ||
1218 | @@ -2016,7 +2035,8 @@ | ||
1219 | for (j = 0; j < ncopies; j++) | ||
1220 | { | ||
1221 | if (j == 0) | ||
1222 | - vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node); | ||
1223 | + vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node, | ||
1224 | + -1); | ||
1225 | else | ||
1226 | vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); | ||
1227 | |||
1228 | @@ -2221,7 +2241,7 @@ | ||
1229 | { | ||
1230 | /* Handle uses. */ | ||
1231 | if (j == 0) | ||
1232 | - vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); | ||
1233 | + vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1); | ||
1234 | else | ||
1235 | vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); | ||
1236 | |||
1237 | @@ -2576,10 +2596,10 @@ | ||
1238 | operand 1 should be of a vector type (the usual case). */ | ||
1239 | if (vec_oprnd1) | ||
1240 | vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, | ||
1241 | - slp_node); | ||
1242 | + slp_node, -1); | ||
1243 | else | ||
1244 | vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, | ||
1245 | - slp_node); | ||
1246 | + slp_node, -1); | ||
1247 | } | ||
1248 | else | ||
1249 | vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); | ||
1250 | @@ -2887,10 +2907,10 @@ | ||
1251 | { | ||
1252 | if (op_type == binary_op || op_type == ternary_op) | ||
1253 | vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, | ||
1254 | - slp_node); | ||
1255 | + slp_node, -1); | ||
1256 | else | ||
1257 | vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, | ||
1258 | - slp_node); | ||
1259 | + slp_node, -1); | ||
1260 | if (op_type == ternary_op) | ||
1261 | { | ||
1262 | vec_oprnds2 = VEC_alloc (tree, heap, 1); | ||
1263 | @@ -3202,7 +3222,8 @@ | ||
1264 | { | ||
1265 | /* Handle uses. */ | ||
1266 | if (slp_node) | ||
1267 | - vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1); | ||
1268 | + vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, | ||
1269 | + slp_node, -1); | ||
1270 | else | ||
1271 | { | ||
1272 | VEC_free (tree, heap, vec_oprnds0); | ||
1273 | @@ -3548,12 +3569,12 @@ | ||
1274 | for (k = 0; k < slp_node->vec_stmts_size - 1; k++) | ||
1275 | VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); | ||
1276 | |||
1277 | - vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, | ||
1278 | - -1); | ||
1279 | + vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, | ||
1280 | + slp_node, -1); | ||
1281 | } | ||
1282 | else | ||
1283 | - vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, | ||
1284 | - &vec_oprnds1, -1); | ||
1285 | + vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, | ||
1286 | + &vec_oprnds1, slp_node, -1); | ||
1287 | } | ||
1288 | else | ||
1289 | { | ||
1290 | @@ -3796,6 +3817,7 @@ | ||
1291 | vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); | ||
1292 | first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); | ||
1293 | first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); | ||
1294 | + op = gimple_assign_rhs1 (first_stmt); | ||
1295 | } | ||
1296 | else | ||
1297 | /* VEC_NUM is the number of vect stmts to be created for this | ||
1298 | @@ -3878,8 +3900,8 @@ | ||
1299 | if (slp) | ||
1300 | { | ||
1301 | /* Get vectorized arguments for SLP_NODE. */ | ||
1302 | - vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds, | ||
1303 | - NULL, -1); | ||
1304 | + vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, | ||
1305 | + NULL, slp_node, -1); | ||
1306 | |||
1307 | vec_oprnd = VEC_index (tree, vec_oprnds, 0); | ||
1308 | } | ||
1309 | @@ -5040,7 +5062,7 @@ | ||
1310 | In basic blocks we only analyze statements that are a part of some SLP | ||
1311 | instance, therefore, all the statements are relevant. | ||
1312 | |||
1313 | - Pattern statement need to be analyzed instead of the original statement | ||
1314 | + Pattern statement needs to be analyzed instead of the original statement | ||
1315 | if the original statement is not relevant. Otherwise, we analyze both | ||
1316 | statements. */ | ||
1317 | |||
1318 | |||
1319 | === modified file 'gcc/tree-vectorizer.h' | ||
1320 | --- old/gcc/tree-vectorizer.h 2011-10-23 13:33:07 +0000 | ||
1321 | +++ new/gcc/tree-vectorizer.h 2011-11-14 11:38:08 +0000 | ||
1322 | @@ -73,15 +73,15 @@ | ||
1323 | /************************************************************************ | ||
1324 | SLP | ||
1325 | ************************************************************************/ | ||
1326 | +typedef void *slp_void_p; | ||
1327 | +DEF_VEC_P (slp_void_p); | ||
1328 | +DEF_VEC_ALLOC_P (slp_void_p, heap); | ||
1329 | |||
1330 | -/* A computation tree of an SLP instance. Each node corresponds to a group of | ||
1331 | +/* A computation tree of an SLP instance. Each node corresponds to a group of | ||
1332 | stmts to be packed in a SIMD stmt. */ | ||
1333 | typedef struct _slp_tree { | ||
1334 | - /* Only binary and unary operations are supported. LEFT child corresponds to | ||
1335 | - the first operand and RIGHT child to the second if the operation is | ||
1336 | - binary. */ | ||
1337 | - struct _slp_tree *left; | ||
1338 | - struct _slp_tree *right; | ||
1339 | + /* Nodes that contain def-stmts of this node statements operands. */ | ||
1340 | + VEC (slp_void_p, heap) *children; | ||
1341 | /* A group of scalar stmts to be vectorized together. */ | ||
1342 | VEC (gimple, heap) *stmts; | ||
1343 | /* Vectorized stmt/s. */ | ||
1344 | @@ -146,14 +146,32 @@ | ||
1345 | #define SLP_INSTANCE_LOADS(S) (S)->loads | ||
1346 | #define SLP_INSTANCE_FIRST_LOAD_STMT(S) (S)->first_load | ||
1347 | |||
1348 | -#define SLP_TREE_LEFT(S) (S)->left | ||
1349 | -#define SLP_TREE_RIGHT(S) (S)->right | ||
1350 | +#define SLP_TREE_CHILDREN(S) (S)->children | ||
1351 | #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts | ||
1352 | #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts | ||
1353 | #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size | ||
1354 | #define SLP_TREE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop | ||
1355 | #define SLP_TREE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop | ||
1356 | |||
1357 | +/* This structure is used in creation of an SLP tree. Each instance | ||
1358 | + corresponds to the same operand in a group of scalar stmts in an SLP | ||
1359 | + node. */ | ||
1360 | +typedef struct _slp_oprnd_info | ||
1361 | +{ | ||
1362 | + /* Def-stmts for the operands. */ | ||
1363 | + VEC (gimple, heap) *def_stmts; | ||
1364 | + /* Information about the first statement, its vector def-type, type, the | ||
1365 | + operand itself in case it's constant, and an indication if it's a pattern | ||
1366 | + stmt. */ | ||
1367 | + enum vect_def_type first_dt; | ||
1368 | + tree first_def_type; | ||
1369 | + tree first_const_oprnd; | ||
1370 | + bool first_pattern; | ||
1371 | +} *slp_oprnd_info; | ||
1372 | + | ||
1373 | +DEF_VEC_P(slp_oprnd_info); | ||
1374 | +DEF_VEC_ALLOC_P(slp_oprnd_info, heap); | ||
1375 | + | ||
1376 | |||
1377 | typedef struct _vect_peel_info | ||
1378 | { | ||
1379 | @@ -819,6 +837,8 @@ | ||
1380 | unsigned int *, unsigned int *); | ||
1381 | extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); | ||
1382 | extern bool vect_supportable_shift (enum tree_code, tree); | ||
1383 | +extern void vect_get_vec_defs (tree, tree, gimple, VEC (tree, heap) **, | ||
1384 | + VEC (tree, heap) **, slp_tree, int); | ||
1385 | |||
1386 | /* In tree-vect-data-refs.c. */ | ||
1387 | extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); | ||
1388 | @@ -885,8 +905,9 @@ | ||
1389 | extern bool vect_analyze_slp (loop_vec_info, bb_vec_info); | ||
1390 | extern void vect_make_slp_decision (loop_vec_info); | ||
1391 | extern void vect_detect_hybrid_slp (loop_vec_info); | ||
1392 | -extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **, | ||
1393 | - VEC (tree,heap) **, int); | ||
1394 | +extern void vect_get_slp_defs (VEC (tree, heap) *, slp_tree, | ||
1395 | + VEC (slp_void_p, heap) **, int); | ||
1396 | + | ||
1397 | extern LOC find_bb_location (basic_block); | ||
1398 | extern bb_vec_info vect_slp_analyze_bb (basic_block); | ||
1399 | extern void vect_slp_transform_bb (basic_block); | ||
1400 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch new file mode 100644 index 000000000..d72446919 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106841.patch | |||
@@ -0,0 +1,515 @@ | |||
1 | 2011-11-21 Michael Hope <michael.hope@linaro.org> | ||
2 | |||
3 | Backport from mainline r180131: | ||
4 | |||
5 | 2011-10-18 Julian Brown <julian@codesourcery.com> | ||
6 | |||
7 | gcc/ | ||
8 | * config/arm/arm.c (arm_block_move_unaligned_straight) | ||
9 | (arm_adjust_block_mem, arm_block_move_unaligned_loop) | ||
10 | (arm_movmemqi_unaligned): New. | ||
11 | (arm_gen_movmemqi): Support unaligned block copies. | ||
12 | |||
13 | gcc/testsuite/ | ||
14 | * lib/target-supports.exp (check_effective_target_arm_unaligned): New. | ||
15 | * gcc.target/arm/unaligned-memcpy-1.c: New. | ||
16 | * gcc.target/arm/unaligned-memcpy-2.c: New. | ||
17 | * gcc.target/arm/unaligned-memcpy-3.c: New. | ||
18 | * gcc.target/arm/unaligned-memcpy-4.c: New. | ||
19 | |||
20 | 2011-09-15 James Greenhalgh <james.greenhalgh@arm.com> | ||
21 | |||
22 | gcc/ | ||
23 | * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): New builtin macro. | ||
24 | |||
25 | === modified file 'gcc/config/arm/arm.c' | ||
26 | --- old/gcc/config/arm/arm.c 2011-10-26 11:38:30 +0000 | ||
27 | +++ new/gcc/config/arm/arm.c 2011-11-21 01:45:54 +0000 | ||
28 | @@ -10803,6 +10803,335 @@ | ||
29 | return true; | ||
30 | } | ||
31 | |||
32 | +/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit | ||
33 | + unaligned copies on processors which support unaligned semantics for those | ||
34 | + instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency | ||
35 | + (using more registers) by doing e.g. load/load/store/store for a factor of 2. | ||
36 | + An interleave factor of 1 (the minimum) will perform no interleaving. | ||
37 | + Load/store multiple are used for aligned addresses where possible. */ | ||
38 | + | ||
39 | +static void | ||
40 | +arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, | ||
41 | + HOST_WIDE_INT length, | ||
42 | + unsigned int interleave_factor) | ||
43 | +{ | ||
44 | + rtx *regs = XALLOCAVEC (rtx, interleave_factor); | ||
45 | + int *regnos = XALLOCAVEC (int, interleave_factor); | ||
46 | + HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD; | ||
47 | + HOST_WIDE_INT i, j; | ||
48 | + HOST_WIDE_INT remaining = length, words; | ||
49 | + rtx halfword_tmp = NULL, byte_tmp = NULL; | ||
50 | + rtx dst, src; | ||
51 | + bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD; | ||
52 | + bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD; | ||
53 | + HOST_WIDE_INT srcoffset, dstoffset; | ||
54 | + HOST_WIDE_INT src_autoinc, dst_autoinc; | ||
55 | + rtx mem, addr; | ||
56 | + | ||
57 | + gcc_assert (1 <= interleave_factor && interleave_factor <= 4); | ||
58 | + | ||
59 | + /* Use hard registers if we have aligned source or destination so we can use | ||
60 | + load/store multiple with contiguous registers. */ | ||
61 | + if (dst_aligned || src_aligned) | ||
62 | + for (i = 0; i < interleave_factor; i++) | ||
63 | + regs[i] = gen_rtx_REG (SImode, i); | ||
64 | + else | ||
65 | + for (i = 0; i < interleave_factor; i++) | ||
66 | + regs[i] = gen_reg_rtx (SImode); | ||
67 | + | ||
68 | + dst = copy_addr_to_reg (XEXP (dstbase, 0)); | ||
69 | + src = copy_addr_to_reg (XEXP (srcbase, 0)); | ||
70 | + | ||
71 | + srcoffset = dstoffset = 0; | ||
72 | + | ||
73 | + /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST. | ||
74 | + For copying the last bytes we want to subtract this offset again. */ | ||
75 | + src_autoinc = dst_autoinc = 0; | ||
76 | + | ||
77 | + for (i = 0; i < interleave_factor; i++) | ||
78 | + regnos[i] = i; | ||
79 | + | ||
80 | + /* Copy BLOCK_SIZE_BYTES chunks. */ | ||
81 | + | ||
82 | + for (i = 0; i + block_size_bytes <= length; i += block_size_bytes) | ||
83 | + { | ||
84 | + /* Load words. */ | ||
85 | + if (src_aligned && interleave_factor > 1) | ||
86 | + { | ||
87 | + emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src, | ||
88 | + TRUE, srcbase, &srcoffset)); | ||
89 | + src_autoinc += UNITS_PER_WORD * interleave_factor; | ||
90 | + } | ||
91 | + else | ||
92 | + { | ||
93 | + for (j = 0; j < interleave_factor; j++) | ||
94 | + { | ||
95 | + addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD | ||
96 | + - src_autoinc); | ||
97 | + mem = adjust_automodify_address (srcbase, SImode, addr, | ||
98 | + srcoffset + j * UNITS_PER_WORD); | ||
99 | + emit_insn (gen_unaligned_loadsi (regs[j], mem)); | ||
100 | + } | ||
101 | + srcoffset += block_size_bytes; | ||
102 | + } | ||
103 | + | ||
104 | + /* Store words. */ | ||
105 | + if (dst_aligned && interleave_factor > 1) | ||
106 | + { | ||
107 | + emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst, | ||
108 | + TRUE, dstbase, &dstoffset)); | ||
109 | + dst_autoinc += UNITS_PER_WORD * interleave_factor; | ||
110 | + } | ||
111 | + else | ||
112 | + { | ||
113 | + for (j = 0; j < interleave_factor; j++) | ||
114 | + { | ||
115 | + addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD | ||
116 | + - dst_autoinc); | ||
117 | + mem = adjust_automodify_address (dstbase, SImode, addr, | ||
118 | + dstoffset + j * UNITS_PER_WORD); | ||
119 | + emit_insn (gen_unaligned_storesi (mem, regs[j])); | ||
120 | + } | ||
121 | + dstoffset += block_size_bytes; | ||
122 | + } | ||
123 | + | ||
124 | + remaining -= block_size_bytes; | ||
125 | + } | ||
126 | + | ||
127 | + /* Copy any whole words left (note these aren't interleaved with any | ||
128 | + subsequent halfword/byte load/stores in the interests of simplicity). */ | ||
129 | + | ||
130 | + words = remaining / UNITS_PER_WORD; | ||
131 | + | ||
132 | + gcc_assert (words < interleave_factor); | ||
133 | + | ||
134 | + if (src_aligned && words > 1) | ||
135 | + { | ||
136 | + emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase, | ||
137 | + &srcoffset)); | ||
138 | + src_autoinc += UNITS_PER_WORD * words; | ||
139 | + } | ||
140 | + else | ||
141 | + { | ||
142 | + for (j = 0; j < words; j++) | ||
143 | + { | ||
144 | + addr = plus_constant (src, | ||
145 | + srcoffset + j * UNITS_PER_WORD - src_autoinc); | ||
146 | + mem = adjust_automodify_address (srcbase, SImode, addr, | ||
147 | + srcoffset + j * UNITS_PER_WORD); | ||
148 | + emit_insn (gen_unaligned_loadsi (regs[j], mem)); | ||
149 | + } | ||
150 | + srcoffset += words * UNITS_PER_WORD; | ||
151 | + } | ||
152 | + | ||
153 | + if (dst_aligned && words > 1) | ||
154 | + { | ||
155 | + emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase, | ||
156 | + &dstoffset)); | ||
157 | + dst_autoinc += words * UNITS_PER_WORD; | ||
158 | + } | ||
159 | + else | ||
160 | + { | ||
161 | + for (j = 0; j < words; j++) | ||
162 | + { | ||
163 | + addr = plus_constant (dst, | ||
164 | + dstoffset + j * UNITS_PER_WORD - dst_autoinc); | ||
165 | + mem = adjust_automodify_address (dstbase, SImode, addr, | ||
166 | + dstoffset + j * UNITS_PER_WORD); | ||
167 | + emit_insn (gen_unaligned_storesi (mem, regs[j])); | ||
168 | + } | ||
169 | + dstoffset += words * UNITS_PER_WORD; | ||
170 | + } | ||
171 | + | ||
172 | + remaining -= words * UNITS_PER_WORD; | ||
173 | + | ||
174 | + gcc_assert (remaining < 4); | ||
175 | + | ||
176 | + /* Copy a halfword if necessary. */ | ||
177 | + | ||
178 | + if (remaining >= 2) | ||
179 | + { | ||
180 | + halfword_tmp = gen_reg_rtx (SImode); | ||
181 | + | ||
182 | + addr = plus_constant (src, srcoffset - src_autoinc); | ||
183 | + mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset); | ||
184 | + emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem)); | ||
185 | + | ||
186 | + /* Either write out immediately, or delay until we've loaded the last | ||
187 | + byte, depending on interleave factor. */ | ||
188 | + if (interleave_factor == 1) | ||
189 | + { | ||
190 | + addr = plus_constant (dst, dstoffset - dst_autoinc); | ||
191 | + mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset); | ||
192 | + emit_insn (gen_unaligned_storehi (mem, | ||
193 | + gen_lowpart (HImode, halfword_tmp))); | ||
194 | + halfword_tmp = NULL; | ||
195 | + dstoffset += 2; | ||
196 | + } | ||
197 | + | ||
198 | + remaining -= 2; | ||
199 | + srcoffset += 2; | ||
200 | + } | ||
201 | + | ||
202 | + gcc_assert (remaining < 2); | ||
203 | + | ||
204 | + /* Copy last byte. */ | ||
205 | + | ||
206 | + if ((remaining & 1) != 0) | ||
207 | + { | ||
208 | + byte_tmp = gen_reg_rtx (SImode); | ||
209 | + | ||
210 | + addr = plus_constant (src, srcoffset - src_autoinc); | ||
211 | + mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset); | ||
212 | + emit_move_insn (gen_lowpart (QImode, byte_tmp), mem); | ||
213 | + | ||
214 | + if (interleave_factor == 1) | ||
215 | + { | ||
216 | + addr = plus_constant (dst, dstoffset - dst_autoinc); | ||
217 | + mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset); | ||
218 | + emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); | ||
219 | + byte_tmp = NULL; | ||
220 | + dstoffset++; | ||
221 | + } | ||
222 | + | ||
223 | + remaining--; | ||
224 | + srcoffset++; | ||
225 | + } | ||
226 | + | ||
227 | + /* Store last halfword if we haven't done so already. */ | ||
228 | + | ||
229 | + if (halfword_tmp) | ||
230 | + { | ||
231 | + addr = plus_constant (dst, dstoffset - dst_autoinc); | ||
232 | + mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset); | ||
233 | + emit_insn (gen_unaligned_storehi (mem, | ||
234 | + gen_lowpart (HImode, halfword_tmp))); | ||
235 | + dstoffset += 2; | ||
236 | + } | ||
237 | + | ||
238 | + /* Likewise for last byte. */ | ||
239 | + | ||
240 | + if (byte_tmp) | ||
241 | + { | ||
242 | + addr = plus_constant (dst, dstoffset - dst_autoinc); | ||
243 | + mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset); | ||
244 | + emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); | ||
245 | + dstoffset++; | ||
246 | + } | ||
247 | + | ||
248 | + gcc_assert (remaining == 0 && srcoffset == dstoffset); | ||
249 | +} | ||
250 | + | ||
251 | +/* From mips_adjust_block_mem: | ||
252 | + | ||
253 | + Helper function for doing a loop-based block operation on memory | ||
254 | + reference MEM. Each iteration of the loop will operate on LENGTH | ||
255 | + bytes of MEM. | ||
256 | + | ||
257 | + Create a new base register for use within the loop and point it to | ||
258 | + the start of MEM. Create a new memory reference that uses this | ||
259 | + register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ | ||
260 | + | ||
261 | +static void | ||
262 | +arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg, | ||
263 | + rtx *loop_mem) | ||
264 | +{ | ||
265 | + *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); | ||
266 | + | ||
267 | + /* Although the new mem does not refer to a known location, | ||
268 | + it does keep up to LENGTH bytes of alignment. */ | ||
269 | + *loop_mem = change_address (mem, BLKmode, *loop_reg); | ||
270 | + set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); | ||
271 | +} | ||
272 | + | ||
273 | +/* From mips_block_move_loop: | ||
274 | + | ||
275 | + Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER | ||
276 | + bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that | ||
277 | + the memory regions do not overlap. */ | ||
278 | + | ||
279 | +static void | ||
280 | +arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, | ||
281 | + unsigned int interleave_factor, | ||
282 | + HOST_WIDE_INT bytes_per_iter) | ||
283 | +{ | ||
284 | + rtx label, src_reg, dest_reg, final_src, test; | ||
285 | + HOST_WIDE_INT leftover; | ||
286 | + | ||
287 | + leftover = length % bytes_per_iter; | ||
288 | + length -= leftover; | ||
289 | + | ||
290 | + /* Create registers and memory references for use within the loop. */ | ||
291 | + arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); | ||
292 | + arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); | ||
293 | + | ||
294 | + /* Calculate the value that SRC_REG should have after the last iteration of | ||
295 | + the loop. */ | ||
296 | + final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), | ||
297 | + 0, 0, OPTAB_WIDEN); | ||
298 | + | ||
299 | + /* Emit the start of the loop. */ | ||
300 | + label = gen_label_rtx (); | ||
301 | + emit_label (label); | ||
302 | + | ||
303 | + /* Emit the loop body. */ | ||
304 | + arm_block_move_unaligned_straight (dest, src, bytes_per_iter, | ||
305 | + interleave_factor); | ||
306 | + | ||
307 | + /* Move on to the next block. */ | ||
308 | + emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter)); | ||
309 | + emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter)); | ||
310 | + | ||
311 | + /* Emit the loop condition. */ | ||
312 | + test = gen_rtx_NE (VOIDmode, src_reg, final_src); | ||
313 | + emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label)); | ||
314 | + | ||
315 | + /* Mop up any left-over bytes. */ | ||
316 | + if (leftover) | ||
317 | + arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor); | ||
318 | +} | ||
319 | + | ||
320 | +/* Emit a block move when either the source or destination is unaligned (not | ||
321 | + aligned to a four-byte boundary). This may need further tuning depending on | ||
322 | + core type, optimize_size setting, etc. */ | ||
323 | + | ||
324 | +static int | ||
325 | +arm_movmemqi_unaligned (rtx *operands) | ||
326 | +{ | ||
327 | + HOST_WIDE_INT length = INTVAL (operands[2]); | ||
328 | + | ||
329 | + if (optimize_size) | ||
330 | + { | ||
331 | + bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD; | ||
332 | + bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD; | ||
333 | + /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit | ||
334 | + size of code if optimizing for size. We'll use ldm/stm if src_aligned | ||
335 | + or dst_aligned though: allow more interleaving in those cases since the | ||
336 | + resulting code can be smaller. */ | ||
337 | + unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1; | ||
338 | + HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4; | ||
339 | + | ||
340 | + if (length > 12) | ||
341 | + arm_block_move_unaligned_loop (operands[0], operands[1], length, | ||
342 | + interleave_factor, bytes_per_iter); | ||
343 | + else | ||
344 | + arm_block_move_unaligned_straight (operands[0], operands[1], length, | ||
345 | + interleave_factor); | ||
346 | + } | ||
347 | + else | ||
348 | + { | ||
349 | + /* Note that the loop created by arm_block_move_unaligned_loop may be | ||
350 | + subject to loop unrolling, which makes tuning this condition a little | ||
351 | + redundant. */ | ||
352 | + if (length > 32) | ||
353 | + arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16); | ||
354 | + else | ||
355 | + arm_block_move_unaligned_straight (operands[0], operands[1], length, 4); | ||
356 | + } | ||
357 | + | ||
358 | + return 1; | ||
359 | +} | ||
360 | + | ||
361 | int | ||
362 | arm_gen_movmemqi (rtx *operands) | ||
363 | { | ||
364 | @@ -10815,8 +11144,13 @@ | ||
365 | |||
366 | if (GET_CODE (operands[2]) != CONST_INT | ||
367 | || GET_CODE (operands[3]) != CONST_INT | ||
368 | - || INTVAL (operands[2]) > 64 | ||
369 | - || INTVAL (operands[3]) & 3) | ||
370 | + || INTVAL (operands[2]) > 64) | ||
371 | + return 0; | ||
372 | + | ||
373 | + if (unaligned_access && (INTVAL (operands[3]) & 3) != 0) | ||
374 | + return arm_movmemqi_unaligned (operands); | ||
375 | + | ||
376 | + if (INTVAL (operands[3]) & 3) | ||
377 | return 0; | ||
378 | |||
379 | dstbase = operands[0]; | ||
380 | |||
381 | === modified file 'gcc/config/arm/arm.h' | ||
382 | --- old/gcc/config/arm/arm.h 2011-10-19 17:01:50 +0000 | ||
383 | +++ new/gcc/config/arm/arm.h 2011-11-21 01:45:54 +0000 | ||
384 | @@ -47,6 +47,8 @@ | ||
385 | { \ | ||
386 | if (TARGET_DSP_MULTIPLY) \ | ||
387 | builtin_define ("__ARM_FEATURE_DSP"); \ | ||
388 | + if (unaligned_access) \ | ||
389 | + builtin_define ("__ARM_FEATURE_UNALIGNED"); \ | ||
390 | /* Define __arm__ even when in thumb mode, for \ | ||
391 | consistency with armcc. */ \ | ||
392 | builtin_define ("__arm__"); \ | ||
393 | |||
394 | === added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c' | ||
395 | --- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 1970-01-01 00:00:00 +0000 | ||
396 | +++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c 2011-10-19 22:56:19 +0000 | ||
397 | @@ -0,0 +1,19 @@ | ||
398 | +/* { dg-do compile } */ | ||
399 | +/* { dg-require-effective-target arm_unaligned } */ | ||
400 | +/* { dg-options "-O2" } */ | ||
401 | + | ||
402 | +#include <string.h> | ||
403 | + | ||
404 | +void unknown_alignment (char *dest, char *src) | ||
405 | +{ | ||
406 | + memcpy (dest, src, 15); | ||
407 | +} | ||
408 | + | ||
409 | +/* We should see three unaligned word loads and store pairs, one unaligned | ||
410 | + ldrh/strh pair, and an ldrb/strb pair. Sanity check that. */ | ||
411 | + | ||
412 | +/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */ | ||
413 | +/* { dg-final { scan-assembler-times "ldrh" 1 } } */ | ||
414 | +/* { dg-final { scan-assembler-times "strh" 1 } } */ | ||
415 | +/* { dg-final { scan-assembler-times "ldrb" 1 } } */ | ||
416 | +/* { dg-final { scan-assembler-times "strb" 1 } } */ | ||
417 | |||
418 | === added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c' | ||
419 | --- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 1970-01-01 00:00:00 +0000 | ||
420 | +++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c 2011-10-19 22:56:19 +0000 | ||
421 | @@ -0,0 +1,21 @@ | ||
422 | +/* { dg-do compile } */ | ||
423 | +/* { dg-require-effective-target arm_unaligned } */ | ||
424 | +/* { dg-options "-O2" } */ | ||
425 | + | ||
426 | +#include <string.h> | ||
427 | + | ||
428 | +char dest[16]; | ||
429 | + | ||
430 | +void aligned_dest (char *src) | ||
431 | +{ | ||
432 | + memcpy (dest, src, 15); | ||
433 | +} | ||
434 | + | ||
435 | +/* Expect a multi-word store for the main part of the copy, but subword | ||
436 | + loads/stores for the remainder. */ | ||
437 | + | ||
438 | +/* { dg-final { scan-assembler-times "stmia" 1 } } */ | ||
439 | +/* { dg-final { scan-assembler-times "ldrh" 1 } } */ | ||
440 | +/* { dg-final { scan-assembler-times "strh" 1 } } */ | ||
441 | +/* { dg-final { scan-assembler-times "ldrb" 1 } } */ | ||
442 | +/* { dg-final { scan-assembler-times "strb" 1 } } */ | ||
443 | |||
444 | === added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c' | ||
445 | --- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 1970-01-01 00:00:00 +0000 | ||
446 | +++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c 2011-10-19 22:56:19 +0000 | ||
447 | @@ -0,0 +1,21 @@ | ||
448 | +/* { dg-do compile } */ | ||
449 | +/* { dg-require-effective-target arm_unaligned } */ | ||
450 | +/* { dg-options "-O2" } */ | ||
451 | + | ||
452 | +#include <string.h> | ||
453 | + | ||
454 | +char src[16]; | ||
455 | + | ||
456 | +void aligned_src (char *dest) | ||
457 | +{ | ||
458 | + memcpy (dest, src, 15); | ||
459 | +} | ||
460 | + | ||
461 | +/* Expect a multi-word load for the main part of the copy, but subword | ||
462 | + loads/stores for the remainder. */ | ||
463 | + | ||
464 | +/* { dg-final { scan-assembler-times "ldmia" 1 } } */ | ||
465 | +/* { dg-final { scan-assembler-times "ldrh" 1 } } */ | ||
466 | +/* { dg-final { scan-assembler-times "strh" 1 } } */ | ||
467 | +/* { dg-final { scan-assembler-times "ldrb" 1 } } */ | ||
468 | +/* { dg-final { scan-assembler-times "strb" 1 } } */ | ||
469 | |||
470 | === added file 'gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c' | ||
471 | --- old/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 1970-01-01 00:00:00 +0000 | ||
472 | +++ new/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c 2011-10-19 22:56:19 +0000 | ||
473 | @@ -0,0 +1,18 @@ | ||
474 | +/* { dg-do compile } */ | ||
475 | +/* { dg-require-effective-target arm_unaligned } */ | ||
476 | +/* { dg-options "-O2" } */ | ||
477 | + | ||
478 | +#include <string.h> | ||
479 | + | ||
480 | +char src[16]; | ||
481 | +char dest[16]; | ||
482 | + | ||
483 | +void aligned_both (void) | ||
484 | +{ | ||
485 | + memcpy (dest, src, 15); | ||
486 | +} | ||
487 | + | ||
488 | +/* We know both src and dest to be aligned: expect multiword loads/stores. */ | ||
489 | + | ||
490 | +/* { dg-final { scan-assembler-times "ldmia" 1 } } */ | ||
491 | +/* { dg-final { scan-assembler-times "stmia" 1 } } */ | ||
492 | |||
493 | === modified file 'gcc/testsuite/lib/target-supports.exp' | ||
494 | --- old/gcc/testsuite/lib/target-supports.exp 2011-10-23 13:33:07 +0000 | ||
495 | +++ new/gcc/testsuite/lib/target-supports.exp 2011-11-21 01:45:54 +0000 | ||
496 | @@ -1894,6 +1894,18 @@ | ||
497 | }] | ||
498 | } | ||
499 | |||
500 | +# Return 1 if this is an ARM target that supports unaligned word/halfword | ||
501 | +# load/store instructions. | ||
502 | + | ||
503 | +proc check_effective_target_arm_unaligned { } { | ||
504 | + return [check_no_compiler_messages arm_unaligned assembly { | ||
505 | + #ifndef __ARM_FEATURE_UNALIGNED | ||
506 | + #error no unaligned support | ||
507 | + #endif | ||
508 | + int i; | ||
509 | + }] | ||
510 | +} | ||
511 | + | ||
512 | # Add the options needed for NEON. We need either -mfloat-abi=softfp | ||
513 | # or -mfloat-abi=hard, but if one is already specified by the | ||
514 | # multilib, use it. Similarly, if a -mfpu option already enables | ||
515 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc b/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc index 73fe5c8b2..fcdccf5d2 100644 --- a/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc +++ b/meta-oe/recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc | |||
@@ -53,4 +53,26 @@ file://linaro/gcc-4.6-linaro-r106805.patch \ | |||
53 | file://linaro/gcc-4.6-linaro-r106806.patch \ | 53 | file://linaro/gcc-4.6-linaro-r106806.patch \ |
54 | file://linaro/gcc-4.6-linaro-r106807.patch \ | 54 | file://linaro/gcc-4.6-linaro-r106807.patch \ |
55 | file://linaro/gcc-4.6-linaro-r106811.patch \ | 55 | file://linaro/gcc-4.6-linaro-r106811.patch \ |
56 | file://linaro/gcc-4.6-linaro-r106814.patch \ | ||
57 | file://linaro/gcc-4.6-linaro-r106815.patch \ | ||
58 | file://linaro/gcc-4.6-linaro-r106816.patch \ | ||
59 | file://linaro/gcc-4.6-linaro-r106817.patch \ | ||
60 | file://linaro/gcc-4.6-linaro-r106818.patch \ | ||
61 | file://linaro/gcc-4.6-linaro-r106819.patch \ | ||
62 | file://linaro/gcc-4.6-linaro-r106820.patch \ | ||
63 | file://linaro/gcc-4.6-linaro-r106821.patch \ | ||
64 | file://linaro/gcc-4.6-linaro-r106825.patch \ | ||
65 | file://linaro/gcc-4.6-linaro-r106826.patch \ | ||
66 | file://linaro/gcc-4.6-linaro-r106827.patch \ | ||
67 | file://linaro/gcc-4.6-linaro-r106828.patch \ | ||
68 | file://linaro/gcc-4.6-linaro-r106829.patch \ | ||
69 | file://linaro/gcc-4.6-linaro-r106830.patch \ | ||
70 | file://linaro/gcc-4.6-linaro-r106831.patch \ | ||
71 | file://linaro/gcc-4.6-linaro-r106832.patch \ | ||
72 | file://linaro/gcc-4.6-linaro-r106833.patch \ | ||
73 | file://linaro/gcc-4.6-linaro-r106834.patch \ | ||
74 | file://linaro/gcc-4.6-linaro-r106836.patch \ | ||
75 | file://linaro/gcc-4.6-linaro-r106839.patch \ | ||
76 | file://linaro/gcc-4.6-linaro-r106840.patch \ | ||
77 | file://linaro/gcc-4.6-linaro-r106841.patch \ | ||
56 | " | 78 | " |
diff --git a/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc b/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc index 0faf45e93..c12913d92 100644 --- a/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc +++ b/meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc | |||
@@ -1,4 +1,4 @@ | |||
1 | # this will prepend this layer to FILESPATH | 1 | # this will prepend this layer to FILESPATH |
2 | FILESEXTRAPATHS := "${THISDIR}/gcc-4.6" | 2 | FILESEXTRAPATHS := "${THISDIR}/gcc-4.6" |
3 | PRINC = "2" | 3 | PRINC = "3" |
4 | ARM_INSTRUCTION_SET = "arm" | 4 | ARM_INSTRUCTION_SET = "arm" |