diff options
author | Khem Raj <raj.khem@gmail.com> | 2011-06-12 20:56:57 -0700 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2011-06-14 15:29:15 +0100 |
commit | ec9b6d061b8495b9316ae90e5a71a499ef2873ee (patch) | |
tree | dc922c9ddd98f937eb2095b07f6c3b5043e3a491 /meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch | |
parent | fbc60cc5ba2284bfcaea8c82001a49b7795cc2a2 (diff) | |
download | poky-ec9b6d061b8495b9316ae90e5a71a499ef2873ee.tar.gz |
gcc-4.6.0: Bring in patches from FSF 4.6 branch
This brings in new patches from 4.6 release branch
updates the comment section of existing branch to not
contain patch numbers.
Tested build on qemu for arm ppc mips x86 and x86_64
(From OE-Core rev: 3968f33b6542cf20cf63cf49bfbc033bd2486295)
Signed-off-by: Khem Raj <raj.khem@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch')
-rw-r--r-- | meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch b/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch new file mode 100644 index 0000000000..199eb6130b --- /dev/null +++ b/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch | |||
@@ -0,0 +1,367 @@ | |||
1 | From 57c28e9c809ad3f27fe0743eabb030e8ee8b2af4 Mon Sep 17 00:00:00 2001 | ||
2 | From: irar <irar@138bc75d-0d04-0410-961f-82ee72b054a4> | ||
3 | Date: Sat, 4 Jun 2011 09:20:00 +0000 | ||
4 | Subject: [PATCH] PR tree-optimization/49038 | ||
5 | * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader): | ||
6 | Ensure at least one epilogue iteration if required by data | ||
7 | accesses with gaps. | ||
8 | * tree-vectorizer.h (struct _loop_vec_info): Add new field | ||
9 | to mark loops that require peeling for gaps. | ||
10 | * tree-vect-loop.c (new_loop_vec_info): Initialize new field. | ||
11 | (vect_get_known_peeling_cost): Take peeling for gaps into | ||
12 | account. | ||
13 | (vect_transform_loop): Generate epilogue if required by data | ||
14 | access with gaps. | ||
15 | * tree-vect-data-refs.c (vect_analyze_group_access): Mark the | ||
16 | loop as requiring an epilogue if there are gaps in the end of | ||
17 | the strided group. | ||
18 | |||
19 | git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-4_6-branch@174635 138bc75d-0d04-0410-961f-82ee72b054a4 | ||
20 | |||
21 | index 13b7118..8d51590 100644 | ||
22 | new file mode 100644 | ||
23 | index 0000000..91c214f | ||
24 | --- /dev/null | ||
25 | +++ b/gcc/testsuite/gcc.dg/vect/pr49038.c | ||
26 | @@ -0,0 +1,42 @@ | ||
27 | +#include <sys/mman.h> | ||
28 | +#include <stdio.h> | ||
29 | + | ||
30 | +#define COUNT 320 | ||
31 | +#define MMAP_SIZE 0x10000 | ||
32 | +#define ADDRESS 0x1122000000 | ||
33 | +#define TYPE unsigned short | ||
34 | + | ||
35 | +#ifndef MAP_ANONYMOUS | ||
36 | +#define MAP_ANONYMOUS MAP_ANON | ||
37 | +#endif | ||
38 | + | ||
39 | +void __attribute__((noinline)) | ||
40 | +foo (TYPE *__restrict a, TYPE *__restrict b) | ||
41 | +{ | ||
42 | + int n; | ||
43 | + | ||
44 | + for (n = 0; n < COUNT; n++) | ||
45 | + a[n] = b[n * 2]; | ||
46 | +} | ||
47 | + | ||
48 | +int | ||
49 | +main (void) | ||
50 | +{ | ||
51 | + void *x; | ||
52 | + size_t b_offset; | ||
53 | + | ||
54 | + x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE, | ||
55 | + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | ||
56 | + if (x == MAP_FAILED) | ||
57 | + { | ||
58 | + perror ("mmap"); | ||
59 | + return 1; | ||
60 | + } | ||
61 | + | ||
62 | + b_offset = MMAP_SIZE - (2 * COUNT - 1) * sizeof (TYPE); | ||
63 | + foo ((unsigned short *) x, | ||
64 | + (unsigned short *) ((char *) x + b_offset)); | ||
65 | + return 0; | ||
66 | +} | ||
67 | + | ||
68 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
69 | diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c | ||
70 | new file mode 100644 | ||
71 | index 0000000..ccbc366 | ||
72 | --- /dev/null | ||
73 | +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c | ||
74 | @@ -0,0 +1,116 @@ | ||
75 | +/* { dg-require-effective-target vect_int } */ | ||
76 | + | ||
77 | +#include <stdarg.h> | ||
78 | +#include <stdio.h> | ||
79 | +#include "tree-vect.h" | ||
80 | + | ||
81 | +#define N 160 | ||
82 | + | ||
83 | +typedef struct { | ||
84 | + unsigned char a; | ||
85 | + unsigned char b; | ||
86 | + unsigned char c; | ||
87 | + unsigned char d; | ||
88 | + unsigned char e; | ||
89 | + unsigned char f; | ||
90 | + unsigned char g; | ||
91 | + unsigned char h; | ||
92 | +} s; | ||
93 | + | ||
94 | +__attribute__ ((noinline)) int | ||
95 | +main1 (s *arr, int n) | ||
96 | +{ | ||
97 | + int i; | ||
98 | + s *ptr = arr; | ||
99 | + s res[N]; | ||
100 | + unsigned char x; | ||
101 | + | ||
102 | + for (i = 0; i < N; i++) | ||
103 | + { | ||
104 | + res[i].a = 0; | ||
105 | + res[i].b = 0; | ||
106 | + res[i].c = 0; | ||
107 | + res[i].d = 0; | ||
108 | + res[i].e = 0; | ||
109 | + res[i].f = 0; | ||
110 | + res[i].g = 0; | ||
111 | + res[i].h = 0; | ||
112 | + __asm__ volatile (""); | ||
113 | + } | ||
114 | + | ||
115 | + /* Check peeling for gaps for unknown loop bound. */ | ||
116 | + for (i = 0; i < n; i++) | ||
117 | + { | ||
118 | + res[i].c = ptr->b + ptr->c; | ||
119 | + x = ptr->c + ptr->f; | ||
120 | + res[i].a = x + ptr->b; | ||
121 | + res[i].d = ptr->b + ptr->c; | ||
122 | + res[i].b = ptr->c; | ||
123 | + res[i].f = ptr->f + ptr->e; | ||
124 | + res[i].e = ptr->b + ptr->e; | ||
125 | + res[i].h = ptr->c; | ||
126 | + res[i].g = ptr->b + ptr->c; | ||
127 | + ptr++; | ||
128 | + } | ||
129 | + | ||
130 | + /* check results: */ | ||
131 | + for (i = 0; i < n; i++) | ||
132 | + { | ||
133 | + if (res[i].c != arr[i].b + arr[i].c | ||
134 | + || res[i].a != arr[i].c + arr[i].f + arr[i].b | ||
135 | + || res[i].d != arr[i].b + arr[i].c | ||
136 | + || res[i].b != arr[i].c | ||
137 | + || res[i].f != arr[i].f + arr[i].e | ||
138 | + || res[i].e != arr[i].b + arr[i].e | ||
139 | + || res[i].h != arr[i].c | ||
140 | + || res[i].g != arr[i].b + arr[i].c) | ||
141 | + abort (); | ||
142 | + } | ||
143 | + | ||
144 | + /* Check also that we don't do more iterations than needed. */ | ||
145 | + for (i = n; i < N; i++) | ||
146 | + { | ||
147 | + if (res[i].c == arr[i].b + arr[i].c | ||
148 | + || res[i].a == arr[i].c + arr[i].f + arr[i].b | ||
149 | + || res[i].d == arr[i].b + arr[i].c | ||
150 | + || res[i].b == arr[i].c | ||
151 | + || res[i].f == arr[i].f + arr[i].e | ||
152 | + || res[i].e == arr[i].b + arr[i].e | ||
153 | + || res[i].h == arr[i].c | ||
154 | + || res[i].g == arr[i].b + arr[i].c) | ||
155 | + abort (); | ||
156 | + } | ||
157 | + | ||
158 | + return 0; | ||
159 | +} | ||
160 | + | ||
161 | + | ||
162 | +int main (void) | ||
163 | +{ | ||
164 | + int i; | ||
165 | + s arr[N]; | ||
166 | + | ||
167 | + check_vect (); | ||
168 | + | ||
169 | + for (i = 0; i < N; i++) | ||
170 | + { | ||
171 | + arr[i].a = 5; | ||
172 | + arr[i].b = 6; | ||
173 | + arr[i].c = 17; | ||
174 | + arr[i].d = 3; | ||
175 | + arr[i].e = 16; | ||
176 | + arr[i].f = 16; | ||
177 | + arr[i].g = 3; | ||
178 | + arr[i].h = 56; | ||
179 | + if (arr[i].a == 178) | ||
180 | + abort(); | ||
181 | + } | ||
182 | + | ||
183 | + main1 (arr, N-2); | ||
184 | + | ||
185 | + return 0; | ||
186 | +} | ||
187 | + | ||
188 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ | ||
189 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
190 | + | ||
191 | diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c | ||
192 | index db5098c..1c60388 100644 | ||
193 | --- a/gcc/tree-vect-data-refs.c | ||
194 | +++ b/gcc/tree-vect-data-refs.c | ||
195 | @@ -2045,7 +2045,7 @@ vect_analyze_group_access (struct data_reference *dr) | ||
196 | loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | ||
197 | bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); | ||
198 | HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); | ||
199 | - HOST_WIDE_INT stride; | ||
200 | + HOST_WIDE_INT stride, last_accessed_element = 1; | ||
201 | bool slp_impossible = false; | ||
202 | |||
203 | /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the | ||
204 | @@ -2074,6 +2074,16 @@ vect_analyze_group_access (struct data_reference *dr) | ||
205 | fprintf (vect_dump, " step "); | ||
206 | print_generic_expr (vect_dump, step, TDF_SLIM); | ||
207 | } | ||
208 | + | ||
209 | + if (loop_vinfo) | ||
210 | + { | ||
211 | + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; | ||
212 | + | ||
213 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
214 | + fprintf (vect_dump, "Data access with gaps requires scalar " | ||
215 | + "epilogue loop"); | ||
216 | + } | ||
217 | + | ||
218 | return true; | ||
219 | } | ||
220 | |||
221 | @@ -2139,6 +2149,7 @@ vect_analyze_group_access (struct data_reference *dr) | ||
222 | next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); | ||
223 | continue; | ||
224 | } | ||
225 | + | ||
226 | prev = next; | ||
227 | |||
228 | /* Check that all the accesses have the same STEP. */ | ||
229 | @@ -2169,6 +2180,8 @@ vect_analyze_group_access (struct data_reference *dr) | ||
230 | gaps += diff - 1; | ||
231 | } | ||
232 | |||
233 | + last_accessed_element += diff; | ||
234 | + | ||
235 | /* Store the gap from the previous member of the group. If there is no | ||
236 | gap in the access, DR_GROUP_GAP is always 1. */ | ||
237 | DR_GROUP_GAP (vinfo_for_stmt (next)) = diff; | ||
238 | @@ -2260,6 +2273,15 @@ vect_analyze_group_access (struct data_reference *dr) | ||
239 | VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo), | ||
240 | stmt); | ||
241 | } | ||
242 | + | ||
243 | + /* There is a gap in the end of the group. */ | ||
244 | + if (stride - last_accessed_element > 0 && loop_vinfo) | ||
245 | + { | ||
246 | + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; | ||
247 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
248 | + fprintf (vect_dump, "Data access with gaps requires scalar " | ||
249 | + "epilogue loop"); | ||
250 | + } | ||
251 | } | ||
252 | |||
253 | return true; | ||
254 | diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c | ||
255 | index b691cd2..da2c9b7 100644 | ||
256 | --- a/gcc/tree-vect-loop-manip.c | ||
257 | +++ b/gcc/tree-vect-loop-manip.c | ||
258 | @@ -1551,7 +1551,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, | ||
259 | edge pe; | ||
260 | basic_block new_bb; | ||
261 | gimple_seq stmts; | ||
262 | - tree ni_name; | ||
263 | + tree ni_name, ni_minus_gap_name; | ||
264 | tree var; | ||
265 | tree ratio_name; | ||
266 | tree ratio_mult_vf_name; | ||
267 | @@ -1568,9 +1568,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, | ||
268 | ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list); | ||
269 | log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); | ||
270 | |||
271 | + /* If epilogue loop is required because of data accesses with gaps, we | ||
272 | + subtract one iteration from the total number of iterations here for | ||
273 | + correct calculation of RATIO. */ | ||
274 | + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) | ||
275 | + { | ||
276 | + ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name), | ||
277 | + ni_name, | ||
278 | + build_one_cst (TREE_TYPE (ni_name))); | ||
279 | + if (!is_gimple_val (ni_minus_gap_name)) | ||
280 | + { | ||
281 | + var = create_tmp_var (TREE_TYPE (ni), "ni_gap"); | ||
282 | + add_referenced_var (var); | ||
283 | + | ||
284 | + stmts = NULL; | ||
285 | + ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, | ||
286 | + true, var); | ||
287 | + if (cond_expr_stmt_list) | ||
288 | + gimple_seq_add_seq (&cond_expr_stmt_list, stmts); | ||
289 | + else | ||
290 | + { | ||
291 | + pe = loop_preheader_edge (loop); | ||
292 | + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); | ||
293 | + gcc_assert (!new_bb); | ||
294 | + } | ||
295 | + } | ||
296 | + } | ||
297 | + else | ||
298 | + ni_minus_gap_name = ni_name; | ||
299 | + | ||
300 | /* Create: ratio = ni >> log2(vf) */ | ||
301 | |||
302 | - ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf); | ||
303 | + ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name), | ||
304 | + ni_minus_gap_name, log_vf); | ||
305 | if (!is_gimple_val (ratio_name)) | ||
306 | { | ||
307 | var = create_tmp_var (TREE_TYPE (ni), "bnd"); | ||
308 | diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c | ||
309 | index 7692eb8..44c1ecd 100644 | ||
310 | --- a/gcc/tree-vect-loop.c | ||
311 | +++ b/gcc/tree-vect-loop.c | ||
312 | @@ -760,6 +760,7 @@ new_loop_vec_info (struct loop *loop) | ||
313 | LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10); | ||
314 | LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1; | ||
315 | LOOP_VINFO_PEELING_HTAB (res) = NULL; | ||
316 | + LOOP_VINFO_PEELING_FOR_GAPS (res) = false; | ||
317 | |||
318 | return res; | ||
319 | } | ||
320 | @@ -2149,6 +2150,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, | ||
321 | peel_iters_prologue = niters < peel_iters_prologue ? | ||
322 | niters : peel_iters_prologue; | ||
323 | *peel_iters_epilogue = (niters - peel_iters_prologue) % vf; | ||
324 | + /* If we need to peel for gaps, but no peeling is required, we have to | ||
325 | + peel VF iterations. */ | ||
326 | + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue) | ||
327 | + *peel_iters_epilogue = vf; | ||
328 | } | ||
329 | |||
330 | return (peel_iters_prologue * scalar_single_iter_cost) | ||
331 | @@ -4721,7 +4726,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) | ||
332 | do_peeling_for_loop_bound | ||
333 | = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | ||
334 | || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | ||
335 | - && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)); | ||
336 | + && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) | ||
337 | + || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)); | ||
338 | |||
339 | if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) | ||
340 | || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) | ||
341 | diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h | ||
342 | index ee8410c..c1ac81c 100644 | ||
343 | --- a/gcc/tree-vectorizer.h | ||
344 | +++ b/gcc/tree-vectorizer.h | ||
345 | @@ -251,6 +251,11 @@ typedef struct _loop_vec_info { | ||
346 | /* Hash table used to choose the best peeling option. */ | ||
347 | htab_t peeling_htab; | ||
348 | |||
349 | + /* When we have strided data accesses with gaps, we may introduce invalid | ||
350 | + memory accesses. We peel the last iteration of the loop to prevent | ||
351 | + this. */ | ||
352 | + bool peeling_for_gaps; | ||
353 | + | ||
354 | } *loop_vec_info; | ||
355 | |||
356 | /* Access Functions. */ | ||
357 | @@ -278,6 +283,7 @@ typedef struct _loop_vec_info { | ||
358 | #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor | ||
359 | #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions | ||
360 | #define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab | ||
361 | +#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps | ||
362 | |||
363 | #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ | ||
364 | VEC_length (gimple, (L)->may_misalign_stmts) > 0 | ||
365 | -- | ||
366 | 1.7.0.4 | ||
367 | |||