diff options
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch')
-rw-r--r-- | meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch b/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch new file mode 100644 index 0000000000..199eb6130b --- /dev/null +++ b/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch | |||
@@ -0,0 +1,367 @@ | |||
1 | From 57c28e9c809ad3f27fe0743eabb030e8ee8b2af4 Mon Sep 17 00:00:00 2001 | ||
2 | From: irar <irar@138bc75d-0d04-0410-961f-82ee72b054a4> | ||
3 | Date: Sat, 4 Jun 2011 09:20:00 +0000 | ||
4 | Subject: [PATCH] PR tree-optimization/49038 | ||
5 | * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader): | ||
6 | Ensure at least one epilogue iteration if required by data | ||
7 | accesses with gaps. | ||
8 | * tree-vectorizer.h (struct _loop_vec_info): Add new field | ||
9 | to mark loops that require peeling for gaps. | ||
10 | * tree-vect-loop.c (new_loop_vec_info): Initialize new field. | ||
11 | (vect_get_known_peeling_cost): Take peeling for gaps into | ||
12 | account. | ||
13 | (vect_transform_loop): Generate epilogue if required by data | ||
14 | access with gaps. | ||
15 | * tree-vect-data-refs.c (vect_analyze_group_access): Mark the | ||
16 | loop as requiring an epilogue if there are gaps in the end of | ||
17 | the strided group. | ||
18 | |||
19 | git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-4_6-branch@174635 138bc75d-0d04-0410-961f-82ee72b054a4 | ||
20 | |||
21 | index 13b7118..8d51590 100644 | ||
22 | new file mode 100644 | ||
23 | index 0000000..91c214f | ||
24 | --- /dev/null | ||
25 | +++ b/gcc/testsuite/gcc.dg/vect/pr49038.c | ||
26 | @@ -0,0 +1,42 @@ | ||
27 | +#include <sys/mman.h> | ||
28 | +#include <stdio.h> | ||
29 | + | ||
30 | +#define COUNT 320 | ||
31 | +#define MMAP_SIZE 0x10000 | ||
32 | +#define ADDRESS 0x1122000000 | ||
33 | +#define TYPE unsigned short | ||
34 | + | ||
35 | +#ifndef MAP_ANONYMOUS | ||
36 | +#define MAP_ANONYMOUS MAP_ANON | ||
37 | +#endif | ||
38 | + | ||
39 | +void __attribute__((noinline)) | ||
40 | +foo (TYPE *__restrict a, TYPE *__restrict b) | ||
41 | +{ | ||
42 | + int n; | ||
43 | + | ||
44 | + for (n = 0; n < COUNT; n++) | ||
45 | + a[n] = b[n * 2]; | ||
46 | +} | ||
47 | + | ||
48 | +int | ||
49 | +main (void) | ||
50 | +{ | ||
51 | + void *x; | ||
52 | + size_t b_offset; | ||
53 | + | ||
54 | + x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE, | ||
55 | + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | ||
56 | + if (x == MAP_FAILED) | ||
57 | + { | ||
58 | + perror ("mmap"); | ||
59 | + return 1; | ||
60 | + } | ||
61 | + | ||
62 | + b_offset = MMAP_SIZE - (2 * COUNT - 1) * sizeof (TYPE); | ||
63 | + foo ((unsigned short *) x, | ||
64 | + (unsigned short *) ((char *) x + b_offset)); | ||
65 | + return 0; | ||
66 | +} | ||
67 | + | ||
68 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
69 | diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c | ||
70 | new file mode 100644 | ||
71 | index 0000000..ccbc366 | ||
72 | --- /dev/null | ||
73 | +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c | ||
74 | @@ -0,0 +1,116 @@ | ||
75 | +/* { dg-require-effective-target vect_int } */ | ||
76 | + | ||
77 | +#include <stdarg.h> | ||
78 | +#include <stdio.h> | ||
79 | +#include "tree-vect.h" | ||
80 | + | ||
81 | +#define N 160 | ||
82 | + | ||
83 | +typedef struct { | ||
84 | + unsigned char a; | ||
85 | + unsigned char b; | ||
86 | + unsigned char c; | ||
87 | + unsigned char d; | ||
88 | + unsigned char e; | ||
89 | + unsigned char f; | ||
90 | + unsigned char g; | ||
91 | + unsigned char h; | ||
92 | +} s; | ||
93 | + | ||
94 | +__attribute__ ((noinline)) int | ||
95 | +main1 (s *arr, int n) | ||
96 | +{ | ||
97 | + int i; | ||
98 | + s *ptr = arr; | ||
99 | + s res[N]; | ||
100 | + unsigned char x; | ||
101 | + | ||
102 | + for (i = 0; i < N; i++) | ||
103 | + { | ||
104 | + res[i].a = 0; | ||
105 | + res[i].b = 0; | ||
106 | + res[i].c = 0; | ||
107 | + res[i].d = 0; | ||
108 | + res[i].e = 0; | ||
109 | + res[i].f = 0; | ||
110 | + res[i].g = 0; | ||
111 | + res[i].h = 0; | ||
112 | + __asm__ volatile (""); | ||
113 | + } | ||
114 | + | ||
115 | + /* Check peeling for gaps for unknown loop bound. */ | ||
116 | + for (i = 0; i < n; i++) | ||
117 | + { | ||
118 | + res[i].c = ptr->b + ptr->c; | ||
119 | + x = ptr->c + ptr->f; | ||
120 | + res[i].a = x + ptr->b; | ||
121 | + res[i].d = ptr->b + ptr->c; | ||
122 | + res[i].b = ptr->c; | ||
123 | + res[i].f = ptr->f + ptr->e; | ||
124 | + res[i].e = ptr->b + ptr->e; | ||
125 | + res[i].h = ptr->c; | ||
126 | + res[i].g = ptr->b + ptr->c; | ||
127 | + ptr++; | ||
128 | + } | ||
129 | + | ||
130 | + /* check results: */ | ||
131 | + for (i = 0; i < n; i++) | ||
132 | + { | ||
133 | + if (res[i].c != arr[i].b + arr[i].c | ||
134 | + || res[i].a != arr[i].c + arr[i].f + arr[i].b | ||
135 | + || res[i].d != arr[i].b + arr[i].c | ||
136 | + || res[i].b != arr[i].c | ||
137 | + || res[i].f != arr[i].f + arr[i].e | ||
138 | + || res[i].e != arr[i].b + arr[i].e | ||
139 | + || res[i].h != arr[i].c | ||
140 | + || res[i].g != arr[i].b + arr[i].c) | ||
141 | + abort (); | ||
142 | + } | ||
143 | + | ||
144 | + /* Check also that we don't do more iterations than needed. */ | ||
145 | + for (i = n; i < N; i++) | ||
146 | + { | ||
147 | + if (res[i].c == arr[i].b + arr[i].c | ||
148 | + || res[i].a == arr[i].c + arr[i].f + arr[i].b | ||
149 | + || res[i].d == arr[i].b + arr[i].c | ||
150 | + || res[i].b == arr[i].c | ||
151 | + || res[i].f == arr[i].f + arr[i].e | ||
152 | + || res[i].e == arr[i].b + arr[i].e | ||
153 | + || res[i].h == arr[i].c | ||
154 | + || res[i].g == arr[i].b + arr[i].c) | ||
155 | + abort (); | ||
156 | + } | ||
157 | + | ||
158 | + return 0; | ||
159 | +} | ||
160 | + | ||
161 | + | ||
162 | +int main (void) | ||
163 | +{ | ||
164 | + int i; | ||
165 | + s arr[N]; | ||
166 | + | ||
167 | + check_vect (); | ||
168 | + | ||
169 | + for (i = 0; i < N; i++) | ||
170 | + { | ||
171 | + arr[i].a = 5; | ||
172 | + arr[i].b = 6; | ||
173 | + arr[i].c = 17; | ||
174 | + arr[i].d = 3; | ||
175 | + arr[i].e = 16; | ||
176 | + arr[i].f = 16; | ||
177 | + arr[i].g = 3; | ||
178 | + arr[i].h = 56; | ||
179 | + if (arr[i].a == 178) | ||
180 | + abort(); | ||
181 | + } | ||
182 | + | ||
183 | + main1 (arr, N-2); | ||
184 | + | ||
185 | + return 0; | ||
186 | +} | ||
187 | + | ||
188 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ | ||
189 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
190 | + | ||
191 | diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c | ||
192 | index db5098c..1c60388 100644 | ||
193 | --- a/gcc/tree-vect-data-refs.c | ||
194 | +++ b/gcc/tree-vect-data-refs.c | ||
195 | @@ -2045,7 +2045,7 @@ vect_analyze_group_access (struct data_reference *dr) | ||
196 | loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | ||
197 | bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); | ||
198 | HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); | ||
199 | - HOST_WIDE_INT stride; | ||
200 | + HOST_WIDE_INT stride, last_accessed_element = 1; | ||
201 | bool slp_impossible = false; | ||
202 | |||
203 | /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the | ||
204 | @@ -2074,6 +2074,16 @@ vect_analyze_group_access (struct data_reference *dr) | ||
205 | fprintf (vect_dump, " step "); | ||
206 | print_generic_expr (vect_dump, step, TDF_SLIM); | ||
207 | } | ||
208 | + | ||
209 | + if (loop_vinfo) | ||
210 | + { | ||
211 | + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; | ||
212 | + | ||
213 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
214 | + fprintf (vect_dump, "Data access with gaps requires scalar " | ||
215 | + "epilogue loop"); | ||
216 | + } | ||
217 | + | ||
218 | return true; | ||
219 | } | ||
220 | |||
221 | @@ -2139,6 +2149,7 @@ vect_analyze_group_access (struct data_reference *dr) | ||
222 | next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); | ||
223 | continue; | ||
224 | } | ||
225 | + | ||
226 | prev = next; | ||
227 | |||
228 | /* Check that all the accesses have the same STEP. */ | ||
229 | @@ -2169,6 +2180,8 @@ vect_analyze_group_access (struct data_reference *dr) | ||
230 | gaps += diff - 1; | ||
231 | } | ||
232 | |||
233 | + last_accessed_element += diff; | ||
234 | + | ||
235 | /* Store the gap from the previous member of the group. If there is no | ||
236 | gap in the access, DR_GROUP_GAP is always 1. */ | ||
237 | DR_GROUP_GAP (vinfo_for_stmt (next)) = diff; | ||
238 | @@ -2260,6 +2273,15 @@ vect_analyze_group_access (struct data_reference *dr) | ||
239 | VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo), | ||
240 | stmt); | ||
241 | } | ||
242 | + | ||
243 | + /* There is a gap in the end of the group. */ | ||
244 | + if (stride - last_accessed_element > 0 && loop_vinfo) | ||
245 | + { | ||
246 | + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; | ||
247 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
248 | + fprintf (vect_dump, "Data access with gaps requires scalar " | ||
249 | + "epilogue loop"); | ||
250 | + } | ||
251 | } | ||
252 | |||
253 | return true; | ||
254 | diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c | ||
255 | index b691cd2..da2c9b7 100644 | ||
256 | --- a/gcc/tree-vect-loop-manip.c | ||
257 | +++ b/gcc/tree-vect-loop-manip.c | ||
258 | @@ -1551,7 +1551,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, | ||
259 | edge pe; | ||
260 | basic_block new_bb; | ||
261 | gimple_seq stmts; | ||
262 | - tree ni_name; | ||
263 | + tree ni_name, ni_minus_gap_name; | ||
264 | tree var; | ||
265 | tree ratio_name; | ||
266 | tree ratio_mult_vf_name; | ||
267 | @@ -1568,9 +1568,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, | ||
268 | ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list); | ||
269 | log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); | ||
270 | |||
271 | + /* If epilogue loop is required because of data accesses with gaps, we | ||
272 | + subtract one iteration from the total number of iterations here for | ||
273 | + correct calculation of RATIO. */ | ||
274 | + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) | ||
275 | + { | ||
276 | + ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name), | ||
277 | + ni_name, | ||
278 | + build_one_cst (TREE_TYPE (ni_name))); | ||
279 | + if (!is_gimple_val (ni_minus_gap_name)) | ||
280 | + { | ||
281 | + var = create_tmp_var (TREE_TYPE (ni), "ni_gap"); | ||
282 | + add_referenced_var (var); | ||
283 | + | ||
284 | + stmts = NULL; | ||
285 | + ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, | ||
286 | + true, var); | ||
287 | + if (cond_expr_stmt_list) | ||
288 | + gimple_seq_add_seq (&cond_expr_stmt_list, stmts); | ||
289 | + else | ||
290 | + { | ||
291 | + pe = loop_preheader_edge (loop); | ||
292 | + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); | ||
293 | + gcc_assert (!new_bb); | ||
294 | + } | ||
295 | + } | ||
296 | + } | ||
297 | + else | ||
298 | + ni_minus_gap_name = ni_name; | ||
299 | + | ||
300 | /* Create: ratio = ni >> log2(vf) */ | ||
301 | |||
302 | - ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf); | ||
303 | + ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name), | ||
304 | + ni_minus_gap_name, log_vf); | ||
305 | if (!is_gimple_val (ratio_name)) | ||
306 | { | ||
307 | var = create_tmp_var (TREE_TYPE (ni), "bnd"); | ||
308 | diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c | ||
309 | index 7692eb8..44c1ecd 100644 | ||
310 | --- a/gcc/tree-vect-loop.c | ||
311 | +++ b/gcc/tree-vect-loop.c | ||
312 | @@ -760,6 +760,7 @@ new_loop_vec_info (struct loop *loop) | ||
313 | LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10); | ||
314 | LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1; | ||
315 | LOOP_VINFO_PEELING_HTAB (res) = NULL; | ||
316 | + LOOP_VINFO_PEELING_FOR_GAPS (res) = false; | ||
317 | |||
318 | return res; | ||
319 | } | ||
320 | @@ -2149,6 +2150,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, | ||
321 | peel_iters_prologue = niters < peel_iters_prologue ? | ||
322 | niters : peel_iters_prologue; | ||
323 | *peel_iters_epilogue = (niters - peel_iters_prologue) % vf; | ||
324 | + /* If we need to peel for gaps, but no peeling is required, we have to | ||
325 | + peel VF iterations. */ | ||
326 | + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue) | ||
327 | + *peel_iters_epilogue = vf; | ||
328 | } | ||
329 | |||
330 | return (peel_iters_prologue * scalar_single_iter_cost) | ||
331 | @@ -4721,7 +4726,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) | ||
332 | do_peeling_for_loop_bound | ||
333 | = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | ||
334 | || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | ||
335 | - && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)); | ||
336 | + && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) | ||
337 | + || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)); | ||
338 | |||
339 | if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) | ||
340 | || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) | ||
341 | diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h | ||
342 | index ee8410c..c1ac81c 100644 | ||
343 | --- a/gcc/tree-vectorizer.h | ||
344 | +++ b/gcc/tree-vectorizer.h | ||
345 | @@ -251,6 +251,11 @@ typedef struct _loop_vec_info { | ||
346 | /* Hash table used to choose the best peeling option. */ | ||
347 | htab_t peeling_htab; | ||
348 | |||
349 | + /* When we have strided data accesses with gaps, we may introduce invalid | ||
350 | + memory accesses. We peel the last iteration of the loop to prevent | ||
351 | + this. */ | ||
352 | + bool peeling_for_gaps; | ||
353 | + | ||
354 | } *loop_vec_info; | ||
355 | |||
356 | /* Access Functions. */ | ||
357 | @@ -278,6 +283,7 @@ typedef struct _loop_vec_info { | ||
358 | #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor | ||
359 | #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions | ||
360 | #define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab | ||
361 | +#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps | ||
362 | |||
363 | #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ | ||
364 | VEC_length (gimple, (L)->may_misalign_stmts) > 0 | ||
365 | -- | ||
366 | 1.7.0.4 | ||
367 | |||