summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch')
-rw-r--r--meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch367
1 files changed, 367 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch b/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch
new file mode 100644
index 0000000000..199eb6130b
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch
@@ -0,0 +1,367 @@
1From 57c28e9c809ad3f27fe0743eabb030e8ee8b2af4 Mon Sep 17 00:00:00 2001
2From: irar <irar@138bc75d-0d04-0410-961f-82ee72b054a4>
3Date: Sat, 4 Jun 2011 09:20:00 +0000
4Subject: [PATCH] PR tree-optimization/49038
5 * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader):
6 Ensure at least one epilogue iteration if required by data
7 accesses with gaps.
8 * tree-vectorizer.h (struct _loop_vec_info): Add new field
9 to mark loops that require peeling for gaps.
10 * tree-vect-loop.c (new_loop_vec_info): Initialize new field.
11 (vect_get_known_peeling_cost): Take peeling for gaps into
12 account.
13 (vect_transform_loop): Generate epilogue if required by data
14 access with gaps.
15 * tree-vect-data-refs.c (vect_analyze_group_access): Mark the
16 loop as requiring an epilogue if there are gaps in the end of
17 the strided group.
18
19git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-4_6-branch@174635 138bc75d-0d04-0410-961f-82ee72b054a4
20
21index 13b7118..8d51590 100644
22new file mode 100644
23index 0000000..91c214f
24--- /dev/null
25+++ b/gcc/testsuite/gcc.dg/vect/pr49038.c
26@@ -0,0 +1,42 @@
27+#include <sys/mman.h>
28+#include <stdio.h>
29+
30+#define COUNT 320
31+#define MMAP_SIZE 0x10000
32+#define ADDRESS 0x1122000000
33+#define TYPE unsigned short
34+
35+#ifndef MAP_ANONYMOUS
36+#define MAP_ANONYMOUS MAP_ANON
37+#endif
38+
39+void __attribute__((noinline))
40+foo (TYPE *__restrict a, TYPE *__restrict b)
41+{
42+ int n;
43+
44+ for (n = 0; n < COUNT; n++)
45+ a[n] = b[n * 2];
46+}
47+
48+int
49+main (void)
50+{
51+ void *x;
52+ size_t b_offset;
53+
54+ x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE,
55+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
56+ if (x == MAP_FAILED)
57+ {
58+ perror ("mmap");
59+ return 1;
60+ }
61+
62+ b_offset = MMAP_SIZE - (2 * COUNT - 1) * sizeof (TYPE);
63+ foo ((unsigned short *) x,
64+ (unsigned short *) ((char *) x + b_offset));
65+ return 0;
66+}
67+
68+/* { dg-final { cleanup-tree-dump "vect" } } */
69diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c
70new file mode 100644
71index 0000000..ccbc366
72--- /dev/null
73+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c
74@@ -0,0 +1,116 @@
75+/* { dg-require-effective-target vect_int } */
76+
77+#include <stdarg.h>
78+#include <stdio.h>
79+#include "tree-vect.h"
80+
81+#define N 160
82+
83+typedef struct {
84+ unsigned char a;
85+ unsigned char b;
86+ unsigned char c;
87+ unsigned char d;
88+ unsigned char e;
89+ unsigned char f;
90+ unsigned char g;
91+ unsigned char h;
92+} s;
93+
94+__attribute__ ((noinline)) int
95+main1 (s *arr, int n)
96+{
97+ int i;
98+ s *ptr = arr;
99+ s res[N];
100+ unsigned char x;
101+
102+ for (i = 0; i < N; i++)
103+ {
104+ res[i].a = 0;
105+ res[i].b = 0;
106+ res[i].c = 0;
107+ res[i].d = 0;
108+ res[i].e = 0;
109+ res[i].f = 0;
110+ res[i].g = 0;
111+ res[i].h = 0;
112+ __asm__ volatile ("");
113+ }
114+
115+ /* Check peeling for gaps for unknown loop bound. */
116+ for (i = 0; i < n; i++)
117+ {
118+ res[i].c = ptr->b + ptr->c;
119+ x = ptr->c + ptr->f;
120+ res[i].a = x + ptr->b;
121+ res[i].d = ptr->b + ptr->c;
122+ res[i].b = ptr->c;
123+ res[i].f = ptr->f + ptr->e;
124+ res[i].e = ptr->b + ptr->e;
125+ res[i].h = ptr->c;
126+ res[i].g = ptr->b + ptr->c;
127+ ptr++;
128+ }
129+
130+ /* check results: */
131+ for (i = 0; i < n; i++)
132+ {
133+ if (res[i].c != arr[i].b + arr[i].c
134+ || res[i].a != arr[i].c + arr[i].f + arr[i].b
135+ || res[i].d != arr[i].b + arr[i].c
136+ || res[i].b != arr[i].c
137+ || res[i].f != arr[i].f + arr[i].e
138+ || res[i].e != arr[i].b + arr[i].e
139+ || res[i].h != arr[i].c
140+ || res[i].g != arr[i].b + arr[i].c)
141+ abort ();
142+ }
143+
144+ /* Check also that we don't do more iterations than needed. */
145+ for (i = n; i < N; i++)
146+ {
147+ if (res[i].c == arr[i].b + arr[i].c
148+ || res[i].a == arr[i].c + arr[i].f + arr[i].b
149+ || res[i].d == arr[i].b + arr[i].c
150+ || res[i].b == arr[i].c
151+ || res[i].f == arr[i].f + arr[i].e
152+ || res[i].e == arr[i].b + arr[i].e
153+ || res[i].h == arr[i].c
154+ || res[i].g == arr[i].b + arr[i].c)
155+ abort ();
156+ }
157+
158+ return 0;
159+}
160+
161+
162+int main (void)
163+{
164+ int i;
165+ s arr[N];
166+
167+ check_vect ();
168+
169+ for (i = 0; i < N; i++)
170+ {
171+ arr[i].a = 5;
172+ arr[i].b = 6;
173+ arr[i].c = 17;
174+ arr[i].d = 3;
175+ arr[i].e = 16;
176+ arr[i].f = 16;
177+ arr[i].g = 3;
178+ arr[i].h = 56;
179+ if (arr[i].a == 178)
180+ abort();
181+ }
182+
183+ main1 (arr, N-2);
184+
185+ return 0;
186+}
187+
188+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
189+/* { dg-final { cleanup-tree-dump "vect" } } */
190+
191diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
192index db5098c..1c60388 100644
193--- a/gcc/tree-vect-data-refs.c
194+++ b/gcc/tree-vect-data-refs.c
195@@ -2045,7 +2045,7 @@ vect_analyze_group_access (struct data_reference *dr)
196 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
197 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
198 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
199- HOST_WIDE_INT stride;
200+ HOST_WIDE_INT stride, last_accessed_element = 1;
201 bool slp_impossible = false;
202
203 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
204@@ -2074,6 +2074,16 @@ vect_analyze_group_access (struct data_reference *dr)
205 fprintf (vect_dump, " step ");
206 print_generic_expr (vect_dump, step, TDF_SLIM);
207 }
208+
209+ if (loop_vinfo)
210+ {
211+ LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
212+
213+ if (vect_print_dump_info (REPORT_DETAILS))
214+ fprintf (vect_dump, "Data access with gaps requires scalar "
215+ "epilogue loop");
216+ }
217+
218 return true;
219 }
220
221@@ -2139,6 +2149,7 @@ vect_analyze_group_access (struct data_reference *dr)
222 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
223 continue;
224 }
225+
226 prev = next;
227
228 /* Check that all the accesses have the same STEP. */
229@@ -2169,6 +2180,8 @@ vect_analyze_group_access (struct data_reference *dr)
230 gaps += diff - 1;
231 }
232
233+ last_accessed_element += diff;
234+
235 /* Store the gap from the previous member of the group. If there is no
236 gap in the access, DR_GROUP_GAP is always 1. */
237 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
238@@ -2260,6 +2273,15 @@ vect_analyze_group_access (struct data_reference *dr)
239 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
240 stmt);
241 }
242+
243+ /* There is a gap in the end of the group. */
244+ if (stride - last_accessed_element > 0 && loop_vinfo)
245+ {
246+ LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
247+ if (vect_print_dump_info (REPORT_DETAILS))
248+ fprintf (vect_dump, "Data access with gaps requires scalar "
249+ "epilogue loop");
250+ }
251 }
252
253 return true;
254diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
255index b691cd2..da2c9b7 100644
256--- a/gcc/tree-vect-loop-manip.c
257+++ b/gcc/tree-vect-loop-manip.c
258@@ -1551,7 +1551,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
259 edge pe;
260 basic_block new_bb;
261 gimple_seq stmts;
262- tree ni_name;
263+ tree ni_name, ni_minus_gap_name;
264 tree var;
265 tree ratio_name;
266 tree ratio_mult_vf_name;
267@@ -1568,9 +1568,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
268 ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
269 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
270
271+ /* If epilogue loop is required because of data accesses with gaps, we
272+ subtract one iteration from the total number of iterations here for
273+ correct calculation of RATIO. */
274+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
275+ {
276+ ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
277+ ni_name,
278+ build_one_cst (TREE_TYPE (ni_name)));
279+ if (!is_gimple_val (ni_minus_gap_name))
280+ {
281+ var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
282+ add_referenced_var (var);
283+
284+ stmts = NULL;
285+ ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
286+ true, var);
287+ if (cond_expr_stmt_list)
288+ gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
289+ else
290+ {
291+ pe = loop_preheader_edge (loop);
292+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
293+ gcc_assert (!new_bb);
294+ }
295+ }
296+ }
297+ else
298+ ni_minus_gap_name = ni_name;
299+
300 /* Create: ratio = ni >> log2(vf) */
301
302- ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
303+ ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
304+ ni_minus_gap_name, log_vf);
305 if (!is_gimple_val (ratio_name))
306 {
307 var = create_tmp_var (TREE_TYPE (ni), "bnd");
308diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
309index 7692eb8..44c1ecd 100644
310--- a/gcc/tree-vect-loop.c
311+++ b/gcc/tree-vect-loop.c
312@@ -760,6 +760,7 @@ new_loop_vec_info (struct loop *loop)
313 LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
314 LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
315 LOOP_VINFO_PEELING_HTAB (res) = NULL;
316+ LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
317
318 return res;
319 }
320@@ -2149,6 +2150,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
321 peel_iters_prologue = niters < peel_iters_prologue ?
322 niters : peel_iters_prologue;
323 *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
324+ /* If we need to peel for gaps, but no peeling is required, we have to
325+ peel VF iterations. */
326+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
327+ *peel_iters_epilogue = vf;
328 }
329
330 return (peel_iters_prologue * scalar_single_iter_cost)
331@@ -4721,7 +4726,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
332 do_peeling_for_loop_bound
333 = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
334 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
335- && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
336+ && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
337+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
338
339 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
340 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
341diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
342index ee8410c..c1ac81c 100644
343--- a/gcc/tree-vectorizer.h
344+++ b/gcc/tree-vectorizer.h
345@@ -251,6 +251,11 @@ typedef struct _loop_vec_info {
346 /* Hash table used to choose the best peeling option. */
347 htab_t peeling_htab;
348
349+ /* When we have strided data accesses with gaps, we may introduce invalid
350+ memory accesses. We peel the last iteration of the loop to prevent
351+ this. */
352+ bool peeling_for_gaps;
353+
354 } *loop_vec_info;
355
356 /* Access Functions. */
357@@ -278,6 +283,7 @@ typedef struct _loop_vec_info {
358 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
359 #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
360 #define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab
361+#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
362
363 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
364 VEC_length (gimple, (L)->may_misalign_stmts) > 0
365--
3661.7.0.4
367