summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch
diff options
context:
space:
mode:
authorKhem Raj <raj.khem@gmail.com>2011-06-12 20:56:57 -0700
committerRichard Purdie <richard.purdie@linuxfoundation.org>2011-06-14 15:29:15 +0100
commitec9b6d061b8495b9316ae90e5a71a499ef2873ee (patch)
treedc922c9ddd98f937eb2095b07f6c3b5043e3a491 /meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch
parentfbc60cc5ba2284bfcaea8c82001a49b7795cc2a2 (diff)
downloadpoky-ec9b6d061b8495b9316ae90e5a71a499ef2873ee.tar.gz
gcc-4.6.0: Bring in patches from FSF 4.6 branch
This brings in new patches from 4.6 release branch updates the comment section of existing branch to not contain patch numbers. Tested build on qemu for arm ppc mips x86 and x86_64 (From OE-Core rev: 3968f33b6542cf20cf63cf49bfbc033bd2486295) Signed-off-by: Khem Raj <raj.khem@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch')
-rw-r--r--meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch367
1 files changed, 367 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch b/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch
new file mode 100644
index 0000000000..199eb6130b
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch
@@ -0,0 +1,367 @@
1From 57c28e9c809ad3f27fe0743eabb030e8ee8b2af4 Mon Sep 17 00:00:00 2001
2From: irar <irar@138bc75d-0d04-0410-961f-82ee72b054a4>
3Date: Sat, 4 Jun 2011 09:20:00 +0000
4Subject: [PATCH] PR tree-optimization/49038
5 * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader):
6 Ensure at least one epilogue iteration if required by data
7 accesses with gaps.
8 * tree-vectorizer.h (struct _loop_vec_info): Add new field
9 to mark loops that require peeling for gaps.
10 * tree-vect-loop.c (new_loop_vec_info): Initialize new field.
11 (vect_get_known_peeling_cost): Take peeling for gaps into
12 account.
13 (vect_transform_loop): Generate epilogue if required by data
14 access with gaps.
15 * tree-vect-data-refs.c (vect_analyze_group_access): Mark the
16 loop as requiring an epilogue if there are gaps in the end of
17 the strided group.
18
19git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-4_6-branch@174635 138bc75d-0d04-0410-961f-82ee72b054a4
20
21index 13b7118..8d51590 100644
22new file mode 100644
23index 0000000..91c214f
24--- /dev/null
25+++ b/gcc/testsuite/gcc.dg/vect/pr49038.c
26@@ -0,0 +1,42 @@
27+#include <sys/mman.h>
28+#include <stdio.h>
29+
30+#define COUNT 320
31+#define MMAP_SIZE 0x10000
32+#define ADDRESS 0x1122000000
33+#define TYPE unsigned short
34+
35+#ifndef MAP_ANONYMOUS
36+#define MAP_ANONYMOUS MAP_ANON
37+#endif
38+
39+void __attribute__((noinline))
40+foo (TYPE *__restrict a, TYPE *__restrict b)
41+{
42+ int n;
43+
44+ for (n = 0; n < COUNT; n++)
45+ a[n] = b[n * 2];
46+}
47+
48+int
49+main (void)
50+{
51+ void *x;
52+ size_t b_offset;
53+
54+ x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE,
55+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
56+ if (x == MAP_FAILED)
57+ {
58+ perror ("mmap");
59+ return 1;
60+ }
61+
62+ b_offset = MMAP_SIZE - (2 * COUNT - 1) * sizeof (TYPE);
63+ foo ((unsigned short *) x,
64+ (unsigned short *) ((char *) x + b_offset));
65+ return 0;
66+}
67+
68+/* { dg-final { cleanup-tree-dump "vect" } } */
69diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c
70new file mode 100644
71index 0000000..ccbc366
72--- /dev/null
73+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c
74@@ -0,0 +1,116 @@
75+/* { dg-require-effective-target vect_int } */
76+
77+#include <stdarg.h>
78+#include <stdio.h>
79+#include "tree-vect.h"
80+
81+#define N 160
82+
83+typedef struct {
84+ unsigned char a;
85+ unsigned char b;
86+ unsigned char c;
87+ unsigned char d;
88+ unsigned char e;
89+ unsigned char f;
90+ unsigned char g;
91+ unsigned char h;
92+} s;
93+
94+__attribute__ ((noinline)) int
95+main1 (s *arr, int n)
96+{
97+ int i;
98+ s *ptr = arr;
99+ s res[N];
100+ unsigned char x;
101+
102+ for (i = 0; i < N; i++)
103+ {
104+ res[i].a = 0;
105+ res[i].b = 0;
106+ res[i].c = 0;
107+ res[i].d = 0;
108+ res[i].e = 0;
109+ res[i].f = 0;
110+ res[i].g = 0;
111+ res[i].h = 0;
112+ __asm__ volatile ("");
113+ }
114+
115+ /* Check peeling for gaps for unknown loop bound. */
116+ for (i = 0; i < n; i++)
117+ {
118+ res[i].c = ptr->b + ptr->c;
119+ x = ptr->c + ptr->f;
120+ res[i].a = x + ptr->b;
121+ res[i].d = ptr->b + ptr->c;
122+ res[i].b = ptr->c;
123+ res[i].f = ptr->f + ptr->e;
124+ res[i].e = ptr->b + ptr->e;
125+ res[i].h = ptr->c;
126+ res[i].g = ptr->b + ptr->c;
127+ ptr++;
128+ }
129+
130+ /* check results: */
131+ for (i = 0; i < n; i++)
132+ {
133+ if (res[i].c != arr[i].b + arr[i].c
134+ || res[i].a != arr[i].c + arr[i].f + arr[i].b
135+ || res[i].d != arr[i].b + arr[i].c
136+ || res[i].b != arr[i].c
137+ || res[i].f != arr[i].f + arr[i].e
138+ || res[i].e != arr[i].b + arr[i].e
139+ || res[i].h != arr[i].c
140+ || res[i].g != arr[i].b + arr[i].c)
141+ abort ();
142+ }
143+
144+ /* Check also that we don't do more iterations than needed. */
145+ for (i = n; i < N; i++)
146+ {
147+ if (res[i].c == arr[i].b + arr[i].c
148+ || res[i].a == arr[i].c + arr[i].f + arr[i].b
149+ || res[i].d == arr[i].b + arr[i].c
150+ || res[i].b == arr[i].c
151+ || res[i].f == arr[i].f + arr[i].e
152+ || res[i].e == arr[i].b + arr[i].e
153+ || res[i].h == arr[i].c
154+ || res[i].g == arr[i].b + arr[i].c)
155+ abort ();
156+ }
157+
158+ return 0;
159+}
160+
161+
162+int main (void)
163+{
164+ int i;
165+ s arr[N];
166+
167+ check_vect ();
168+
169+ for (i = 0; i < N; i++)
170+ {
171+ arr[i].a = 5;
172+ arr[i].b = 6;
173+ arr[i].c = 17;
174+ arr[i].d = 3;
175+ arr[i].e = 16;
176+ arr[i].f = 16;
177+ arr[i].g = 3;
178+ arr[i].h = 56;
179+ if (arr[i].a == 178)
180+ abort();
181+ }
182+
183+ main1 (arr, N-2);
184+
185+ return 0;
186+}
187+
188+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
189+/* { dg-final { cleanup-tree-dump "vect" } } */
190+
191diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
192index db5098c..1c60388 100644
193--- a/gcc/tree-vect-data-refs.c
194+++ b/gcc/tree-vect-data-refs.c
195@@ -2045,7 +2045,7 @@ vect_analyze_group_access (struct data_reference *dr)
196 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
197 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
198 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
199- HOST_WIDE_INT stride;
200+ HOST_WIDE_INT stride, last_accessed_element = 1;
201 bool slp_impossible = false;
202
203 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
204@@ -2074,6 +2074,16 @@ vect_analyze_group_access (struct data_reference *dr)
205 fprintf (vect_dump, " step ");
206 print_generic_expr (vect_dump, step, TDF_SLIM);
207 }
208+
209+ if (loop_vinfo)
210+ {
211+ LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
212+
213+ if (vect_print_dump_info (REPORT_DETAILS))
214+ fprintf (vect_dump, "Data access with gaps requires scalar "
215+ "epilogue loop");
216+ }
217+
218 return true;
219 }
220
221@@ -2139,6 +2149,7 @@ vect_analyze_group_access (struct data_reference *dr)
222 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
223 continue;
224 }
225+
226 prev = next;
227
228 /* Check that all the accesses have the same STEP. */
229@@ -2169,6 +2180,8 @@ vect_analyze_group_access (struct data_reference *dr)
230 gaps += diff - 1;
231 }
232
233+ last_accessed_element += diff;
234+
235 /* Store the gap from the previous member of the group. If there is no
236 gap in the access, DR_GROUP_GAP is always 1. */
237 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
238@@ -2260,6 +2273,15 @@ vect_analyze_group_access (struct data_reference *dr)
239 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
240 stmt);
241 }
242+
243+ /* There is a gap in the end of the group. */
244+ if (stride - last_accessed_element > 0 && loop_vinfo)
245+ {
246+ LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
247+ if (vect_print_dump_info (REPORT_DETAILS))
248+ fprintf (vect_dump, "Data access with gaps requires scalar "
249+ "epilogue loop");
250+ }
251 }
252
253 return true;
254diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
255index b691cd2..da2c9b7 100644
256--- a/gcc/tree-vect-loop-manip.c
257+++ b/gcc/tree-vect-loop-manip.c
258@@ -1551,7 +1551,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
259 edge pe;
260 basic_block new_bb;
261 gimple_seq stmts;
262- tree ni_name;
263+ tree ni_name, ni_minus_gap_name;
264 tree var;
265 tree ratio_name;
266 tree ratio_mult_vf_name;
267@@ -1568,9 +1568,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
268 ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
269 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
270
271+ /* If epilogue loop is required because of data accesses with gaps, we
272+ subtract one iteration from the total number of iterations here for
273+ correct calculation of RATIO. */
274+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
275+ {
276+ ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
277+ ni_name,
278+ build_one_cst (TREE_TYPE (ni_name)));
279+ if (!is_gimple_val (ni_minus_gap_name))
280+ {
281+ var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
282+ add_referenced_var (var);
283+
284+ stmts = NULL;
285+ ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
286+ true, var);
287+ if (cond_expr_stmt_list)
288+ gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
289+ else
290+ {
291+ pe = loop_preheader_edge (loop);
292+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
293+ gcc_assert (!new_bb);
294+ }
295+ }
296+ }
297+ else
298+ ni_minus_gap_name = ni_name;
299+
300 /* Create: ratio = ni >> log2(vf) */
301
302- ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
303+ ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
304+ ni_minus_gap_name, log_vf);
305 if (!is_gimple_val (ratio_name))
306 {
307 var = create_tmp_var (TREE_TYPE (ni), "bnd");
308diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
309index 7692eb8..44c1ecd 100644
310--- a/gcc/tree-vect-loop.c
311+++ b/gcc/tree-vect-loop.c
312@@ -760,6 +760,7 @@ new_loop_vec_info (struct loop *loop)
313 LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
314 LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
315 LOOP_VINFO_PEELING_HTAB (res) = NULL;
316+ LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
317
318 return res;
319 }
320@@ -2149,6 +2150,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
321 peel_iters_prologue = niters < peel_iters_prologue ?
322 niters : peel_iters_prologue;
323 *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
324+ /* If we need to peel for gaps, but no peeling is required, we have to
325+ peel VF iterations. */
326+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
327+ *peel_iters_epilogue = vf;
328 }
329
330 return (peel_iters_prologue * scalar_single_iter_cost)
331@@ -4721,7 +4726,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
332 do_peeling_for_loop_bound
333 = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
334 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
335- && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
336+ && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
337+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
338
339 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
340 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
341diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
342index ee8410c..c1ac81c 100644
343--- a/gcc/tree-vectorizer.h
344+++ b/gcc/tree-vectorizer.h
345@@ -251,6 +251,11 @@ typedef struct _loop_vec_info {
346 /* Hash table used to choose the best peeling option. */
347 htab_t peeling_htab;
348
349+ /* When we have strided data accesses with gaps, we may introduce invalid
350+ memory accesses. We peel the last iteration of the loop to prevent
351+ this. */
352+ bool peeling_for_gaps;
353+
354 } *loop_vec_info;
355
356 /* Access Functions. */
357@@ -278,6 +283,7 @@ typedef struct _loop_vec_info {
358 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
359 #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
360 #define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab
361+#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
362
363 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
364 VEC_length (gimple, (L)->may_misalign_stmts) > 0
365--
3661.7.0.4
367