summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0384-PR-tree-optimization-49038.patch
blob: 199eb6130bd8f3f3c288272d7f93e4dfe5636827 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
From 57c28e9c809ad3f27fe0743eabb030e8ee8b2af4 Mon Sep 17 00:00:00 2001
From: irar <irar@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Sat, 4 Jun 2011 09:20:00 +0000
Subject: [PATCH]         PR tree-optimization/49038
         * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader):
         Ensure at least one epilogue iteration if required by data
         accesses with gaps.
         * tree-vectorizer.h (struct _loop_vec_info): Add new field
         to mark loops that require peeling for gaps.
         * tree-vect-loop.c (new_loop_vec_info): Initialize new field.
         (vect_get_known_peeling_cost): Take peeling for gaps into
         account.
         (vect_transform_loop): Generate epilogue if required by data
         access with gaps.
         * tree-vect-data-refs.c (vect_analyze_group_access): Mark the
         loop as requiring an epilogue if there are gaps in the end of
         the strided group.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-4_6-branch@174635 138bc75d-0d04-0410-961f-82ee72b054a4

index 13b7118..8d51590 100644
new file mode 100644
index 0000000..91c214f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr49038.c
@@ -0,0 +1,42 @@
+#include <sys/mman.h>
+#include <stdio.h>
+
+#define COUNT 320
+#define MMAP_SIZE 0x10000
+#define ADDRESS 0x1122000000
+#define TYPE unsigned short
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+void __attribute__((noinline))
+foo (TYPE *__restrict a, TYPE *__restrict b)
+{
+  int n;
+
+  for (n = 0; n < COUNT; n++)
+    a[n] = b[n * 2];
+}
+
+int
+main (void)
+{
+  void *x;
+  size_t b_offset;
+
+  x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE,
+	    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (x == MAP_FAILED)
+    {
+      perror ("mmap");
+      return 1;
+    }
+
+  b_offset = MMAP_SIZE - (2 * COUNT - 1) * sizeof (TYPE);
+  foo ((unsigned short *) x,
+       (unsigned short *) ((char *) x + b_offset));
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c
new file mode 100644
index 0000000..ccbc366
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c
@@ -0,0 +1,116 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 160 
+
+typedef struct {
+   unsigned char a;
+   unsigned char b;
+   unsigned char c;
+   unsigned char d;
+   unsigned char e;
+   unsigned char f;
+   unsigned char g;
+   unsigned char h;
+} s;
+
+__attribute__ ((noinline)) int
+main1 (s *arr, int n)
+{
+  int i;
+  s *ptr = arr;
+  s res[N];
+  unsigned char x;
+
+  for (i = 0; i < N; i++)
+    {
+      res[i].a = 0;
+      res[i].b = 0;
+      res[i].c = 0;
+      res[i].d = 0;
+      res[i].e = 0;
+      res[i].f = 0;
+      res[i].g = 0;
+      res[i].h = 0;
+      __asm__ volatile ("");
+    }
+
+  /* Check peeling for gaps for unknown loop bound.  */
+  for (i = 0; i < n; i++)
+    {
+      res[i].c = ptr->b + ptr->c;
+      x = ptr->c + ptr->f;
+      res[i].a = x + ptr->b;
+      res[i].d = ptr->b + ptr->c;
+      res[i].b = ptr->c;
+      res[i].f = ptr->f + ptr->e;
+      res[i].e = ptr->b + ptr->e; 
+      res[i].h = ptr->c;   
+      res[i].g = ptr->b + ptr->c;
+      ptr++; 
+    } 
+   
+  /* check results:  */
+  for (i = 0; i < n; i++)
+    { 
+      if (res[i].c != arr[i].b + arr[i].c
+          || res[i].a != arr[i].c + arr[i].f + arr[i].b
+          || res[i].d != arr[i].b + arr[i].c
+          || res[i].b != arr[i].c
+          || res[i].f != arr[i].f + arr[i].e
+          || res[i].e != arr[i].b + arr[i].e
+          || res[i].h != arr[i].c
+          || res[i].g != arr[i].b + arr[i].c)
+        abort ();
+   }
+
+  /* Check also that we don't do more iterations than needed.  */
+  for (i = n; i < N; i++)
+    {
+      if (res[i].c == arr[i].b + arr[i].c
+          || res[i].a == arr[i].c + arr[i].f + arr[i].b
+          || res[i].d == arr[i].b + arr[i].c
+          || res[i].b == arr[i].c
+          || res[i].f == arr[i].f + arr[i].e
+          || res[i].e == arr[i].b + arr[i].e
+          || res[i].h == arr[i].c
+          || res[i].g == arr[i].b + arr[i].c)
+        abort ();
+   }
+
+  return 0;
+}
+
+
+int main (void)
+{
+  int i;
+  s arr[N];
+  
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    { 
+      arr[i].a = 5;
+      arr[i].b = 6;
+      arr[i].c = 17;
+      arr[i].d = 3;
+      arr[i].e = 16;
+      arr[i].f = 16;
+      arr[i].g = 3;
+      arr[i].h = 56;
+      if (arr[i].a == 178)
+         abort(); 
+    } 
+
+  main1 (arr, N-2);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd }  } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+  
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index db5098c..1c60388 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -2045,7 +2045,7 @@ vect_analyze_group_access (struct data_reference *dr)
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
-  HOST_WIDE_INT stride;
+  HOST_WIDE_INT stride, last_accessed_element = 1;
   bool slp_impossible = false;
 
   /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
@@ -2074,6 +2074,16 @@ vect_analyze_group_access (struct data_reference *dr)
 	      fprintf (vect_dump, " step ");
 	      print_generic_expr (vect_dump, step, TDF_SLIM);
 	    }
+
+	  if (loop_vinfo)
+	    {
+	      LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+
+	      if (vect_print_dump_info (REPORT_DETAILS))
+		fprintf (vect_dump, "Data access with gaps requires scalar "
+				    "epilogue loop");
+	    }
+
 	  return true;
 	}
 
@@ -2139,6 +2149,7 @@ vect_analyze_group_access (struct data_reference *dr)
               next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
               continue;
             }
+
           prev = next;
 
           /* Check that all the accesses have the same STEP.  */
@@ -2169,6 +2180,8 @@ vect_analyze_group_access (struct data_reference *dr)
               gaps += diff - 1;
 	    }
 
+	  last_accessed_element += diff;
+
           /* Store the gap from the previous member of the group. If there is no
              gap in the access, DR_GROUP_GAP is always 1.  */
           DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
@@ -2260,6 +2273,15 @@ vect_analyze_group_access (struct data_reference *dr)
             VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
                            stmt);
         }
+
+      /* There is a gap in the end of the group.  */
+      if (stride - last_accessed_element > 0 && loop_vinfo)
+	{
+	  LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+	  if (vect_print_dump_info (REPORT_DETAILS))
+	    fprintf (vect_dump, "Data access with gaps requires scalar "
+				"epilogue loop");
+	}
     }
 
   return true;
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index b691cd2..da2c9b7 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1551,7 +1551,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
   edge pe;
   basic_block new_bb;
   gimple_seq stmts;
-  tree ni_name;
+  tree ni_name, ni_minus_gap_name;
   tree var;
   tree ratio_name;
   tree ratio_mult_vf_name;
@@ -1568,9 +1568,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
 
+  /* If epilogue loop is required because of data accesses with gaps, we
+     subtract one iteration from the total number of iterations here for
+     correct calculation of RATIO.  */
+  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+    {
+      ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+				       ni_name,
+			               build_one_cst (TREE_TYPE (ni_name)));
+      if (!is_gimple_val (ni_minus_gap_name))
+	{
+	  var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
+          add_referenced_var (var);
+
+          stmts = NULL;
+          ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
+						    true, var);
+          if (cond_expr_stmt_list)
+            gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
+          else
+            {
+              pe = loop_preheader_edge (loop);
+              new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+              gcc_assert (!new_bb);
+            }
+        }
+    }
+  else
+    ni_minus_gap_name = ni_name;
+
   /* Create: ratio = ni >> log2(vf) */
 
-  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
+  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
+			    ni_minus_gap_name, log_vf);
   if (!is_gimple_val (ratio_name))
     {
       var = create_tmp_var (TREE_TYPE (ni), "bnd");
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 7692eb8..44c1ecd 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -760,6 +760,7 @@ new_loop_vec_info (struct loop *loop)
   LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
   LOOP_VINFO_PEELING_HTAB (res) = NULL;
+  LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 
   return res;
 }
@@ -2149,6 +2150,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
       peel_iters_prologue = niters < peel_iters_prologue ?
                             niters : peel_iters_prologue;
       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
+      /* If we need to peel for gaps, but no peeling is required, we have to
+	 peel VF iterations.  */
+      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
+        *peel_iters_epilogue = vf;
     }
 
    return (peel_iters_prologue * scalar_single_iter_cost)
@@ -4721,7 +4726,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
   do_peeling_for_loop_bound
     = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-	   && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
+	   && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
+       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
 
   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index ee8410c..c1ac81c 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -251,6 +251,11 @@ typedef struct _loop_vec_info {
   /* Hash table used to choose the best peeling option.  */
   htab_t peeling_htab;
 
+  /* When we have strided data accesses with gaps, we may introduce invalid
+     memory accesses.  We peel the last iteration of the loop to prevent
+     this.  */
+  bool peeling_for_gaps;
+
 } *loop_vec_info;
 
 /* Access Functions.  */
@@ -278,6 +283,7 @@ typedef struct _loop_vec_info {
 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
 #define LOOP_VINFO_REDUCTIONS(L)           (L)->reductions
 #define LOOP_VINFO_PEELING_HTAB(L)         (L)->peeling_htab
+#define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
 
 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
 VEC_length (gimple, (L)->may_misalign_stmts) > 0
-- 
1.7.0.4