diff options
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch')
-rw-r--r-- | toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch | 1281 |
1 files changed, 0 insertions, 1281 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch deleted file mode 100644 index 3a149231f..000000000 --- a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch +++ /dev/null | |||
@@ -1,1281 +0,0 @@ | |||
1 | 2011-07-11 Ira Rosen <ira.rosen@linaro.org> | ||
2 | |||
3 | Backport from FSF: | ||
4 | 2011-06-16 Ira Rosen <ira.rosen@linaro.org> | ||
5 | |||
6 | gcc/ | ||
7 | * tree-vectorizer.h (vect_recog_func_ptr): Change the first | ||
8 | argument to be a VEC of statements. | ||
9 | * tree-vect-loop.c (vect_determine_vectorization_factor): Remove the | ||
10 | assert that pattern statements have to have their vector type set. | ||
11 | * tree-vect-patterns.c (vect_recog_widen_sum_pattern): | ||
12 | Change the first argument to be a VEC of statements. Update | ||
13 | documentation. | ||
14 | (vect_recog_dot_prod_pattern, vect_recog_pow_pattern): Likewise. | ||
15 | (vect_handle_widen_mult_by_const): New function. | ||
16 | (vect_recog_widen_mult_pattern): Change the first argument to be a | ||
17 | VEC of statements. Update documentation. Check that the constant is | ||
18 | INTEGER_CST. Support multiplication by a constant that fits an | ||
19 | intermediate type - call vect_handle_widen_mult_by_const. | ||
20 | (vect_pattern_recog_1): Update vect_recog_func_ptr and its | ||
21 | call. Handle additional pattern statements if necessary. | ||
22 | |||
23 | gcc/testsuite/ | ||
24 | * gcc.dg/vect/vect-widen-mult-half-u8.c: New test. | ||
25 | |||
26 | and | ||
27 | 2011-06-30 Ira Rosen <ira.rosen@linaro.org> | ||
28 | |||
29 | gcc/ | ||
30 | * tree-vect-loop.c (vect_determine_vectorization_factor): Handle | ||
31 | both pattern and original statements if necessary. | ||
32 | (vect_transform_loop): Likewise. | ||
33 | * tree-vect-patterns.c (vect_pattern_recog): Update documentation. | ||
34 | * tree-vect-stmts.c (vect_mark_relevant): Add new argument. | ||
35 | Mark the pattern statement only if the original statement doesn't | ||
36 | have its own uses. | ||
37 | (process_use): Call vect_mark_relevant with additional parameter. | ||
38 | (vect_mark_stmts_to_be_vectorized): Likewise. | ||
39 | (vect_get_vec_def_for_operand): Use vectorized pattern statement. | ||
40 | (vect_analyze_stmt): Handle both pattern and original statements | ||
41 | if necessary. | ||
42 | (vect_transform_stmt): Don't store vectorized pattern statement | ||
43 | in the original statement. | ||
44 | (vect_is_simple_use_1): Use related pattern statement only if the | ||
45 | original statement is irrelevant. | ||
46 | * tree-vect-slp.c (vect_get_and_check_slp_defs): Likewise. | ||
47 | |||
48 | gcc/testsuite/ | ||
49 | * gcc.dg/vect/slp-widen-mult-half.c: New test. | ||
50 | * gcc.dg/vect/vect-widen-mult-half.c: New test. | ||
51 | |||
52 | === added file 'gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c' | ||
53 | Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c | ||
54 | =================================================================== | ||
55 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | ||
56 | +++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 2012-01-09 15:03:29.156918805 -0800 | ||
57 | @@ -0,0 +1,52 @@ | ||
58 | +/* { dg-require-effective-target vect_int } */ | ||
59 | + | ||
60 | +#include "tree-vect.h" | ||
61 | +#include <stdlib.h> | ||
62 | + | ||
63 | +#define N 32 | ||
64 | +#define COEF 32470 | ||
65 | +#define COEF2 324700 | ||
66 | + | ||
67 | +unsigned char in[N]; | ||
68 | +int out[N]; | ||
69 | +int out2[N]; | ||
70 | + | ||
71 | +__attribute__ ((noinline)) void | ||
72 | +foo () | ||
73 | +{ | ||
74 | + int i; | ||
75 | + | ||
76 | + for (i = 0; i < N/2; i++) | ||
77 | + { | ||
78 | + out[2*i] = in[2*i] * COEF; | ||
79 | + out2[2*i] = in[2*i] + COEF2; | ||
80 | + out[2*i+1] = in[2*i+1] * COEF; | ||
81 | + out2[2*i+1] = in[2*i+1] + COEF2; | ||
82 | + } | ||
83 | +} | ||
84 | + | ||
85 | +int main (void) | ||
86 | +{ | ||
87 | + int i; | ||
88 | + | ||
89 | + for (i = 0; i < N; i++) | ||
90 | + { | ||
91 | + in[i] = i; | ||
92 | + __asm__ volatile (""); | ||
93 | + } | ||
94 | + | ||
95 | + foo (); | ||
96 | + | ||
97 | + for (i = 0; i < N; i++) | ||
98 | + if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2) | ||
99 | + abort (); | ||
100 | + | ||
101 | + return 0; | ||
102 | +} | ||
103 | + | ||
104 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ | ||
105 | +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ | ||
106 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
107 | +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
108 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
109 | + | ||
110 | Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c | ||
111 | =================================================================== | ||
112 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | ||
113 | +++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c 2012-01-09 15:03:29.156918805 -0800 | ||
114 | @@ -0,0 +1,59 @@ | ||
115 | +/* { dg-require-effective-target vect_int } */ | ||
116 | + | ||
117 | +#include "tree-vect.h" | ||
118 | +#include <stdlib.h> | ||
119 | + | ||
120 | +#define N 32 | ||
121 | +#define COEF 32470 | ||
122 | + | ||
123 | +unsigned char in[N]; | ||
124 | +int out[N]; | ||
125 | + | ||
126 | +__attribute__ ((noinline)) void | ||
127 | +foo () | ||
128 | +{ | ||
129 | + int i; | ||
130 | + | ||
131 | + for (i = 0; i < N; i++) | ||
132 | + out[i] = in[i] * COEF; | ||
133 | +} | ||
134 | + | ||
135 | +__attribute__ ((noinline)) void | ||
136 | +bar () | ||
137 | +{ | ||
138 | + int i; | ||
139 | + | ||
140 | + for (i = 0; i < N; i++) | ||
141 | + out[i] = COEF * in[i]; | ||
142 | +} | ||
143 | + | ||
144 | +int main (void) | ||
145 | +{ | ||
146 | + int i; | ||
147 | + | ||
148 | + for (i = 0; i < N; i++) | ||
149 | + { | ||
150 | + in[i] = i; | ||
151 | + __asm__ volatile (""); | ||
152 | + } | ||
153 | + | ||
154 | + foo (); | ||
155 | + | ||
156 | + for (i = 0; i < N; i++) | ||
157 | + if (out[i] != in[i] * COEF) | ||
158 | + abort (); | ||
159 | + | ||
160 | + bar (); | ||
161 | + | ||
162 | + for (i = 0; i < N; i++) | ||
163 | + if (out[i] != in[i] * COEF) | ||
164 | + abort (); | ||
165 | + | ||
166 | + return 0; | ||
167 | +} | ||
168 | + | ||
169 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ | ||
170 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
171 | +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
172 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
173 | + | ||
174 | Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c | ||
175 | =================================================================== | ||
176 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | ||
177 | +++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c 2012-01-09 15:03:29.160918806 -0800 | ||
178 | @@ -0,0 +1,49 @@ | ||
179 | +/* { dg-require-effective-target vect_int } */ | ||
180 | + | ||
181 | +#include "tree-vect.h" | ||
182 | +#include <stdlib.h> | ||
183 | + | ||
184 | +#define N 32 | ||
185 | +#define COEF 32470 | ||
186 | +#define COEF2 324700 | ||
187 | + | ||
188 | +unsigned char in[N]; | ||
189 | +int out[N]; | ||
190 | +int out2[N]; | ||
191 | + | ||
192 | +__attribute__ ((noinline)) void | ||
193 | +foo (int a) | ||
194 | +{ | ||
195 | + int i; | ||
196 | + | ||
197 | + for (i = 0; i < N; i++) | ||
198 | + { | ||
199 | + out[i] = in[i] * COEF; | ||
200 | + out2[i] = in[i] + a; | ||
201 | + } | ||
202 | +} | ||
203 | + | ||
204 | +int main (void) | ||
205 | +{ | ||
206 | + int i; | ||
207 | + | ||
208 | + for (i = 0; i < N; i++) | ||
209 | + { | ||
210 | + in[i] = i; | ||
211 | + __asm__ volatile (""); | ||
212 | + } | ||
213 | + | ||
214 | + foo (COEF2); | ||
215 | + | ||
216 | + for (i = 0; i < N; i++) | ||
217 | + if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2) | ||
218 | + abort (); | ||
219 | + | ||
220 | + return 0; | ||
221 | +} | ||
222 | + | ||
223 | +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ | ||
224 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
225 | +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
226 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
227 | + | ||
228 | Index: gcc-4_6-branch/gcc/tree-vect-loop.c | ||
229 | =================================================================== | ||
230 | --- gcc-4_6-branch.orig/gcc/tree-vect-loop.c 2012-01-09 15:02:15.000000000 -0800 | ||
231 | +++ gcc-4_6-branch/gcc/tree-vect-loop.c 2012-01-09 15:03:29.160918806 -0800 | ||
232 | @@ -181,6 +181,8 @@ | ||
233 | stmt_vec_info stmt_info; | ||
234 | int i; | ||
235 | HOST_WIDE_INT dummy; | ||
236 | + gimple stmt, pattern_stmt = NULL; | ||
237 | + bool analyze_pattern_stmt = false; | ||
238 | |||
239 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
240 | fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); | ||
241 | @@ -241,12 +243,20 @@ | ||
242 | } | ||
243 | } | ||
244 | |||
245 | - for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | ||
246 | + for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;) | ||
247 | { | ||
248 | - tree vf_vectype; | ||
249 | - gimple stmt = gsi_stmt (si), pattern_stmt; | ||
250 | - stmt_info = vinfo_for_stmt (stmt); | ||
251 | + tree vf_vectype; | ||
252 | + | ||
253 | + if (analyze_pattern_stmt) | ||
254 | + { | ||
255 | + stmt = pattern_stmt; | ||
256 | + analyze_pattern_stmt = false; | ||
257 | + } | ||
258 | + else | ||
259 | + stmt = gsi_stmt (si); | ||
260 | |||
261 | + stmt_info = vinfo_for_stmt (stmt); | ||
262 | + | ||
263 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
264 | { | ||
265 | fprintf (vect_dump, "==> examining statement: "); | ||
266 | @@ -276,10 +286,17 @@ | ||
267 | { | ||
268 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
269 | fprintf (vect_dump, "skip."); | ||
270 | + gsi_next (&si); | ||
271 | continue; | ||
272 | } | ||
273 | } | ||
274 | |||
275 | + else if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
276 | + && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) | ||
277 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | ||
278 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
279 | + analyze_pattern_stmt = true; | ||
280 | + | ||
281 | if (gimple_get_lhs (stmt) == NULL_TREE) | ||
282 | { | ||
283 | if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) | ||
284 | @@ -311,9 +328,7 @@ | ||
285 | } | ||
286 | else | ||
287 | { | ||
288 | - gcc_assert (!STMT_VINFO_DATA_REF (stmt_info) | ||
289 | - && !is_pattern_stmt_p (stmt_info)); | ||
290 | - | ||
291 | + gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)); | ||
292 | scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); | ||
293 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
294 | { | ||
295 | @@ -385,6 +400,9 @@ | ||
296 | if (!vectorization_factor | ||
297 | || (nunits > vectorization_factor)) | ||
298 | vectorization_factor = nunits; | ||
299 | + | ||
300 | + if (!analyze_pattern_stmt) | ||
301 | + gsi_next (&si); | ||
302 | } | ||
303 | } | ||
304 | |||
305 | @@ -4740,6 +4758,8 @@ | ||
306 | tree cond_expr = NULL_TREE; | ||
307 | gimple_seq cond_expr_stmt_list = NULL; | ||
308 | bool do_peeling_for_loop_bound; | ||
309 | + gimple stmt, pattern_stmt; | ||
310 | + bool transform_pattern_stmt = false; | ||
311 | |||
312 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
313 | fprintf (vect_dump, "=== vec_transform_loop ==="); | ||
314 | @@ -4827,11 +4847,19 @@ | ||
315 | } | ||
316 | } | ||
317 | |||
318 | - for (si = gsi_start_bb (bb); !gsi_end_p (si);) | ||
319 | + pattern_stmt = NULL; | ||
320 | + for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;) | ||
321 | { | ||
322 | - gimple stmt = gsi_stmt (si), pattern_stmt; | ||
323 | bool is_store; | ||
324 | |||
325 | + if (transform_pattern_stmt) | ||
326 | + { | ||
327 | + stmt = pattern_stmt; | ||
328 | + transform_pattern_stmt = false; | ||
329 | + } | ||
330 | + else | ||
331 | + stmt = gsi_stmt (si); | ||
332 | + | ||
333 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
334 | { | ||
335 | fprintf (vect_dump, "------>vectorizing statement: "); | ||
336 | @@ -4869,6 +4897,11 @@ | ||
337 | continue; | ||
338 | } | ||
339 | } | ||
340 | + else if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
341 | + && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) | ||
342 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | ||
343 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
344 | + transform_pattern_stmt = true; | ||
345 | |||
346 | gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); | ||
347 | nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( | ||
348 | @@ -4897,8 +4930,9 @@ | ||
349 | /* Hybrid SLP stmts must be vectorized in addition to SLP. */ | ||
350 | if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info)) | ||
351 | { | ||
352 | - gsi_next (&si); | ||
353 | - continue; | ||
354 | + if (!transform_pattern_stmt) | ||
355 | + gsi_next (&si); | ||
356 | + continue; | ||
357 | } | ||
358 | } | ||
359 | |||
360 | @@ -4917,7 +4951,7 @@ | ||
361 | the chain. */ | ||
362 | vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info)); | ||
363 | gsi_remove (&si, true); | ||
364 | - continue; | ||
365 | + continue; | ||
366 | } | ||
367 | else | ||
368 | { | ||
369 | @@ -4927,7 +4961,9 @@ | ||
370 | continue; | ||
371 | } | ||
372 | } | ||
373 | - gsi_next (&si); | ||
374 | + | ||
375 | + if (!transform_pattern_stmt) | ||
376 | + gsi_next (&si); | ||
377 | } /* stmts in BB */ | ||
378 | } /* BBs in loop */ | ||
379 | |||
380 | Index: gcc-4_6-branch/gcc/tree-vect-patterns.c | ||
381 | =================================================================== | ||
382 | --- gcc-4_6-branch.orig/gcc/tree-vect-patterns.c 2012-01-09 15:02:15.000000000 -0800 | ||
383 | +++ gcc-4_6-branch/gcc/tree-vect-patterns.c 2012-01-09 15:03:29.160918806 -0800 | ||
384 | @@ -39,10 +39,13 @@ | ||
385 | #include "diagnostic-core.h" | ||
386 | |||
387 | /* Pattern recognition functions */ | ||
388 | -static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *); | ||
389 | -static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *); | ||
390 | -static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *); | ||
391 | -static gimple vect_recog_pow_pattern (gimple *, tree *, tree *); | ||
392 | +static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *, | ||
393 | + tree *); | ||
394 | +static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *, | ||
395 | + tree *); | ||
396 | +static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *, | ||
397 | + tree *); | ||
398 | +static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); | ||
399 | static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { | ||
400 | vect_recog_widen_mult_pattern, | ||
401 | vect_recog_widen_sum_pattern, | ||
402 | @@ -142,9 +145,9 @@ | ||
403 | |||
404 | Input: | ||
405 | |||
406 | - * LAST_STMT: A stmt from which the pattern search begins. In the example, | ||
407 | - when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be | ||
408 | - detected. | ||
409 | + * STMTS: Contains a stmt from which the pattern search begins. In the | ||
410 | + example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} | ||
411 | + will be detected. | ||
412 | |||
413 | Output: | ||
414 | |||
415 | @@ -165,12 +168,13 @@ | ||
416 | inner-loop nested in an outer-loop that us being vectorized). */ | ||
417 | |||
418 | static gimple | ||
419 | -vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out) | ||
420 | +vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in, | ||
421 | + tree *type_out) | ||
422 | { | ||
423 | - gimple stmt; | ||
424 | + gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0); | ||
425 | tree oprnd0, oprnd1; | ||
426 | tree oprnd00, oprnd01; | ||
427 | - stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); | ||
428 | + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); | ||
429 | tree type, half_type; | ||
430 | gimple pattern_stmt; | ||
431 | tree prod_type; | ||
432 | @@ -178,10 +182,10 @@ | ||
433 | struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
434 | tree var, rhs; | ||
435 | |||
436 | - if (!is_gimple_assign (*last_stmt)) | ||
437 | + if (!is_gimple_assign (last_stmt)) | ||
438 | return NULL; | ||
439 | |||
440 | - type = gimple_expr_type (*last_stmt); | ||
441 | + type = gimple_expr_type (last_stmt); | ||
442 | |||
443 | /* Look for the following pattern | ||
444 | DX = (TYPE1) X; | ||
445 | @@ -207,7 +211,7 @@ | ||
446 | /* Starting from LAST_STMT, follow the defs of its uses in search | ||
447 | of the above pattern. */ | ||
448 | |||
449 | - if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) | ||
450 | + if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) | ||
451 | return NULL; | ||
452 | |||
453 | if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) | ||
454 | @@ -228,12 +232,12 @@ | ||
455 | |||
456 | if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) | ||
457 | return NULL; | ||
458 | - oprnd0 = gimple_assign_rhs1 (*last_stmt); | ||
459 | - oprnd1 = gimple_assign_rhs2 (*last_stmt); | ||
460 | + oprnd0 = gimple_assign_rhs1 (last_stmt); | ||
461 | + oprnd1 = gimple_assign_rhs2 (last_stmt); | ||
462 | if (!types_compatible_p (TREE_TYPE (oprnd0), type) | ||
463 | || !types_compatible_p (TREE_TYPE (oprnd1), type)) | ||
464 | return NULL; | ||
465 | - stmt = *last_stmt; | ||
466 | + stmt = last_stmt; | ||
467 | |||
468 | if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) | ||
469 | { | ||
470 | @@ -319,11 +323,79 @@ | ||
471 | |||
472 | /* We don't allow changing the order of the computation in the inner-loop | ||
473 | when doing outer-loop vectorization. */ | ||
474 | - gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); | ||
475 | + gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); | ||
476 | |||
477 | return pattern_stmt; | ||
478 | } | ||
479 | |||
480 | +/* Handle two cases of multiplication by a constant. The first one is when | ||
481 | + the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second | ||
482 | + operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to | ||
483 | + TYPE. | ||
484 | + | ||
485 | + Otherwise, if the type of the result (TYPE) is at least 4 times bigger than | ||
486 | + HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than | ||
487 | + TYPE), we can perform widen-mult from the intermediate type to TYPE and | ||
488 | + replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */ | ||
489 | + | ||
490 | +static bool | ||
491 | +vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd, | ||
492 | + VEC (gimple, heap) **stmts, tree type, | ||
493 | + tree *half_type, gimple def_stmt) | ||
494 | +{ | ||
495 | + tree new_type, new_oprnd, tmp; | ||
496 | + gimple new_stmt; | ||
497 | + | ||
498 | + if (int_fits_type_p (const_oprnd, *half_type)) | ||
499 | + { | ||
500 | + /* CONST_OPRND is a constant of HALF_TYPE. */ | ||
501 | + *oprnd = gimple_assign_rhs1 (def_stmt); | ||
502 | + return true; | ||
503 | + } | ||
504 | + | ||
505 | + if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) | ||
506 | + || !vinfo_for_stmt (def_stmt)) | ||
507 | + return false; | ||
508 | + | ||
509 | + /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for | ||
510 | + a type 2 times bigger than HALF_TYPE. */ | ||
511 | + new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, | ||
512 | + TYPE_UNSIGNED (type)); | ||
513 | + if (!int_fits_type_p (const_oprnd, new_type)) | ||
514 | + return false; | ||
515 | + | ||
516 | + /* Use NEW_TYPE for widen_mult. */ | ||
517 | + if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) | ||
518 | + { | ||
519 | + new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); | ||
520 | + /* Check if the already created pattern stmt is what we need. */ | ||
521 | + if (!is_gimple_assign (new_stmt) | ||
522 | + || gimple_assign_rhs_code (new_stmt) != NOP_EXPR | ||
523 | + || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type) | ||
524 | + return false; | ||
525 | + | ||
526 | + *oprnd = gimple_assign_lhs (new_stmt); | ||
527 | + } | ||
528 | + else | ||
529 | + { | ||
530 | + /* Create a_T = (NEW_TYPE) a_t; */ | ||
531 | + *oprnd = gimple_assign_rhs1 (def_stmt); | ||
532 | + tmp = create_tmp_var (new_type, NULL); | ||
533 | + add_referenced_var (tmp); | ||
534 | + new_oprnd = make_ssa_name (tmp, NULL); | ||
535 | + new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd, | ||
536 | + NULL_TREE); | ||
537 | + SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; | ||
538 | + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; | ||
539 | + VEC_safe_push (gimple, heap, *stmts, def_stmt); | ||
540 | + *oprnd = new_oprnd; | ||
541 | + } | ||
542 | + | ||
543 | + *half_type = new_type; | ||
544 | + return true; | ||
545 | +} | ||
546 | + | ||
547 | + | ||
548 | /* Function vect_recog_widen_mult_pattern | ||
549 | |||
550 | Try to find the following pattern: | ||
551 | @@ -361,28 +433,47 @@ | ||
552 | S3 a_T = (TYPE) a_t; | ||
553 | S5 prod_T = a_T * CONST; | ||
554 | |||
555 | - Input: | ||
556 | + A special case of multiplication by constants is when 'TYPE' is 4 times | ||
557 | + bigger than 'type', but CONST fits an intermediate type 2 times smaller | ||
558 | + than 'TYPE'. In that case we create an additional pattern stmt for S3 | ||
559 | + to create a variable of the intermediate type, and perform widen-mult | ||
560 | + on the intermediate type as well: | ||
561 | + | ||
562 | + type a_t; | ||
563 | + interm_type a_it; | ||
564 | + TYPE a_T, prod_T, prod_T'; | ||
565 | + | ||
566 | + S1 a_t = ; | ||
567 | + S3 a_T = (TYPE) a_t; | ||
568 | + '--> a_it = (interm_type) a_t; | ||
569 | + S5 prod_T = a_T * CONST; | ||
570 | + '--> prod_T' = a_it w* CONST; | ||
571 | + | ||
572 | + Input/Output: | ||
573 | |||
574 | - * LAST_STMT: A stmt from which the pattern search begins. In the example, | ||
575 | - when this function is called with S5, the pattern {S3,S4,S5,(S6)} is | ||
576 | - detected. | ||
577 | + * STMTS: Contains a stmt from which the pattern search begins. In the | ||
578 | + example, when this function is called with S5, the pattern {S3,S4,S5,(S6)} | ||
579 | + is detected. In case of unsigned widen-mult, the original stmt (S5) is | ||
580 | + replaced with S6 in STMTS. In case of multiplication by a constant | ||
581 | + of an intermediate type (the last case above), STMTS also contains S3 | ||
582 | + (inserted before S5). | ||
583 | |||
584 | - Output: | ||
585 | + Output: | ||
586 | |||
587 | - * TYPE_IN: The type of the input arguments to the pattern. | ||
588 | + * TYPE_IN: The type of the input arguments to the pattern. | ||
589 | |||
590 | - * TYPE_OUT: The type of the output of this pattern. | ||
591 | + * TYPE_OUT: The type of the output of this pattern. | ||
592 | |||
593 | - * Return value: A new stmt that will be used to replace the sequence of | ||
594 | - stmts that constitute the pattern. In this case it will be: | ||
595 | - WIDEN_MULT <a_t, b_t> | ||
596 | - */ | ||
597 | + * Return value: A new stmt that will be used to replace the sequence of | ||
598 | + stmts that constitute the pattern. In this case it will be: | ||
599 | + WIDEN_MULT <a_t, b_t> | ||
600 | +*/ | ||
601 | |||
602 | static gimple | ||
603 | -vect_recog_widen_mult_pattern (gimple *last_stmt, | ||
604 | - tree *type_in, | ||
605 | - tree *type_out) | ||
606 | +vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts, | ||
607 | + tree *type_in, tree *type_out) | ||
608 | { | ||
609 | + gimple last_stmt = VEC_pop (gimple, *stmts); | ||
610 | gimple def_stmt0, def_stmt1; | ||
611 | tree oprnd0, oprnd1; | ||
612 | tree type, half_type0, half_type1; | ||
613 | @@ -395,27 +486,27 @@ | ||
614 | VEC (tree, heap) *dummy_vec; | ||
615 | bool op0_ok, op1_ok; | ||
616 | |||
617 | - if (!is_gimple_assign (*last_stmt)) | ||
618 | + if (!is_gimple_assign (last_stmt)) | ||
619 | return NULL; | ||
620 | |||
621 | - type = gimple_expr_type (*last_stmt); | ||
622 | + type = gimple_expr_type (last_stmt); | ||
623 | |||
624 | /* Starting from LAST_STMT, follow the defs of its uses in search | ||
625 | of the above pattern. */ | ||
626 | |||
627 | - if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR) | ||
628 | + if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) | ||
629 | return NULL; | ||
630 | |||
631 | - oprnd0 = gimple_assign_rhs1 (*last_stmt); | ||
632 | - oprnd1 = gimple_assign_rhs2 (*last_stmt); | ||
633 | + oprnd0 = gimple_assign_rhs1 (last_stmt); | ||
634 | + oprnd1 = gimple_assign_rhs2 (last_stmt); | ||
635 | if (!types_compatible_p (TREE_TYPE (oprnd0), type) | ||
636 | || !types_compatible_p (TREE_TYPE (oprnd1), type)) | ||
637 | return NULL; | ||
638 | |||
639 | /* Check argument 0. */ | ||
640 | - op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false); | ||
641 | + op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false); | ||
642 | /* Check argument 1. */ | ||
643 | - op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false); | ||
644 | + op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false); | ||
645 | |||
646 | /* In case of multiplication by a constant one of the operands may not match | ||
647 | the pattern, but not both. */ | ||
648 | @@ -429,29 +520,21 @@ | ||
649 | } | ||
650 | else if (!op0_ok) | ||
651 | { | ||
652 | - if (CONSTANT_CLASS_P (oprnd0) | ||
653 | - && TREE_CODE (half_type1) == INTEGER_TYPE | ||
654 | - && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1)) | ||
655 | - && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0)) | ||
656 | - { | ||
657 | - /* OPRND0 is a constant of HALF_TYPE1. */ | ||
658 | - half_type0 = half_type1; | ||
659 | - oprnd1 = gimple_assign_rhs1 (def_stmt1); | ||
660 | - } | ||
661 | + if (TREE_CODE (oprnd0) == INTEGER_CST | ||
662 | + && TREE_CODE (half_type1) == INTEGER_TYPE | ||
663 | + && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type, | ||
664 | + &half_type1, def_stmt1)) | ||
665 | + half_type0 = half_type1; | ||
666 | else | ||
667 | return NULL; | ||
668 | } | ||
669 | else if (!op1_ok) | ||
670 | { | ||
671 | - if (CONSTANT_CLASS_P (oprnd1) | ||
672 | + if (TREE_CODE (oprnd1) == INTEGER_CST | ||
673 | && TREE_CODE (half_type0) == INTEGER_TYPE | ||
674 | - && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0)) | ||
675 | - && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1)) | ||
676 | - { | ||
677 | - /* OPRND1 is a constant of HALF_TYPE0. */ | ||
678 | - half_type1 = half_type0; | ||
679 | - oprnd0 = gimple_assign_rhs1 (def_stmt0); | ||
680 | - } | ||
681 | + && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type, | ||
682 | + &half_type0, def_stmt0)) | ||
683 | + half_type1 = half_type0; | ||
684 | else | ||
685 | return NULL; | ||
686 | } | ||
687 | @@ -461,7 +544,7 @@ | ||
688 | Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ | ||
689 | if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) | ||
690 | { | ||
691 | - tree lhs = gimple_assign_lhs (*last_stmt), use_lhs; | ||
692 | + tree lhs = gimple_assign_lhs (last_stmt), use_lhs; | ||
693 | imm_use_iterator imm_iter; | ||
694 | use_operand_p use_p; | ||
695 | int nuses = 0; | ||
696 | @@ -491,7 +574,7 @@ | ||
697 | return NULL; | ||
698 | |||
699 | type = use_type; | ||
700 | - *last_stmt = use_stmt; | ||
701 | + last_stmt = use_stmt; | ||
702 | } | ||
703 | |||
704 | if (!types_compatible_p (half_type0, half_type1)) | ||
705 | @@ -506,7 +589,7 @@ | ||
706 | vectype_out = get_vectype_for_scalar_type (type); | ||
707 | if (!vectype | ||
708 | || !vectype_out | ||
709 | - || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt, | ||
710 | + || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, | ||
711 | vectype_out, vectype, | ||
712 | &dummy, &dummy, &dummy_code, | ||
713 | &dummy_code, &dummy_int, &dummy_vec)) | ||
714 | @@ -524,6 +607,7 @@ | ||
715 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
716 | print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); | ||
717 | |||
718 | + VEC_safe_push (gimple, heap, *stmts, last_stmt); | ||
719 | return pattern_stmt; | ||
720 | } | ||
721 | |||
722 | @@ -555,16 +639,17 @@ | ||
723 | */ | ||
724 | |||
725 | static gimple | ||
726 | -vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out) | ||
727 | +vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out) | ||
728 | { | ||
729 | + gimple last_stmt = VEC_index (gimple, *stmts, 0); | ||
730 | tree fn, base, exp = NULL; | ||
731 | gimple stmt; | ||
732 | tree var; | ||
733 | |||
734 | - if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL) | ||
735 | + if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) | ||
736 | return NULL; | ||
737 | |||
738 | - fn = gimple_call_fndecl (*last_stmt); | ||
739 | + fn = gimple_call_fndecl (last_stmt); | ||
740 | if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) | ||
741 | return NULL; | ||
742 | |||
743 | @@ -574,8 +659,8 @@ | ||
744 | case BUILT_IN_POWI: | ||
745 | case BUILT_IN_POWF: | ||
746 | case BUILT_IN_POW: | ||
747 | - base = gimple_call_arg (*last_stmt, 0); | ||
748 | - exp = gimple_call_arg (*last_stmt, 1); | ||
749 | + base = gimple_call_arg (last_stmt, 0); | ||
750 | + exp = gimple_call_arg (last_stmt, 1); | ||
751 | if (TREE_CODE (exp) != REAL_CST | ||
752 | && TREE_CODE (exp) != INTEGER_CST) | ||
753 | return NULL; | ||
754 | @@ -667,21 +752,23 @@ | ||
755 | inner-loop nested in an outer-loop that us being vectorized). */ | ||
756 | |||
757 | static gimple | ||
758 | -vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out) | ||
759 | +vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in, | ||
760 | + tree *type_out) | ||
761 | { | ||
762 | + gimple last_stmt = VEC_index (gimple, *stmts, 0); | ||
763 | gimple stmt; | ||
764 | tree oprnd0, oprnd1; | ||
765 | - stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); | ||
766 | + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); | ||
767 | tree type, half_type; | ||
768 | gimple pattern_stmt; | ||
769 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
770 | struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
771 | tree var; | ||
772 | |||
773 | - if (!is_gimple_assign (*last_stmt)) | ||
774 | + if (!is_gimple_assign (last_stmt)) | ||
775 | return NULL; | ||
776 | |||
777 | - type = gimple_expr_type (*last_stmt); | ||
778 | + type = gimple_expr_type (last_stmt); | ||
779 | |||
780 | /* Look for the following pattern | ||
781 | DX = (TYPE) X; | ||
782 | @@ -693,25 +780,25 @@ | ||
783 | /* Starting from LAST_STMT, follow the defs of its uses in search | ||
784 | of the above pattern. */ | ||
785 | |||
786 | - if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) | ||
787 | + if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) | ||
788 | return NULL; | ||
789 | |||
790 | if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) | ||
791 | return NULL; | ||
792 | |||
793 | - oprnd0 = gimple_assign_rhs1 (*last_stmt); | ||
794 | - oprnd1 = gimple_assign_rhs2 (*last_stmt); | ||
795 | + oprnd0 = gimple_assign_rhs1 (last_stmt); | ||
796 | + oprnd1 = gimple_assign_rhs2 (last_stmt); | ||
797 | if (!types_compatible_p (TREE_TYPE (oprnd0), type) | ||
798 | || !types_compatible_p (TREE_TYPE (oprnd1), type)) | ||
799 | return NULL; | ||
800 | |||
801 | - /* So far so good. Since *last_stmt was detected as a (summation) reduction, | ||
802 | + /* So far so good. Since last_stmt was detected as a (summation) reduction, | ||
803 | we know that oprnd1 is the reduction variable (defined by a loop-header | ||
804 | phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. | ||
805 | Left to check that oprnd0 is defined by a cast from type 'type' to type | ||
806 | 'TYPE'. */ | ||
807 | |||
808 | - if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true)) | ||
809 | + if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true)) | ||
810 | return NULL; | ||
811 | |||
812 | oprnd0 = gimple_assign_rhs1 (stmt); | ||
813 | @@ -732,8 +819,9 @@ | ||
814 | |||
815 | /* We don't allow changing the order of the computation in the inner-loop | ||
816 | when doing outer-loop vectorization. */ | ||
817 | - gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); | ||
818 | + gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); | ||
819 | |||
820 | + VEC_safe_push (gimple, heap, *stmts, last_stmt); | ||
821 | return pattern_stmt; | ||
822 | } | ||
823 | |||
824 | @@ -762,7 +850,7 @@ | ||
825 | |||
826 | static void | ||
827 | vect_pattern_recog_1 ( | ||
828 | - gimple (* vect_recog_func) (gimple *, tree *, tree *), | ||
829 | + gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *), | ||
830 | gimple_stmt_iterator si) | ||
831 | { | ||
832 | gimple stmt = gsi_stmt (si), pattern_stmt; | ||
833 | @@ -774,12 +862,14 @@ | ||
834 | enum tree_code code; | ||
835 | int i; | ||
836 | gimple next; | ||
837 | + VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); | ||
838 | |||
839 | - pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out); | ||
840 | + VEC_quick_push (gimple, stmts_to_replace, stmt); | ||
841 | + pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out); | ||
842 | if (!pattern_stmt) | ||
843 | return; | ||
844 | |||
845 | - si = gsi_for_stmt (stmt); | ||
846 | + stmt = VEC_last (gimple, stmts_to_replace); | ||
847 | stmt_info = vinfo_for_stmt (stmt); | ||
848 | loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | ||
849 | |||
850 | @@ -849,6 +939,35 @@ | ||
851 | FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) | ||
852 | if (next == stmt) | ||
853 | VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); | ||
854 | + | ||
855 | + /* In case of widen-mult by a constant, it is possible that an additional | ||
856 | + pattern stmt is created and inserted in STMTS_TO_REPLACE. We create a | ||
857 | + stmt_info for it, and mark the relevant statements. */ | ||
858 | + for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt) | ||
859 | + && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1); | ||
860 | + i++) | ||
861 | + { | ||
862 | + stmt_info = vinfo_for_stmt (stmt); | ||
863 | + pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
864 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
865 | + { | ||
866 | + fprintf (vect_dump, "additional pattern stmt: "); | ||
867 | + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); | ||
868 | + } | ||
869 | + | ||
870 | + set_vinfo_for_stmt (pattern_stmt, | ||
871 | + new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); | ||
872 | + gimple_set_bb (pattern_stmt, gimple_bb (stmt)); | ||
873 | + pattern_stmt_info = vinfo_for_stmt (pattern_stmt); | ||
874 | + | ||
875 | + STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; | ||
876 | + STMT_VINFO_DEF_TYPE (pattern_stmt_info) | ||
877 | + = STMT_VINFO_DEF_TYPE (stmt_info); | ||
878 | + STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info); | ||
879 | + STMT_VINFO_IN_PATTERN_P (stmt_info) = true; | ||
880 | + } | ||
881 | + | ||
882 | + VEC_free (gimple, heap, stmts_to_replace); | ||
883 | } | ||
884 | |||
885 | |||
886 | @@ -896,10 +1015,8 @@ | ||
887 | |||
888 | If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} | ||
889 | (because they are marked as irrelevant). It will vectorize S6, and record | ||
890 | - a pointer to the new vector stmt VS6 both from S6 (as usual), and also | ||
891 | - from S4. We do that so that when we get to vectorizing stmts that use the | ||
892 | - def of S4 (like S5 that uses a_0), we'll know where to take the relevant | ||
893 | - vector-def from. S4 will be skipped, and S5 will be vectorized as usual: | ||
894 | + a pointer to the new vector stmt VS6 from S6 (as usual). | ||
895 | + S4 will be skipped, and S5 will be vectorized as usual: | ||
896 | |||
897 | in_pattern_p related_stmt vec_stmt | ||
898 | S1: a_i = .... - - - | ||
899 | @@ -915,7 +1032,21 @@ | ||
900 | elsewhere), and we'll end up with: | ||
901 | |||
902 | VS6: va_new = .... | ||
903 | - VS5: ... = ..vuse(va_new).. */ | ||
904 | + VS5: ... = ..vuse(va_new).. | ||
905 | + | ||
906 | + In case of more than one pattern statements, e.g., widen-mult with | ||
907 | + intermediate type: | ||
908 | + | ||
909 | + S1 a_t = ; | ||
910 | + S2 a_T = (TYPE) a_t; | ||
911 | + '--> S3: a_it = (interm_type) a_t; | ||
912 | + S4 prod_T = a_T * CONST; | ||
913 | + '--> S5: prod_T' = a_it w* CONST; | ||
914 | + | ||
915 | + there may be other users of a_T outside the pattern. In that case S2 will | ||
916 | + be marked as relevant (as well as S3), and both S2 and S3 will be analyzed | ||
917 | + and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will | ||
918 | + be recorded in S3. */ | ||
919 | |||
920 | void | ||
921 | vect_pattern_recog (loop_vec_info loop_vinfo) | ||
922 | @@ -925,7 +1056,7 @@ | ||
923 | unsigned int nbbs = loop->num_nodes; | ||
924 | gimple_stmt_iterator si; | ||
925 | unsigned int i, j; | ||
926 | - gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); | ||
927 | + gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); | ||
928 | |||
929 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
930 | fprintf (vect_dump, "=== vect_pattern_recog ==="); | ||
931 | Index: gcc-4_6-branch/gcc/tree-vect-slp.c | ||
932 | =================================================================== | ||
933 | --- gcc-4_6-branch.orig/gcc/tree-vect-slp.c 2012-01-09 15:02:15.000000000 -0800 | ||
934 | +++ gcc-4_6-branch/gcc/tree-vect-slp.c 2012-01-09 15:03:29.160918806 -0800 | ||
935 | @@ -152,7 +152,9 @@ | ||
936 | if (loop && def_stmt && gimple_bb (def_stmt) | ||
937 | && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | ||
938 | && vinfo_for_stmt (def_stmt) | ||
939 | - && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))) | ||
940 | + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) | ||
941 | + && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) | ||
942 | + && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) | ||
943 | { | ||
944 | if (!*first_stmt_dt0) | ||
945 | *pattern0 = true; | ||
946 | Index: gcc-4_6-branch/gcc/tree-vect-stmts.c | ||
947 | =================================================================== | ||
948 | --- gcc-4_6-branch.orig/gcc/tree-vect-stmts.c 2012-01-09 15:02:15.000000000 -0800 | ||
949 | +++ gcc-4_6-branch/gcc/tree-vect-stmts.c 2012-01-09 15:06:23.636927250 -0800 | ||
950 | @@ -126,33 +126,72 @@ | ||
951 | |||
952 | static void | ||
953 | vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt, | ||
954 | - enum vect_relevant relevant, bool live_p) | ||
955 | + enum vect_relevant relevant, bool live_p, | ||
956 | + bool used_in_pattern) | ||
957 | { | ||
958 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | ||
959 | enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); | ||
960 | bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); | ||
961 | + gimple pattern_stmt; | ||
962 | |||
963 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
964 | fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p); | ||
965 | |||
966 | + /* If this stmt is an original stmt in a pattern, we might need to mark its | ||
967 | + related pattern stmt instead of the original stmt. However, such stmts | ||
968 | + may have their own uses that are not in any pattern, in such cases the | ||
969 | + stmt itself should be marked. */ | ||
970 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)) | ||
971 | { | ||
972 | - gimple pattern_stmt; | ||
973 | + bool found = false; | ||
974 | + if (!used_in_pattern) | ||
975 | + { | ||
976 | + imm_use_iterator imm_iter; | ||
977 | + use_operand_p use_p; | ||
978 | + gimple use_stmt; | ||
979 | + tree lhs; | ||
980 | + | ||
981 | + if (is_gimple_assign (stmt)) | ||
982 | + lhs = gimple_assign_lhs (stmt); | ||
983 | + else | ||
984 | + lhs = gimple_call_lhs (stmt); | ||
985 | + | ||
986 | + /* This use is out of pattern use, if LHS has other uses that are | ||
987 | + pattern uses, we should mark the stmt itself, and not the pattern | ||
988 | + stmt. */ | ||
989 | + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) | ||
990 | + { | ||
991 | + if (is_gimple_debug (USE_STMT (use_p))) | ||
992 | + continue; | ||
993 | + use_stmt = USE_STMT (use_p); | ||
994 | + | ||
995 | + if (vinfo_for_stmt (use_stmt) | ||
996 | + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) | ||
997 | + { | ||
998 | + found = true; | ||
999 | + break; | ||
1000 | + } | ||
1001 | + } | ||
1002 | + } | ||
1003 | + | ||
1004 | + if (!found) | ||
1005 | + { | ||
1006 | + /* This is the last stmt in a sequence that was detected as a | ||
1007 | + pattern that can potentially be vectorized. Don't mark the stmt | ||
1008 | + as relevant/live because it's not going to be vectorized. | ||
1009 | + Instead mark the pattern-stmt that replaces it. */ | ||
1010 | + | ||
1011 | + pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
1012 | |||
1013 | - /* This is the last stmt in a sequence that was detected as a | ||
1014 | - pattern that can potentially be vectorized. Don't mark the stmt | ||
1015 | - as relevant/live because it's not going to be vectorized. | ||
1016 | - Instead mark the pattern-stmt that replaces it. */ | ||
1017 | - | ||
1018 | - pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
1019 | - | ||
1020 | - if (vect_print_dump_info (REPORT_DETAILS)) | ||
1021 | - fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); | ||
1022 | - stmt_info = vinfo_for_stmt (pattern_stmt); | ||
1023 | - gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); | ||
1024 | - save_relevant = STMT_VINFO_RELEVANT (stmt_info); | ||
1025 | - save_live_p = STMT_VINFO_LIVE_P (stmt_info); | ||
1026 | - stmt = pattern_stmt; | ||
1027 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
1028 | + fprintf (vect_dump, "last stmt in pattern. don't mark" | ||
1029 | + " relevant/live."); | ||
1030 | + stmt_info = vinfo_for_stmt (pattern_stmt); | ||
1031 | + gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); | ||
1032 | + save_relevant = STMT_VINFO_RELEVANT (stmt_info); | ||
1033 | + save_live_p = STMT_VINFO_LIVE_P (stmt_info); | ||
1034 | + stmt = pattern_stmt; | ||
1035 | + } | ||
1036 | } | ||
1037 | |||
1038 | STMT_VINFO_LIVE_P (stmt_info) |= live_p; | ||
1039 | @@ -437,7 +476,8 @@ | ||
1040 | } | ||
1041 | } | ||
1042 | |||
1043 | - vect_mark_relevant (worklist, def_stmt, relevant, live_p); | ||
1044 | + vect_mark_relevant (worklist, def_stmt, relevant, live_p, | ||
1045 | + is_pattern_stmt_p (stmt_vinfo)); | ||
1046 | return true; | ||
1047 | } | ||
1048 | |||
1049 | @@ -494,7 +534,7 @@ | ||
1050 | } | ||
1051 | |||
1052 | if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p)) | ||
1053 | - vect_mark_relevant (&worklist, phi, relevant, live_p); | ||
1054 | + vect_mark_relevant (&worklist, phi, relevant, live_p, false); | ||
1055 | } | ||
1056 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | ||
1057 | { | ||
1058 | @@ -506,7 +546,7 @@ | ||
1059 | } | ||
1060 | |||
1061 | if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p)) | ||
1062 | - vect_mark_relevant (&worklist, stmt, relevant, live_p); | ||
1063 | + vect_mark_relevant (&worklist, stmt, relevant, live_p, false); | ||
1064 | } | ||
1065 | } | ||
1066 | |||
1067 | @@ -613,42 +653,55 @@ | ||
1068 | if (is_gimple_assign (stmt)) | ||
1069 | { | ||
1070 | tree rhs = gimple_assign_rhs1 (stmt); | ||
1071 | - if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) | ||
1072 | - == GIMPLE_SINGLE_RHS) | ||
1073 | + unsigned int op_num; | ||
1074 | + tree op; | ||
1075 | + switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) | ||
1076 | { | ||
1077 | - unsigned int op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 | ||
1078 | - (stmt)); | ||
1079 | - for (i = 0; i < op_num; i++) | ||
1080 | - { | ||
1081 | - tree op = TREE_OPERAND (rhs, i); | ||
1082 | - if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1083 | - &worklist)) | ||
1084 | + case GIMPLE_SINGLE_RHS: | ||
1085 | + op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)); | ||
1086 | + for (i = 0; i < op_num; i++) | ||
1087 | { | ||
1088 | - VEC_free (gimple, heap, worklist); | ||
1089 | - return false; | ||
1090 | + op = TREE_OPERAND (rhs, i); | ||
1091 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1092 | + &worklist)) | ||
1093 | + { | ||
1094 | + VEC_free (gimple, heap, worklist); | ||
1095 | + return false; | ||
1096 | + } | ||
1097 | } | ||
1098 | - } | ||
1099 | - } | ||
1100 | - else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) | ||
1101 | - == GIMPLE_BINARY_RHS) | ||
1102 | - { | ||
1103 | - tree op = gimple_assign_rhs1 (stmt); | ||
1104 | - if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1105 | - &worklist)) | ||
1106 | - { | ||
1107 | - VEC_free (gimple, heap, worklist); | ||
1108 | - return false; | ||
1109 | - } | ||
1110 | - op = gimple_assign_rhs2 (stmt); | ||
1111 | - if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1112 | - &worklist)) | ||
1113 | - { | ||
1114 | - VEC_free (gimple, heap, worklist); | ||
1115 | - return false; | ||
1116 | - } | ||
1117 | + break; | ||
1118 | + | ||
1119 | + case GIMPLE_BINARY_RHS: | ||
1120 | + op = gimple_assign_rhs1 (stmt); | ||
1121 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1122 | + &worklist)) | ||
1123 | + { | ||
1124 | + VEC_free (gimple, heap, worklist); | ||
1125 | + return false; | ||
1126 | + } | ||
1127 | + op = gimple_assign_rhs2 (stmt); | ||
1128 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1129 | + &worklist)) | ||
1130 | + { | ||
1131 | + VEC_free (gimple, heap, worklist); | ||
1132 | + return false; | ||
1133 | + } | ||
1134 | + break; | ||
1135 | + | ||
1136 | + case GIMPLE_UNARY_RHS: | ||
1137 | + op = gimple_assign_rhs1 (stmt); | ||
1138 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
1139 | + &worklist)) | ||
1140 | + { | ||
1141 | + VEC_free (gimple, heap, worklist); | ||
1142 | + return false; | ||
1143 | + } | ||
1144 | + | ||
1145 | + break; | ||
1146 | + | ||
1147 | + default: | ||
1148 | + return false; | ||
1149 | } | ||
1150 | - else | ||
1151 | - return false; | ||
1152 | } | ||
1153 | else if (is_gimple_call (stmt)) | ||
1154 | { | ||
1155 | @@ -1210,7 +1263,14 @@ | ||
1156 | |||
1157 | /* Get the def from the vectorized stmt. */ | ||
1158 | def_stmt_info = vinfo_for_stmt (def_stmt); | ||
1159 | + | ||
1160 | vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); | ||
1161 | + /* Get vectorized pattern statement. */ | ||
1162 | + if (!vec_stmt | ||
1163 | + && STMT_VINFO_IN_PATTERN_P (def_stmt_info) | ||
1164 | + && !STMT_VINFO_RELEVANT (def_stmt_info)) | ||
1165 | + vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt ( | ||
1166 | + STMT_VINFO_RELATED_STMT (def_stmt_info))); | ||
1167 | gcc_assert (vec_stmt); | ||
1168 | if (gimple_code (vec_stmt) == GIMPLE_PHI) | ||
1169 | vec_oprnd = PHI_RESULT (vec_stmt); | ||
1170 | @@ -4894,6 +4954,7 @@ | ||
1171 | enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); | ||
1172 | bool ok; | ||
1173 | tree scalar_type, vectype; | ||
1174 | + gimple pattern_stmt; | ||
1175 | |||
1176 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
1177 | { | ||
1178 | @@ -4915,16 +4976,22 @@ | ||
1179 | - any LABEL_EXPRs in the loop | ||
1180 | - computations that are used only for array indexing or loop control. | ||
1181 | In basic blocks we only analyze statements that are a part of some SLP | ||
1182 | - instance, therefore, all the statements are relevant. */ | ||
1183 | + instance, therefore, all the statements are relevant. | ||
1184 | + | ||
1185 | + Pattern statement need to be analyzed instead of the original statement | ||
1186 | + if the original statement is not relevant. Otherwise, we analyze both | ||
1187 | + statements. */ | ||
1188 | |||
1189 | + pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
1190 | if (!STMT_VINFO_RELEVANT_P (stmt_info) | ||
1191 | && !STMT_VINFO_LIVE_P (stmt_info)) | ||
1192 | { | ||
1193 | - gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | ||
1194 | if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
1195 | + && pattern_stmt | ||
1196 | && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | ||
1197 | || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
1198 | { | ||
1199 | + /* Analyze PATTERN_STMT instead of the original stmt. */ | ||
1200 | stmt = pattern_stmt; | ||
1201 | stmt_info = vinfo_for_stmt (pattern_stmt); | ||
1202 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
1203 | @@ -4941,6 +5008,21 @@ | ||
1204 | return true; | ||
1205 | } | ||
1206 | } | ||
1207 | + else if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
1208 | + && pattern_stmt | ||
1209 | + && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | ||
1210 | + || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | ||
1211 | + { | ||
1212 | + /* Analyze PATTERN_STMT too. */ | ||
1213 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
1214 | + { | ||
1215 | + fprintf (vect_dump, "==> examining pattern statement: "); | ||
1216 | + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
1217 | + } | ||
1218 | + | ||
1219 | + if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node)) | ||
1220 | + return false; | ||
1221 | + } | ||
1222 | |||
1223 | switch (STMT_VINFO_DEF_TYPE (stmt_info)) | ||
1224 | { | ||
1225 | @@ -5074,7 +5156,6 @@ | ||
1226 | bool is_store = false; | ||
1227 | gimple vec_stmt = NULL; | ||
1228 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | ||
1229 | - gimple orig_stmt_in_pattern; | ||
1230 | bool done; | ||
1231 | |||
1232 | switch (STMT_VINFO_TYPE (stmt_info)) | ||
1233 | @@ -5213,21 +5294,7 @@ | ||
1234 | } | ||
1235 | |||
1236 | if (vec_stmt) | ||
1237 | - { | ||
1238 | STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; | ||
1239 | - orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); | ||
1240 | - if (orig_stmt_in_pattern) | ||
1241 | - { | ||
1242 | - stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern); | ||
1243 | - /* STMT was inserted by the vectorizer to replace a computation idiom. | ||
1244 | - ORIG_STMT_IN_PATTERN is a stmt in the original sequence that | ||
1245 | - computed this idiom. We need to record a pointer to VEC_STMT in | ||
1246 | - the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the | ||
1247 | - documentation of vect_pattern_recog. */ | ||
1248 | - if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) | ||
1249 | - STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt; | ||
1250 | - } | ||
1251 | - } | ||
1252 | |||
1253 | return is_store; | ||
1254 | } | ||
1255 | @@ -5605,8 +5672,12 @@ | ||
1256 | || *dt == vect_nested_cycle) | ||
1257 | { | ||
1258 | stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt); | ||
1259 | - if (STMT_VINFO_IN_PATTERN_P (stmt_info)) | ||
1260 | + | ||
1261 | + if (STMT_VINFO_IN_PATTERN_P (stmt_info) | ||
1262 | + && !STMT_VINFO_RELEVANT (stmt_info) | ||
1263 | + && !STMT_VINFO_LIVE_P (stmt_info)) | ||
1264 | stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); | ||
1265 | + | ||
1266 | *vectype = STMT_VINFO_VECTYPE (stmt_info); | ||
1267 | gcc_assert (*vectype != NULL_TREE); | ||
1268 | } | ||
1269 | Index: gcc-4_6-branch/gcc/tree-vectorizer.h | ||
1270 | =================================================================== | ||
1271 | --- gcc-4_6-branch.orig/gcc/tree-vectorizer.h 2012-01-09 15:02:15.000000000 -0800 | ||
1272 | +++ gcc-4_6-branch/gcc/tree-vectorizer.h 2012-01-09 15:03:29.164918806 -0800 | ||
1273 | @@ -890,7 +890,7 @@ | ||
1274 | /* Pattern recognition functions. | ||
1275 | Additional pattern recognition functions can (and will) be added | ||
1276 | in the future. */ | ||
1277 | -typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); | ||
1278 | +typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); | ||
1279 | #define NUM_PATTERNS 4 | ||
1280 | void vect_pattern_recog (loop_vec_info); | ||
1281 | |||