diff options
| author | Khem Raj <raj.khem@gmail.com> | 2012-01-09 23:44:16 +0000 |
|---|---|---|
| committer | Koen Kooi <koen@dominion.thruhere.net> | 2012-01-10 21:43:59 +0100 |
| commit | 8413bf3c5de39d830969ad181b4dd4e136d91482 (patch) | |
| tree | 2328801dc6da7f78f77a211925ab9b5ba913a2ec /meta-oe/recipes-devtools/gcc/gcc-4.6/linaro | |
| parent | f2179dabaaba99ef5b9fe48e38c73b2178d50390 (diff) | |
| download | meta-openembedded-8413bf3c5de39d830969ad181b4dd4e136d91482.tar.gz | |
gcc-4.6: Update linaro patches past 2012.12 release
Signed-off-by: Khem Raj <raj.khem@gmail.com>
Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
Diffstat (limited to 'meta-oe/recipes-devtools/gcc/gcc-4.6/linaro')
9 files changed, 4714 insertions, 0 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch new file mode 100644 index 0000000000..74f139d33a --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106842.patch | |||
| @@ -0,0 +1,388 @@ | |||
| 1 | 2011-11-22 Ira Rosen <ira.rosen@linaro.org> | ||
| 2 | |||
| 3 | Backport from mainline: | ||
| 4 | |||
| 5 | 2011-10-06 Jakub Jelinek <jakub@redhat.com> | ||
| 6 | |||
| 7 | gcc/ | ||
| 8 | PR tree-optimization/50596 | ||
| 9 | * tree-vectorizer.h (vect_is_simple_cond): New prototype. | ||
| 10 | (NUM_PATTERNS): Change to 6. | ||
| 11 | * tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): New | ||
| 12 | function. | ||
| 13 | (vect_vect_recog_func_ptrs): Add vect_recog_mixed_size_cond_pattern. | ||
| 14 | (vect_mark_pattern_stmts): Don't create stmt_vinfo for def_stmt | ||
| 15 | if it already has one, and don't set STMT_VINFO_VECTYPE in it | ||
| 16 | if it is already set. | ||
| 17 | * tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Handle | ||
| 18 | COND_EXPR in pattern stmts. | ||
| 19 | (vect_is_simple_cond): No longer static. | ||
| 20 | |||
| 21 | gcc/testsuite: | ||
| 22 | PR tree-optimization/50596 | ||
| 23 | * gcc.dg/vect/vect-cond-8.c: New test. | ||
| 24 | |||
| 25 | 2011-10-07 Jakub Jelinek <jakub@redhat.com> | ||
| 26 | |||
| 27 | gcc/ | ||
| 28 | PR tree-optimization/50650 | ||
| 29 | * tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Don't | ||
| 30 | call vect_is_simple_cond here, instead fail if cond_expr isn't | ||
| 31 | COMPARISON_CLASS_P or if get_vectype_for_scalar_type returns NULL | ||
| 32 | for cond_expr's first operand. | ||
| 33 | * tree-vect-stmts.c (vect_is_simple_cond): Static again. | ||
| 34 | * tree-vectorizer.h (vect_is_simple_cond): Remove prototype. | ||
| 35 | |||
| 36 | |||
| 37 | gcc/ | ||
| 38 | * tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Reduce | ||
| 39 | it to integral types only. | ||
| 40 | |||
| 41 | gcc/testsuite/ | ||
| 42 | * gcc.dg/vect/pr30858.c: Expect the error message twice for targets | ||
| 43 | with multiple vector sizes. | ||
| 44 | * gcc.dg/vect/vect-cond-8.c: Rename to... | ||
| 45 | * gcc.dg/vect/vect-cond-8a.c: ... this and change the type from float | ||
| 46 | to int. | ||
| 47 | * lib/target-supports.exp (check_effective_target_vect_condition): | ||
| 48 | Return true for NEON. | ||
| 49 | |||
| 50 | === modified file 'gcc/testsuite/gcc.dg/vect/pr30858.c' | ||
| 51 | --- old/gcc/testsuite/gcc.dg/vect/pr30858.c 2007-02-22 08:16:18 +0000 | ||
| 52 | +++ new/gcc/testsuite/gcc.dg/vect/pr30858.c 2011-11-20 09:11:09 +0000 | ||
| 53 | @@ -11,5 +11,6 @@ | ||
| 54 | } | ||
| 55 | |||
| 56 | /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ | ||
| 57 | -/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" } } */ | ||
| 58 | +/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" { xfail vect_multiple_sizes } } } */ | ||
| 59 | +/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 2 "vect" { target vect_multiple_sizes } } } */ | ||
| 60 | /* { dg-final { cleanup-tree-dump "vect" } } */ | ||
| 61 | |||
| 62 | === added file 'gcc/testsuite/gcc.dg/vect/vect-cond-8a.c' | ||
| 63 | --- old/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c 1970-01-01 00:00:00 +0000 | ||
| 64 | +++ new/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c 2011-11-20 09:11:09 +0000 | ||
| 65 | @@ -0,0 +1,75 @@ | ||
| 66 | +/* { dg-require-effective-target vect_condition } */ | ||
| 67 | + | ||
| 68 | +#include "tree-vect.h" | ||
| 69 | + | ||
| 70 | +#define N 1024 | ||
| 71 | +int a[N], b[N], c[N]; | ||
| 72 | +char d[N], e[N], f[N]; | ||
| 73 | +unsigned char k[N]; | ||
| 74 | + | ||
| 75 | +__attribute__((noinline, noclone)) void | ||
| 76 | +f1 (void) | ||
| 77 | +{ | ||
| 78 | + int i; | ||
| 79 | + for (i = 0; i < N; ++i) | ||
| 80 | + k[i] = a[i] < b[i] ? 17 : 0; | ||
| 81 | +} | ||
| 82 | + | ||
| 83 | +__attribute__((noinline, noclone)) void | ||
| 84 | +f2 (void) | ||
| 85 | +{ | ||
| 86 | + int i; | ||
| 87 | + for (i = 0; i < N; ++i) | ||
| 88 | + k[i] = a[i] < b[i] ? 0 : 24; | ||
| 89 | +} | ||
| 90 | + | ||
| 91 | +__attribute__((noinline, noclone)) void | ||
| 92 | +f3 (void) | ||
| 93 | +{ | ||
| 94 | + int i; | ||
| 95 | + for (i = 0; i < N; ++i) | ||
| 96 | + k[i] = a[i] < b[i] ? 51 : 12; | ||
| 97 | +} | ||
| 98 | + | ||
| 99 | +int | ||
| 100 | +main () | ||
| 101 | +{ | ||
| 102 | + int i; | ||
| 103 | + | ||
| 104 | + check_vect (); | ||
| 105 | + | ||
| 106 | + for (i = 0; i < N; i++) | ||
| 107 | + { | ||
| 108 | + switch (i % 9) | ||
| 109 | + { | ||
| 110 | + case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break; | ||
| 111 | + case 1: a[i] = 0; b[i] = 0; break; | ||
| 112 | + case 2: a[i] = i + 1; b[i] = - i - 1; break; | ||
| 113 | + case 3: a[i] = i; b[i] = i + 7; break; | ||
| 114 | + case 4: a[i] = i; b[i] = i; break; | ||
| 115 | + case 5: a[i] = i + 16; b[i] = i + 3; break; | ||
| 116 | + case 6: a[i] = - i - 5; b[i] = - i; break; | ||
| 117 | + case 7: a[i] = - i; b[i] = - i; break; | ||
| 118 | + case 8: a[i] = - i; b[i] = - i - 7; break; | ||
| 119 | + } | ||
| 120 | + d[i] = i; | ||
| 121 | + e[i] = 2 * i; | ||
| 122 | + } | ||
| 123 | + f1 (); | ||
| 124 | + for (i = 0; i < N; i++) | ||
| 125 | + if (k[i] != ((i % 3) == 0 ? 17 : 0)) | ||
| 126 | + abort (); | ||
| 127 | + f2 (); | ||
| 128 | + for (i = 0; i < N; i++) | ||
| 129 | + if (k[i] != ((i % 3) == 0 ? 0 : 24)) | ||
| 130 | + abort (); | ||
| 131 | + f3 (); | ||
| 132 | + for (i = 0; i < N; i++) | ||
| 133 | + if (k[i] != ((i % 3) == 0 ? 51 : 12)) | ||
| 134 | + abort (); | ||
| 135 | + | ||
| 136 | + return 0; | ||
| 137 | +} | ||
| 138 | + | ||
| 139 | +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 3 "vect" } } */ | ||
| 140 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
| 141 | |||
| 142 | === modified file 'gcc/testsuite/lib/target-supports.exp' | ||
| 143 | --- old/gcc/testsuite/lib/target-supports.exp 2011-11-21 01:45:54 +0000 | ||
| 144 | +++ new/gcc/testsuite/lib/target-supports.exp 2011-11-22 16:52:23 +0000 | ||
| 145 | @@ -3150,7 +3150,8 @@ | ||
| 146 | || [istarget ia64-*-*] | ||
| 147 | || [istarget i?86-*-*] | ||
| 148 | || [istarget spu-*-*] | ||
| 149 | - || [istarget x86_64-*-*] } { | ||
| 150 | + || [istarget x86_64-*-*] | ||
| 151 | + || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { | ||
| 152 | set et_vect_cond_saved 1 | ||
| 153 | } | ||
| 154 | } | ||
| 155 | |||
| 156 | === modified file 'gcc/tree-vect-patterns.c' | ||
| 157 | --- old/gcc/tree-vect-patterns.c 2011-10-23 13:33:07 +0000 | ||
| 158 | +++ new/gcc/tree-vect-patterns.c 2011-11-20 09:11:09 +0000 | ||
| 159 | @@ -50,13 +50,16 @@ | ||
| 160 | tree *); | ||
| 161 | static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **, | ||
| 162 | tree *, tree *); | ||
| 163 | +static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **, | ||
| 164 | + tree *, tree *); | ||
| 165 | static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { | ||
| 166 | vect_recog_widen_mult_pattern, | ||
| 167 | vect_recog_widen_sum_pattern, | ||
| 168 | vect_recog_dot_prod_pattern, | ||
| 169 | vect_recog_pow_pattern, | ||
| 170 | vect_recog_over_widening_pattern, | ||
| 171 | - vect_recog_widen_shift_pattern}; | ||
| 172 | + vect_recog_widen_shift_pattern, | ||
| 173 | + vect_recog_mixed_size_cond_pattern}; | ||
| 174 | |||
| 175 | |||
| 176 | /* Function widened_name_p | ||
| 177 | @@ -1441,6 +1444,118 @@ | ||
| 178 | return pattern_stmt; | ||
| 179 | } | ||
| 180 | |||
| 181 | +/* Function vect_recog_mixed_size_cond_pattern | ||
| 182 | + | ||
| 183 | + Try to find the following pattern: | ||
| 184 | + | ||
| 185 | + type x_t, y_t; | ||
| 186 | + TYPE a_T, b_T, c_T; | ||
| 187 | + loop: | ||
| 188 | + S1 a_T = x_t CMP y_t ? b_T : c_T; | ||
| 189 | + | ||
| 190 | + where type 'TYPE' is an integral type which has different size | ||
| 191 | + from 'type'. b_T and c_T are constants and if 'TYPE' is wider | ||
| 192 | + than 'type', the constants need to fit into an integer type | ||
| 193 | + with the same width as 'type'. | ||
| 194 | + | ||
| 195 | + Input: | ||
| 196 | + | ||
| 197 | + * LAST_STMT: A stmt from which the pattern search begins. | ||
| 198 | + | ||
| 199 | + Output: | ||
| 200 | + | ||
| 201 | + * TYPE_IN: The type of the input arguments to the pattern. | ||
| 202 | + | ||
| 203 | + * TYPE_OUT: The type of the output of this pattern. | ||
| 204 | + | ||
| 205 | + * Return value: A new stmt that will be used to replace the pattern. | ||
| 206 | + Additionally a def_stmt is added. | ||
| 207 | + | ||
| 208 | + a_it = x_t CMP y_t ? b_it : c_it; | ||
| 209 | + a_T = (TYPE) a_it; */ | ||
| 210 | + | ||
| 211 | +static gimple | ||
| 212 | +vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in, | ||
| 213 | + tree *type_out) | ||
| 214 | +{ | ||
| 215 | + gimple last_stmt = VEC_index (gimple, *stmts, 0); | ||
| 216 | + tree cond_expr, then_clause, else_clause; | ||
| 217 | + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info; | ||
| 218 | + tree type, vectype, comp_vectype, comp_type, op, tmp; | ||
| 219 | + enum machine_mode cmpmode; | ||
| 220 | + gimple pattern_stmt, def_stmt; | ||
| 221 | + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
| 222 | + | ||
| 223 | + if (!is_gimple_assign (last_stmt) | ||
| 224 | + || gimple_assign_rhs_code (last_stmt) != COND_EXPR | ||
| 225 | + || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) | ||
| 226 | + return NULL; | ||
| 227 | + | ||
| 228 | + op = gimple_assign_rhs1 (last_stmt); | ||
| 229 | + cond_expr = TREE_OPERAND (op, 0); | ||
| 230 | + then_clause = TREE_OPERAND (op, 1); | ||
| 231 | + else_clause = TREE_OPERAND (op, 2); | ||
| 232 | + | ||
| 233 | + if (TREE_CODE (then_clause) != INTEGER_CST | ||
| 234 | + || TREE_CODE (else_clause) != INTEGER_CST) | ||
| 235 | + return NULL; | ||
| 236 | + | ||
| 237 | + if (!COMPARISON_CLASS_P (cond_expr)) | ||
| 238 | + return NULL; | ||
| 239 | + | ||
| 240 | + type = gimple_expr_type (last_stmt); | ||
| 241 | + comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0)); | ||
| 242 | + if (!INTEGRAL_TYPE_P (comp_type) | ||
| 243 | + || !INTEGRAL_TYPE_P (type)) | ||
| 244 | + return NULL; | ||
| 245 | + | ||
| 246 | + comp_vectype = get_vectype_for_scalar_type (comp_type); | ||
| 247 | + if (comp_vectype == NULL_TREE) | ||
| 248 | + return NULL; | ||
| 249 | + | ||
| 250 | + cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype)); | ||
| 251 | + | ||
| 252 | + if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode)) | ||
| 253 | + return NULL; | ||
| 254 | + | ||
| 255 | + vectype = get_vectype_for_scalar_type (type); | ||
| 256 | + if (vectype == NULL_TREE) | ||
| 257 | + return NULL; | ||
| 258 | + | ||
| 259 | + if (types_compatible_p (vectype, comp_vectype)) | ||
| 260 | + return NULL; | ||
| 261 | + | ||
| 262 | + if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype))) | ||
| 263 | + return NULL; | ||
| 264 | + | ||
| 265 | + if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)) | ||
| 266 | + { | ||
| 267 | + if (!int_fits_type_p (then_clause, comp_type) | ||
| 268 | + || !int_fits_type_p (else_clause, comp_type)) | ||
| 269 | + return NULL; | ||
| 270 | + } | ||
| 271 | + | ||
| 272 | + tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr), | ||
| 273 | + fold_convert (comp_type, then_clause), | ||
| 274 | + fold_convert (comp_type, else_clause)); | ||
| 275 | + def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), tmp); | ||
| 276 | + | ||
| 277 | + pattern_stmt | ||
| 278 | + = gimple_build_assign_with_ops (NOP_EXPR, | ||
| 279 | + vect_recog_temp_ssa_var (type, NULL), | ||
| 280 | + gimple_assign_lhs (def_stmt), NULL_TREE); | ||
| 281 | + | ||
| 282 | + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt; | ||
| 283 | + def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); | ||
| 284 | + set_vinfo_for_stmt (def_stmt, def_stmt_info); | ||
| 285 | + STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype; | ||
| 286 | + *type_in = vectype; | ||
| 287 | + *type_out = vectype; | ||
| 288 | + | ||
| 289 | + return pattern_stmt; | ||
| 290 | +} | ||
| 291 | + | ||
| 292 | + | ||
| 293 | /* Mark statements that are involved in a pattern. */ | ||
| 294 | |||
| 295 | static inline void | ||
| 296 | @@ -1468,14 +1583,18 @@ | ||
| 297 | if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)) | ||
| 298 | { | ||
| 299 | def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info); | ||
| 300 | - set_vinfo_for_stmt (def_stmt, | ||
| 301 | - new_stmt_vec_info (def_stmt, loop_vinfo, NULL)); | ||
| 302 | + def_stmt_info = vinfo_for_stmt (def_stmt); | ||
| 303 | + if (def_stmt_info == NULL) | ||
| 304 | + { | ||
| 305 | + def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); | ||
| 306 | + set_vinfo_for_stmt (def_stmt, def_stmt_info); | ||
| 307 | + } | ||
| 308 | gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); | ||
| 309 | - def_stmt_info = vinfo_for_stmt (def_stmt); | ||
| 310 | STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt; | ||
| 311 | STMT_VINFO_DEF_TYPE (def_stmt_info) | ||
| 312 | = STMT_VINFO_DEF_TYPE (orig_stmt_info); | ||
| 313 | - STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; | ||
| 314 | + if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE) | ||
| 315 | + STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | |||
| 319 | |||
| 320 | === modified file 'gcc/tree-vect-stmts.c' | ||
| 321 | --- old/gcc/tree-vect-stmts.c 2011-11-14 11:38:08 +0000 | ||
| 322 | +++ new/gcc/tree-vect-stmts.c 2011-11-22 16:52:23 +0000 | ||
| 323 | @@ -655,20 +655,40 @@ | ||
| 324 | tree rhs = gimple_assign_rhs1 (stmt); | ||
| 325 | unsigned int op_num; | ||
| 326 | tree op; | ||
| 327 | + enum tree_code rhs_code; | ||
| 328 | switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) | ||
| 329 | { | ||
| 330 | case GIMPLE_SINGLE_RHS: | ||
| 331 | - op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)); | ||
| 332 | - for (i = 0; i < op_num; i++) | ||
| 333 | - { | ||
| 334 | - op = TREE_OPERAND (rhs, i); | ||
| 335 | - if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
| 336 | - &worklist)) | ||
| 337 | - { | ||
| 338 | - VEC_free (gimple, heap, worklist); | ||
| 339 | - return false; | ||
| 340 | - } | ||
| 341 | - } | ||
| 342 | + op = gimple_assign_rhs1 (stmt); | ||
| 343 | + rhs_code = gimple_assign_rhs_code (stmt); | ||
| 344 | + i = 0; | ||
| 345 | + if (rhs_code == COND_EXPR | ||
| 346 | + && COMPARISON_CLASS_P (TREE_OPERAND (op, 0))) | ||
| 347 | + { | ||
| 348 | + op = TREE_OPERAND (op, 0); | ||
| 349 | + if (!process_use (stmt, TREE_OPERAND (op, 0), | ||
| 350 | + loop_vinfo, | ||
| 351 | + live_p, relevant, &worklist) | ||
| 352 | + || !process_use (stmt, TREE_OPERAND (op, 1), | ||
| 353 | + loop_vinfo, | ||
| 354 | + live_p, relevant, &worklist)) | ||
| 355 | + { | ||
| 356 | + VEC_free (gimple, heap, worklist); | ||
| 357 | + return false; | ||
| 358 | + } | ||
| 359 | + i = 1; | ||
| 360 | + } | ||
| 361 | + op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)); | ||
| 362 | + for (i; i < op_num; i++) | ||
| 363 | + { | ||
| 364 | + op = TREE_OPERAND (rhs, i); | ||
| 365 | + if (!process_use (stmt, op, loop_vinfo, live_p, relevant, | ||
| 366 | + &worklist)) | ||
| 367 | + { | ||
| 368 | + VEC_free (gimple, heap, worklist); | ||
| 369 | + return false; | ||
| 370 | + } | ||
| 371 | + } | ||
| 372 | break; | ||
| 373 | |||
| 374 | case GIMPLE_BINARY_RHS: | ||
| 375 | |||
| 376 | === modified file 'gcc/tree-vectorizer.h' | ||
| 377 | --- old/gcc/tree-vectorizer.h 2011-11-14 11:38:08 +0000 | ||
| 378 | +++ new/gcc/tree-vectorizer.h 2011-11-22 16:52:23 +0000 | ||
| 379 | @@ -917,7 +917,7 @@ | ||
| 380 | Additional pattern recognition functions can (and will) be added | ||
| 381 | in the future. */ | ||
| 382 | typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); | ||
| 383 | -#define NUM_PATTERNS 6 | ||
| 384 | +#define NUM_PATTERNS 7 | ||
| 385 | void vect_pattern_recog (loop_vec_info); | ||
| 386 | |||
| 387 | /* In tree-vectorizer.c. */ | ||
| 388 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch new file mode 100644 index 0000000000..ea42ad6eeb --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106843.patch | |||
| @@ -0,0 +1,805 @@ | |||
| 1 | 2011-11-22 Ira Rosen <ira.rosen@linaro.org> | ||
| 2 | Backport from mainline: | ||
| 3 | |||
| 4 | 2011-11-06 Ira Rosen <ira.rosen@linaro.org> | ||
| 5 | |||
| 6 | gcc/ | ||
| 7 | * tree-vectorizer.h (vectorizable_condition): Add argument. | ||
| 8 | * tree-vect-loop.c (vectorizable_reduction): Fail for condition | ||
| 9 | in SLP. Update calls to vectorizable_condition. | ||
| 10 | * tree-vect-stmts.c (vect_is_simple_cond): Add basic block info to | ||
| 11 | the arguments. Pass it to vect_is_simple_use_1. | ||
| 12 | (vectorizable_condition): Add slp_node to the arguments. Support | ||
| 13 | vectorization of basic blocks. Fail for reduction in SLP. Update | ||
| 14 | calls to vect_is_simple_cond and vect_is_simple_use. Support SLP: | ||
| 15 | call vect_get_slp_defs to get vector operands. | ||
| 16 | (vect_analyze_stmt): Update calls to vectorizable_condition. | ||
| 17 | (vect_transform_stmt): Likewise. | ||
| 18 | * tree-vect-slp.c (vect_create_new_slp_node): Handle COND_EXPR. | ||
| 19 | (vect_get_and_check_slp_defs): Handle COND_EXPR. Allow pattern | ||
| 20 | def stmts. | ||
| 21 | (vect_build_slp_tree): Handle COND_EXPR. | ||
| 22 | (vect_analyze_slp_instance): Push pattern statements to root node. | ||
| 23 | (vect_get_constant_vectors): Fix comments. Handle COND_EXPR. | ||
| 24 | |||
| 25 | gcc/testsuite/ | ||
| 26 | * gcc.dg/vect/bb-slp-cond-1.c: New test. | ||
| 27 | * gcc.dg/vect/slp-cond-1.c: New test. | ||
| 28 | |||
| 29 | === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c' | ||
| 30 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 1970-01-01 00:00:00 +0000 | ||
| 31 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 2011-11-20 08:24:08 +0000 | ||
| 32 | @@ -0,0 +1,46 @@ | ||
| 33 | +/* { dg-require-effective-target vect_condition } */ | ||
| 34 | + | ||
| 35 | +#include "tree-vect.h" | ||
| 36 | + | ||
| 37 | +#define N 128 | ||
| 38 | + | ||
| 39 | +__attribute__((noinline, noclone)) void | ||
| 40 | +foo (int *a, int stride) | ||
| 41 | +{ | ||
| 42 | + int i; | ||
| 43 | + | ||
| 44 | + for (i = 0; i < N/stride; i++, a += stride) | ||
| 45 | + { | ||
| 46 | + a[0] = a[0] ? 1 : 5; | ||
| 47 | + a[1] = a[1] ? 2 : 6; | ||
| 48 | + a[2] = a[2] ? 3 : 7; | ||
| 49 | + a[3] = a[3] ? 4 : 8; | ||
| 50 | + } | ||
| 51 | +} | ||
| 52 | + | ||
| 53 | + | ||
| 54 | +int a[N]; | ||
| 55 | +int main () | ||
| 56 | +{ | ||
| 57 | + int i; | ||
| 58 | + | ||
| 59 | + check_vect (); | ||
| 60 | + | ||
| 61 | + for (i = 0; i < N; i++) | ||
| 62 | + a[i] = i; | ||
| 63 | + | ||
| 64 | + foo (a, 4); | ||
| 65 | + | ||
| 66 | + for (i = 1; i < N; i++) | ||
| 67 | + if (a[i] != i%4 + 1) | ||
| 68 | + abort (); | ||
| 69 | + | ||
| 70 | + if (a[0] != 5) | ||
| 71 | + abort (); | ||
| 72 | + | ||
| 73 | + return 0; | ||
| 74 | +} | ||
| 75 | + | ||
| 76 | +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */ | ||
| 77 | +/* { dg-final { cleanup-tree-dump "slp" } } */ | ||
| 78 | + | ||
| 79 | |||
| 80 | === added file 'gcc/testsuite/gcc.dg/vect/slp-cond-1.c' | ||
| 81 | --- old/gcc/testsuite/gcc.dg/vect/slp-cond-1.c 1970-01-01 00:00:00 +0000 | ||
| 82 | +++ new/gcc/testsuite/gcc.dg/vect/slp-cond-1.c 2011-11-20 08:24:08 +0000 | ||
| 83 | @@ -0,0 +1,126 @@ | ||
| 84 | +/* { dg-require-effective-target vect_condition } */ | ||
| 85 | +#include "tree-vect.h" | ||
| 86 | + | ||
| 87 | +#define N 32 | ||
| 88 | +int a[N], b[N]; | ||
| 89 | +int d[N], e[N]; | ||
| 90 | +int k[N]; | ||
| 91 | + | ||
| 92 | +__attribute__((noinline, noclone)) void | ||
| 93 | +f1 (void) | ||
| 94 | +{ | ||
| 95 | + int i; | ||
| 96 | + for (i = 0; i < N/4; i++) | ||
| 97 | + { | ||
| 98 | + k[4*i] = a[4*i] < b[4*i] ? 17 : 0; | ||
| 99 | + k[4*i+1] = a[4*i+1] < b[4*i+1] ? 17 : 0; | ||
| 100 | + k[4*i+2] = a[4*i+2] < b[4*i+2] ? 17 : 0; | ||
| 101 | + k[4*i+3] = a[4*i+3] < b[4*i+3] ? 17 : 0; | ||
| 102 | + } | ||
| 103 | +} | ||
| 104 | + | ||
| 105 | +__attribute__((noinline, noclone)) void | ||
| 106 | +f2 (void) | ||
| 107 | +{ | ||
| 108 | + int i; | ||
| 109 | + for (i = 0; i < N/2; ++i) | ||
| 110 | + { | ||
| 111 | + k[2*i] = a[2*i] < b[2*i] ? 0 : 24; | ||
| 112 | + k[2*i+1] = a[2*i+1] < b[2*i+1] ? 7 : 4; | ||
| 113 | + } | ||
| 114 | +} | ||
| 115 | + | ||
| 116 | +__attribute__((noinline, noclone)) void | ||
| 117 | +f3 (void) | ||
| 118 | +{ | ||
| 119 | + int i; | ||
| 120 | + for (i = 0; i < N/2; ++i) | ||
| 121 | + { | ||
| 122 | + k[2*i] = a[2*i] < b[2*i] ? 51 : 12; | ||
| 123 | + k[2*i+1] = a[2*i+1] > b[2*i+1] ? 51 : 12; | ||
| 124 | + } | ||
| 125 | +} | ||
| 126 | + | ||
| 127 | +__attribute__((noinline, noclone)) void | ||
| 128 | +f4 (void) | ||
| 129 | +{ | ||
| 130 | + int i; | ||
| 131 | + for (i = 0; i < N/2; ++i) | ||
| 132 | + { | ||
| 133 | + int d0 = d[2*i], e0 = e[2*i]; | ||
| 134 | + int d1 = d[2*i+1], e1 = e[2*i+1]; | ||
| 135 | + k[2*i] = a[2*i] >= b[2*i] ? d0 : e0; | ||
| 136 | + k[2*i+1] = a[2*i+1] >= b[2*i+1] ? d1 : e1; | ||
| 137 | + } | ||
| 138 | +} | ||
| 139 | + | ||
| 140 | +int | ||
| 141 | +main () | ||
| 142 | +{ | ||
| 143 | + int i; | ||
| 144 | + | ||
| 145 | + check_vect (); | ||
| 146 | + | ||
| 147 | + for (i = 0; i < N; i++) | ||
| 148 | + { | ||
| 149 | + switch (i % 9) | ||
| 150 | + { | ||
| 151 | + case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break; | ||
| 152 | + case 1: a[i] = 0; b[i] = 0; break; | ||
| 153 | + case 2: a[i] = i + 1; b[i] = - i - 1; break; | ||
| 154 | + case 3: a[i] = i; b[i] = i + 7; break; | ||
| 155 | + case 4: a[i] = i; b[i] = i; break; | ||
| 156 | + case 5: a[i] = i + 16; b[i] = i + 3; break; | ||
| 157 | + case 6: a[i] = - i - 5; b[i] = - i; break; | ||
| 158 | + case 7: a[i] = - i; b[i] = - i; break; | ||
| 159 | + case 8: a[i] = - i; b[i] = - i - 7; break; | ||
| 160 | + } | ||
| 161 | + d[i] = i; | ||
| 162 | + e[i] = 2 * i; | ||
| 163 | + } | ||
| 164 | + f1 (); | ||
| 165 | + for (i = 0; i < N; i++) | ||
| 166 | + if (k[i] != ((i % 3) == 0 ? 17 : 0)) | ||
| 167 | + abort (); | ||
| 168 | + | ||
| 169 | + f2 (); | ||
| 170 | + for (i = 0; i < N; i++) | ||
| 171 | + { | ||
| 172 | + switch (i % 9) | ||
| 173 | + { | ||
| 174 | + case 0: | ||
| 175 | + case 6: | ||
| 176 | + if (k[i] != ((i/9 % 2) == 0 ? 0 : 7)) | ||
| 177 | + abort (); | ||
| 178 | + break; | ||
| 179 | + case 1: | ||
| 180 | + case 5: | ||
| 181 | + case 7: | ||
| 182 | + if (k[i] != ((i/9 % 2) == 0 ? 4 : 24)) | ||
| 183 | + abort (); | ||
| 184 | + break; | ||
| 185 | + case 2: | ||
| 186 | + case 4: | ||
| 187 | + case 8: | ||
| 188 | + if (k[i] != ((i/9 % 2) == 0 ? 24 : 4)) | ||
| 189 | + abort (); | ||
| 190 | + break; | ||
| 191 | + case 3: | ||
| 192 | + if (k[i] != ((i/9 % 2) == 0 ? 7 : 0)) | ||
| 193 | + abort (); | ||
| 194 | + break; | ||
| 195 | + } | ||
| 196 | + } | ||
| 197 | + | ||
| 198 | + f3 (); | ||
| 199 | + | ||
| 200 | + f4 (); | ||
| 201 | + for (i = 0; i < N; i++) | ||
| 202 | + if (k[i] != ((i % 3) == 0 ? e[i] : d[i])) | ||
| 203 | + abort (); | ||
| 204 | + | ||
| 205 | + return 0; | ||
| 206 | +} | ||
| 207 | + | ||
| 208 | +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ | ||
| 209 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
| 210 | |||
| 211 | === modified file 'gcc/tree-vect-loop.c' | ||
| 212 | --- old/gcc/tree-vect-loop.c 2011-11-14 11:38:08 +0000 | ||
| 213 | +++ new/gcc/tree-vect-loop.c 2011-11-20 08:24:08 +0000 | ||
| 214 | @@ -4087,6 +4087,9 @@ | ||
| 215 | gcc_unreachable (); | ||
| 216 | } | ||
| 217 | |||
| 218 | + if (code == COND_EXPR && slp_node) | ||
| 219 | + return false; | ||
| 220 | + | ||
| 221 | scalar_dest = gimple_assign_lhs (stmt); | ||
| 222 | scalar_type = TREE_TYPE (scalar_dest); | ||
| 223 | if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type) | ||
| 224 | @@ -4161,7 +4164,7 @@ | ||
| 225 | |||
| 226 | if (code == COND_EXPR) | ||
| 227 | { | ||
| 228 | - if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0)) | ||
| 229 | + if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL)) | ||
| 230 | { | ||
| 231 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
| 232 | fprintf (vect_dump, "unsupported condition in reduction"); | ||
| 233 | @@ -4433,7 +4436,7 @@ | ||
| 234 | gcc_assert (!slp_node); | ||
| 235 | vectorizable_condition (stmt, gsi, vec_stmt, | ||
| 236 | PHI_RESULT (VEC_index (gimple, phis, 0)), | ||
| 237 | - reduc_index); | ||
| 238 | + reduc_index, NULL); | ||
| 239 | /* Multiple types are not supported for condition. */ | ||
| 240 | break; | ||
| 241 | } | ||
| 242 | |||
| 243 | === modified file 'gcc/tree-vect-slp.c' | ||
| 244 | --- old/gcc/tree-vect-slp.c 2011-11-14 11:38:08 +0000 | ||
| 245 | +++ new/gcc/tree-vect-slp.c 2011-11-21 06:58:40 +0000 | ||
| 246 | @@ -109,7 +109,11 @@ | ||
| 247 | if (is_gimple_call (stmt)) | ||
| 248 | nops = gimple_call_num_args (stmt); | ||
| 249 | else if (is_gimple_assign (stmt)) | ||
| 250 | - nops = gimple_num_ops (stmt) - 1; | ||
| 251 | + { | ||
| 252 | + nops = gimple_num_ops (stmt) - 1; | ||
| 253 | + if (gimple_assign_rhs_code (stmt) == COND_EXPR) | ||
| 254 | + nops = 4; | ||
| 255 | + } | ||
| 256 | else | ||
| 257 | return NULL; | ||
| 258 | |||
| 259 | @@ -190,20 +194,51 @@ | ||
| 260 | bool different_types = false; | ||
| 261 | bool pattern = false; | ||
| 262 | slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info; | ||
| 263 | + int op_idx = 1; | ||
| 264 | + tree compare_rhs = NULL_TREE, rhs = NULL_TREE; | ||
| 265 | + int cond_idx = -1; | ||
| 266 | |||
| 267 | if (loop_vinfo) | ||
| 268 | loop = LOOP_VINFO_LOOP (loop_vinfo); | ||
| 269 | |||
| 270 | if (is_gimple_call (stmt)) | ||
| 271 | number_of_oprnds = gimple_call_num_args (stmt); | ||
| 272 | + else if (is_gimple_assign (stmt)) | ||
| 273 | + { | ||
| 274 | + number_of_oprnds = gimple_num_ops (stmt) - 1; | ||
| 275 | + if (gimple_assign_rhs_code (stmt) == COND_EXPR) | ||
| 276 | + { | ||
| 277 | + number_of_oprnds = 4; | ||
| 278 | + cond_idx = 0; | ||
| 279 | + rhs = gimple_assign_rhs1 (stmt); | ||
| 280 | + } | ||
| 281 | + } | ||
| 282 | else | ||
| 283 | - number_of_oprnds = gimple_num_ops (stmt) - 1; | ||
| 284 | + return false; | ||
| 285 | |||
| 286 | for (i = 0; i < number_of_oprnds; i++) | ||
| 287 | { | ||
| 288 | - oprnd = gimple_op (stmt, i + 1); | ||
| 289 | + if (compare_rhs) | ||
| 290 | + oprnd = compare_rhs; | ||
| 291 | + else | ||
| 292 | + oprnd = gimple_op (stmt, op_idx++); | ||
| 293 | + | ||
| 294 | oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i); | ||
| 295 | |||
| 296 | + if (-1 < cond_idx && cond_idx < 4) | ||
| 297 | + { | ||
| 298 | + if (compare_rhs) | ||
| 299 | + compare_rhs = NULL_TREE; | ||
| 300 | + else | ||
| 301 | + oprnd = TREE_OPERAND (rhs, cond_idx++); | ||
| 302 | + } | ||
| 303 | + | ||
| 304 | + if (COMPARISON_CLASS_P (oprnd)) | ||
| 305 | + { | ||
| 306 | + compare_rhs = TREE_OPERAND (oprnd, 1); | ||
| 307 | + oprnd = TREE_OPERAND (oprnd, 0); | ||
| 308 | + } | ||
| 309 | + | ||
| 310 | if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, | ||
| 311 | &dt) | ||
| 312 | || (!def_stmt && dt != vect_constant_def)) | ||
| 313 | @@ -243,8 +278,7 @@ | ||
| 314 | def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); | ||
| 315 | dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); | ||
| 316 | |||
| 317 | - if (dt == vect_unknown_def_type | ||
| 318 | - || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt))) | ||
| 319 | + if (dt == vect_unknown_def_type) | ||
| 320 | { | ||
| 321 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
| 322 | fprintf (vect_dump, "Unsupported pattern."); | ||
| 323 | @@ -423,6 +457,7 @@ | ||
| 324 | VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node); | ||
| 325 | gimple stmt = VEC_index (gimple, stmts, 0); | ||
| 326 | enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK; | ||
| 327 | + enum tree_code first_cond_code = ERROR_MARK; | ||
| 328 | tree lhs; | ||
| 329 | bool stop_recursion = false, need_same_oprnds = false; | ||
| 330 | tree vectype, scalar_type, first_op1 = NULL_TREE; | ||
| 331 | @@ -439,11 +474,18 @@ | ||
| 332 | VEC (slp_oprnd_info, heap) *oprnds_info; | ||
| 333 | unsigned int nops; | ||
| 334 | slp_oprnd_info oprnd_info; | ||
| 335 | + tree cond; | ||
| 336 | |||
| 337 | if (is_gimple_call (stmt)) | ||
| 338 | nops = gimple_call_num_args (stmt); | ||
| 339 | + else if (is_gimple_assign (stmt)) | ||
| 340 | + { | ||
| 341 | + nops = gimple_num_ops (stmt) - 1; | ||
| 342 | + if (gimple_assign_rhs_code (stmt) == COND_EXPR) | ||
| 343 | + nops = 4; | ||
| 344 | + } | ||
| 345 | else | ||
| 346 | - nops = gimple_num_ops (stmt) - 1; | ||
| 347 | + return false; | ||
| 348 | |||
| 349 | oprnds_info = vect_create_oprnd_info (nops, group_size); | ||
| 350 | |||
| 351 | @@ -484,6 +526,22 @@ | ||
| 352 | return false; | ||
| 353 | } | ||
| 354 | |||
| 355 | + if (is_gimple_assign (stmt) | ||
| 356 | + && gimple_assign_rhs_code (stmt) == COND_EXPR | ||
| 357 | + && (cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0)) | ||
| 358 | + && !COMPARISON_CLASS_P (cond)) | ||
| 359 | + { | ||
| 360 | + if (vect_print_dump_info (REPORT_SLP)) | ||
| 361 | + { | ||
| 362 | + fprintf (vect_dump, | ||
| 363 | + "Build SLP failed: condition is not comparison "); | ||
| 364 | + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
| 365 | + } | ||
| 366 | + | ||
| 367 | + vect_free_oprnd_info (&oprnds_info); | ||
| 368 | + return false; | ||
| 369 | + } | ||
| 370 | + | ||
| 371 | scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); | ||
| 372 | vectype = get_vectype_for_scalar_type (scalar_type); | ||
| 373 | if (!vectype) | ||
| 374 | @@ -737,7 +795,8 @@ | ||
| 375 | |||
| 376 | /* Not memory operation. */ | ||
| 377 | if (TREE_CODE_CLASS (rhs_code) != tcc_binary | ||
| 378 | - && TREE_CODE_CLASS (rhs_code) != tcc_unary) | ||
| 379 | + && TREE_CODE_CLASS (rhs_code) != tcc_unary | ||
| 380 | + && rhs_code != COND_EXPR) | ||
| 381 | { | ||
| 382 | if (vect_print_dump_info (REPORT_SLP)) | ||
| 383 | { | ||
| 384 | @@ -750,6 +809,26 @@ | ||
| 385 | return false; | ||
| 386 | } | ||
| 387 | |||
| 388 | + if (rhs_code == COND_EXPR) | ||
| 389 | + { | ||
| 390 | + tree cond_expr = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); | ||
| 391 | + | ||
| 392 | + if (i == 0) | ||
| 393 | + first_cond_code = TREE_CODE (cond_expr); | ||
| 394 | + else if (first_cond_code != TREE_CODE (cond_expr)) | ||
| 395 | + { | ||
| 396 | + if (vect_print_dump_info (REPORT_SLP)) | ||
| 397 | + { | ||
| 398 | + fprintf (vect_dump, "Build SLP failed: different" | ||
| 399 | + " operation"); | ||
| 400 | + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | ||
| 401 | + } | ||
| 402 | + | ||
| 403 | + vect_free_oprnd_info (&oprnds_info); | ||
| 404 | + return false; | ||
| 405 | + } | ||
| 406 | + } | ||
| 407 | + | ||
| 408 | /* Find the def-stmts. */ | ||
| 409 | if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt, | ||
| 410 | ncopies_for_cost, (i == 0), | ||
| 411 | @@ -1395,7 +1474,12 @@ | ||
| 412 | /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ | ||
| 413 | while (next) | ||
| 414 | { | ||
| 415 | - VEC_safe_push (gimple, heap, scalar_stmts, next); | ||
| 416 | + if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)) | ||
| 417 | + && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))) | ||
| 418 | + VEC_safe_push (gimple, heap, scalar_stmts, | ||
| 419 | + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))); | ||
| 420 | + else | ||
| 421 | + VEC_safe_push (gimple, heap, scalar_stmts, next); | ||
| 422 | next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); | ||
| 423 | } | ||
| 424 | } | ||
| 425 | @@ -1404,7 +1488,7 @@ | ||
| 426 | /* Collect reduction statements. */ | ||
| 427 | VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo); | ||
| 428 | for (i = 0; VEC_iterate (gimple, reductions, i, next); i++) | ||
| 429 | - VEC_safe_push (gimple, heap, scalar_stmts, next); | ||
| 430 | + VEC_safe_push (gimple, heap, scalar_stmts, next); | ||
| 431 | } | ||
| 432 | |||
| 433 | node = vect_create_new_slp_node (scalar_stmts); | ||
| 434 | @@ -2160,15 +2244,15 @@ | ||
| 435 | |||
| 436 | For example, we have two scalar operands, s1 and s2 (e.g., group of | ||
| 437 | strided accesses of size two), while NUNITS is four (i.e., four scalars | ||
| 438 | - of this type can be packed in a vector). The output vector will contain | ||
| 439 | - two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES | ||
| 440 | + of this type can be packed in a vector). The output vector will contain | ||
| 441 | + two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES | ||
| 442 | will be 2). | ||
| 443 | |||
| 444 | If GROUP_SIZE > NUNITS, the scalars will be split into several vectors | ||
| 445 | containing the operands. | ||
| 446 | |||
| 447 | For example, NUNITS is four as before, and the group size is 8 | ||
| 448 | - (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and | ||
| 449 | + (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and | ||
| 450 | {s5, s6, s7, s8}. */ | ||
| 451 | |||
| 452 | number_of_copies = least_common_multiple (nunits, group_size) / group_size; | ||
| 453 | @@ -2180,8 +2264,18 @@ | ||
| 454 | { | ||
| 455 | if (is_store) | ||
| 456 | op = gimple_assign_rhs1 (stmt); | ||
| 457 | - else | ||
| 458 | + else if (gimple_assign_rhs_code (stmt) != COND_EXPR) | ||
| 459 | op = gimple_op (stmt, op_num + 1); | ||
| 460 | + else | ||
| 461 | + { | ||
| 462 | + if (op_num == 0 || op_num == 1) | ||
| 463 | + { | ||
| 464 | + tree cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); | ||
| 465 | + op = TREE_OPERAND (cond, op_num); | ||
| 466 | + } | ||
| 467 | + else | ||
| 468 | + op = TREE_OPERAND (gimple_assign_rhs1 (stmt), op_num - 1); | ||
| 469 | + } | ||
| 470 | |||
| 471 | if (reduc_index != -1) | ||
| 472 | { | ||
| 473 | |||
| 474 | === modified file 'gcc/tree-vect-stmts.c' | ||
| 475 | --- old/gcc/tree-vect-stmts.c 2011-11-22 16:52:23 +0000 | ||
| 476 | +++ new/gcc/tree-vect-stmts.c 2011-11-22 17:10:17 +0000 | ||
| 477 | @@ -4816,7 +4816,7 @@ | ||
| 478 | condition operands are supportable using vec_is_simple_use. */ | ||
| 479 | |||
| 480 | static bool | ||
| 481 | -vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo) | ||
| 482 | +vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) | ||
| 483 | { | ||
| 484 | tree lhs, rhs; | ||
| 485 | tree def; | ||
| 486 | @@ -4831,7 +4831,7 @@ | ||
| 487 | if (TREE_CODE (lhs) == SSA_NAME) | ||
| 488 | { | ||
| 489 | gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); | ||
| 490 | - if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def, | ||
| 491 | + if (!vect_is_simple_use (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def, | ||
| 492 | &dt)) | ||
| 493 | return false; | ||
| 494 | } | ||
| 495 | @@ -4842,7 +4842,7 @@ | ||
| 496 | if (TREE_CODE (rhs) == SSA_NAME) | ||
| 497 | { | ||
| 498 | gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); | ||
| 499 | - if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def, | ||
| 500 | + if (!vect_is_simple_use (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def, | ||
| 501 | &dt)) | ||
| 502 | return false; | ||
| 503 | } | ||
| 504 | @@ -4868,7 +4868,8 @@ | ||
| 505 | |||
| 506 | bool | ||
| 507 | vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, | ||
| 508 | - gimple *vec_stmt, tree reduc_def, int reduc_index) | ||
| 509 | + gimple *vec_stmt, tree reduc_def, int reduc_index, | ||
| 510 | + slp_tree slp_node) | ||
| 511 | { | ||
| 512 | tree scalar_dest = NULL_TREE; | ||
| 513 | tree vec_dest = NULL_TREE; | ||
| 514 | @@ -4885,19 +4886,24 @@ | ||
| 515 | tree def; | ||
| 516 | enum vect_def_type dt, dts[4]; | ||
| 517 | int nunits = TYPE_VECTOR_SUBPARTS (vectype); | ||
| 518 | - int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; | ||
| 519 | + int ncopies; | ||
| 520 | enum tree_code code; | ||
| 521 | stmt_vec_info prev_stmt_info = NULL; | ||
| 522 | - int j; | ||
| 523 | + int i, j; | ||
| 524 | + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); | ||
| 525 | + VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; | ||
| 526 | + VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL; | ||
| 527 | |||
| 528 | - /* FORNOW: unsupported in basic block SLP. */ | ||
| 529 | - gcc_assert (loop_vinfo); | ||
| 530 | + if (slp_node || PURE_SLP_STMT (stmt_info)) | ||
| 531 | + ncopies = 1; | ||
| 532 | + else | ||
| 533 | + ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; | ||
| 534 | |||
| 535 | gcc_assert (ncopies >= 1); | ||
| 536 | - if (reduc_index && ncopies > 1) | ||
| 537 | + if (reduc_index && (ncopies > 1 || STMT_SLP_TYPE (stmt_info))) | ||
| 538 | return false; /* FORNOW */ | ||
| 539 | |||
| 540 | - if (!STMT_VINFO_RELEVANT_P (stmt_info)) | ||
| 541 | + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) | ||
| 542 | return false; | ||
| 543 | |||
| 544 | if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def | ||
| 545 | @@ -4905,10 +4911,6 @@ | ||
| 546 | && reduc_def)) | ||
| 547 | return false; | ||
| 548 | |||
| 549 | - /* FORNOW: SLP not supported. */ | ||
| 550 | - if (STMT_SLP_TYPE (stmt_info)) | ||
| 551 | - return false; | ||
| 552 | - | ||
| 553 | /* FORNOW: not yet supported. */ | ||
| 554 | if (STMT_VINFO_LIVE_P (stmt_info)) | ||
| 555 | { | ||
| 556 | @@ -4932,7 +4934,7 @@ | ||
| 557 | then_clause = TREE_OPERAND (op, 1); | ||
| 558 | else_clause = TREE_OPERAND (op, 2); | ||
| 559 | |||
| 560 | - if (!vect_is_simple_cond (cond_expr, loop_vinfo)) | ||
| 561 | + if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo)) | ||
| 562 | return false; | ||
| 563 | |||
| 564 | /* We do not handle two different vector types for the condition | ||
| 565 | @@ -4944,7 +4946,7 @@ | ||
| 566 | if (TREE_CODE (then_clause) == SSA_NAME) | ||
| 567 | { | ||
| 568 | gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause); | ||
| 569 | - if (!vect_is_simple_use (then_clause, loop_vinfo, NULL, | ||
| 570 | + if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo, | ||
| 571 | &then_def_stmt, &def, &dt)) | ||
| 572 | return false; | ||
| 573 | } | ||
| 574 | @@ -4956,7 +4958,7 @@ | ||
| 575 | if (TREE_CODE (else_clause) == SSA_NAME) | ||
| 576 | { | ||
| 577 | gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause); | ||
| 578 | - if (!vect_is_simple_use (else_clause, loop_vinfo, NULL, | ||
| 579 | + if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo, | ||
| 580 | &else_def_stmt, &def, &dt)) | ||
| 581 | return false; | ||
| 582 | } | ||
| 583 | @@ -4974,7 +4976,15 @@ | ||
| 584 | return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode); | ||
| 585 | } | ||
| 586 | |||
| 587 | - /* Transform */ | ||
| 588 | + /* Transform. */ | ||
| 589 | + | ||
| 590 | + if (!slp_node) | ||
| 591 | + { | ||
| 592 | + vec_oprnds0 = VEC_alloc (tree, heap, 1); | ||
| 593 | + vec_oprnds1 = VEC_alloc (tree, heap, 1); | ||
| 594 | + vec_oprnds2 = VEC_alloc (tree, heap, 1); | ||
| 595 | + vec_oprnds3 = VEC_alloc (tree, heap, 1); | ||
| 596 | + } | ||
| 597 | |||
| 598 | /* Handle def. */ | ||
| 599 | scalar_dest = gimple_assign_lhs (stmt); | ||
| 600 | @@ -4983,67 +4993,118 @@ | ||
| 601 | /* Handle cond expr. */ | ||
| 602 | for (j = 0; j < ncopies; j++) | ||
| 603 | { | ||
| 604 | - gimple new_stmt; | ||
| 605 | + gimple new_stmt = NULL; | ||
| 606 | if (j == 0) | ||
| 607 | { | ||
| 608 | - gimple gtemp; | ||
| 609 | - vec_cond_lhs = | ||
| 610 | + if (slp_node) | ||
| 611 | + { | ||
| 612 | + VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4); | ||
| 613 | + VEC (slp_void_p, heap) *vec_defs; | ||
| 614 | + | ||
| 615 | + vec_defs = VEC_alloc (slp_void_p, heap, 4); | ||
| 616 | + VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0)); | ||
| 617 | + VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1)); | ||
| 618 | + VEC_safe_push (tree, heap, ops, then_clause); | ||
| 619 | + VEC_safe_push (tree, heap, ops, else_clause); | ||
| 620 | + vect_get_slp_defs (ops, slp_node, &vec_defs, -1); | ||
| 621 | + vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); | ||
| 622 | + vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); | ||
| 623 | + vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); | ||
| 624 | + vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); | ||
| 625 | + | ||
| 626 | + VEC_free (tree, heap, ops); | ||
| 627 | + VEC_free (slp_void_p, heap, vec_defs); | ||
| 628 | + } | ||
| 629 | + else | ||
| 630 | + { | ||
| 631 | + gimple gtemp; | ||
| 632 | + vec_cond_lhs = | ||
| 633 | vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), | ||
| 634 | stmt, NULL); | ||
| 635 | - vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo, | ||
| 636 | + vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo, | ||
| 637 | NULL, >emp, &def, &dts[0]); | ||
| 638 | - vec_cond_rhs = | ||
| 639 | - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), | ||
| 640 | - stmt, NULL); | ||
| 641 | - vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo, | ||
| 642 | - NULL, >emp, &def, &dts[1]); | ||
| 643 | - if (reduc_index == 1) | ||
| 644 | - vec_then_clause = reduc_def; | ||
| 645 | - else | ||
| 646 | - { | ||
| 647 | - vec_then_clause = vect_get_vec_def_for_operand (then_clause, | ||
| 648 | - stmt, NULL); | ||
| 649 | - vect_is_simple_use (then_clause, loop_vinfo, | ||
| 650 | - NULL, >emp, &def, &dts[2]); | ||
| 651 | - } | ||
| 652 | - if (reduc_index == 2) | ||
| 653 | - vec_else_clause = reduc_def; | ||
| 654 | - else | ||
| 655 | - { | ||
| 656 | - vec_else_clause = vect_get_vec_def_for_operand (else_clause, | ||
| 657 | - stmt, NULL); | ||
| 658 | - vect_is_simple_use (else_clause, loop_vinfo, | ||
| 659 | + | ||
| 660 | + vec_cond_rhs = | ||
| 661 | + vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), | ||
| 662 | + stmt, NULL); | ||
| 663 | + vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo, | ||
| 664 | + NULL, >emp, &def, &dts[1]); | ||
| 665 | + if (reduc_index == 1) | ||
| 666 | + vec_then_clause = reduc_def; | ||
| 667 | + else | ||
| 668 | + { | ||
| 669 | + vec_then_clause = vect_get_vec_def_for_operand (then_clause, | ||
| 670 | + stmt, NULL); | ||
| 671 | + vect_is_simple_use (then_clause, loop_vinfo, | ||
| 672 | + NULL, >emp, &def, &dts[2]); | ||
| 673 | + } | ||
| 674 | + if (reduc_index == 2) | ||
| 675 | + vec_else_clause = reduc_def; | ||
| 676 | + else | ||
| 677 | + { | ||
| 678 | + vec_else_clause = vect_get_vec_def_for_operand (else_clause, | ||
| 679 | + stmt, NULL); | ||
| 680 | + vect_is_simple_use (else_clause, loop_vinfo, | ||
| 681 | NULL, >emp, &def, &dts[3]); | ||
| 682 | + } | ||
| 683 | } | ||
| 684 | } | ||
| 685 | else | ||
| 686 | { | ||
| 687 | - vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs); | ||
| 688 | - vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs); | ||
| 689 | + vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], | ||
| 690 | + VEC_pop (tree, vec_oprnds0)); | ||
| 691 | + vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], | ||
| 692 | + VEC_pop (tree, vec_oprnds1)); | ||
| 693 | vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2], | ||
| 694 | - vec_then_clause); | ||
| 695 | + VEC_pop (tree, vec_oprnds2)); | ||
| 696 | vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3], | ||
| 697 | - vec_else_clause); | ||
| 698 | + VEC_pop (tree, vec_oprnds3)); | ||
| 699 | + } | ||
| 700 | + | ||
| 701 | + if (!slp_node) | ||
| 702 | + { | ||
| 703 | + VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs); | ||
| 704 | + VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs); | ||
| 705 | + VEC_quick_push (tree, vec_oprnds2, vec_then_clause); | ||
| 706 | + VEC_quick_push (tree, vec_oprnds3, vec_else_clause); | ||
| 707 | } | ||
| 708 | |||
| 709 | /* Arguments are ready. Create the new vector stmt. */ | ||
| 710 | - vec_compare = build2 (TREE_CODE (cond_expr), vectype, | ||
| 711 | - vec_cond_lhs, vec_cond_rhs); | ||
| 712 | - vec_cond_expr = build3 (VEC_COND_EXPR, vectype, | ||
| 713 | - vec_compare, vec_then_clause, vec_else_clause); | ||
| 714 | - | ||
| 715 | - new_stmt = gimple_build_assign (vec_dest, vec_cond_expr); | ||
| 716 | - new_temp = make_ssa_name (vec_dest, new_stmt); | ||
| 717 | - gimple_assign_set_lhs (new_stmt, new_temp); | ||
| 718 | - vect_finish_stmt_generation (stmt, new_stmt, gsi); | ||
| 719 | - if (j == 0) | ||
| 720 | - STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; | ||
| 721 | - else | ||
| 722 | - STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; | ||
| 723 | - | ||
| 724 | - prev_stmt_info = vinfo_for_stmt (new_stmt); | ||
| 725 | + FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs) | ||
| 726 | + { | ||
| 727 | + vec_cond_rhs = VEC_index (tree, vec_oprnds1, i); | ||
| 728 | + vec_then_clause = VEC_index (tree, vec_oprnds2, i); | ||
| 729 | + vec_else_clause = VEC_index (tree, vec_oprnds3, i); | ||
| 730 | + | ||
| 731 | + vec_compare = build2 (TREE_CODE (cond_expr), vectype, | ||
| 732 | + vec_cond_lhs, vec_cond_rhs); | ||
| 733 | + vec_cond_expr = build3 (VEC_COND_EXPR, vectype, | ||
| 734 | + vec_compare, vec_then_clause, vec_else_clause); | ||
| 735 | + | ||
| 736 | + new_stmt = gimple_build_assign (vec_dest, vec_cond_expr); | ||
| 737 | + new_temp = make_ssa_name (vec_dest, new_stmt); | ||
| 738 | + gimple_assign_set_lhs (new_stmt, new_temp); | ||
| 739 | + vect_finish_stmt_generation (stmt, new_stmt, gsi); | ||
| 740 | + if (slp_node) | ||
| 741 | + VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); | ||
| 742 | + } | ||
| 743 | + | ||
| 744 | + if (slp_node) | ||
| 745 | + continue; | ||
| 746 | + | ||
| 747 | + if (j == 0) | ||
| 748 | + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; | ||
| 749 | + else | ||
| 750 | + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; | ||
| 751 | + | ||
| 752 | + prev_stmt_info = vinfo_for_stmt (new_stmt); | ||
| 753 | } | ||
| 754 | |||
| 755 | + VEC_free (tree, heap, vec_oprnds0); | ||
| 756 | + VEC_free (tree, heap, vec_oprnds1); | ||
| 757 | + VEC_free (tree, heap, vec_oprnds2); | ||
| 758 | + VEC_free (tree, heap, vec_oprnds3); | ||
| 759 | + | ||
| 760 | return true; | ||
| 761 | } | ||
| 762 | |||
| 763 | @@ -5217,7 +5278,7 @@ | ||
| 764 | || vectorizable_call (stmt, NULL, NULL) | ||
| 765 | || vectorizable_store (stmt, NULL, NULL, NULL) | ||
| 766 | || vectorizable_reduction (stmt, NULL, NULL, NULL) | ||
| 767 | - || vectorizable_condition (stmt, NULL, NULL, NULL, 0)); | ||
| 768 | + || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL)); | ||
| 769 | else | ||
| 770 | { | ||
| 771 | if (bb_vinfo) | ||
| 772 | @@ -5227,7 +5288,8 @@ | ||
| 773 | || vectorizable_operation (stmt, NULL, NULL, node) | ||
| 774 | || vectorizable_assignment (stmt, NULL, NULL, node) | ||
| 775 | || vectorizable_load (stmt, NULL, NULL, node, NULL) | ||
| 776 | - || vectorizable_store (stmt, NULL, NULL, node)); | ||
| 777 | + || vectorizable_store (stmt, NULL, NULL, node) | ||
| 778 | + || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); | ||
| 779 | } | ||
| 780 | |||
| 781 | if (!ok) | ||
| 782 | @@ -5343,8 +5405,7 @@ | ||
| 783 | break; | ||
| 784 | |||
| 785 | case condition_vec_info_type: | ||
| 786 | - gcc_assert (!slp_node); | ||
| 787 | - done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0); | ||
| 788 | + done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node); | ||
| 789 | gcc_assert (done); | ||
| 790 | break; | ||
| 791 | |||
| 792 | |||
| 793 | === modified file 'gcc/tree-vectorizer.h' | ||
| 794 | --- old/gcc/tree-vectorizer.h 2011-11-22 16:52:23 +0000 | ||
| 795 | +++ new/gcc/tree-vectorizer.h 2011-11-22 17:10:17 +0000 | ||
| 796 | @@ -832,7 +832,7 @@ | ||
| 797 | extern void vect_remove_stores (gimple); | ||
| 798 | extern bool vect_analyze_stmt (gimple, bool *, slp_tree); | ||
| 799 | extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *, | ||
| 800 | - tree, int); | ||
| 801 | + tree, int, slp_tree); | ||
| 802 | extern void vect_get_load_cost (struct data_reference *, int, bool, | ||
| 803 | unsigned int *, unsigned int *); | ||
| 804 | extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); | ||
| 805 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch new file mode 100644 index 0000000000..e501959c7a --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106844.patch | |||
| @@ -0,0 +1,495 @@ | |||
| 1 | 2011-11-27 Ira Rosen <ira.rosen@linaro.org> | ||
| 2 | |||
| 3 | gcc/ | ||
| 4 | * tree-vectorizer.h (vect_pattern_recog): Add new argument. | ||
| 5 | * tree-vect-loop.c (vect_analyze_loop_2): Update call to | ||
| 6 | vect_pattern_recog. | ||
| 7 | * tree-vect-patterns.c (widened_name_p): Pass basic block | ||
| 8 | info to vect_is_simple_use. | ||
| 9 | (vect_recog_dot_prod_pattern): Fail for basic blocks. | ||
| 10 | (vect_recog_widen_sum_pattern): Likewise. | ||
| 11 | (vect_handle_widen_op_by_const): Support basic blocks. | ||
| 12 | (vect_operation_fits_smaller_type, | ||
| 13 | vect_recog_over_widening_pattern): Likewise. | ||
| 14 | (vect_recog_mixed_size_cond_pattern): Support basic blocks. | ||
| 15 | Add printing. | ||
| 16 | (vect_mark_pattern_stmts): Update calls to new_stmt_vec_info. | ||
| 17 | (vect_pattern_recog_1): Check for reduction only in loops. | ||
| 18 | (vect_pattern_recog): Add new argument. Support basic blocks. | ||
| 19 | * tree-vect-stmts.c (vectorizable_conversion): Pass basic block | ||
| 20 | info to vect_is_simple_use_1. | ||
| 21 | * tree-vect-slp.c (vect_get_and_check_slp_defs): Support basic | ||
| 22 | blocks. | ||
| 23 | (vect_slp_analyze_bb_1): Call vect_pattern_recog. | ||
| 24 | |||
| 25 | gcc/testsuite/ | ||
| 26 | * gcc.dg/vect/bb-slp-pattern-1.c: New test. | ||
| 27 | * gcc.dg/vect/bb-slp-pattern-2.c: New test. | ||
| 28 | |||
| 29 | === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c' | ||
| 30 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c 1970-01-01 00:00:00 +0000 | ||
| 31 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c 2011-11-23 06:37:10 +0000 | ||
| 32 | @@ -0,0 +1,55 @@ | ||
| 33 | +/* { dg-require-effective-target vect_int } */ | ||
| 34 | + | ||
| 35 | +#include <stdarg.h> | ||
| 36 | +#include "tree-vect.h" | ||
| 37 | + | ||
| 38 | +#define N 8 | ||
| 39 | + | ||
| 40 | +unsigned short X[N]; | ||
| 41 | +unsigned short Y[N]; | ||
| 42 | +unsigned int result[N]; | ||
| 43 | + | ||
| 44 | +/* unsigned short->unsigned int widening-mult. */ | ||
| 45 | +__attribute__ ((noinline, noclone)) void | ||
| 46 | +foo (void) | ||
| 47 | +{ | ||
| 48 | + result[0] = (unsigned int)(X[0] * Y[0]); | ||
| 49 | + result[1] = (unsigned int)(X[1] * Y[1]); | ||
| 50 | + result[2] = (unsigned int)(X[2] * Y[2]); | ||
| 51 | + result[3] = (unsigned int)(X[3] * Y[3]); | ||
| 52 | + result[4] = (unsigned int)(X[4] * Y[4]); | ||
| 53 | + result[5] = (unsigned int)(X[5] * Y[5]); | ||
| 54 | + result[6] = (unsigned int)(X[6] * Y[6]); | ||
| 55 | + result[7] = (unsigned int)(X[7] * Y[7]); | ||
| 56 | +} | ||
| 57 | + | ||
| 58 | +int main (void) | ||
| 59 | +{ | ||
| 60 | + int i, tmp; | ||
| 61 | + | ||
| 62 | + check_vect (); | ||
| 63 | + | ||
| 64 | + for (i = 0; i < N; i++) | ||
| 65 | + { | ||
| 66 | + X[i] = i; | ||
| 67 | + Y[i] = 64-i; | ||
| 68 | + } | ||
| 69 | + | ||
| 70 | + foo (); | ||
| 71 | + | ||
| 72 | + for (i = 0; i < N; i++) | ||
| 73 | + { | ||
| 74 | + __asm__ volatile (""); | ||
| 75 | + tmp = X[i] * Y[i]; | ||
| 76 | + if (result[i] != tmp) | ||
| 77 | + abort (); | ||
| 78 | + } | ||
| 79 | + | ||
| 80 | + return 0; | ||
| 81 | +} | ||
| 82 | + | ||
| 83 | +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */ | ||
| 84 | +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
| 85 | +/* { dg-final { scan-tree-dump-times "pattern recognized" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */ | ||
| 86 | +/* { dg-final { cleanup-tree-dump "slp" } } */ | ||
| 87 | + | ||
| 88 | |||
| 89 | === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c' | ||
| 90 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c 1970-01-01 00:00:00 +0000 | ||
| 91 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c 2011-11-23 06:37:10 +0000 | ||
| 92 | @@ -0,0 +1,53 @@ | ||
| 93 | +/* { dg-require-effective-target vect_condition } */ | ||
| 94 | + | ||
| 95 | +#include "tree-vect.h" | ||
| 96 | + | ||
| 97 | +#define N 128 | ||
| 98 | + | ||
| 99 | +__attribute__((noinline, noclone)) void | ||
| 100 | +foo (short * __restrict__ a, int * __restrict__ b, int stride) | ||
| 101 | +{ | ||
| 102 | + int i; | ||
| 103 | + | ||
| 104 | + for (i = 0; i < N/stride; i++, a += stride, b += stride) | ||
| 105 | + { | ||
| 106 | + a[0] = b[0] ? 1 : 7; | ||
| 107 | + a[1] = b[1] ? 2 : 0; | ||
| 108 | + a[2] = b[2] ? 3 : 0; | ||
| 109 | + a[3] = b[3] ? 4 : 0; | ||
| 110 | + a[4] = b[4] ? 5 : 0; | ||
| 111 | + a[5] = b[5] ? 6 : 0; | ||
| 112 | + a[6] = b[6] ? 7 : 0; | ||
| 113 | + a[7] = b[7] ? 8 : 0; | ||
| 114 | + } | ||
| 115 | +} | ||
| 116 | + | ||
| 117 | +short a[N]; | ||
| 118 | +int b[N]; | ||
| 119 | +int main () | ||
| 120 | +{ | ||
| 121 | + int i; | ||
| 122 | + | ||
| 123 | + check_vect (); | ||
| 124 | + | ||
| 125 | + for (i = 0; i < N; i++) | ||
| 126 | + { | ||
| 127 | + a[i] = i; | ||
| 128 | + b[i] = -i; | ||
| 129 | + } | ||
| 130 | + | ||
| 131 | + foo (a, b, 8); | ||
| 132 | + | ||
| 133 | + for (i = 1; i < N; i++) | ||
| 134 | + if (a[i] != i%8 + 1) | ||
| 135 | + abort (); | ||
| 136 | + | ||
| 137 | + if (a[0] != 7) | ||
| 138 | + abort (); | ||
| 139 | + | ||
| 140 | + return 0; | ||
| 141 | +} | ||
| 142 | + | ||
| 143 | +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_pack_trunc } } } } */ | ||
| 144 | +/* { dg-final { cleanup-tree-dump "slp" } } */ | ||
| 145 | + | ||
| 146 | |||
| 147 | === modified file 'gcc/tree-vect-loop.c' | ||
| 148 | --- old/gcc/tree-vect-loop.c 2011-11-20 08:24:08 +0000 | ||
| 149 | +++ new/gcc/tree-vect-loop.c 2011-11-23 06:47:35 +0000 | ||
| 150 | @@ -1458,7 +1458,7 @@ | ||
| 151 | |||
| 152 | vect_analyze_scalar_cycles (loop_vinfo); | ||
| 153 | |||
| 154 | - vect_pattern_recog (loop_vinfo); | ||
| 155 | + vect_pattern_recog (loop_vinfo, NULL); | ||
| 156 | |||
| 157 | /* Data-flow analysis to detect stmts that do not need to be vectorized. */ | ||
| 158 | |||
| 159 | |||
| 160 | === modified file 'gcc/tree-vect-patterns.c' | ||
| 161 | --- old/gcc/tree-vect-patterns.c 2011-11-20 09:11:09 +0000 | ||
| 162 | +++ new/gcc/tree-vect-patterns.c 2011-11-23 07:49:33 +0000 | ||
| 163 | @@ -83,11 +83,13 @@ | ||
| 164 | tree oprnd0; | ||
| 165 | enum vect_def_type dt; | ||
| 166 | tree def; | ||
| 167 | + bb_vec_info bb_vinfo; | ||
| 168 | |||
| 169 | stmt_vinfo = vinfo_for_stmt (use_stmt); | ||
| 170 | loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
| 171 | + bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); | ||
| 172 | |||
| 173 | - if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt)) | ||
| 174 | + if (!vect_is_simple_use (name, loop_vinfo, bb_vinfo, def_stmt, &def, &dt)) | ||
| 175 | return false; | ||
| 176 | |||
| 177 | if (dt != vect_internal_def | ||
| 178 | @@ -111,7 +113,7 @@ | ||
| 179 | || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) | ||
| 180 | return false; | ||
| 181 | |||
| 182 | - if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy, | ||
| 183 | + if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy, | ||
| 184 | &dt)) | ||
| 185 | return false; | ||
| 186 | |||
| 187 | @@ -188,9 +190,14 @@ | ||
| 188 | gimple pattern_stmt; | ||
| 189 | tree prod_type; | ||
| 190 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
| 191 | - struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
| 192 | + struct loop *loop; | ||
| 193 | tree var, rhs; | ||
| 194 | |||
| 195 | + if (!loop_info) | ||
| 196 | + return NULL; | ||
| 197 | + | ||
| 198 | + loop = LOOP_VINFO_LOOP (loop_info); | ||
| 199 | + | ||
| 200 | if (!is_gimple_assign (last_stmt)) | ||
| 201 | return NULL; | ||
| 202 | |||
| 203 | @@ -358,8 +365,16 @@ | ||
| 204 | { | ||
| 205 | tree new_type, new_oprnd, tmp; | ||
| 206 | gimple new_stmt; | ||
| 207 | - loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); | ||
| 208 | - struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
| 209 | + loop_vec_info loop_vinfo; | ||
| 210 | + struct loop *loop = NULL; | ||
| 211 | + bb_vec_info bb_vinfo; | ||
| 212 | + stmt_vec_info stmt_vinfo; | ||
| 213 | + | ||
| 214 | + stmt_vinfo = vinfo_for_stmt (stmt); | ||
| 215 | + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
| 216 | + bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); | ||
| 217 | + if (loop_vinfo) | ||
| 218 | + loop = LOOP_VINFO_LOOP (loop_vinfo); | ||
| 219 | |||
| 220 | if (code != MULT_EXPR && code != LSHIFT_EXPR) | ||
| 221 | return false; | ||
| 222 | @@ -377,7 +392,9 @@ | ||
| 223 | |||
| 224 | if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) | ||
| 225 | || !gimple_bb (def_stmt) | ||
| 226 | - || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | ||
| 227 | + || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) | ||
| 228 | + || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_vinfo) | ||
| 229 | + && gimple_code (def_stmt) != GIMPLE_PHI) | ||
| 230 | || !vinfo_for_stmt (def_stmt)) | ||
| 231 | return false; | ||
| 232 | |||
| 233 | @@ -774,9 +791,14 @@ | ||
| 234 | tree type, half_type; | ||
| 235 | gimple pattern_stmt; | ||
| 236 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
| 237 | - struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
| 238 | + struct loop *loop; | ||
| 239 | tree var; | ||
| 240 | |||
| 241 | + if (!loop_info) | ||
| 242 | + return NULL; | ||
| 243 | + | ||
| 244 | + loop = LOOP_VINFO_LOOP (loop_info); | ||
| 245 | + | ||
| 246 | if (!is_gimple_assign (last_stmt)) | ||
| 247 | return NULL; | ||
| 248 | |||
| 249 | @@ -877,7 +899,11 @@ | ||
| 250 | gimple def_stmt, new_stmt; | ||
| 251 | bool first = false; | ||
| 252 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); | ||
| 253 | - struct loop *loop = LOOP_VINFO_LOOP (loop_info); | ||
| 254 | + bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); | ||
| 255 | + struct loop *loop = NULL; | ||
| 256 | + | ||
| 257 | + if (loop_info) | ||
| 258 | + loop = LOOP_VINFO_LOOP (loop_info); | ||
| 259 | |||
| 260 | *new_def_stmt = NULL; | ||
| 261 | |||
| 262 | @@ -909,7 +935,9 @@ | ||
| 263 | first = true; | ||
| 264 | if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false) | ||
| 265 | || !gimple_bb (def_stmt) | ||
| 266 | - || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | ||
| 267 | + || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) | ||
| 268 | + || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info) | ||
| 269 | + && gimple_code (def_stmt) != GIMPLE_PHI) | ||
| 270 | || !vinfo_for_stmt (def_stmt)) | ||
| 271 | return false; | ||
| 272 | } | ||
| 273 | @@ -1087,7 +1115,16 @@ | ||
| 274 | int nuses = 0; | ||
| 275 | tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd; | ||
| 276 | bool first; | ||
| 277 | - struct loop *loop = (gimple_bb (stmt))->loop_father; | ||
| 278 | + loop_vec_info loop_vinfo; | ||
| 279 | + struct loop *loop = NULL; | ||
| 280 | + bb_vec_info bb_vinfo; | ||
| 281 | + stmt_vec_info stmt_vinfo; | ||
| 282 | + | ||
| 283 | + stmt_vinfo = vinfo_for_stmt (stmt); | ||
| 284 | + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
| 285 | + bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); | ||
| 286 | + if (loop_vinfo) | ||
| 287 | + loop = LOOP_VINFO_LOOP (loop_vinfo); | ||
| 288 | |||
| 289 | first = true; | ||
| 290 | while (1) | ||
| 291 | @@ -1120,7 +1157,8 @@ | ||
| 292 | |||
| 293 | if (nuses != 1 || !is_gimple_assign (use_stmt) | ||
| 294 | || !gimple_bb (use_stmt) | ||
| 295 | - || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) | ||
| 296 | + || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) | ||
| 297 | + || (!loop && gimple_bb (use_stmt) != BB_VINFO_BB (bb_vinfo))) | ||
| 298 | return NULL; | ||
| 299 | |||
| 300 | /* Create pattern statement for STMT. */ | ||
| 301 | @@ -1485,6 +1523,7 @@ | ||
| 302 | enum machine_mode cmpmode; | ||
| 303 | gimple pattern_stmt, def_stmt; | ||
| 304 | loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
| 305 | + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); | ||
| 306 | |||
| 307 | if (!is_gimple_assign (last_stmt) | ||
| 308 | || gimple_assign_rhs_code (last_stmt) != COND_EXPR | ||
| 309 | @@ -1538,7 +1577,8 @@ | ||
| 310 | tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr), | ||
| 311 | fold_convert (comp_type, then_clause), | ||
| 312 | fold_convert (comp_type, else_clause)); | ||
| 313 | - def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), tmp); | ||
| 314 | + def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), | ||
| 315 | + tmp); | ||
| 316 | |||
| 317 | pattern_stmt | ||
| 318 | = gimple_build_assign_with_ops (NOP_EXPR, | ||
| 319 | @@ -1546,12 +1586,15 @@ | ||
| 320 | gimple_assign_lhs (def_stmt), NULL_TREE); | ||
| 321 | |||
| 322 | STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt; | ||
| 323 | - def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); | ||
| 324 | + def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); | ||
| 325 | set_vinfo_for_stmt (def_stmt, def_stmt_info); | ||
| 326 | STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype; | ||
| 327 | *type_in = vectype; | ||
| 328 | *type_out = vectype; | ||
| 329 | |||
| 330 | + if (vect_print_dump_info (REPORT_DETAILS)) | ||
| 331 | + fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: "); | ||
| 332 | + | ||
| 333 | return pattern_stmt; | ||
| 334 | } | ||
| 335 | |||
| 336 | @@ -1565,10 +1608,11 @@ | ||
| 337 | stmt_vec_info pattern_stmt_info, def_stmt_info; | ||
| 338 | stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); | ||
| 339 | loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); | ||
| 340 | + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info); | ||
| 341 | gimple def_stmt; | ||
| 342 | |||
| 343 | set_vinfo_for_stmt (pattern_stmt, | ||
| 344 | - new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); | ||
| 345 | + new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo)); | ||
| 346 | gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); | ||
| 347 | pattern_stmt_info = vinfo_for_stmt (pattern_stmt); | ||
| 348 | |||
| 349 | @@ -1586,7 +1630,7 @@ | ||
| 350 | def_stmt_info = vinfo_for_stmt (def_stmt); | ||
| 351 | if (def_stmt_info == NULL) | ||
| 352 | { | ||
| 353 | - def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); | ||
| 354 | + def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); | ||
| 355 | set_vinfo_for_stmt (def_stmt, def_stmt_info); | ||
| 356 | } | ||
| 357 | gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); | ||
| 358 | @@ -1697,9 +1741,10 @@ | ||
| 359 | |||
| 360 | /* Patterns cannot be vectorized using SLP, because they change the order of | ||
| 361 | computation. */ | ||
| 362 | - FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) | ||
| 363 | - if (next == stmt) | ||
| 364 | - VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); | ||
| 365 | + if (loop_vinfo) | ||
| 366 | + FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) | ||
| 367 | + if (next == stmt) | ||
| 368 | + VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); | ||
| 369 | |||
| 370 | /* It is possible that additional pattern stmts are created and inserted in | ||
| 371 | STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the | ||
| 372 | @@ -1799,26 +1844,46 @@ | ||
| 373 | be recorded in S3. */ | ||
| 374 | |||
| 375 | void | ||
| 376 | -vect_pattern_recog (loop_vec_info loop_vinfo) | ||
| 377 | +vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) | ||
| 378 | { | ||
| 379 | - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | ||
| 380 | - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | ||
| 381 | - unsigned int nbbs = loop->num_nodes; | ||
| 382 | + struct loop *loop; | ||
| 383 | + basic_block *bbs, bb; | ||
| 384 | + unsigned int nbbs; | ||
| 385 | gimple_stmt_iterator si; | ||
| 386 | unsigned int i, j; | ||
| 387 | gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); | ||
| 388 | VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); | ||
| 389 | + gimple stmt; | ||
| 390 | |||
| 391 | if (vect_print_dump_info (REPORT_DETAILS)) | ||
| 392 | fprintf (vect_dump, "=== vect_pattern_recog ==="); | ||
| 393 | |||
| 394 | - /* Scan through the loop stmts, applying the pattern recognition | ||
| 395 | + if (loop_vinfo) | ||
| 396 | + { | ||
| 397 | + loop = LOOP_VINFO_LOOP (loop_vinfo); | ||
| 398 | + bbs = LOOP_VINFO_BBS (loop_vinfo); | ||
| 399 | + nbbs = loop->num_nodes; | ||
| 400 | + } | ||
| 401 | + else | ||
| 402 | + { | ||
| 403 | + bb = BB_VINFO_BB (bb_vinfo); | ||
| 404 | + nbbs = 1; | ||
| 405 | + bbs = XNEW (basic_block); | ||
| 406 | + bbs[0] = bb; | ||
| 407 | + } | ||
| 408 | + | ||
| 409 | + /* Scan through the stmts, applying the pattern recognition | ||
| 410 | functions starting at each stmt visited: */ | ||
| 411 | for (i = 0; i < nbbs; i++) | ||
| 412 | { | ||
| 413 | basic_block bb = bbs[i]; | ||
| 414 | for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | ||
| 415 | { | ||
| 416 | + if (bb_vinfo && (stmt = gsi_stmt (si)) | ||
| 417 | + && vinfo_for_stmt (stmt) | ||
| 418 | + && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) | ||
| 419 | + continue; | ||
| 420 | + | ||
| 421 | /* Scan over all generic vect_recog_xxx_pattern functions. */ | ||
| 422 | for (j = 0; j < NUM_PATTERNS; j++) | ||
| 423 | { | ||
| 424 | @@ -1830,4 +1895,6 @@ | ||
| 425 | } | ||
| 426 | |||
| 427 | VEC_free (gimple, heap, stmts_to_replace); | ||
| 428 | + if (bb_vinfo) | ||
| 429 | + free (bbs); | ||
| 430 | } | ||
| 431 | |||
| 432 | === modified file 'gcc/tree-vect-slp.c' | ||
| 433 | --- old/gcc/tree-vect-slp.c 2011-11-21 06:58:40 +0000 | ||
| 434 | +++ new/gcc/tree-vect-slp.c 2011-11-23 06:47:35 +0000 | ||
| 435 | @@ -255,12 +255,14 @@ | ||
| 436 | /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt | ||
| 437 | from the pattern. Check that all the stmts of the node are in the | ||
| 438 | pattern. */ | ||
| 439 | - if (loop && def_stmt && gimple_bb (def_stmt) | ||
| 440 | - && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | ||
| 441 | + if (def_stmt && gimple_bb (def_stmt) | ||
| 442 | + && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) | ||
| 443 | + || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo) | ||
| 444 | + && gimple_code (def_stmt) != GIMPLE_PHI)) | ||
| 445 | && vinfo_for_stmt (def_stmt) | ||
| 446 | && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) | ||
| 447 | - && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) | ||
| 448 | - && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) | ||
| 449 | + && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) | ||
| 450 | + && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) | ||
| 451 | { | ||
| 452 | pattern = true; | ||
| 453 | if (!first && !oprnd_info->first_pattern) | ||
| 454 | @@ -1972,6 +1974,8 @@ | ||
| 455 | return NULL; | ||
| 456 | } | ||
| 457 | |||
| 458 | + vect_pattern_recog (NULL, bb_vinfo); | ||
| 459 | + | ||
| 460 | if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf, | ||
| 461 | &data_dependence_in_bb) | ||
| 462 | || min_vf > max_vf | ||
| 463 | |||
| 464 | === modified file 'gcc/tree-vect-stmts.c' | ||
| 465 | --- old/gcc/tree-vect-stmts.c 2011-11-22 17:10:17 +0000 | ||
| 466 | +++ new/gcc/tree-vect-stmts.c 2011-11-23 06:47:35 +0000 | ||
| 467 | @@ -3451,11 +3451,11 @@ | ||
| 468 | /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of | ||
| 469 | OP1. */ | ||
| 470 | if (CONSTANT_CLASS_P (op0)) | ||
| 471 | - ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL, | ||
| 472 | + ok = vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, | ||
| 473 | &def_stmt, &def, &dt[1], &vectype_in); | ||
| 474 | else | ||
| 475 | - ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, | ||
| 476 | - &dt[1]); | ||
| 477 | + ok = vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, | ||
| 478 | + &def, &dt[1]); | ||
| 479 | |||
| 480 | if (!ok) | ||
| 481 | { | ||
| 482 | |||
| 483 | === modified file 'gcc/tree-vectorizer.h' | ||
| 484 | --- old/gcc/tree-vectorizer.h 2011-11-22 17:10:17 +0000 | ||
| 485 | +++ new/gcc/tree-vectorizer.h 2011-11-23 06:47:35 +0000 | ||
| 486 | @@ -918,7 +918,7 @@ | ||
| 487 | in the future. */ | ||
| 488 | typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); | ||
| 489 | #define NUM_PATTERNS 7 | ||
| 490 | -void vect_pattern_recog (loop_vec_info); | ||
| 491 | +void vect_pattern_recog (loop_vec_info, bb_vec_info); | ||
| 492 | |||
| 493 | /* In tree-vectorizer.c. */ | ||
| 494 | unsigned vectorize_loops (void); | ||
| 495 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch new file mode 100644 index 0000000000..17cfd10682 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106845.patch | |||
| @@ -0,0 +1,1818 @@ | |||
| 1 | 2011-11-28 David Alan Gilbert <david.gilbert@linaro.org> | ||
| 2 | |||
| 3 | Backport from mainline (svn r19983): | ||
| 4 | |||
| 5 | 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org> | ||
| 6 | |||
| 7 | gcc/testsuite/ | ||
| 8 | * gcc.dg/di-longlong64-sync-1.c: New test. | ||
| 9 | * gcc.dg/di-sync-multithread.c: New test. | ||
| 10 | * gcc.target/arm/di-longlong64-sync-withhelpers.c: New test. | ||
| 11 | * gcc.target/arm/di-longlong64-sync-withldrexd.c: New test. | ||
| 12 | * lib/target-supports.exp: (arm_arch_*_ok): Series of effective-target | ||
| 13 | tests for v5, v6, v6k, and v7-a, and add-options helpers. | ||
| 14 | (check_effective_target_arm_arm_ok): New helper. | ||
| 15 | (check_effective_target_sync_longlong): New helper. | ||
| 16 | |||
| 17 | 2011-11-28 David Alan Gilbert <david.gilbert@linaro.org> | ||
| 18 | |||
| 19 | Backport from mainline (svn r19982): | ||
| 20 | |||
| 21 | 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org> | ||
| 22 | |||
| 23 | gcc/ | ||
| 24 | * config/arm/linux-atomic-64bit.c: New (based on linux-atomic.c). | ||
| 25 | * config/arm/linux-atomic.c: Change comment to point to 64bit version. | ||
| 26 | (SYNC_LOCK_RELEASE): Instantiate 64bit version. | ||
| 27 | * config/arm/t-linux-eabi: Pull in linux-atomic-64bit.c. | ||
| 28 | |||
| 29 | 2011-11-28 David Alan Gilbert <david.gilbert@linaro.org> | ||
| 30 | |||
| 31 | Backport from mainline (svn r19981): | ||
| 32 | |||
| 33 | 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org> | ||
| 34 | |||
| 35 | gcc/ | ||
| 36 | * config/arm/arm.c (arm_output_ldrex): Support ldrexd. | ||
| 37 | (arm_output_strex): Support strexd. | ||
| 38 | (arm_output_it): New helper to output it in Thumb2 mode only. | ||
| 39 | (arm_output_sync_loop): Support DI mode. Change comment to | ||
| 40 | not support const_int. | ||
| 41 | (arm_expand_sync): Support DI mode. | ||
| 42 | * config/arm/arm.h (TARGET_HAVE_LDREXBHD): Split into LDREXBH | ||
| 43 | and LDREXD. | ||
| 44 | * config/arm/iterators.md (NARROW): move from sync.md. | ||
| 45 | (QHSD): New iterator for all current ARM integer modes. | ||
| 46 | (SIDI): New iterator for SI and DI modes only. | ||
| 47 | * config/arm/sync.md (sync_predtab): New mode_attr. | ||
| 48 | (sync_compare_and_swapsi): Fold into sync_compare_and_swap<mode>. | ||
| 49 | (sync_lock_test_and_setsi): Fold into sync_lock_test_and_setsi<mode>. | ||
| 50 | (sync_<sync_optab>si): Fold into sync_<sync_optab><mode>. | ||
| 51 | (sync_nandsi): Fold into sync_nand<mode>. | ||
| 52 | (sync_new_<sync_optab>si): Fold into sync_new_<sync_optab><mode>. | ||
| 53 | (sync_new_nandsi): Fold into sync_new_nand<mode>. | ||
| 54 | (sync_old_<sync_optab>si): Fold into sync_old_<sync_optab><mode>. | ||
| 55 | (sync_old_nandsi): Fold into sync_old_nand<mode>. | ||
| 56 | (sync_compare_and_swap<mode>): Support SI & DI. | ||
| 57 | (sync_lock_test_and_set<mode>): Likewise. | ||
| 58 | (sync_<sync_optab><mode>): Likewise. | ||
| 59 | (sync_nand<mode>): Likewise. | ||
| 60 | (sync_new_<sync_optab><mode>): Likewise. | ||
| 61 | (sync_new_nand<mode>): Likewise. | ||
| 62 | (sync_old_<sync_optab><mode>): Likewise. | ||
| 63 | (sync_old_nand<mode>): Likewise. | ||
| 64 | (arm_sync_compare_and_swapsi): Turn into iterator on SI & DI. | ||
| 65 | (arm_sync_lock_test_and_setsi): Likewise. | ||
| 66 | (arm_sync_new_<sync_optab>si): Likewise. | ||
| 67 | (arm_sync_new_nandsi): Likewise. | ||
| 68 | (arm_sync_old_<sync_optab>si): Likewise. | ||
| 69 | (arm_sync_old_nandsi): Likewise. | ||
| 70 | (arm_sync_compare_and_swap<mode> NARROW): use sync_predtab, fix indent. | ||
| 71 | (arm_sync_lock_test_and_setsi<mode> NARROW): Likewise. | ||
| 72 | (arm_sync_new_<sync_optab><mode> NARROW): Likewise. | ||
| 73 | (arm_sync_new_nand<mode> NARROW): Likewise. | ||
| 74 | (arm_sync_old_<sync_optab><mode> NARROW): Likewise. | ||
| 75 | (arm_sync_old_nand<mode> NARROW): Likewise. | ||
| 76 | |||
| 77 | 2011-11-28 David Alan Gilbert <david.gilbert@linaro.org> | ||
| 78 | |||
| 79 | Backport from mainline (svn r19980): | ||
| 80 | |||
| 81 | 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org> | ||
| 82 | |||
| 83 | PR target/48126 | ||
| 84 | |||
| 85 | gcc/ | ||
| 86 | * config/arm/arm.c (arm_output_sync_loop): Move label before barrier. | ||
| 87 | |||
| 88 | 2011-11-28 David Alan Gilbert <david.gilbert@linaro.org> | ||
| 89 | |||
| 90 | Backport from mainline (svn r19979): | ||
| 91 | |||
| 92 | 2011-10-14 David Alan Gilbert <david.gilbert@linaro.org> | ||
| 93 | |||
| 94 | gcc/ | ||
| 95 | * config/arm/arm.h (TARGET_HAVE_DMB_MCR): MCR Not available in Thumb1. | ||
| 96 | |||
| 97 | === modified file 'gcc/config/arm/arm.c' | ||
| 98 | --- old/gcc/config/arm/arm.c 2011-11-21 01:45:54 +0000 | ||
| 99 | +++ new/gcc/config/arm/arm.c 2011-11-28 15:07:01 +0000 | ||
| 100 | @@ -24307,12 +24307,26 @@ | ||
| 101 | rtx target, | ||
| 102 | rtx memory) | ||
| 103 | { | ||
| 104 | - const char *suffix = arm_ldrex_suffix (mode); | ||
| 105 | - rtx operands[2]; | ||
| 106 | + rtx operands[3]; | ||
| 107 | |||
| 108 | operands[0] = target; | ||
| 109 | - operands[1] = memory; | ||
| 110 | - arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix); | ||
| 111 | + if (mode != DImode) | ||
| 112 | + { | ||
| 113 | + const char *suffix = arm_ldrex_suffix (mode); | ||
| 114 | + operands[1] = memory; | ||
| 115 | + arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix); | ||
| 116 | + } | ||
| 117 | + else | ||
| 118 | + { | ||
| 119 | + /* The restrictions on target registers in ARM mode are that the two | ||
| 120 | + registers are consecutive and the first one is even; Thumb is | ||
| 121 | + actually more flexible, but DI should give us this anyway. | ||
| 122 | + Note that the 1st register always gets the lowest word in memory. */ | ||
| 123 | + gcc_assert ((REGNO (target) & 1) == 0); | ||
| 124 | + operands[1] = gen_rtx_REG (SImode, REGNO (target) + 1); | ||
| 125 | + operands[2] = memory; | ||
| 126 | + arm_output_asm_insn (emit, 0, operands, "ldrexd\t%%0, %%1, %%C2"); | ||
| 127 | + } | ||
| 128 | } | ||
| 129 | |||
| 130 | /* Emit a strex{b,h,d, } instruction appropriate for the specified | ||
| 131 | @@ -24325,14 +24339,41 @@ | ||
| 132 | rtx value, | ||
| 133 | rtx memory) | ||
| 134 | { | ||
| 135 | - const char *suffix = arm_ldrex_suffix (mode); | ||
| 136 | - rtx operands[3]; | ||
| 137 | + rtx operands[4]; | ||
| 138 | |||
| 139 | operands[0] = result; | ||
| 140 | operands[1] = value; | ||
| 141 | - operands[2] = memory; | ||
| 142 | - arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix, | ||
| 143 | - cc); | ||
| 144 | + if (mode != DImode) | ||
| 145 | + { | ||
| 146 | + const char *suffix = arm_ldrex_suffix (mode); | ||
| 147 | + operands[2] = memory; | ||
| 148 | + arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", | ||
| 149 | + suffix, cc); | ||
| 150 | + } | ||
| 151 | + else | ||
| 152 | + { | ||
| 153 | + /* The restrictions on target registers in ARM mode are that the two | ||
| 154 | + registers are consecutive and the first one is even; Thumb is | ||
| 155 | + actually more flexible, but DI should give us this anyway. | ||
| 156 | + Note that the 1st register always gets the lowest word in memory. */ | ||
| 157 | + gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2); | ||
| 158 | + operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1); | ||
| 159 | + operands[3] = memory; | ||
| 160 | + arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3", | ||
| 161 | + cc); | ||
| 162 | + } | ||
| 163 | +} | ||
| 164 | + | ||
| 165 | +/* Helper to emit an it instruction in Thumb2 mode only; although the assembler | ||
| 166 | + will ignore it in ARM mode, emitting it will mess up instruction counts we | ||
| 167 | + sometimes keep 'flags' are the extra t's and e's if it's more than one | ||
| 168 | + instruction that is conditional. */ | ||
| 169 | +static void | ||
| 170 | +arm_output_it (emit_f emit, const char *flags, const char *cond) | ||
| 171 | +{ | ||
| 172 | + rtx operands[1]; /* Don't actually use the operand. */ | ||
| 173 | + if (TARGET_THUMB2) | ||
| 174 | + arm_output_asm_insn (emit, 0, operands, "it%s\t%s", flags, cond); | ||
| 175 | } | ||
| 176 | |||
| 177 | /* Helper to emit a two operand instruction. */ | ||
| 178 | @@ -24374,7 +24415,7 @@ | ||
| 179 | |||
| 180 | required_value: | ||
| 181 | |||
| 182 | - RTX register or const_int representing the required old_value for | ||
| 183 | + RTX register representing the required old_value for | ||
| 184 | the modify to continue, if NULL no comparsion is performed. */ | ||
| 185 | static void | ||
| 186 | arm_output_sync_loop (emit_f emit, | ||
| 187 | @@ -24388,7 +24429,13 @@ | ||
| 188 | enum attr_sync_op sync_op, | ||
| 189 | int early_barrier_required) | ||
| 190 | { | ||
| 191 | - rtx operands[1]; | ||
| 192 | + rtx operands[2]; | ||
| 193 | + /* We'll use the lo for the normal rtx in the none-DI case | ||
| 194 | + as well as the least-sig word in the DI case. */ | ||
| 195 | + rtx old_value_lo, required_value_lo, new_value_lo, t1_lo; | ||
| 196 | + rtx old_value_hi, required_value_hi, new_value_hi, t1_hi; | ||
| 197 | + | ||
| 198 | + bool is_di = mode == DImode; | ||
| 199 | |||
| 200 | gcc_assert (t1 != t2); | ||
| 201 | |||
| 202 | @@ -24399,82 +24446,142 @@ | ||
| 203 | |||
| 204 | arm_output_ldrex (emit, mode, old_value, memory); | ||
| 205 | |||
| 206 | + if (is_di) | ||
| 207 | + { | ||
| 208 | + old_value_lo = gen_lowpart (SImode, old_value); | ||
| 209 | + old_value_hi = gen_highpart (SImode, old_value); | ||
| 210 | + if (required_value) | ||
| 211 | + { | ||
| 212 | + required_value_lo = gen_lowpart (SImode, required_value); | ||
| 213 | + required_value_hi = gen_highpart (SImode, required_value); | ||
| 214 | + } | ||
| 215 | + else | ||
| 216 | + { | ||
| 217 | + /* Silence false potentially unused warning. */ | ||
| 218 | + required_value_lo = NULL_RTX; | ||
| 219 | + required_value_hi = NULL_RTX; | ||
| 220 | + } | ||
| 221 | + new_value_lo = gen_lowpart (SImode, new_value); | ||
| 222 | + new_value_hi = gen_highpart (SImode, new_value); | ||
| 223 | + t1_lo = gen_lowpart (SImode, t1); | ||
| 224 | + t1_hi = gen_highpart (SImode, t1); | ||
| 225 | + } | ||
| 226 | + else | ||
| 227 | + { | ||
| 228 | + old_value_lo = old_value; | ||
| 229 | + new_value_lo = new_value; | ||
| 230 | + required_value_lo = required_value; | ||
| 231 | + t1_lo = t1; | ||
| 232 | + | ||
| 233 | + /* Silence false potentially unused warning. */ | ||
| 234 | + t1_hi = NULL_RTX; | ||
| 235 | + new_value_hi = NULL_RTX; | ||
| 236 | + required_value_hi = NULL_RTX; | ||
| 237 | + old_value_hi = NULL_RTX; | ||
| 238 | + } | ||
| 239 | + | ||
| 240 | if (required_value) | ||
| 241 | { | ||
| 242 | - rtx operands[2]; | ||
| 243 | + operands[0] = old_value_lo; | ||
| 244 | + operands[1] = required_value_lo; | ||
| 245 | |||
| 246 | - operands[0] = old_value; | ||
| 247 | - operands[1] = required_value; | ||
| 248 | arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1"); | ||
| 249 | + if (is_di) | ||
| 250 | + { | ||
| 251 | + arm_output_it (emit, "", "eq"); | ||
| 252 | + arm_output_op2 (emit, "cmpeq", old_value_hi, required_value_hi); | ||
| 253 | + } | ||
| 254 | arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX); | ||
| 255 | } | ||
| 256 | |||
| 257 | switch (sync_op) | ||
| 258 | { | ||
| 259 | case SYNC_OP_ADD: | ||
| 260 | - arm_output_op3 (emit, "add", t1, old_value, new_value); | ||
| 261 | + arm_output_op3 (emit, is_di ? "adds" : "add", | ||
| 262 | + t1_lo, old_value_lo, new_value_lo); | ||
| 263 | + if (is_di) | ||
| 264 | + arm_output_op3 (emit, "adc", t1_hi, old_value_hi, new_value_hi); | ||
| 265 | break; | ||
| 266 | |||
| 267 | case SYNC_OP_SUB: | ||
| 268 | - arm_output_op3 (emit, "sub", t1, old_value, new_value); | ||
| 269 | + arm_output_op3 (emit, is_di ? "subs" : "sub", | ||
| 270 | + t1_lo, old_value_lo, new_value_lo); | ||
| 271 | + if (is_di) | ||
| 272 | + arm_output_op3 (emit, "sbc", t1_hi, old_value_hi, new_value_hi); | ||
| 273 | break; | ||
| 274 | |||
| 275 | case SYNC_OP_IOR: | ||
| 276 | - arm_output_op3 (emit, "orr", t1, old_value, new_value); | ||
| 277 | + arm_output_op3 (emit, "orr", t1_lo, old_value_lo, new_value_lo); | ||
| 278 | + if (is_di) | ||
| 279 | + arm_output_op3 (emit, "orr", t1_hi, old_value_hi, new_value_hi); | ||
| 280 | break; | ||
| 281 | |||
| 282 | case SYNC_OP_XOR: | ||
| 283 | - arm_output_op3 (emit, "eor", t1, old_value, new_value); | ||
| 284 | + arm_output_op3 (emit, "eor", t1_lo, old_value_lo, new_value_lo); | ||
| 285 | + if (is_di) | ||
| 286 | + arm_output_op3 (emit, "eor", t1_hi, old_value_hi, new_value_hi); | ||
| 287 | break; | ||
| 288 | |||
| 289 | case SYNC_OP_AND: | ||
| 290 | - arm_output_op3 (emit,"and", t1, old_value, new_value); | ||
| 291 | + arm_output_op3 (emit,"and", t1_lo, old_value_lo, new_value_lo); | ||
| 292 | + if (is_di) | ||
| 293 | + arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi); | ||
| 294 | break; | ||
| 295 | |||
| 296 | case SYNC_OP_NAND: | ||
| 297 | - arm_output_op3 (emit, "and", t1, old_value, new_value); | ||
| 298 | - arm_output_op2 (emit, "mvn", t1, t1); | ||
| 299 | + arm_output_op3 (emit, "and", t1_lo, old_value_lo, new_value_lo); | ||
| 300 | + if (is_di) | ||
| 301 | + arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi); | ||
| 302 | + arm_output_op2 (emit, "mvn", t1_lo, t1_lo); | ||
| 303 | + if (is_di) | ||
| 304 | + arm_output_op2 (emit, "mvn", t1_hi, t1_hi); | ||
| 305 | break; | ||
| 306 | |||
| 307 | case SYNC_OP_NONE: | ||
| 308 | t1 = new_value; | ||
| 309 | + t1_lo = new_value_lo; | ||
| 310 | + if (is_di) | ||
| 311 | + t1_hi = new_value_hi; | ||
| 312 | break; | ||
| 313 | } | ||
| 314 | |||
| 315 | + /* Note that the result of strex is a 0/1 flag that's always 1 register. */ | ||
| 316 | if (t2) | ||
| 317 | { | ||
| 318 | - arm_output_strex (emit, mode, "", t2, t1, memory); | ||
| 319 | - operands[0] = t2; | ||
| 320 | - arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); | ||
| 321 | - arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", | ||
| 322 | - LOCAL_LABEL_PREFIX); | ||
| 323 | + arm_output_strex (emit, mode, "", t2, t1, memory); | ||
| 324 | + operands[0] = t2; | ||
| 325 | + arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); | ||
| 326 | + arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", | ||
| 327 | + LOCAL_LABEL_PREFIX); | ||
| 328 | } | ||
| 329 | else | ||
| 330 | { | ||
| 331 | /* Use old_value for the return value because for some operations | ||
| 332 | the old_value can easily be restored. This saves one register. */ | ||
| 333 | - arm_output_strex (emit, mode, "", old_value, t1, memory); | ||
| 334 | - operands[0] = old_value; | ||
| 335 | + arm_output_strex (emit, mode, "", old_value_lo, t1, memory); | ||
| 336 | + operands[0] = old_value_lo; | ||
| 337 | arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); | ||
| 338 | arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", | ||
| 339 | LOCAL_LABEL_PREFIX); | ||
| 340 | |||
| 341 | + /* Note that we only used the _lo half of old_value as a temporary | ||
| 342 | + so in DI we don't have to restore the _hi part. */ | ||
| 343 | switch (sync_op) | ||
| 344 | { | ||
| 345 | case SYNC_OP_ADD: | ||
| 346 | - arm_output_op3 (emit, "sub", old_value, t1, new_value); | ||
| 347 | + arm_output_op3 (emit, "sub", old_value_lo, t1_lo, new_value_lo); | ||
| 348 | break; | ||
| 349 | |||
| 350 | case SYNC_OP_SUB: | ||
| 351 | - arm_output_op3 (emit, "add", old_value, t1, new_value); | ||
| 352 | + arm_output_op3 (emit, "add", old_value_lo, t1_lo, new_value_lo); | ||
| 353 | break; | ||
| 354 | |||
| 355 | case SYNC_OP_XOR: | ||
| 356 | - arm_output_op3 (emit, "eor", old_value, t1, new_value); | ||
| 357 | + arm_output_op3 (emit, "eor", old_value_lo, t1_lo, new_value_lo); | ||
| 358 | break; | ||
| 359 | |||
| 360 | case SYNC_OP_NONE: | ||
| 361 | - arm_output_op2 (emit, "mov", old_value, required_value); | ||
| 362 | + arm_output_op2 (emit, "mov", old_value_lo, required_value_lo); | ||
| 363 | break; | ||
| 364 | |||
| 365 | default: | ||
| 366 | @@ -24482,8 +24589,11 @@ | ||
| 367 | } | ||
| 368 | } | ||
| 369 | |||
| 370 | + /* Note: label is before barrier so that in cmp failure case we still get | ||
| 371 | + a barrier to stop subsequent loads floating upwards past the ldrex | ||
| 372 | + PR target/48126. */ | ||
| 373 | + arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX); | ||
| 374 | arm_process_output_memory_barrier (emit, NULL); | ||
| 375 | - arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX); | ||
| 376 | } | ||
| 377 | |||
| 378 | static rtx | ||
| 379 | @@ -24577,7 +24687,7 @@ | ||
| 380 | target = gen_reg_rtx (mode); | ||
| 381 | |||
| 382 | memory = arm_legitimize_sync_memory (memory); | ||
| 383 | - if (mode != SImode) | ||
| 384 | + if (mode != SImode && mode != DImode) | ||
| 385 | { | ||
| 386 | rtx load_temp = gen_reg_rtx (SImode); | ||
| 387 | |||
| 388 | |||
| 389 | === modified file 'gcc/config/arm/arm.h' | ||
| 390 | --- old/gcc/config/arm/arm.h 2011-11-21 01:45:54 +0000 | ||
| 391 | +++ new/gcc/config/arm/arm.h 2011-11-28 15:07:01 +0000 | ||
| 392 | @@ -300,7 +300,8 @@ | ||
| 393 | #define TARGET_HAVE_DMB (arm_arch7) | ||
| 394 | |||
| 395 | /* Nonzero if this chip implements a memory barrier via CP15. */ | ||
| 396 | -#define TARGET_HAVE_DMB_MCR (arm_arch6k && ! TARGET_HAVE_DMB) | ||
| 397 | +#define TARGET_HAVE_DMB_MCR (arm_arch6 && ! TARGET_HAVE_DMB \ | ||
| 398 | + && ! TARGET_THUMB1) | ||
| 399 | |||
| 400 | /* Nonzero if this chip implements a memory barrier instruction. */ | ||
| 401 | #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR) | ||
| 402 | @@ -308,8 +309,12 @@ | ||
| 403 | /* Nonzero if this chip supports ldrex and strex */ | ||
| 404 | #define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) || arm_arch7) | ||
| 405 | |||
| 406 | -/* Nonzero if this chip supports ldrex{bhd} and strex{bhd}. */ | ||
| 407 | -#define TARGET_HAVE_LDREXBHD ((arm_arch6k && TARGET_ARM) || arm_arch7) | ||
| 408 | +/* Nonzero if this chip supports ldrex{bh} and strex{bh}. */ | ||
| 409 | +#define TARGET_HAVE_LDREXBH ((arm_arch6k && TARGET_ARM) || arm_arch7) | ||
| 410 | + | ||
| 411 | +/* Nonzero if this chip supports ldrexd and strexd. */ | ||
| 412 | +#define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) || arm_arch7) \ | ||
| 413 | + && arm_arch_notm) | ||
| 414 | |||
| 415 | /* Nonzero if integer division instructions supported. */ | ||
| 416 | #define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \ | ||
| 417 | |||
| 418 | === modified file 'gcc/config/arm/iterators.md' | ||
| 419 | --- old/gcc/config/arm/iterators.md 2011-10-23 13:33:07 +0000 | ||
| 420 | +++ new/gcc/config/arm/iterators.md 2011-11-28 15:07:01 +0000 | ||
| 421 | @@ -33,6 +33,15 @@ | ||
| 422 | ;; A list of integer modes that are up to one word long | ||
| 423 | (define_mode_iterator QHSI [QI HI SI]) | ||
| 424 | |||
| 425 | +;; A list of integer modes that are less than a word | ||
| 426 | +(define_mode_iterator NARROW [QI HI]) | ||
| 427 | + | ||
| 428 | +;; A list of all the integer modes upto 64bit | ||
| 429 | +(define_mode_iterator QHSD [QI HI SI DI]) | ||
| 430 | + | ||
| 431 | +;; A list of the 32bit and 64bit integer modes | ||
| 432 | +(define_mode_iterator SIDI [SI DI]) | ||
| 433 | + | ||
| 434 | ;; Integer element sizes implemented by IWMMXT. | ||
| 435 | (define_mode_iterator VMMX [V2SI V4HI V8QI]) | ||
| 436 | |||
| 437 | |||
| 438 | === added file 'gcc/config/arm/linux-atomic-64bit.c' | ||
| 439 | --- old/gcc/config/arm/linux-atomic-64bit.c 1970-01-01 00:00:00 +0000 | ||
| 440 | +++ new/gcc/config/arm/linux-atomic-64bit.c 2011-10-14 15:50:44 +0000 | ||
| 441 | @@ -0,0 +1,166 @@ | ||
| 442 | +/* 64bit Linux-specific atomic operations for ARM EABI. | ||
| 443 | + Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. | ||
| 444 | + Based on linux-atomic.c | ||
| 445 | + | ||
| 446 | + 64 bit additions david.gilbert@linaro.org | ||
| 447 | + | ||
| 448 | +This file is part of GCC. | ||
| 449 | + | ||
| 450 | +GCC is free software; you can redistribute it and/or modify it under | ||
| 451 | +the terms of the GNU General Public License as published by the Free | ||
| 452 | +Software Foundation; either version 3, or (at your option) any later | ||
| 453 | +version. | ||
| 454 | + | ||
| 455 | +GCC is distributed in the hope that it will be useful, but WITHOUT ANY | ||
| 456 | +WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 457 | +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||
| 458 | +for more details. | ||
| 459 | + | ||
| 460 | +Under Section 7 of GPL version 3, you are granted additional | ||
| 461 | +permissions described in the GCC Runtime Library Exception, version | ||
| 462 | +3.1, as published by the Free Software Foundation. | ||
| 463 | + | ||
| 464 | +You should have received a copy of the GNU General Public License and | ||
| 465 | +a copy of the GCC Runtime Library Exception along with this program; | ||
| 466 | +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | ||
| 467 | +<http://www.gnu.org/licenses/>. */ | ||
| 468 | + | ||
| 469 | +/* 64bit helper functions for atomic operations; the compiler will | ||
| 470 | + call these when the code is compiled for a CPU without ldrexd/strexd. | ||
| 471 | + (If the CPU had those then the compiler inlines the operation). | ||
| 472 | + | ||
| 473 | + These helpers require a kernel helper that's only present on newer | ||
| 474 | + kernels; we check for that in an init section and bail out rather | ||
| 475 | + unceremoneously. */ | ||
| 476 | + | ||
| 477 | +extern unsigned int __write (int fd, const void *buf, unsigned int count); | ||
| 478 | +extern void abort (void); | ||
| 479 | + | ||
| 480 | +/* Kernel helper for compare-and-exchange. */ | ||
| 481 | +typedef int (__kernel_cmpxchg64_t) (const long long* oldval, | ||
| 482 | + const long long* newval, | ||
| 483 | + long long *ptr); | ||
| 484 | +#define __kernel_cmpxchg64 (*(__kernel_cmpxchg64_t *) 0xffff0f60) | ||
| 485 | + | ||
| 486 | +/* Kernel helper page version number. */ | ||
| 487 | +#define __kernel_helper_version (*(unsigned int *)0xffff0ffc) | ||
| 488 | + | ||
| 489 | +/* Check that the kernel has a new enough version at load. */ | ||
| 490 | +static void __check_for_sync8_kernelhelper (void) | ||
| 491 | +{ | ||
| 492 | + if (__kernel_helper_version < 5) | ||
| 493 | + { | ||
| 494 | + const char err[] = "A newer kernel is required to run this binary. " | ||
| 495 | + "(__kernel_cmpxchg64 helper)\n"; | ||
| 496 | + /* At this point we need a way to crash with some information | ||
| 497 | + for the user - I'm not sure I can rely on much else being | ||
| 498 | + available at this point, so do the same as generic-morestack.c | ||
| 499 | + write () and abort (). */ | ||
| 500 | + __write (2 /* stderr. */, err, sizeof (err)); | ||
| 501 | + abort (); | ||
| 502 | + } | ||
| 503 | +}; | ||
| 504 | + | ||
| 505 | +static void (*__sync8_kernelhelper_inithook[]) (void) | ||
| 506 | + __attribute__ ((used, section (".init_array"))) = { | ||
| 507 | + &__check_for_sync8_kernelhelper | ||
| 508 | +}; | ||
| 509 | + | ||
| 510 | +#define HIDDEN __attribute__ ((visibility ("hidden"))) | ||
| 511 | + | ||
| 512 | +#define FETCH_AND_OP_WORD64(OP, PFX_OP, INF_OP) \ | ||
| 513 | + long long HIDDEN \ | ||
| 514 | + __sync_fetch_and_##OP##_8 (long long *ptr, long long val) \ | ||
| 515 | + { \ | ||
| 516 | + int failure; \ | ||
| 517 | + long long tmp,tmp2; \ | ||
| 518 | + \ | ||
| 519 | + do { \ | ||
| 520 | + tmp = *ptr; \ | ||
| 521 | + tmp2 = PFX_OP (tmp INF_OP val); \ | ||
| 522 | + failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr); \ | ||
| 523 | + } while (failure != 0); \ | ||
| 524 | + \ | ||
| 525 | + return tmp; \ | ||
| 526 | + } | ||
| 527 | + | ||
| 528 | +FETCH_AND_OP_WORD64 (add, , +) | ||
| 529 | +FETCH_AND_OP_WORD64 (sub, , -) | ||
| 530 | +FETCH_AND_OP_WORD64 (or, , |) | ||
| 531 | +FETCH_AND_OP_WORD64 (and, , &) | ||
| 532 | +FETCH_AND_OP_WORD64 (xor, , ^) | ||
| 533 | +FETCH_AND_OP_WORD64 (nand, ~, &) | ||
| 534 | + | ||
| 535 | +#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH | ||
| 536 | +#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH | ||
| 537 | + | ||
| 538 | +/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for | ||
| 539 | + subword-sized quantities. */ | ||
| 540 | + | ||
| 541 | +#define OP_AND_FETCH_WORD64(OP, PFX_OP, INF_OP) \ | ||
| 542 | + long long HIDDEN \ | ||
| 543 | + __sync_##OP##_and_fetch_8 (long long *ptr, long long val) \ | ||
| 544 | + { \ | ||
| 545 | + int failure; \ | ||
| 546 | + long long tmp,tmp2; \ | ||
| 547 | + \ | ||
| 548 | + do { \ | ||
| 549 | + tmp = *ptr; \ | ||
| 550 | + tmp2 = PFX_OP (tmp INF_OP val); \ | ||
| 551 | + failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr); \ | ||
| 552 | + } while (failure != 0); \ | ||
| 553 | + \ | ||
| 554 | + return tmp2; \ | ||
| 555 | + } | ||
| 556 | + | ||
| 557 | +OP_AND_FETCH_WORD64 (add, , +) | ||
| 558 | +OP_AND_FETCH_WORD64 (sub, , -) | ||
| 559 | +OP_AND_FETCH_WORD64 (or, , |) | ||
| 560 | +OP_AND_FETCH_WORD64 (and, , &) | ||
| 561 | +OP_AND_FETCH_WORD64 (xor, , ^) | ||
| 562 | +OP_AND_FETCH_WORD64 (nand, ~, &) | ||
| 563 | + | ||
| 564 | +long long HIDDEN | ||
| 565 | +__sync_val_compare_and_swap_8 (long long *ptr, long long oldval, | ||
| 566 | + long long newval) | ||
| 567 | +{ | ||
| 568 | + int failure; | ||
| 569 | + long long actual_oldval; | ||
| 570 | + | ||
| 571 | + while (1) | ||
| 572 | + { | ||
| 573 | + actual_oldval = *ptr; | ||
| 574 | + | ||
| 575 | + if (__builtin_expect (oldval != actual_oldval, 0)) | ||
| 576 | + return actual_oldval; | ||
| 577 | + | ||
| 578 | + failure = __kernel_cmpxchg64 (&actual_oldval, &newval, ptr); | ||
| 579 | + | ||
| 580 | + if (__builtin_expect (!failure, 1)) | ||
| 581 | + return oldval; | ||
| 582 | + } | ||
| 583 | +} | ||
| 584 | + | ||
| 585 | +typedef unsigned char bool; | ||
| 586 | + | ||
| 587 | +bool HIDDEN | ||
| 588 | +__sync_bool_compare_and_swap_8 (long long *ptr, long long oldval, | ||
| 589 | + long long newval) | ||
| 590 | +{ | ||
| 591 | + int failure = __kernel_cmpxchg64 (&oldval, &newval, ptr); | ||
| 592 | + return (failure == 0); | ||
| 593 | +} | ||
| 594 | + | ||
| 595 | +long long HIDDEN | ||
| 596 | +__sync_lock_test_and_set_8 (long long *ptr, long long val) | ||
| 597 | +{ | ||
| 598 | + int failure; | ||
| 599 | + long long oldval; | ||
| 600 | + | ||
| 601 | + do { | ||
| 602 | + oldval = *ptr; | ||
| 603 | + failure = __kernel_cmpxchg64 (&oldval, &val, ptr); | ||
| 604 | + } while (failure != 0); | ||
| 605 | + | ||
| 606 | + return oldval; | ||
| 607 | +} | ||
| 608 | |||
| 609 | === modified file 'gcc/config/arm/linux-atomic.c' | ||
| 610 | --- old/gcc/config/arm/linux-atomic.c 2011-01-03 20:52:22 +0000 | ||
| 611 | +++ new/gcc/config/arm/linux-atomic.c 2011-10-14 15:50:44 +0000 | ||
| 612 | @@ -32,8 +32,8 @@ | ||
| 613 | #define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0) | ||
| 614 | |||
| 615 | /* Note: we implement byte, short and int versions of atomic operations using | ||
| 616 | - the above kernel helpers, but there is no support for "long long" (64-bit) | ||
| 617 | - operations as yet. */ | ||
| 618 | + the above kernel helpers; see linux-atomic-64bit.c for "long long" (64-bit) | ||
| 619 | + operations. */ | ||
| 620 | |||
| 621 | #define HIDDEN __attribute__ ((visibility ("hidden"))) | ||
| 622 | |||
| 623 | @@ -273,6 +273,7 @@ | ||
| 624 | *ptr = 0; \ | ||
| 625 | } | ||
| 626 | |||
| 627 | +SYNC_LOCK_RELEASE (long long, 8) | ||
| 628 | SYNC_LOCK_RELEASE (int, 4) | ||
| 629 | SYNC_LOCK_RELEASE (short, 2) | ||
| 630 | SYNC_LOCK_RELEASE (char, 1) | ||
| 631 | |||
| 632 | === modified file 'gcc/config/arm/sync.md' | ||
| 633 | --- old/gcc/config/arm/sync.md 2010-12-31 13:25:33 +0000 | ||
| 634 | +++ new/gcc/config/arm/sync.md 2011-10-14 15:47:15 +0000 | ||
| 635 | @@ -1,6 +1,7 @@ | ||
| 636 | ;; Machine description for ARM processor synchronization primitives. | ||
| 637 | ;; Copyright (C) 2010 Free Software Foundation, Inc. | ||
| 638 | ;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com) | ||
| 639 | +;; 64bit Atomics by Dave Gilbert (david.gilbert@linaro.org) | ||
| 640 | ;; | ||
| 641 | ;; This file is part of GCC. | ||
| 642 | ;; | ||
| 643 | @@ -33,31 +34,24 @@ | ||
| 644 | MEM_VOLATILE_P (operands[0]) = 1; | ||
| 645 | }) | ||
| 646 | |||
| 647 | -(define_expand "sync_compare_and_swapsi" | ||
| 648 | - [(set (match_operand:SI 0 "s_register_operand") | ||
| 649 | - (unspec_volatile:SI [(match_operand:SI 1 "memory_operand") | ||
| 650 | - (match_operand:SI 2 "s_register_operand") | ||
| 651 | - (match_operand:SI 3 "s_register_operand")] | ||
| 652 | - VUNSPEC_SYNC_COMPARE_AND_SWAP))] | ||
| 653 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 654 | - { | ||
| 655 | - struct arm_sync_generator generator; | ||
| 656 | - generator.op = arm_sync_generator_omrn; | ||
| 657 | - generator.u.omrn = gen_arm_sync_compare_and_swapsi; | ||
| 658 | - arm_expand_sync (SImode, &generator, operands[0], operands[1], operands[2], | ||
| 659 | - operands[3]); | ||
| 660 | - DONE; | ||
| 661 | - }) | ||
| 662 | |||
| 663 | -(define_mode_iterator NARROW [QI HI]) | ||
| 664 | +(define_mode_attr sync_predtab [(SI "TARGET_HAVE_LDREX && | ||
| 665 | + TARGET_HAVE_MEMORY_BARRIER") | ||
| 666 | + (QI "TARGET_HAVE_LDREXBH && | ||
| 667 | + TARGET_HAVE_MEMORY_BARRIER") | ||
| 668 | + (HI "TARGET_HAVE_LDREXBH && | ||
| 669 | + TARGET_HAVE_MEMORY_BARRIER") | ||
| 670 | + (DI "TARGET_HAVE_LDREXD && | ||
| 671 | + ARM_DOUBLEWORD_ALIGN && | ||
| 672 | + TARGET_HAVE_MEMORY_BARRIER")]) | ||
| 673 | |||
| 674 | (define_expand "sync_compare_and_swap<mode>" | ||
| 675 | - [(set (match_operand:NARROW 0 "s_register_operand") | ||
| 676 | - (unspec_volatile:NARROW [(match_operand:NARROW 1 "memory_operand") | ||
| 677 | - (match_operand:NARROW 2 "s_register_operand") | ||
| 678 | - (match_operand:NARROW 3 "s_register_operand")] | ||
| 679 | + [(set (match_operand:QHSD 0 "s_register_operand") | ||
| 680 | + (unspec_volatile:QHSD [(match_operand:QHSD 1 "memory_operand") | ||
| 681 | + (match_operand:QHSD 2 "s_register_operand") | ||
| 682 | + (match_operand:QHSD 3 "s_register_operand")] | ||
| 683 | VUNSPEC_SYNC_COMPARE_AND_SWAP))] | ||
| 684 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 685 | + "<sync_predtab>" | ||
| 686 | { | ||
| 687 | struct arm_sync_generator generator; | ||
| 688 | generator.op = arm_sync_generator_omrn; | ||
| 689 | @@ -67,25 +61,11 @@ | ||
| 690 | DONE; | ||
| 691 | }) | ||
| 692 | |||
| 693 | -(define_expand "sync_lock_test_and_setsi" | ||
| 694 | - [(match_operand:SI 0 "s_register_operand") | ||
| 695 | - (match_operand:SI 1 "memory_operand") | ||
| 696 | - (match_operand:SI 2 "s_register_operand")] | ||
| 697 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 698 | - { | ||
| 699 | - struct arm_sync_generator generator; | ||
| 700 | - generator.op = arm_sync_generator_omn; | ||
| 701 | - generator.u.omn = gen_arm_sync_lock_test_and_setsi; | ||
| 702 | - arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, | ||
| 703 | - operands[2]); | ||
| 704 | - DONE; | ||
| 705 | - }) | ||
| 706 | - | ||
| 707 | (define_expand "sync_lock_test_and_set<mode>" | ||
| 708 | - [(match_operand:NARROW 0 "s_register_operand") | ||
| 709 | - (match_operand:NARROW 1 "memory_operand") | ||
| 710 | - (match_operand:NARROW 2 "s_register_operand")] | ||
| 711 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 712 | + [(match_operand:QHSD 0 "s_register_operand") | ||
| 713 | + (match_operand:QHSD 1 "memory_operand") | ||
| 714 | + (match_operand:QHSD 2 "s_register_operand")] | ||
| 715 | + "<sync_predtab>" | ||
| 716 | { | ||
| 717 | struct arm_sync_generator generator; | ||
| 718 | generator.op = arm_sync_generator_omn; | ||
| 719 | @@ -115,51 +95,25 @@ | ||
| 720 | (plus "*") | ||
| 721 | (minus "*")]) | ||
| 722 | |||
| 723 | -(define_expand "sync_<sync_optab>si" | ||
| 724 | - [(match_operand:SI 0 "memory_operand") | ||
| 725 | - (match_operand:SI 1 "s_register_operand") | ||
| 726 | - (syncop:SI (match_dup 0) (match_dup 1))] | ||
| 727 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 728 | - { | ||
| 729 | - struct arm_sync_generator generator; | ||
| 730 | - generator.op = arm_sync_generator_omn; | ||
| 731 | - generator.u.omn = gen_arm_sync_new_<sync_optab>si; | ||
| 732 | - arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]); | ||
| 733 | - DONE; | ||
| 734 | - }) | ||
| 735 | - | ||
| 736 | -(define_expand "sync_nandsi" | ||
| 737 | - [(match_operand:SI 0 "memory_operand") | ||
| 738 | - (match_operand:SI 1 "s_register_operand") | ||
| 739 | - (not:SI (and:SI (match_dup 0) (match_dup 1)))] | ||
| 740 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 741 | - { | ||
| 742 | - struct arm_sync_generator generator; | ||
| 743 | - generator.op = arm_sync_generator_omn; | ||
| 744 | - generator.u.omn = gen_arm_sync_new_nandsi; | ||
| 745 | - arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]); | ||
| 746 | - DONE; | ||
| 747 | - }) | ||
| 748 | - | ||
| 749 | (define_expand "sync_<sync_optab><mode>" | ||
| 750 | - [(match_operand:NARROW 0 "memory_operand") | ||
| 751 | - (match_operand:NARROW 1 "s_register_operand") | ||
| 752 | - (syncop:NARROW (match_dup 0) (match_dup 1))] | ||
| 753 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 754 | + [(match_operand:QHSD 0 "memory_operand") | ||
| 755 | + (match_operand:QHSD 1 "s_register_operand") | ||
| 756 | + (syncop:QHSD (match_dup 0) (match_dup 1))] | ||
| 757 | + "<sync_predtab>" | ||
| 758 | { | ||
| 759 | struct arm_sync_generator generator; | ||
| 760 | generator.op = arm_sync_generator_omn; | ||
| 761 | generator.u.omn = gen_arm_sync_new_<sync_optab><mode>; | ||
| 762 | arm_expand_sync (<MODE>mode, &generator, NULL, operands[0], NULL, | ||
| 763 | - operands[1]); | ||
| 764 | + operands[1]); | ||
| 765 | DONE; | ||
| 766 | }) | ||
| 767 | |||
| 768 | (define_expand "sync_nand<mode>" | ||
| 769 | - [(match_operand:NARROW 0 "memory_operand") | ||
| 770 | - (match_operand:NARROW 1 "s_register_operand") | ||
| 771 | - (not:NARROW (and:NARROW (match_dup 0) (match_dup 1)))] | ||
| 772 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 773 | + [(match_operand:QHSD 0 "memory_operand") | ||
| 774 | + (match_operand:QHSD 1 "s_register_operand") | ||
| 775 | + (not:QHSD (and:QHSD (match_dup 0) (match_dup 1)))] | ||
| 776 | + "<sync_predtab>" | ||
| 777 | { | ||
| 778 | struct arm_sync_generator generator; | ||
| 779 | generator.op = arm_sync_generator_omn; | ||
| 780 | @@ -169,57 +123,27 @@ | ||
| 781 | DONE; | ||
| 782 | }) | ||
| 783 | |||
| 784 | -(define_expand "sync_new_<sync_optab>si" | ||
| 785 | - [(match_operand:SI 0 "s_register_operand") | ||
| 786 | - (match_operand:SI 1 "memory_operand") | ||
| 787 | - (match_operand:SI 2 "s_register_operand") | ||
| 788 | - (syncop:SI (match_dup 1) (match_dup 2))] | ||
| 789 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 790 | - { | ||
| 791 | - struct arm_sync_generator generator; | ||
| 792 | - generator.op = arm_sync_generator_omn; | ||
| 793 | - generator.u.omn = gen_arm_sync_new_<sync_optab>si; | ||
| 794 | - arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, | ||
| 795 | - operands[2]); | ||
| 796 | - DONE; | ||
| 797 | - }) | ||
| 798 | - | ||
| 799 | -(define_expand "sync_new_nandsi" | ||
| 800 | - [(match_operand:SI 0 "s_register_operand") | ||
| 801 | - (match_operand:SI 1 "memory_operand") | ||
| 802 | - (match_operand:SI 2 "s_register_operand") | ||
| 803 | - (not:SI (and:SI (match_dup 1) (match_dup 2)))] | ||
| 804 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 805 | - { | ||
| 806 | - struct arm_sync_generator generator; | ||
| 807 | - generator.op = arm_sync_generator_omn; | ||
| 808 | - generator.u.omn = gen_arm_sync_new_nandsi; | ||
| 809 | - arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, | ||
| 810 | - operands[2]); | ||
| 811 | - DONE; | ||
| 812 | - }) | ||
| 813 | - | ||
| 814 | (define_expand "sync_new_<sync_optab><mode>" | ||
| 815 | - [(match_operand:NARROW 0 "s_register_operand") | ||
| 816 | - (match_operand:NARROW 1 "memory_operand") | ||
| 817 | - (match_operand:NARROW 2 "s_register_operand") | ||
| 818 | - (syncop:NARROW (match_dup 1) (match_dup 2))] | ||
| 819 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 820 | + [(match_operand:QHSD 0 "s_register_operand") | ||
| 821 | + (match_operand:QHSD 1 "memory_operand") | ||
| 822 | + (match_operand:QHSD 2 "s_register_operand") | ||
| 823 | + (syncop:QHSD (match_dup 1) (match_dup 2))] | ||
| 824 | + "<sync_predtab>" | ||
| 825 | { | ||
| 826 | struct arm_sync_generator generator; | ||
| 827 | generator.op = arm_sync_generator_omn; | ||
| 828 | generator.u.omn = gen_arm_sync_new_<sync_optab><mode>; | ||
| 829 | arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1], | ||
| 830 | - NULL, operands[2]); | ||
| 831 | + NULL, operands[2]); | ||
| 832 | DONE; | ||
| 833 | }) | ||
| 834 | |||
| 835 | (define_expand "sync_new_nand<mode>" | ||
| 836 | - [(match_operand:NARROW 0 "s_register_operand") | ||
| 837 | - (match_operand:NARROW 1 "memory_operand") | ||
| 838 | - (match_operand:NARROW 2 "s_register_operand") | ||
| 839 | - (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))] | ||
| 840 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 841 | + [(match_operand:QHSD 0 "s_register_operand") | ||
| 842 | + (match_operand:QHSD 1 "memory_operand") | ||
| 843 | + (match_operand:QHSD 2 "s_register_operand") | ||
| 844 | + (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))] | ||
| 845 | + "<sync_predtab>" | ||
| 846 | { | ||
| 847 | struct arm_sync_generator generator; | ||
| 848 | generator.op = arm_sync_generator_omn; | ||
| 849 | @@ -229,57 +153,27 @@ | ||
| 850 | DONE; | ||
| 851 | }); | ||
| 852 | |||
| 853 | -(define_expand "sync_old_<sync_optab>si" | ||
| 854 | - [(match_operand:SI 0 "s_register_operand") | ||
| 855 | - (match_operand:SI 1 "memory_operand") | ||
| 856 | - (match_operand:SI 2 "s_register_operand") | ||
| 857 | - (syncop:SI (match_dup 1) (match_dup 2))] | ||
| 858 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 859 | - { | ||
| 860 | - struct arm_sync_generator generator; | ||
| 861 | - generator.op = arm_sync_generator_omn; | ||
| 862 | - generator.u.omn = gen_arm_sync_old_<sync_optab>si; | ||
| 863 | - arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, | ||
| 864 | - operands[2]); | ||
| 865 | - DONE; | ||
| 866 | - }) | ||
| 867 | - | ||
| 868 | -(define_expand "sync_old_nandsi" | ||
| 869 | - [(match_operand:SI 0 "s_register_operand") | ||
| 870 | - (match_operand:SI 1 "memory_operand") | ||
| 871 | - (match_operand:SI 2 "s_register_operand") | ||
| 872 | - (not:SI (and:SI (match_dup 1) (match_dup 2)))] | ||
| 873 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 874 | - { | ||
| 875 | - struct arm_sync_generator generator; | ||
| 876 | - generator.op = arm_sync_generator_omn; | ||
| 877 | - generator.u.omn = gen_arm_sync_old_nandsi; | ||
| 878 | - arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, | ||
| 879 | - operands[2]); | ||
| 880 | - DONE; | ||
| 881 | - }) | ||
| 882 | - | ||
| 883 | (define_expand "sync_old_<sync_optab><mode>" | ||
| 884 | - [(match_operand:NARROW 0 "s_register_operand") | ||
| 885 | - (match_operand:NARROW 1 "memory_operand") | ||
| 886 | - (match_operand:NARROW 2 "s_register_operand") | ||
| 887 | - (syncop:NARROW (match_dup 1) (match_dup 2))] | ||
| 888 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 889 | + [(match_operand:QHSD 0 "s_register_operand") | ||
| 890 | + (match_operand:QHSD 1 "memory_operand") | ||
| 891 | + (match_operand:QHSD 2 "s_register_operand") | ||
| 892 | + (syncop:QHSD (match_dup 1) (match_dup 2))] | ||
| 893 | + "<sync_predtab>" | ||
| 894 | { | ||
| 895 | struct arm_sync_generator generator; | ||
| 896 | generator.op = arm_sync_generator_omn; | ||
| 897 | generator.u.omn = gen_arm_sync_old_<sync_optab><mode>; | ||
| 898 | arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1], | ||
| 899 | - NULL, operands[2]); | ||
| 900 | + NULL, operands[2]); | ||
| 901 | DONE; | ||
| 902 | }) | ||
| 903 | |||
| 904 | (define_expand "sync_old_nand<mode>" | ||
| 905 | - [(match_operand:NARROW 0 "s_register_operand") | ||
| 906 | - (match_operand:NARROW 1 "memory_operand") | ||
| 907 | - (match_operand:NARROW 2 "s_register_operand") | ||
| 908 | - (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))] | ||
| 909 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 910 | + [(match_operand:QHSD 0 "s_register_operand") | ||
| 911 | + (match_operand:QHSD 1 "memory_operand") | ||
| 912 | + (match_operand:QHSD 2 "s_register_operand") | ||
| 913 | + (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))] | ||
| 914 | + "<sync_predtab>" | ||
| 915 | { | ||
| 916 | struct arm_sync_generator generator; | ||
| 917 | generator.op = arm_sync_generator_omn; | ||
| 918 | @@ -289,22 +183,22 @@ | ||
| 919 | DONE; | ||
| 920 | }) | ||
| 921 | |||
| 922 | -(define_insn "arm_sync_compare_and_swapsi" | ||
| 923 | - [(set (match_operand:SI 0 "s_register_operand" "=&r") | ||
| 924 | - (unspec_volatile:SI | ||
| 925 | - [(match_operand:SI 1 "arm_sync_memory_operand" "+Q") | ||
| 926 | - (match_operand:SI 2 "s_register_operand" "r") | ||
| 927 | - (match_operand:SI 3 "s_register_operand" "r")] | ||
| 928 | - VUNSPEC_SYNC_COMPARE_AND_SWAP)) | ||
| 929 | - (set (match_dup 1) (unspec_volatile:SI [(match_dup 2)] | ||
| 930 | +(define_insn "arm_sync_compare_and_swap<mode>" | ||
| 931 | + [(set (match_operand:SIDI 0 "s_register_operand" "=&r") | ||
| 932 | + (unspec_volatile:SIDI | ||
| 933 | + [(match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") | ||
| 934 | + (match_operand:SIDI 2 "s_register_operand" "r") | ||
| 935 | + (match_operand:SIDI 3 "s_register_operand" "r")] | ||
| 936 | + VUNSPEC_SYNC_COMPARE_AND_SWAP)) | ||
| 937 | + (set (match_dup 1) (unspec_volatile:SIDI [(match_dup 2)] | ||
| 938 | VUNSPEC_SYNC_COMPARE_AND_SWAP)) | ||
| 939 | (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)] | ||
| 940 | VUNSPEC_SYNC_COMPARE_AND_SWAP)) | ||
| 941 | ] | ||
| 942 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 943 | + "<sync_predtab>" | ||
| 944 | { | ||
| 945 | return arm_output_sync_insn (insn, operands); | ||
| 946 | - } | ||
| 947 | + } | ||
| 948 | [(set_attr "sync_result" "0") | ||
| 949 | (set_attr "sync_memory" "1") | ||
| 950 | (set_attr "sync_required_value" "2") | ||
| 951 | @@ -318,7 +212,7 @@ | ||
| 952 | (zero_extend:SI | ||
| 953 | (unspec_volatile:NARROW | ||
| 954 | [(match_operand:NARROW 1 "arm_sync_memory_operand" "+Q") | ||
| 955 | - (match_operand:SI 2 "s_register_operand" "r") | ||
| 956 | + (match_operand:SI 2 "s_register_operand" "r") | ||
| 957 | (match_operand:SI 3 "s_register_operand" "r")] | ||
| 958 | VUNSPEC_SYNC_COMPARE_AND_SWAP))) | ||
| 959 | (set (match_dup 1) (unspec_volatile:NARROW [(match_dup 2)] | ||
| 960 | @@ -326,10 +220,10 @@ | ||
| 961 | (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)] | ||
| 962 | VUNSPEC_SYNC_COMPARE_AND_SWAP)) | ||
| 963 | ] | ||
| 964 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 965 | + "<sync_predtab>" | ||
| 966 | { | ||
| 967 | return arm_output_sync_insn (insn, operands); | ||
| 968 | - } | ||
| 969 | + } | ||
| 970 | [(set_attr "sync_result" "0") | ||
| 971 | (set_attr "sync_memory" "1") | ||
| 972 | (set_attr "sync_required_value" "2") | ||
| 973 | @@ -338,18 +232,18 @@ | ||
| 974 | (set_attr "conds" "clob") | ||
| 975 | (set_attr "predicable" "no")]) | ||
| 976 | |||
| 977 | -(define_insn "arm_sync_lock_test_and_setsi" | ||
| 978 | - [(set (match_operand:SI 0 "s_register_operand" "=&r") | ||
| 979 | - (match_operand:SI 1 "arm_sync_memory_operand" "+Q")) | ||
| 980 | +(define_insn "arm_sync_lock_test_and_set<mode>" | ||
| 981 | + [(set (match_operand:SIDI 0 "s_register_operand" "=&r") | ||
| 982 | + (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")) | ||
| 983 | (set (match_dup 1) | ||
| 984 | - (unspec_volatile:SI [(match_operand:SI 2 "s_register_operand" "r")] | ||
| 985 | - VUNSPEC_SYNC_LOCK)) | ||
| 986 | + (unspec_volatile:SIDI [(match_operand:SIDI 2 "s_register_operand" "r")] | ||
| 987 | + VUNSPEC_SYNC_LOCK)) | ||
| 988 | (clobber (reg:CC CC_REGNUM)) | ||
| 989 | (clobber (match_scratch:SI 3 "=&r"))] | ||
| 990 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 991 | + "<sync_predtab>" | ||
| 992 | { | ||
| 993 | return arm_output_sync_insn (insn, operands); | ||
| 994 | - } | ||
| 995 | + } | ||
| 996 | [(set_attr "sync_release_barrier" "no") | ||
| 997 | (set_attr "sync_result" "0") | ||
| 998 | (set_attr "sync_memory" "1") | ||
| 999 | @@ -364,10 +258,10 @@ | ||
| 1000 | (zero_extend:SI (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))) | ||
| 1001 | (set (match_dup 1) | ||
| 1002 | (unspec_volatile:NARROW [(match_operand:SI 2 "s_register_operand" "r")] | ||
| 1003 | - VUNSPEC_SYNC_LOCK)) | ||
| 1004 | + VUNSPEC_SYNC_LOCK)) | ||
| 1005 | (clobber (reg:CC CC_REGNUM)) | ||
| 1006 | (clobber (match_scratch:SI 3 "=&r"))] | ||
| 1007 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 1008 | + "<sync_predtab>" | ||
| 1009 | { | ||
| 1010 | return arm_output_sync_insn (insn, operands); | ||
| 1011 | } | ||
| 1012 | @@ -380,22 +274,48 @@ | ||
| 1013 | (set_attr "conds" "clob") | ||
| 1014 | (set_attr "predicable" "no")]) | ||
| 1015 | |||
| 1016 | -(define_insn "arm_sync_new_<sync_optab>si" | ||
| 1017 | +(define_insn "arm_sync_new_<sync_optab><mode>" | ||
| 1018 | + [(set (match_operand:SIDI 0 "s_register_operand" "=&r") | ||
| 1019 | + (unspec_volatile:SIDI [(syncop:SIDI | ||
| 1020 | + (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") | ||
| 1021 | + (match_operand:SIDI 2 "s_register_operand" "r")) | ||
| 1022 | + ] | ||
| 1023 | + VUNSPEC_SYNC_NEW_OP)) | ||
| 1024 | + (set (match_dup 1) | ||
| 1025 | + (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] | ||
| 1026 | + VUNSPEC_SYNC_NEW_OP)) | ||
| 1027 | + (clobber (reg:CC CC_REGNUM)) | ||
| 1028 | + (clobber (match_scratch:SI 3 "=&r"))] | ||
| 1029 | + "<sync_predtab>" | ||
| 1030 | + { | ||
| 1031 | + return arm_output_sync_insn (insn, operands); | ||
| 1032 | + } | ||
| 1033 | + [(set_attr "sync_result" "0") | ||
| 1034 | + (set_attr "sync_memory" "1") | ||
| 1035 | + (set_attr "sync_new_value" "2") | ||
| 1036 | + (set_attr "sync_t1" "0") | ||
| 1037 | + (set_attr "sync_t2" "3") | ||
| 1038 | + (set_attr "sync_op" "<sync_optab>") | ||
| 1039 | + (set_attr "conds" "clob") | ||
| 1040 | + (set_attr "predicable" "no")]) | ||
| 1041 | + | ||
| 1042 | +(define_insn "arm_sync_new_<sync_optab><mode>" | ||
| 1043 | [(set (match_operand:SI 0 "s_register_operand" "=&r") | ||
| 1044 | (unspec_volatile:SI [(syncop:SI | ||
| 1045 | - (match_operand:SI 1 "arm_sync_memory_operand" "+Q") | ||
| 1046 | - (match_operand:SI 2 "s_register_operand" "r")) | ||
| 1047 | - ] | ||
| 1048 | - VUNSPEC_SYNC_NEW_OP)) | ||
| 1049 | + (zero_extend:SI | ||
| 1050 | + (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) | ||
| 1051 | + (match_operand:SI 2 "s_register_operand" "r")) | ||
| 1052 | + ] | ||
| 1053 | + VUNSPEC_SYNC_NEW_OP)) | ||
| 1054 | (set (match_dup 1) | ||
| 1055 | - (unspec_volatile:SI [(match_dup 1) (match_dup 2)] | ||
| 1056 | - VUNSPEC_SYNC_NEW_OP)) | ||
| 1057 | + (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] | ||
| 1058 | + VUNSPEC_SYNC_NEW_OP)) | ||
| 1059 | (clobber (reg:CC CC_REGNUM)) | ||
| 1060 | (clobber (match_scratch:SI 3 "=&r"))] | ||
| 1061 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 1062 | + "<sync_predtab>" | ||
| 1063 | { | ||
| 1064 | return arm_output_sync_insn (insn, operands); | ||
| 1065 | - } | ||
| 1066 | + } | ||
| 1067 | [(set_attr "sync_result" "0") | ||
| 1068 | (set_attr "sync_memory" "1") | ||
| 1069 | (set_attr "sync_new_value" "2") | ||
| 1070 | @@ -405,22 +325,22 @@ | ||
| 1071 | (set_attr "conds" "clob") | ||
| 1072 | (set_attr "predicable" "no")]) | ||
| 1073 | |||
| 1074 | -(define_insn "arm_sync_new_nandsi" | ||
| 1075 | - [(set (match_operand:SI 0 "s_register_operand" "=&r") | ||
| 1076 | - (unspec_volatile:SI [(not:SI (and:SI | ||
| 1077 | - (match_operand:SI 1 "arm_sync_memory_operand" "+Q") | ||
| 1078 | - (match_operand:SI 2 "s_register_operand" "r"))) | ||
| 1079 | - ] | ||
| 1080 | - VUNSPEC_SYNC_NEW_OP)) | ||
| 1081 | +(define_insn "arm_sync_new_nand<mode>" | ||
| 1082 | + [(set (match_operand:SIDI 0 "s_register_operand" "=&r") | ||
| 1083 | + (unspec_volatile:SIDI [(not:SIDI (and:SIDI | ||
| 1084 | + (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") | ||
| 1085 | + (match_operand:SIDI 2 "s_register_operand" "r"))) | ||
| 1086 | + ] | ||
| 1087 | + VUNSPEC_SYNC_NEW_OP)) | ||
| 1088 | (set (match_dup 1) | ||
| 1089 | - (unspec_volatile:SI [(match_dup 1) (match_dup 2)] | ||
| 1090 | - VUNSPEC_SYNC_NEW_OP)) | ||
| 1091 | + (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] | ||
| 1092 | + VUNSPEC_SYNC_NEW_OP)) | ||
| 1093 | (clobber (reg:CC CC_REGNUM)) | ||
| 1094 | (clobber (match_scratch:SI 3 "=&r"))] | ||
| 1095 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 1096 | + "<sync_predtab>" | ||
| 1097 | { | ||
| 1098 | return arm_output_sync_insn (insn, operands); | ||
| 1099 | - } | ||
| 1100 | + } | ||
| 1101 | [(set_attr "sync_result" "0") | ||
| 1102 | (set_attr "sync_memory" "1") | ||
| 1103 | (set_attr "sync_new_value" "2") | ||
| 1104 | @@ -430,50 +350,24 @@ | ||
| 1105 | (set_attr "conds" "clob") | ||
| 1106 | (set_attr "predicable" "no")]) | ||
| 1107 | |||
| 1108 | -(define_insn "arm_sync_new_<sync_optab><mode>" | ||
| 1109 | - [(set (match_operand:SI 0 "s_register_operand" "=&r") | ||
| 1110 | - (unspec_volatile:SI [(syncop:SI | ||
| 1111 | - (zero_extend:SI | ||
| 1112 | - (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) | ||
| 1113 | - (match_operand:SI 2 "s_register_operand" "r")) | ||
| 1114 | - ] | ||
| 1115 | - VUNSPEC_SYNC_NEW_OP)) | ||
| 1116 | - (set (match_dup 1) | ||
| 1117 | - (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] | ||
| 1118 | - VUNSPEC_SYNC_NEW_OP)) | ||
| 1119 | - (clobber (reg:CC CC_REGNUM)) | ||
| 1120 | - (clobber (match_scratch:SI 3 "=&r"))] | ||
| 1121 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 1122 | - { | ||
| 1123 | - return arm_output_sync_insn (insn, operands); | ||
| 1124 | - } | ||
| 1125 | - [(set_attr "sync_result" "0") | ||
| 1126 | - (set_attr "sync_memory" "1") | ||
| 1127 | - (set_attr "sync_new_value" "2") | ||
| 1128 | - (set_attr "sync_t1" "0") | ||
| 1129 | - (set_attr "sync_t2" "3") | ||
| 1130 | - (set_attr "sync_op" "<sync_optab>") | ||
| 1131 | - (set_attr "conds" "clob") | ||
| 1132 | - (set_attr "predicable" "no")]) | ||
| 1133 | - | ||
| 1134 | (define_insn "arm_sync_new_nand<mode>" | ||
| 1135 | [(set (match_operand:SI 0 "s_register_operand" "=&r") | ||
| 1136 | (unspec_volatile:SI | ||
| 1137 | [(not:SI | ||
| 1138 | (and:SI | ||
| 1139 | - (zero_extend:SI | ||
| 1140 | - (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) | ||
| 1141 | - (match_operand:SI 2 "s_register_operand" "r"))) | ||
| 1142 | + (zero_extend:SI | ||
| 1143 | + (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) | ||
| 1144 | + (match_operand:SI 2 "s_register_operand" "r"))) | ||
| 1145 | ] VUNSPEC_SYNC_NEW_OP)) | ||
| 1146 | (set (match_dup 1) | ||
| 1147 | (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] | ||
| 1148 | - VUNSPEC_SYNC_NEW_OP)) | ||
| 1149 | + VUNSPEC_SYNC_NEW_OP)) | ||
| 1150 | (clobber (reg:CC CC_REGNUM)) | ||
| 1151 | (clobber (match_scratch:SI 3 "=&r"))] | ||
| 1152 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 1153 | + "<sync_predtab>" | ||
| 1154 | { | ||
| 1155 | return arm_output_sync_insn (insn, operands); | ||
| 1156 | - } | ||
| 1157 | + } | ||
| 1158 | [(set_attr "sync_result" "0") | ||
| 1159 | (set_attr "sync_memory" "1") | ||
| 1160 | (set_attr "sync_new_value" "2") | ||
| 1161 | @@ -483,20 +377,20 @@ | ||
| 1162 | (set_attr "conds" "clob") | ||
| 1163 | (set_attr "predicable" "no")]) | ||
| 1164 | |||
| 1165 | -(define_insn "arm_sync_old_<sync_optab>si" | ||
| 1166 | - [(set (match_operand:SI 0 "s_register_operand" "=&r") | ||
| 1167 | - (unspec_volatile:SI [(syncop:SI | ||
| 1168 | - (match_operand:SI 1 "arm_sync_memory_operand" "+Q") | ||
| 1169 | - (match_operand:SI 2 "s_register_operand" "r")) | ||
| 1170 | - ] | ||
| 1171 | - VUNSPEC_SYNC_OLD_OP)) | ||
| 1172 | +(define_insn "arm_sync_old_<sync_optab><mode>" | ||
| 1173 | + [(set (match_operand:SIDI 0 "s_register_operand" "=&r") | ||
| 1174 | + (unspec_volatile:SIDI [(syncop:SIDI | ||
| 1175 | + (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") | ||
| 1176 | + (match_operand:SIDI 2 "s_register_operand" "r")) | ||
| 1177 | + ] | ||
| 1178 | + VUNSPEC_SYNC_OLD_OP)) | ||
| 1179 | (set (match_dup 1) | ||
| 1180 | - (unspec_volatile:SI [(match_dup 1) (match_dup 2)] | ||
| 1181 | - VUNSPEC_SYNC_OLD_OP)) | ||
| 1182 | + (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] | ||
| 1183 | + VUNSPEC_SYNC_OLD_OP)) | ||
| 1184 | (clobber (reg:CC CC_REGNUM)) | ||
| 1185 | - (clobber (match_scratch:SI 3 "=&r")) | ||
| 1186 | + (clobber (match_scratch:SIDI 3 "=&r")) | ||
| 1187 | (clobber (match_scratch:SI 4 "<sync_clobber>"))] | ||
| 1188 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 1189 | + "<sync_predtab>" | ||
| 1190 | { | ||
| 1191 | return arm_output_sync_insn (insn, operands); | ||
| 1192 | } | ||
| 1193 | @@ -509,47 +403,21 @@ | ||
| 1194 | (set_attr "conds" "clob") | ||
| 1195 | (set_attr "predicable" "no")]) | ||
| 1196 | |||
| 1197 | -(define_insn "arm_sync_old_nandsi" | ||
| 1198 | - [(set (match_operand:SI 0 "s_register_operand" "=&r") | ||
| 1199 | - (unspec_volatile:SI [(not:SI (and:SI | ||
| 1200 | - (match_operand:SI 1 "arm_sync_memory_operand" "+Q") | ||
| 1201 | - (match_operand:SI 2 "s_register_operand" "r"))) | ||
| 1202 | - ] | ||
| 1203 | - VUNSPEC_SYNC_OLD_OP)) | ||
| 1204 | - (set (match_dup 1) | ||
| 1205 | - (unspec_volatile:SI [(match_dup 1) (match_dup 2)] | ||
| 1206 | - VUNSPEC_SYNC_OLD_OP)) | ||
| 1207 | - (clobber (reg:CC CC_REGNUM)) | ||
| 1208 | - (clobber (match_scratch:SI 3 "=&r")) | ||
| 1209 | - (clobber (match_scratch:SI 4 "=&r"))] | ||
| 1210 | - "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" | ||
| 1211 | - { | ||
| 1212 | - return arm_output_sync_insn (insn, operands); | ||
| 1213 | - } | ||
| 1214 | - [(set_attr "sync_result" "0") | ||
| 1215 | - (set_attr "sync_memory" "1") | ||
| 1216 | - (set_attr "sync_new_value" "2") | ||
| 1217 | - (set_attr "sync_t1" "3") | ||
| 1218 | - (set_attr "sync_t2" "4") | ||
| 1219 | - (set_attr "sync_op" "nand") | ||
| 1220 | - (set_attr "conds" "clob") | ||
| 1221 | - (set_attr "predicable" "no")]) | ||
| 1222 | - | ||
| 1223 | (define_insn "arm_sync_old_<sync_optab><mode>" | ||
| 1224 | [(set (match_operand:SI 0 "s_register_operand" "=&r") | ||
| 1225 | (unspec_volatile:SI [(syncop:SI | ||
| 1226 | - (zero_extend:SI | ||
| 1227 | - (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) | ||
| 1228 | - (match_operand:SI 2 "s_register_operand" "r")) | ||
| 1229 | - ] | ||
| 1230 | - VUNSPEC_SYNC_OLD_OP)) | ||
| 1231 | + (zero_extend:SI | ||
| 1232 | + (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) | ||
| 1233 | + (match_operand:SI 2 "s_register_operand" "r")) | ||
| 1234 | + ] | ||
| 1235 | + VUNSPEC_SYNC_OLD_OP)) | ||
| 1236 | (set (match_dup 1) | ||
| 1237 | - (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] | ||
| 1238 | - VUNSPEC_SYNC_OLD_OP)) | ||
| 1239 | + (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] | ||
| 1240 | + VUNSPEC_SYNC_OLD_OP)) | ||
| 1241 | (clobber (reg:CC CC_REGNUM)) | ||
| 1242 | (clobber (match_scratch:SI 3 "=&r")) | ||
| 1243 | (clobber (match_scratch:SI 4 "<sync_clobber>"))] | ||
| 1244 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 1245 | + "<sync_predtab>" | ||
| 1246 | { | ||
| 1247 | return arm_output_sync_insn (insn, operands); | ||
| 1248 | } | ||
| 1249 | @@ -563,20 +431,46 @@ | ||
| 1250 | (set_attr "predicable" "no")]) | ||
| 1251 | |||
| 1252 | (define_insn "arm_sync_old_nand<mode>" | ||
| 1253 | + [(set (match_operand:SIDI 0 "s_register_operand" "=&r") | ||
| 1254 | + (unspec_volatile:SIDI [(not:SIDI (and:SIDI | ||
| 1255 | + (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") | ||
| 1256 | + (match_operand:SIDI 2 "s_register_operand" "r"))) | ||
| 1257 | + ] | ||
| 1258 | + VUNSPEC_SYNC_OLD_OP)) | ||
| 1259 | + (set (match_dup 1) | ||
| 1260 | + (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] | ||
| 1261 | + VUNSPEC_SYNC_OLD_OP)) | ||
| 1262 | + (clobber (reg:CC CC_REGNUM)) | ||
| 1263 | + (clobber (match_scratch:SIDI 3 "=&r")) | ||
| 1264 | + (clobber (match_scratch:SI 4 "=&r"))] | ||
| 1265 | + "<sync_predtab>" | ||
| 1266 | + { | ||
| 1267 | + return arm_output_sync_insn (insn, operands); | ||
| 1268 | + } | ||
| 1269 | + [(set_attr "sync_result" "0") | ||
| 1270 | + (set_attr "sync_memory" "1") | ||
| 1271 | + (set_attr "sync_new_value" "2") | ||
| 1272 | + (set_attr "sync_t1" "3") | ||
| 1273 | + (set_attr "sync_t2" "4") | ||
| 1274 | + (set_attr "sync_op" "nand") | ||
| 1275 | + (set_attr "conds" "clob") | ||
| 1276 | + (set_attr "predicable" "no")]) | ||
| 1277 | + | ||
| 1278 | +(define_insn "arm_sync_old_nand<mode>" | ||
| 1279 | [(set (match_operand:SI 0 "s_register_operand" "=&r") | ||
| 1280 | - (unspec_volatile:SI [(not:SI (and:SI | ||
| 1281 | - (zero_extend:SI | ||
| 1282 | - (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) | ||
| 1283 | - (match_operand:SI 2 "s_register_operand" "r"))) | ||
| 1284 | - ] | ||
| 1285 | - VUNSPEC_SYNC_OLD_OP)) | ||
| 1286 | + (unspec_volatile:SI [(not:SI (and:SI | ||
| 1287 | + (zero_extend:SI | ||
| 1288 | + (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) | ||
| 1289 | + (match_operand:SI 2 "s_register_operand" "r"))) | ||
| 1290 | + ] | ||
| 1291 | + VUNSPEC_SYNC_OLD_OP)) | ||
| 1292 | (set (match_dup 1) | ||
| 1293 | - (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] | ||
| 1294 | - VUNSPEC_SYNC_OLD_OP)) | ||
| 1295 | + (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] | ||
| 1296 | + VUNSPEC_SYNC_OLD_OP)) | ||
| 1297 | (clobber (reg:CC CC_REGNUM)) | ||
| 1298 | (clobber (match_scratch:SI 3 "=&r")) | ||
| 1299 | (clobber (match_scratch:SI 4 "=&r"))] | ||
| 1300 | - "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" | ||
| 1301 | + "<sync_predtab>" | ||
| 1302 | { | ||
| 1303 | return arm_output_sync_insn (insn, operands); | ||
| 1304 | } | ||
| 1305 | |||
| 1306 | === modified file 'gcc/config/arm/t-linux-eabi' | ||
| 1307 | --- old/gcc/config/arm/t-linux-eabi 2011-01-03 20:52:22 +0000 | ||
| 1308 | +++ new/gcc/config/arm/t-linux-eabi 2011-10-14 15:50:44 +0000 | ||
| 1309 | @@ -36,3 +36,4 @@ | ||
| 1310 | EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o | ||
| 1311 | |||
| 1312 | LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c | ||
| 1313 | +LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic-64bit.c | ||
| 1314 | |||
| 1315 | === added file 'gcc/testsuite/gcc.dg/di-longlong64-sync-1.c' | ||
| 1316 | --- old/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c 1970-01-01 00:00:00 +0000 | ||
| 1317 | +++ new/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c 2011-10-14 15:56:32 +0000 | ||
| 1318 | @@ -0,0 +1,164 @@ | ||
| 1319 | +/* { dg-do run } */ | ||
| 1320 | +/* { dg-require-effective-target sync_longlong } */ | ||
| 1321 | +/* { dg-options "-std=gnu99" } */ | ||
| 1322 | +/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ | ||
| 1323 | +/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ | ||
| 1324 | + | ||
| 1325 | + | ||
| 1326 | +/* Test basic functionality of the intrinsics. The operations should | ||
| 1327 | + not be optimized away if no one checks the return values. */ | ||
| 1328 | + | ||
| 1329 | +/* Based on ia64-sync-[12].c, but 1) long on ARM is 32 bit so use long long | ||
| 1330 | + (an explicit 64bit type maybe a better bet) and 2) Use values that cross | ||
| 1331 | + the 32bit boundary and cause carries since the actual maths are done as | ||
| 1332 | + pairs of 32 bit instructions. */ | ||
| 1333 | + | ||
| 1334 | +/* Note: This file is #included by some of the ARM tests. */ | ||
| 1335 | + | ||
| 1336 | +__extension__ typedef __SIZE_TYPE__ size_t; | ||
| 1337 | + | ||
| 1338 | +extern void abort (void); | ||
| 1339 | +extern void *memcpy (void *, const void *, size_t); | ||
| 1340 | +extern int memcmp (const void *, const void *, size_t); | ||
| 1341 | + | ||
| 1342 | +/* Temporary space where the work actually gets done. */ | ||
| 1343 | +static long long AL[24]; | ||
| 1344 | +/* Values copied into AL before we start. */ | ||
| 1345 | +static long long init_di[24] = { 0x100000002ll, 0x200000003ll, 0, 1, | ||
| 1346 | + | ||
| 1347 | + 0x100000002ll, 0x100000002ll, | ||
| 1348 | + 0x100000002ll, 0x100000002ll, | ||
| 1349 | + | ||
| 1350 | + 0, 0x1000e0de0000ll, | ||
| 1351 | + 42 , 0xc001c0de0000ll, | ||
| 1352 | + | ||
| 1353 | + -1ll, 0, 0xff00ff0000ll, -1ll, | ||
| 1354 | + | ||
| 1355 | + 0, 0x1000e0de0000ll, | ||
| 1356 | + 42 , 0xc001c0de0000ll, | ||
| 1357 | + | ||
| 1358 | + -1ll, 0, 0xff00ff0000ll, -1ll}; | ||
| 1359 | +/* This is what should be in AL at the end. */ | ||
| 1360 | +static long long test_di[24] = { 0x1234567890ll, 0x1234567890ll, 1, 0, | ||
| 1361 | + | ||
| 1362 | + 0x100000002ll, 0x100000002ll, | ||
| 1363 | + 0x100000002ll, 0x100000002ll, | ||
| 1364 | + | ||
| 1365 | + 1, 0xc001c0de0000ll, | ||
| 1366 | + 20, 0x1000e0de0000ll, | ||
| 1367 | + | ||
| 1368 | + 0x300000007ll , 0x500000009ll, | ||
| 1369 | + 0xf100ff0001ll, ~0xa00000007ll, | ||
| 1370 | + | ||
| 1371 | + 1, 0xc001c0de0000ll, | ||
| 1372 | + 20, 0x1000e0de0000ll, | ||
| 1373 | + | ||
| 1374 | + 0x300000007ll , 0x500000009ll, | ||
| 1375 | + 0xf100ff0001ll, ~0xa00000007ll }; | ||
| 1376 | + | ||
| 1377 | +/* First check they work in terms of what they do to memory. */ | ||
| 1378 | +static void | ||
| 1379 | +do_noret_di (void) | ||
| 1380 | +{ | ||
| 1381 | + __sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll); | ||
| 1382 | + __sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll); | ||
| 1383 | + __sync_lock_test_and_set (AL+2, 1); | ||
| 1384 | + __sync_lock_release (AL+3); | ||
| 1385 | + | ||
| 1386 | + /* The following tests should not change the value since the | ||
| 1387 | + original does NOT match. */ | ||
| 1388 | + __sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll); | ||
| 1389 | + __sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll); | ||
| 1390 | + __sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll); | ||
| 1391 | + __sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll); | ||
| 1392 | + | ||
| 1393 | + __sync_fetch_and_add (AL+8, 1); | ||
| 1394 | + __sync_fetch_and_add (AL+9, 0xb000e0000000ll); /* + to both halves & carry. */ | ||
| 1395 | + __sync_fetch_and_sub (AL+10, 22); | ||
| 1396 | + __sync_fetch_and_sub (AL+11, 0xb000e0000000ll); | ||
| 1397 | + | ||
| 1398 | + __sync_fetch_and_and (AL+12, 0x300000007ll); | ||
| 1399 | + __sync_fetch_and_or (AL+13, 0x500000009ll); | ||
| 1400 | + __sync_fetch_and_xor (AL+14, 0xe00000001ll); | ||
| 1401 | + __sync_fetch_and_nand (AL+15, 0xa00000007ll); | ||
| 1402 | + | ||
| 1403 | + /* These should be the same as the fetch_and_* cases except for | ||
| 1404 | + return value. */ | ||
| 1405 | + __sync_add_and_fetch (AL+16, 1); | ||
| 1406 | + /* add to both halves & carry. */ | ||
| 1407 | + __sync_add_and_fetch (AL+17, 0xb000e0000000ll); | ||
| 1408 | + __sync_sub_and_fetch (AL+18, 22); | ||
| 1409 | + __sync_sub_and_fetch (AL+19, 0xb000e0000000ll); | ||
| 1410 | + | ||
| 1411 | + __sync_and_and_fetch (AL+20, 0x300000007ll); | ||
| 1412 | + __sync_or_and_fetch (AL+21, 0x500000009ll); | ||
| 1413 | + __sync_xor_and_fetch (AL+22, 0xe00000001ll); | ||
| 1414 | + __sync_nand_and_fetch (AL+23, 0xa00000007ll); | ||
| 1415 | +} | ||
| 1416 | + | ||
| 1417 | +/* Now check return values. */ | ||
| 1418 | +static void | ||
| 1419 | +do_ret_di (void) | ||
| 1420 | +{ | ||
| 1421 | + if (__sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll) != | ||
| 1422 | + 0x100000002ll) abort (); | ||
| 1423 | + if (__sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll) != | ||
| 1424 | + 1) abort (); | ||
| 1425 | + if (__sync_lock_test_and_set (AL+2, 1) != 0) abort (); | ||
| 1426 | + __sync_lock_release (AL+3); /* no return value, but keep to match results. */ | ||
| 1427 | + | ||
| 1428 | + /* The following tests should not change the value since the | ||
| 1429 | + original does NOT match. */ | ||
| 1430 | + if (__sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll) != | ||
| 1431 | + 0x100000002ll) abort (); | ||
| 1432 | + if (__sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll) != | ||
| 1433 | + 0x100000002ll) abort (); | ||
| 1434 | + if (__sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll) != | ||
| 1435 | + 0) abort (); | ||
| 1436 | + if (__sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll) != | ||
| 1437 | + 0) abort (); | ||
| 1438 | + | ||
| 1439 | + if (__sync_fetch_and_add (AL+8, 1) != 0) abort (); | ||
| 1440 | + if (__sync_fetch_and_add (AL+9, 0xb000e0000000ll) != 0x1000e0de0000ll) abort (); | ||
| 1441 | + if (__sync_fetch_and_sub (AL+10, 22) != 42) abort (); | ||
| 1442 | + if (__sync_fetch_and_sub (AL+11, 0xb000e0000000ll) != 0xc001c0de0000ll) | ||
| 1443 | + abort (); | ||
| 1444 | + | ||
| 1445 | + if (__sync_fetch_and_and (AL+12, 0x300000007ll) != -1ll) abort (); | ||
| 1446 | + if (__sync_fetch_and_or (AL+13, 0x500000009ll) != 0) abort (); | ||
| 1447 | + if (__sync_fetch_and_xor (AL+14, 0xe00000001ll) != 0xff00ff0000ll) abort (); | ||
| 1448 | + if (__sync_fetch_and_nand (AL+15, 0xa00000007ll) != -1ll) abort (); | ||
| 1449 | + | ||
| 1450 | + /* These should be the same as the fetch_and_* cases except for | ||
| 1451 | + return value. */ | ||
| 1452 | + if (__sync_add_and_fetch (AL+16, 1) != 1) abort (); | ||
| 1453 | + if (__sync_add_and_fetch (AL+17, 0xb000e0000000ll) != 0xc001c0de0000ll) | ||
| 1454 | + abort (); | ||
| 1455 | + if (__sync_sub_and_fetch (AL+18, 22) != 20) abort (); | ||
| 1456 | + if (__sync_sub_and_fetch (AL+19, 0xb000e0000000ll) != 0x1000e0de0000ll) | ||
| 1457 | + abort (); | ||
| 1458 | + | ||
| 1459 | + if (__sync_and_and_fetch (AL+20, 0x300000007ll) != 0x300000007ll) abort (); | ||
| 1460 | + if (__sync_or_and_fetch (AL+21, 0x500000009ll) != 0x500000009ll) abort (); | ||
| 1461 | + if (__sync_xor_and_fetch (AL+22, 0xe00000001ll) != 0xf100ff0001ll) abort (); | ||
| 1462 | + if (__sync_nand_and_fetch (AL+23, 0xa00000007ll) != ~0xa00000007ll) abort (); | ||
| 1463 | +} | ||
| 1464 | + | ||
| 1465 | +int main () | ||
| 1466 | +{ | ||
| 1467 | + memcpy (AL, init_di, sizeof (init_di)); | ||
| 1468 | + | ||
| 1469 | + do_noret_di (); | ||
| 1470 | + | ||
| 1471 | + if (memcmp (AL, test_di, sizeof (test_di))) | ||
| 1472 | + abort (); | ||
| 1473 | + | ||
| 1474 | + memcpy (AL, init_di, sizeof (init_di)); | ||
| 1475 | + | ||
| 1476 | + do_ret_di (); | ||
| 1477 | + | ||
| 1478 | + if (memcmp (AL, test_di, sizeof (test_di))) | ||
| 1479 | + abort (); | ||
| 1480 | + | ||
| 1481 | + return 0; | ||
| 1482 | +} | ||
| 1483 | |||
| 1484 | === added file 'gcc/testsuite/gcc.dg/di-sync-multithread.c' | ||
| 1485 | --- old/gcc/testsuite/gcc.dg/di-sync-multithread.c 1970-01-01 00:00:00 +0000 | ||
| 1486 | +++ new/gcc/testsuite/gcc.dg/di-sync-multithread.c 2011-10-14 15:56:32 +0000 | ||
| 1487 | @@ -0,0 +1,205 @@ | ||
| 1488 | +/* { dg-do run } */ | ||
| 1489 | +/* { dg-require-effective-target sync_longlong } */ | ||
| 1490 | +/* { dg-require-effective-target pthread_h } */ | ||
| 1491 | +/* { dg-require-effective-target pthread } */ | ||
| 1492 | +/* { dg-options "-pthread -std=gnu99" } */ | ||
| 1493 | + | ||
| 1494 | +/* test of long long atomic ops performed in parallel in 3 pthreads | ||
| 1495 | + david.gilbert@linaro.org */ | ||
| 1496 | + | ||
| 1497 | +#include <pthread.h> | ||
| 1498 | +#include <unistd.h> | ||
| 1499 | + | ||
| 1500 | +/*#define DEBUGIT 1 */ | ||
| 1501 | + | ||
| 1502 | +#ifdef DEBUGIT | ||
| 1503 | +#include <stdio.h> | ||
| 1504 | + | ||
| 1505 | +#define DOABORT(x,...) {\ | ||
| 1506 | + fprintf (stderr, x, __VA_ARGS__); fflush (stderr); abort ();\ | ||
| 1507 | + } | ||
| 1508 | + | ||
| 1509 | +#else | ||
| 1510 | + | ||
| 1511 | +#define DOABORT(x,...) abort (); | ||
| 1512 | + | ||
| 1513 | +#endif | ||
| 1514 | + | ||
| 1515 | +/* Passed to each thread to describe which bits it is going to work on. */ | ||
| 1516 | +struct threadwork { | ||
| 1517 | + unsigned long long count; /* incremented each time the worker loops. */ | ||
| 1518 | + unsigned int thread; /* ID */ | ||
| 1519 | + unsigned int addlsb; /* 8 bit */ | ||
| 1520 | + unsigned int logic1lsb; /* 5 bit */ | ||
| 1521 | + unsigned int logic2lsb; /* 8 bit */ | ||
| 1522 | +}; | ||
| 1523 | + | ||
| 1524 | +/* The shared word where all the atomic work is done. */ | ||
| 1525 | +static volatile long long workspace; | ||
| 1526 | + | ||
| 1527 | +/* A shared word to tell the workers to quit when non-0. */ | ||
| 1528 | +static long long doquit; | ||
| 1529 | + | ||
| 1530 | +extern void abort (void); | ||
| 1531 | + | ||
| 1532 | +/* Note this test doesn't test the return values much. */ | ||
| 1533 | +void* | ||
| 1534 | +worker (void* data) | ||
| 1535 | +{ | ||
| 1536 | + struct threadwork *tw = (struct threadwork*)data; | ||
| 1537 | + long long add1bit = 1ll << tw->addlsb; | ||
| 1538 | + long long logic1bit = 1ll << tw->logic1lsb; | ||
| 1539 | + long long logic2bit = 1ll << tw->logic2lsb; | ||
| 1540 | + | ||
| 1541 | + /* Clear the bits we use. */ | ||
| 1542 | + __sync_and_and_fetch (&workspace, ~(0xffll * add1bit)); | ||
| 1543 | + __sync_fetch_and_and (&workspace, ~(0x1fll * logic1bit)); | ||
| 1544 | + __sync_fetch_and_and (&workspace, ~(0xffll * logic2bit)); | ||
| 1545 | + | ||
| 1546 | + do | ||
| 1547 | + { | ||
| 1548 | + long long tmp1, tmp2, tmp3; | ||
| 1549 | + /* OK, lets try and do some stuff to the workspace - by the end | ||
| 1550 | + of the main loop our area should be the same as it is now - i.e. 0. */ | ||
| 1551 | + | ||
| 1552 | + /* Push the arithmetic section upto 128 - one of the threads will | ||
| 1553 | + case this to carry accross the 32bit boundary. */ | ||
| 1554 | + for (tmp2 = 0; tmp2 < 64; tmp2++) | ||
| 1555 | + { | ||
| 1556 | + /* Add 2 using the two different adds. */ | ||
| 1557 | + tmp1 = __sync_add_and_fetch (&workspace, add1bit); | ||
| 1558 | + tmp3 = __sync_fetch_and_add (&workspace, add1bit); | ||
| 1559 | + | ||
| 1560 | + /* The value should be the intermediate add value in both cases. */ | ||
| 1561 | + if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff))) | ||
| 1562 | + DOABORT ("Mismatch of add intermediates on thread %d " | ||
| 1563 | + "workspace=0x%llx tmp1=0x%llx " | ||
| 1564 | + "tmp2=0x%llx tmp3=0x%llx\n", | ||
| 1565 | + tw->thread, workspace, tmp1, tmp2, tmp3); | ||
| 1566 | + } | ||
| 1567 | + | ||
| 1568 | + /* Set the logic bits. */ | ||
| 1569 | + tmp2=__sync_or_and_fetch (&workspace, | ||
| 1570 | + 0x1fll * logic1bit | 0xffll * logic2bit); | ||
| 1571 | + | ||
| 1572 | + /* Check the logic bits are set and the arithmetic value is correct. */ | ||
| 1573 | + if ((tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit | ||
| 1574 | + | 0xffll * add1bit)) | ||
| 1575 | + != (0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit)) | ||
| 1576 | + DOABORT ("Midloop check failed on thread %d " | ||
| 1577 | + "workspace=0x%llx tmp2=0x%llx " | ||
| 1578 | + "masktmp2=0x%llx expected=0x%llx\n", | ||
| 1579 | + tw->thread, workspace, tmp2, | ||
| 1580 | + tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit | | ||
| 1581 | + 0xffll * add1bit), | ||
| 1582 | + (0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit)); | ||
| 1583 | + | ||
| 1584 | + /* Pull the arithmetic set back down to 0 - again this should cause a | ||
| 1585 | + carry across the 32bit boundary in one thread. */ | ||
| 1586 | + | ||
| 1587 | + for (tmp2 = 0; tmp2 < 64; tmp2++) | ||
| 1588 | + { | ||
| 1589 | + /* Subtract 2 using the two different subs. */ | ||
| 1590 | + tmp1=__sync_sub_and_fetch (&workspace, add1bit); | ||
| 1591 | + tmp3=__sync_fetch_and_sub (&workspace, add1bit); | ||
| 1592 | + | ||
| 1593 | + /* The value should be the intermediate sub value in both cases. */ | ||
| 1594 | + if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff))) | ||
| 1595 | + DOABORT ("Mismatch of sub intermediates on thread %d " | ||
| 1596 | + "workspace=0x%llx tmp1=0x%llx " | ||
| 1597 | + "tmp2=0x%llx tmp3=0x%llx\n", | ||
| 1598 | + tw->thread, workspace, tmp1, tmp2, tmp3); | ||
| 1599 | + } | ||
| 1600 | + | ||
| 1601 | + | ||
| 1602 | + /* Clear the logic bits. */ | ||
| 1603 | + __sync_fetch_and_xor (&workspace, 0x1fll * logic1bit); | ||
| 1604 | + tmp3=__sync_and_and_fetch (&workspace, ~(0xffll * logic2bit)); | ||
| 1605 | + | ||
| 1606 | + /* The logic bits and the arithmetic bits should be zero again. */ | ||
| 1607 | + if (tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit)) | ||
| 1608 | + DOABORT ("End of worker loop; bits none 0 on thread %d " | ||
| 1609 | + "workspace=0x%llx tmp3=0x%llx " | ||
| 1610 | + "mask=0x%llx maskedtmp3=0x%llx\n", | ||
| 1611 | + tw->thread, workspace, tmp3, (0x1fll * logic1bit | | ||
| 1612 | + 0xffll * logic2bit | 0xffll * add1bit), | ||
| 1613 | + tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit)); | ||
| 1614 | + | ||
| 1615 | + __sync_add_and_fetch (&tw->count, 1); | ||
| 1616 | + } | ||
| 1617 | + while (!__sync_bool_compare_and_swap (&doquit, 1, 1)); | ||
| 1618 | + | ||
| 1619 | + pthread_exit (0); | ||
| 1620 | +} | ||
| 1621 | + | ||
| 1622 | +int | ||
| 1623 | +main () | ||
| 1624 | +{ | ||
| 1625 | + /* We have 3 threads doing three sets of operations, an 8 bit | ||
| 1626 | + arithmetic field, a 5 bit logic field and an 8 bit logic | ||
| 1627 | + field (just to pack them all in). | ||
| 1628 | + | ||
| 1629 | + 6 5 4 4 3 2 1 | ||
| 1630 | + 3 6 8 0 2 4 6 8 0 | ||
| 1631 | + |...,...|...,...|...,...|...,...|...,...|...,...|...,...|...,... | ||
| 1632 | + - T0 -- T1 -- T2 --T2 -- T0 -*- T2-- T1-- T1 -***- T0- | ||
| 1633 | + logic2 logic2 arith log2 arith log1 log1 arith log1 | ||
| 1634 | + | ||
| 1635 | + */ | ||
| 1636 | + unsigned int t; | ||
| 1637 | + long long tmp; | ||
| 1638 | + int err; | ||
| 1639 | + | ||
| 1640 | + struct threadwork tw[3]={ | ||
| 1641 | + { 0ll, 0, 27, 0, 56 }, | ||
| 1642 | + { 0ll, 1, 8,16, 48 }, | ||
| 1643 | + { 0ll, 2, 40,21, 35 } | ||
| 1644 | + }; | ||
| 1645 | + | ||
| 1646 | + pthread_t threads[3]; | ||
| 1647 | + | ||
| 1648 | + __sync_lock_release (&doquit); | ||
| 1649 | + | ||
| 1650 | + /* Get the work space into a known value - All 1's. */ | ||
| 1651 | + __sync_lock_release (&workspace); /* Now all 0. */ | ||
| 1652 | + tmp = __sync_val_compare_and_swap (&workspace, 0, -1ll); | ||
| 1653 | + if (tmp!=0) | ||
| 1654 | + DOABORT ("Initial __sync_val_compare_and_swap wasn't 0 workspace=0x%llx " | ||
| 1655 | + "tmp=0x%llx\n", workspace,tmp); | ||
| 1656 | + | ||
| 1657 | + for (t = 0; t < 3; t++) | ||
| 1658 | + { | ||
| 1659 | + err=pthread_create (&threads[t], NULL , worker, &tw[t]); | ||
| 1660 | + if (err) DOABORT ("pthread_create failed on thread %d with error %d\n", | ||
| 1661 | + t, err); | ||
| 1662 | + }; | ||
| 1663 | + | ||
| 1664 | + sleep (5); | ||
| 1665 | + | ||
| 1666 | + /* Stop please. */ | ||
| 1667 | + __sync_lock_test_and_set (&doquit, 1ll); | ||
| 1668 | + | ||
| 1669 | + for (t = 0; t < 3; t++) | ||
| 1670 | + { | ||
| 1671 | + err=pthread_join (threads[t], NULL); | ||
| 1672 | + if (err) | ||
| 1673 | + DOABORT ("pthread_join failed on thread %d with error %d\n", t, err); | ||
| 1674 | + }; | ||
| 1675 | + | ||
| 1676 | + __sync_synchronize (); | ||
| 1677 | + | ||
| 1678 | + /* OK, so all the workers have finished - | ||
| 1679 | + the workers should have zero'd their workspace, the unused areas | ||
| 1680 | + should still be 1. */ | ||
| 1681 | + if (!__sync_bool_compare_and_swap (&workspace, 0x040000e0ll, 0)) | ||
| 1682 | + DOABORT ("End of run workspace mismatch, got %llx\n", workspace); | ||
| 1683 | + | ||
| 1684 | + /* All the workers should have done some work. */ | ||
| 1685 | + for (t = 0; t < 3; t++) | ||
| 1686 | + { | ||
| 1687 | + if (tw[t].count == 0) DOABORT ("Worker %d gave 0 count\n", t); | ||
| 1688 | + }; | ||
| 1689 | + | ||
| 1690 | + return 0; | ||
| 1691 | +} | ||
| 1692 | + | ||
| 1693 | |||
| 1694 | === added file 'gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c' | ||
| 1695 | --- old/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c 1970-01-01 00:00:00 +0000 | ||
| 1696 | +++ new/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c 2011-10-14 15:56:32 +0000 | ||
| 1697 | @@ -0,0 +1,14 @@ | ||
| 1698 | +/* { dg-do compile } */ | ||
| 1699 | +/* { dg-require-effective-target arm_arch_v5_ok } */ | ||
| 1700 | +/* { dg-options "-std=gnu99" } */ | ||
| 1701 | +/* { dg-add-options arm_arch_v5 } */ | ||
| 1702 | +/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ | ||
| 1703 | +/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ | ||
| 1704 | +/* { dg-message "file included" "In file included" { target *-*-* } 0 } */ | ||
| 1705 | + | ||
| 1706 | +#include "../../gcc.dg/di-longlong64-sync-1.c" | ||
| 1707 | + | ||
| 1708 | +/* On an old ARM we have no ldrexd or strexd so we have to use helpers. */ | ||
| 1709 | +/* { dg-final { scan-assembler-not "ldrexd" } } */ | ||
| 1710 | +/* { dg-final { scan-assembler-not "strexd" } } */ | ||
| 1711 | +/* { dg-final { scan-assembler "__sync_" } } */ | ||
| 1712 | |||
| 1713 | === added file 'gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c' | ||
| 1714 | --- old/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c 1970-01-01 00:00:00 +0000 | ||
| 1715 | +++ new/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c 2011-10-14 15:56:32 +0000 | ||
| 1716 | @@ -0,0 +1,17 @@ | ||
| 1717 | +/* { dg-do compile } */ | ||
| 1718 | +/* { dg-require-effective-target arm_arm_ok } */ | ||
| 1719 | +/* { dg-options "-marm -std=gnu99" } */ | ||
| 1720 | +/* { dg-require-effective-target arm_arch_v6k_ok } */ | ||
| 1721 | +/* { dg-add-options arm_arch_v6k } */ | ||
| 1722 | +/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ | ||
| 1723 | +/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ | ||
| 1724 | +/* { dg-message "file included" "In file included" { target *-*-* } 0 } */ | ||
| 1725 | + | ||
| 1726 | +#include "../../gcc.dg/di-longlong64-sync-1.c" | ||
| 1727 | + | ||
| 1728 | +/* We should be using ldrexd, strexd and no helpers or shorter ldrex. */ | ||
| 1729 | +/* { dg-final { scan-assembler-times "\tldrexd" 46 } } */ | ||
| 1730 | +/* { dg-final { scan-assembler-times "\tstrexd" 46 } } */ | ||
| 1731 | +/* { dg-final { scan-assembler-not "__sync_" } } */ | ||
| 1732 | +/* { dg-final { scan-assembler-not "ldrex\t" } } */ | ||
| 1733 | +/* { dg-final { scan-assembler-not "strex\t" } } */ | ||
| 1734 | |||
| 1735 | === modified file 'gcc/testsuite/lib/target-supports.exp' | ||
| 1736 | --- old/gcc/testsuite/lib/target-supports.exp 2011-11-22 17:10:17 +0000 | ||
| 1737 | +++ new/gcc/testsuite/lib/target-supports.exp 2011-11-28 15:07:01 +0000 | ||
| 1738 | @@ -2000,6 +2000,47 @@ | ||
| 1739 | check_effective_target_arm_fp16_ok_nocache] | ||
| 1740 | } | ||
| 1741 | |||
| 1742 | +# Creates a series of routines that return 1 if the given architecture | ||
| 1743 | +# can be selected and a routine to give the flags to select that architecture | ||
| 1744 | +# Note: Extra flags may be added to disable options from newer compilers | ||
| 1745 | +# (Thumb in particular - but others may be added in the future) | ||
| 1746 | +# Usage: /* { dg-require-effective-target arm_arch_v5_ok } */ | ||
| 1747 | +# /* { dg-add-options arm_arch_v5 } */ | ||
| 1748 | +foreach { armfunc armflag armdef } { v5 "-march=armv5 -marm" __ARM_ARCH_5__ | ||
| 1749 | + v6 "-march=armv6" __ARM_ARCH_6__ | ||
| 1750 | + v6k "-march=armv6k" __ARM_ARCH_6K__ | ||
| 1751 | + v7a "-march=armv7-a" __ARM_ARCH_7A__ } { | ||
| 1752 | + eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] { | ||
| 1753 | + proc check_effective_target_arm_arch_FUNC_ok { } { | ||
| 1754 | + if { [ string match "*-marm*" "FLAG" ] && | ||
| 1755 | + ![check_effective_target_arm_arm_ok] } { | ||
| 1756 | + return 0 | ||
| 1757 | + } | ||
| 1758 | + return [check_no_compiler_messages arm_arch_FUNC_ok assembly { | ||
| 1759 | + #if !defined (DEF) | ||
| 1760 | + #error FOO | ||
| 1761 | + #endif | ||
| 1762 | + } "FLAG" ] | ||
| 1763 | + } | ||
| 1764 | + | ||
| 1765 | + proc add_options_for_arm_arch_FUNC { flags } { | ||
| 1766 | + return "$flags FLAG" | ||
| 1767 | + } | ||
| 1768 | + }] | ||
| 1769 | +} | ||
| 1770 | + | ||
| 1771 | +# Return 1 if this is an ARM target where -marm causes ARM to be | ||
| 1772 | +# used (not Thumb) | ||
| 1773 | + | ||
| 1774 | +proc check_effective_target_arm_arm_ok { } { | ||
| 1775 | + return [check_no_compiler_messages arm_arm_ok assembly { | ||
| 1776 | + #if !defined (__arm__) || defined (__thumb__) || defined (__thumb2__) | ||
| 1777 | + #error FOO | ||
| 1778 | + #endif | ||
| 1779 | + } "-marm"] | ||
| 1780 | +} | ||
| 1781 | + | ||
| 1782 | + | ||
| 1783 | # Return 1 is this is an ARM target where -mthumb causes Thumb-1 to be | ||
| 1784 | # used. | ||
| 1785 | |||
| 1786 | @@ -3384,6 +3425,31 @@ | ||
| 1787 | return $et_sync_int_long_saved | ||
| 1788 | } | ||
| 1789 | |||
| 1790 | +# Return 1 if the target supports atomic operations on "long long" and can | ||
| 1791 | +# execute them | ||
| 1792 | +# So far only put checks in for ARM, others may want to add their own | ||
| 1793 | +proc check_effective_target_sync_longlong { } { | ||
| 1794 | + return [check_runtime sync_longlong_runtime { | ||
| 1795 | + #include <stdlib.h> | ||
| 1796 | + int main () | ||
| 1797 | + { | ||
| 1798 | + long long l1; | ||
| 1799 | + | ||
| 1800 | + if (sizeof (long long) != 8) | ||
| 1801 | + exit (1); | ||
| 1802 | + | ||
| 1803 | + #ifdef __arm__ | ||
| 1804 | + /* Just check for native; checking for kernel fallback is tricky. */ | ||
| 1805 | + asm volatile ("ldrexd r0,r1, [%0]" : : "r" (&l1) : "r0", "r1"); | ||
| 1806 | + #else | ||
| 1807 | + # error "Add other suitable archs here" | ||
| 1808 | + #endif | ||
| 1809 | + | ||
| 1810 | + exit (0); | ||
| 1811 | + } | ||
| 1812 | + } "" ] | ||
| 1813 | +} | ||
| 1814 | + | ||
| 1815 | # Return 1 if the target supports atomic operations on "char" and "short". | ||
| 1816 | |||
| 1817 | proc check_effective_target_sync_char_short { } { | ||
| 1818 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch new file mode 100644 index 0000000000..88d74c72f3 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106846.patch | |||
| @@ -0,0 +1,487 @@ | |||
| 1 | 2011-11-27 Ira Rosen <ira.rosen@linaro.org> | ||
| 2 | |||
| 3 | Needs to be merged upstream: | ||
| 4 | |||
| 5 | gcc/ | ||
| 6 | * tree-vect-patterns.c (widened_name_p): Rename to ... | ||
| 7 | (type_conversion_p): ... this. Add new argument to determine | ||
| 8 | if it's a promotion or demotion operation. Check for | ||
| 9 | CONVERT_EXPR_CODE_P instead of NOP_EXPR. | ||
| 10 | (vect_recog_dot_prod_pattern): Call type_conversion_p instead | ||
| 11 | widened_name_p. | ||
| 12 | (vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern, | ||
| 13 | vect_operation_fits_smaller_type, vect_recog_widen_shift_pattern): | ||
| 14 | Likewise. | ||
| 15 | (vect_recog_mixed_size_cond_pattern): Likewise and allow | ||
| 16 | non-constant then and else clauses. | ||
| 17 | |||
| 18 | gcc/testsuite/ | ||
| 19 | * gcc.dg/vect/bb-slp-cond-3.c: New test. | ||
| 20 | * gcc.dg/vect/bb-slp-cond-4.c: New test. | ||
| 21 | |||
| 22 | === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c' | ||
| 23 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c 1970-01-01 00:00:00 +0000 | ||
| 24 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c 2011-11-27 11:29:32 +0000 | ||
| 25 | @@ -0,0 +1,85 @@ | ||
| 26 | +/* { dg-require-effective-target vect_condition } */ | ||
| 27 | + | ||
| 28 | +#include "tree-vect.h" | ||
| 29 | + | ||
| 30 | +#define N 64 | ||
| 31 | + | ||
| 32 | +/* Comparison in int, then/else and result in unsigned char. */ | ||
| 33 | + | ||
| 34 | +static inline unsigned char | ||
| 35 | +foo (int x, int y, int a, int b) | ||
| 36 | +{ | ||
| 37 | + if (x >= y) | ||
| 38 | + return a; | ||
| 39 | + else | ||
| 40 | + return b; | ||
| 41 | +} | ||
| 42 | + | ||
| 43 | +__attribute__((noinline, noclone)) void | ||
| 44 | +bar (unsigned char * __restrict__ a, unsigned char * __restrict__ b, | ||
| 45 | + unsigned char * __restrict__ c, unsigned char * __restrict__ d, | ||
| 46 | + unsigned char * __restrict__ e, int stride, int w) | ||
| 47 | +{ | ||
| 48 | + int i; | ||
| 49 | + for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride, | ||
| 50 | + d += stride, e += stride) | ||
| 51 | + { | ||
| 52 | + e[0] = foo (c[0], d[0], a[0] * w, b[0] * w); | ||
| 53 | + e[1] = foo (c[1], d[1], a[1] * w, b[1] * w); | ||
| 54 | + e[2] = foo (c[2], d[2], a[2] * w, b[2] * w); | ||
| 55 | + e[3] = foo (c[3], d[3], a[3] * w, b[3] * w); | ||
| 56 | + e[4] = foo (c[4], d[4], a[4] * w, b[4] * w); | ||
| 57 | + e[5] = foo (c[5], d[5], a[5] * w, b[5] * w); | ||
| 58 | + e[6] = foo (c[6], d[6], a[6] * w, b[6] * w); | ||
| 59 | + e[7] = foo (c[7], d[7], a[7] * w, b[7] * w); | ||
| 60 | + e[8] = foo (c[8], d[8], a[8] * w, b[8] * w); | ||
| 61 | + e[9] = foo (c[9], d[9], a[9] * w, b[9] * w); | ||
| 62 | + e[10] = foo (c[10], d[10], a[10] * w, b[10] * w); | ||
| 63 | + e[11] = foo (c[11], d[11], a[11] * w, b[11] * w); | ||
| 64 | + e[12] = foo (c[12], d[12], a[12] * w, b[12] * w); | ||
| 65 | + e[13] = foo (c[13], d[13], a[13] * w, b[13] * w); | ||
| 66 | + e[14] = foo (c[14], d[14], a[14] * w, b[14] * w); | ||
| 67 | + e[15] = foo (c[15], d[15], a[15] * w, b[15] * w); | ||
| 68 | + } | ||
| 69 | +} | ||
| 70 | + | ||
| 71 | + | ||
| 72 | +unsigned char a[N], b[N], c[N], d[N], e[N]; | ||
| 73 | + | ||
| 74 | +int main () | ||
| 75 | +{ | ||
| 76 | + int i; | ||
| 77 | + | ||
| 78 | + check_vect (); | ||
| 79 | + | ||
| 80 | + for (i = 0; i < N; i++) | ||
| 81 | + { | ||
| 82 | + a[i] = i; | ||
| 83 | + b[i] = 5; | ||
| 84 | + e[i] = 0; | ||
| 85 | + | ||
| 86 | + switch (i % 9) | ||
| 87 | + { | ||
| 88 | + case 0: asm (""); c[i] = i; d[i] = i + 1; break; | ||
| 89 | + case 1: c[i] = 0; d[i] = 0; break; | ||
| 90 | + case 2: c[i] = i + 1; d[i] = i - 1; break; | ||
| 91 | + case 3: c[i] = i; d[i] = i + 7; break; | ||
| 92 | + case 4: c[i] = i; d[i] = i; break; | ||
| 93 | + case 5: c[i] = i + 16; d[i] = i + 3; break; | ||
| 94 | + case 6: c[i] = i - 5; d[i] = i; break; | ||
| 95 | + case 7: c[i] = i; d[i] = i; break; | ||
| 96 | + case 8: c[i] = i; d[i] = i - 7; break; | ||
| 97 | + } | ||
| 98 | + } | ||
| 99 | + | ||
| 100 | + bar (a, b, c, d, e, 16, 2); | ||
| 101 | + for (i = 0; i < N; i++) | ||
| 102 | + if (e[i] != ((i % 3) == 0 ? 10 : 2 * i)) | ||
| 103 | + abort (); | ||
| 104 | + | ||
| 105 | + return 0; | ||
| 106 | +} | ||
| 107 | + | ||
| 108 | +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_int_mult } } } } */ | ||
| 109 | +/* { dg-final { cleanup-tree-dump "slp" } } */ | ||
| 110 | + | ||
| 111 | |||
| 112 | === added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c' | ||
| 113 | --- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c 1970-01-01 00:00:00 +0000 | ||
| 114 | +++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c 2011-11-27 11:29:32 +0000 | ||
| 115 | @@ -0,0 +1,85 @@ | ||
| 116 | +/* { dg-require-effective-target vect_condition } */ | ||
| 117 | + | ||
| 118 | +#include "tree-vect.h" | ||
| 119 | + | ||
| 120 | +#define N 64 | ||
| 121 | + | ||
| 122 | +/* Comparison in short, then/else and result in int. */ | ||
| 123 | +static inline int | ||
| 124 | +foo (short x, short y, int a, int b) | ||
| 125 | +{ | ||
| 126 | + if (x >= y) | ||
| 127 | + return a; | ||
| 128 | + else | ||
| 129 | + return b; | ||
| 130 | +} | ||
| 131 | + | ||
| 132 | +__attribute__((noinline, noclone)) void | ||
| 133 | +bar (short * __restrict__ a, short * __restrict__ b, | ||
| 134 | + short * __restrict__ c, short * __restrict__ d, | ||
| 135 | + int * __restrict__ e, int stride, int w) | ||
| 136 | +{ | ||
| 137 | + int i; | ||
| 138 | + for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride, | ||
| 139 | + d += stride, e += stride) | ||
| 140 | + { | ||
| 141 | + e[0] = foo (c[0], d[0], a[0], b[0]); | ||
| 142 | + e[1] = foo (c[1], d[1], a[1], b[1]); | ||
| 143 | + e[2] = foo (c[2], d[2], a[2], b[2]); | ||
| 144 | + e[3] = foo (c[3], d[3], a[3], b[3]); | ||
| 145 | + e[4] = foo (c[4], d[4], a[4], b[4]); | ||
| 146 | + e[5] = foo (c[5], d[5], a[5], b[5]); | ||
| 147 | + e[6] = foo (c[6], d[6], a[6], b[6]); | ||
| 148 | + e[7] = foo (c[7], d[7], a[7], b[7]); | ||
| 149 | + e[8] = foo (c[8], d[8], a[8], b[8]); | ||
| 150 | + e[9] = foo (c[9], d[9], a[9], b[9]); | ||
| 151 | + e[10] = foo (c[10], d[10], a[10], b[10]); | ||
| 152 | + e[11] = foo (c[11], d[11], a[11], b[11]); | ||
| 153 | + e[12] = foo (c[12], d[12], a[12], b[12]); | ||
| 154 | + e[13] = foo (c[13], d[13], a[13], b[13]); | ||
| 155 | + e[14] = foo (c[14], d[14], a[14], b[14]); | ||
| 156 | + e[15] = foo (c[15], d[15], a[15], b[15]); | ||
| 157 | + } | ||
| 158 | +} | ||
| 159 | + | ||
| 160 | + | ||
| 161 | +short a[N], b[N], c[N], d[N]; | ||
| 162 | +int e[N]; | ||
| 163 | + | ||
| 164 | +int main () | ||
| 165 | +{ | ||
| 166 | + int i; | ||
| 167 | + | ||
| 168 | + check_vect (); | ||
| 169 | + | ||
| 170 | + for (i = 0; i < N; i++) | ||
| 171 | + { | ||
| 172 | + a[i] = i; | ||
| 173 | + b[i] = 5; | ||
| 174 | + e[i] = 0; | ||
| 175 | + | ||
| 176 | + switch (i % 9) | ||
| 177 | + { | ||
| 178 | + case 0: asm (""); c[i] = - i - 1; d[i] = i + 1; break; | ||
| 179 | + case 1: c[i] = 0; d[i] = 0; break; | ||
| 180 | + case 2: c[i] = i + 1; d[i] = - i - 1; break; | ||
| 181 | + case 3: c[i] = i; d[i] = i + 7; break; | ||
| 182 | + case 4: c[i] = i; d[i] = i; break; | ||
| 183 | + case 5: c[i] = i + 16; d[i] = i + 3; break; | ||
| 184 | + case 6: c[i] = - i - 5; d[i] = - i; break; | ||
| 185 | + case 7: c[i] = - i; d[i] = - i; break; | ||
| 186 | + case 8: c[i] = - i; d[i] = - i - 7; break; | ||
| 187 | + } | ||
| 188 | + } | ||
| 189 | + | ||
| 190 | + bar (a, b, c, d, e, 16, 2); | ||
| 191 | + for (i = 0; i < N; i++) | ||
| 192 | + if (e[i] != ((i % 3) == 0 ? 5 : i)) | ||
| 193 | + abort (); | ||
| 194 | + | ||
| 195 | + return 0; | ||
| 196 | +} | ||
| 197 | + | ||
| 198 | +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */ | ||
| 199 | +/* { dg-final { cleanup-tree-dump "slp" } } */ | ||
| 200 | + | ||
| 201 | |||
| 202 | === modified file 'gcc/tree-vect-patterns.c' | ||
| 203 | --- old/gcc/tree-vect-patterns.c 2011-11-23 07:49:33 +0000 | ||
| 204 | +++ new/gcc/tree-vect-patterns.c 2011-11-27 12:17:31 +0000 | ||
| 205 | @@ -62,18 +62,16 @@ | ||
| 206 | vect_recog_mixed_size_cond_pattern}; | ||
| 207 | |||
| 208 | |||
| 209 | -/* Function widened_name_p | ||
| 210 | - | ||
| 211 | - Check whether NAME, an ssa-name used in USE_STMT, | ||
| 212 | - is a result of a type-promotion, such that: | ||
| 213 | - DEF_STMT: NAME = NOP (name0) | ||
| 214 | - where the type of name0 (HALF_TYPE) is smaller than the type of NAME. | ||
| 215 | +/* Check whether NAME, an ssa-name used in USE_STMT, | ||
| 216 | + is a result of a type promotion or demotion, such that: | ||
| 217 | + DEF_STMT: NAME = NOP (name0) | ||
| 218 | + where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME. | ||
| 219 | If CHECK_SIGN is TRUE, check that either both types are signed or both are | ||
| 220 | unsigned. */ | ||
| 221 | |||
| 222 | static bool | ||
| 223 | -widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt, | ||
| 224 | - bool check_sign) | ||
| 225 | +type_conversion_p (tree name, gimple use_stmt, bool check_sign, | ||
| 226 | + tree *orig_type, gimple *def_stmt, bool *promotion) | ||
| 227 | { | ||
| 228 | tree dummy; | ||
| 229 | gimple dummy_gimple; | ||
| 230 | @@ -96,21 +94,27 @@ | ||
| 231 | && dt != vect_external_def && dt != vect_constant_def) | ||
| 232 | return false; | ||
| 233 | |||
| 234 | - if (! *def_stmt) | ||
| 235 | + if (!*def_stmt) | ||
| 236 | return false; | ||
| 237 | |||
| 238 | if (!is_gimple_assign (*def_stmt)) | ||
| 239 | return false; | ||
| 240 | |||
| 241 | - if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR) | ||
| 242 | + if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt))) | ||
| 243 | return false; | ||
| 244 | |||
| 245 | oprnd0 = gimple_assign_rhs1 (*def_stmt); | ||
| 246 | |||
| 247 | - *half_type = TREE_TYPE (oprnd0); | ||
| 248 | - if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) | ||
| 249 | - || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign) | ||
| 250 | - || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) | ||
| 251 | + *orig_type = TREE_TYPE (oprnd0); | ||
| 252 | + if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type) | ||
| 253 | + || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign)) | ||
| 254 | + return false; | ||
| 255 | + | ||
| 256 | + if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2)) | ||
| 257 | + *promotion = true; | ||
| 258 | + else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2)) | ||
| 259 | + *promotion = false; | ||
| 260 | + else | ||
| 261 | return false; | ||
| 262 | |||
| 263 | if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy, | ||
| 264 | @@ -192,6 +196,7 @@ | ||
| 265 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
| 266 | struct loop *loop; | ||
| 267 | tree var, rhs; | ||
| 268 | + bool promotion; | ||
| 269 | |||
| 270 | if (!loop_info) | ||
| 271 | return NULL; | ||
| 272 | @@ -255,7 +260,9 @@ | ||
| 273 | return NULL; | ||
| 274 | stmt = last_stmt; | ||
| 275 | |||
| 276 | - if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) | ||
| 277 | + if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt, | ||
| 278 | + &promotion) | ||
| 279 | + && promotion) | ||
| 280 | { | ||
| 281 | stmt = def_stmt; | ||
| 282 | oprnd0 = gimple_assign_rhs1 (stmt); | ||
| 283 | @@ -310,10 +317,14 @@ | ||
| 284 | if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) | ||
| 285 | || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) | ||
| 286 | return NULL; | ||
| 287 | - if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true)) | ||
| 288 | + if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt, | ||
| 289 | + &promotion) | ||
| 290 | + || !promotion) | ||
| 291 | return NULL; | ||
| 292 | oprnd00 = gimple_assign_rhs1 (def_stmt); | ||
| 293 | - if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true)) | ||
| 294 | + if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt, | ||
| 295 | + &promotion) | ||
| 296 | + || !promotion) | ||
| 297 | return NULL; | ||
| 298 | oprnd01 = gimple_assign_rhs1 (def_stmt); | ||
| 299 | if (!types_compatible_p (half_type0, half_type1)) | ||
| 300 | @@ -526,7 +537,7 @@ | ||
| 301 | enum tree_code dummy_code; | ||
| 302 | int dummy_int; | ||
| 303 | VEC (tree, heap) *dummy_vec; | ||
| 304 | - bool op1_ok; | ||
| 305 | + bool op1_ok, promotion; | ||
| 306 | |||
| 307 | if (!is_gimple_assign (last_stmt)) | ||
| 308 | return NULL; | ||
| 309 | @@ -546,12 +557,14 @@ | ||
| 310 | return NULL; | ||
| 311 | |||
| 312 | /* Check argument 0. */ | ||
| 313 | - if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) | ||
| 314 | + if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, | ||
| 315 | + &promotion) | ||
| 316 | + || !promotion) | ||
| 317 | return NULL; | ||
| 318 | - /* Check argument 1. */ | ||
| 319 | - op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false); | ||
| 320 | - | ||
| 321 | - if (op1_ok) | ||
| 322 | + /* Check argument 1. */ | ||
| 323 | + op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1, | ||
| 324 | + &def_stmt1, &promotion); | ||
| 325 | + if (op1_ok && promotion) | ||
| 326 | { | ||
| 327 | oprnd0 = gimple_assign_rhs1 (def_stmt0); | ||
| 328 | oprnd1 = gimple_assign_rhs1 (def_stmt1); | ||
| 329 | @@ -793,6 +806,7 @@ | ||
| 330 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
| 331 | struct loop *loop; | ||
| 332 | tree var; | ||
| 333 | + bool promotion; | ||
| 334 | |||
| 335 | if (!loop_info) | ||
| 336 | return NULL; | ||
| 337 | @@ -832,8 +846,10 @@ | ||
| 338 | Left to check that oprnd0 is defined by a cast from type 'type' to type | ||
| 339 | 'TYPE'. */ | ||
| 340 | |||
| 341 | - if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true)) | ||
| 342 | - return NULL; | ||
| 343 | + if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt, | ||
| 344 | + &promotion) | ||
| 345 | + || !promotion) | ||
| 346 | + return NULL; | ||
| 347 | |||
| 348 | oprnd0 = gimple_assign_rhs1 (stmt); | ||
| 349 | *type_in = half_type; | ||
| 350 | @@ -899,6 +915,7 @@ | ||
| 351 | gimple def_stmt, new_stmt; | ||
| 352 | bool first = false; | ||
| 353 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); | ||
| 354 | + bool promotion; | ||
| 355 | bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); | ||
| 356 | struct loop *loop = NULL; | ||
| 357 | |||
| 358 | @@ -933,7 +950,9 @@ | ||
| 359 | else | ||
| 360 | { | ||
| 361 | first = true; | ||
| 362 | - if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false) | ||
| 363 | + if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt, | ||
| 364 | + &promotion) | ||
| 365 | + || !promotion | ||
| 366 | || !gimple_bb (def_stmt) | ||
| 367 | || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) | ||
| 368 | || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info) | ||
| 369 | @@ -1327,6 +1346,7 @@ | ||
| 370 | VEC (tree, heap) * dummy_vec; | ||
| 371 | gimple use_stmt = NULL; | ||
| 372 | bool over_widen = false; | ||
| 373 | + bool promotion; | ||
| 374 | |||
| 375 | if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt)) | ||
| 376 | return NULL; | ||
| 377 | @@ -1381,8 +1401,10 @@ | ||
| 378 | return NULL; | ||
| 379 | |||
| 380 | /* Check operand 0: it has to be defined by a type promotion. */ | ||
| 381 | - if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) | ||
| 382 | - return NULL; | ||
| 383 | + if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, | ||
| 384 | + &promotion) | ||
| 385 | + || !promotion) | ||
| 386 | + return NULL; | ||
| 387 | |||
| 388 | /* Check operand 1: has to be positive. We check that it fits the type | ||
| 389 | in vect_handle_widen_op_by_const (). */ | ||
| 390 | @@ -1492,9 +1514,9 @@ | ||
| 391 | S1 a_T = x_t CMP y_t ? b_T : c_T; | ||
| 392 | |||
| 393 | where type 'TYPE' is an integral type which has different size | ||
| 394 | - from 'type'. b_T and c_T are constants and if 'TYPE' is wider | ||
| 395 | + from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider | ||
| 396 | than 'type', the constants need to fit into an integer type | ||
| 397 | - with the same width as 'type'. | ||
| 398 | + with the same width as 'type') or results of conversion from 'type'. | ||
| 399 | |||
| 400 | Input: | ||
| 401 | |||
| 402 | @@ -1523,6 +1545,9 @@ | ||
| 403 | enum machine_mode cmpmode; | ||
| 404 | gimple pattern_stmt, def_stmt; | ||
| 405 | loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
| 406 | + tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE; | ||
| 407 | + gimple def_stmt0 = NULL, def_stmt1 = NULL; | ||
| 408 | + bool promotion; | ||
| 409 | bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); | ||
| 410 | |||
| 411 | if (!is_gimple_assign (last_stmt) | ||
| 412 | @@ -1535,25 +1560,40 @@ | ||
| 413 | then_clause = TREE_OPERAND (op, 1); | ||
| 414 | else_clause = TREE_OPERAND (op, 2); | ||
| 415 | |||
| 416 | - if (TREE_CODE (then_clause) != INTEGER_CST | ||
| 417 | - || TREE_CODE (else_clause) != INTEGER_CST) | ||
| 418 | - return NULL; | ||
| 419 | - | ||
| 420 | if (!COMPARISON_CLASS_P (cond_expr)) | ||
| 421 | return NULL; | ||
| 422 | |||
| 423 | type = gimple_expr_type (last_stmt); | ||
| 424 | comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0)); | ||
| 425 | - if (!INTEGRAL_TYPE_P (comp_type) | ||
| 426 | - || !INTEGRAL_TYPE_P (type)) | ||
| 427 | - return NULL; | ||
| 428 | - | ||
| 429 | comp_vectype = get_vectype_for_scalar_type (comp_type); | ||
| 430 | if (comp_vectype == NULL_TREE) | ||
| 431 | return NULL; | ||
| 432 | |||
| 433 | + if (types_compatible_p (type, comp_type) | ||
| 434 | + || !INTEGRAL_TYPE_P (comp_type) | ||
| 435 | + || !INTEGRAL_TYPE_P (type)) | ||
| 436 | + return NULL; | ||
| 437 | + | ||
| 438 | + if ((TREE_CODE (then_clause) != INTEGER_CST | ||
| 439 | + && !type_conversion_p (then_clause, last_stmt, false, &orig_type0, | ||
| 440 | + &def_stmt0, &promotion)) | ||
| 441 | + || (TREE_CODE (else_clause) != INTEGER_CST | ||
| 442 | + && !type_conversion_p (else_clause, last_stmt, false, &orig_type1, | ||
| 443 | + &def_stmt1, &promotion))) | ||
| 444 | + return NULL; | ||
| 445 | + | ||
| 446 | + if (orig_type0 && orig_type1 | ||
| 447 | + && (!types_compatible_p (orig_type0, orig_type1) | ||
| 448 | + || !types_compatible_p (orig_type0, comp_type))) | ||
| 449 | + return NULL; | ||
| 450 | + | ||
| 451 | + if (orig_type0) | ||
| 452 | + then_clause = gimple_assign_rhs1 (def_stmt0); | ||
| 453 | + | ||
| 454 | + if (orig_type1) | ||
| 455 | + else_clause = gimple_assign_rhs1 (def_stmt1); | ||
| 456 | + | ||
| 457 | cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype)); | ||
| 458 | - | ||
| 459 | if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode)) | ||
| 460 | return NULL; | ||
| 461 | |||
| 462 | @@ -1561,18 +1601,15 @@ | ||
| 463 | if (vectype == NULL_TREE) | ||
| 464 | return NULL; | ||
| 465 | |||
| 466 | - if (types_compatible_p (vectype, comp_vectype)) | ||
| 467 | - return NULL; | ||
| 468 | - | ||
| 469 | if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype))) | ||
| 470 | return NULL; | ||
| 471 | |||
| 472 | - if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)) | ||
| 473 | - { | ||
| 474 | - if (!int_fits_type_p (then_clause, comp_type) | ||
| 475 | - || !int_fits_type_p (else_clause, comp_type)) | ||
| 476 | - return NULL; | ||
| 477 | - } | ||
| 478 | + if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode) | ||
| 479 | + && ((TREE_CODE (then_clause) == INTEGER_CST | ||
| 480 | + && !int_fits_type_p (then_clause, comp_type)) | ||
| 481 | + || (TREE_CODE (else_clause) == INTEGER_CST | ||
| 482 | + && !int_fits_type_p (else_clause, comp_type)))) | ||
| 483 | + return NULL; | ||
| 484 | |||
| 485 | tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr), | ||
| 486 | fold_convert (comp_type, then_clause), | ||
| 487 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch new file mode 100644 index 0000000000..43a2a4da96 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106848.patch | |||
| @@ -0,0 +1,276 @@ | |||
| 1 | 2011-12-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> | ||
| 2 | |||
| 3 | Backport from mainline -A15 tuning. | ||
| 4 | 2011-11-30 Matthew Gretton-Dann <matthew.gretton-dann@arm.com> | ||
| 5 | |||
| 6 | * config/arm/arm.c (arm_issue_rate): Cortex-A15 can triple issue. | ||
| 7 | * config/arm/arm.md (mul64): New attribute. | ||
| 8 | (generic_sched): Cortex-A15 is not scheduled generically. | ||
| 9 | (cortex-a15.md): Include. | ||
| 10 | * config/arm/cortex-a15.md: New machine description. | ||
| 11 | * config/arm/t-arm (MD_INCLUDES): Add cortex-a15.md. | ||
| 12 | |||
| 13 | 2011-11-30 Matthew Gretton-Dann <matthew.gretton-dann@arm.com> | ||
| 14 | * config/arm/t-arm (MD_INCLUDES): Ensure all md files are listed. | ||
| 15 | |||
| 16 | === modified file 'gcc/config/arm/arm.c' | ||
| 17 | --- old/gcc/config/arm/arm.c 2011-12-05 10:55:48 +0000 | ||
| 18 | +++ new/gcc/config/arm/arm.c 2011-12-05 12:33:25 +0000 | ||
| 19 | @@ -24056,6 +24056,9 @@ | ||
| 20 | { | ||
| 21 | switch (arm_tune) | ||
| 22 | { | ||
| 23 | + case cortexa15: | ||
| 24 | + return 3; | ||
| 25 | + | ||
| 26 | case cortexr4: | ||
| 27 | case cortexr4f: | ||
| 28 | case cortexr5: | ||
| 29 | |||
| 30 | === modified file 'gcc/config/arm/arm.md' | ||
| 31 | --- old/gcc/config/arm/arm.md 2011-10-26 11:38:30 +0000 | ||
| 32 | +++ new/gcc/config/arm/arm.md 2011-12-02 00:38:59 +0000 | ||
| 33 | @@ -345,6 +345,13 @@ | ||
| 34 | (const_string "mult") | ||
| 35 | (const_string "alu"))) | ||
| 36 | |||
| 37 | +; Is this an (integer side) multiply with a 64-bit result? | ||
| 38 | +(define_attr "mul64" "no,yes" | ||
| 39 | + (if_then_else | ||
| 40 | + (eq_attr "insn" "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals") | ||
| 41 | + (const_string "yes") | ||
| 42 | + (const_string "no"))) | ||
| 43 | + | ||
| 44 | ; Load scheduling, set from the arm_ld_sched variable | ||
| 45 | ; initialized by arm_option_override() | ||
| 46 | (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) | ||
| 47 | @@ -511,7 +518,7 @@ | ||
| 48 | |||
| 49 | (define_attr "generic_sched" "yes,no" | ||
| 50 | (const (if_then_else | ||
| 51 | - (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4") | ||
| 52 | + (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4") | ||
| 53 | (eq_attr "tune_cortexr4" "yes")) | ||
| 54 | (const_string "no") | ||
| 55 | (const_string "yes")))) | ||
| 56 | @@ -537,6 +544,7 @@ | ||
| 57 | (include "cortex-a5.md") | ||
| 58 | (include "cortex-a8.md") | ||
| 59 | (include "cortex-a9.md") | ||
| 60 | +(include "cortex-a15.md") | ||
| 61 | (include "cortex-r4.md") | ||
| 62 | (include "cortex-r4f.md") | ||
| 63 | (include "cortex-m4.md") | ||
| 64 | |||
| 65 | === added file 'gcc/config/arm/cortex-a15.md' | ||
| 66 | --- old/gcc/config/arm/cortex-a15.md 1970-01-01 00:00:00 +0000 | ||
| 67 | +++ new/gcc/config/arm/cortex-a15.md 2011-12-02 00:38:59 +0000 | ||
| 68 | @@ -0,0 +1,186 @@ | ||
| 69 | +;; ARM Cortex-A15 pipeline description | ||
| 70 | +;; Copyright (C) 2011 Free Software Foundation, Inc. | ||
| 71 | +;; | ||
| 72 | +;; Written by Matthew Gretton-Dann <matthew.gretton-dann@arm.com> | ||
| 73 | + | ||
| 74 | +;; This file is part of GCC. | ||
| 75 | +;; | ||
| 76 | +;; GCC is free software; you can redistribute it and/or modify it | ||
| 77 | +;; under the terms of the GNU General Public License as published by | ||
| 78 | +;; the Free Software Foundation; either version 3, or (at your option) | ||
| 79 | +;; any later version. | ||
| 80 | +;; | ||
| 81 | +;; GCC is distributed in the hope that it will be useful, but | ||
| 82 | +;; WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 83 | +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 84 | +;; General Public License for more details. | ||
| 85 | +;; | ||
| 86 | +;; You should have received a copy of the GNU General Public License | ||
| 87 | +;; along with GCC; see the file COPYING3. If not see | ||
| 88 | +;; <http://www.gnu.org/licenses/>. | ||
| 89 | + | ||
| 90 | +(define_automaton "cortex_a15") | ||
| 91 | + | ||
| 92 | +;; The Cortex-A15 core is modelled as a triple issue pipeline that has | ||
| 93 | +;; the following dispatch units. | ||
| 94 | +;; 1. Two pipelines for simple integer operations: SX1, SX2 | ||
| 95 | +;; 2. Two pipelines for Neon and FP data-processing operations: CX1, CX2 | ||
| 96 | +;; 3. One pipeline for branch operations: BX | ||
| 97 | +;; 4. One pipeline for integer multiply and divide operations: MX | ||
| 98 | +;; 5. Two pipelines for load and store operations: LS1, LS2 | ||
| 99 | +;; | ||
| 100 | +;; We can issue into three pipelines per-cycle. | ||
| 101 | +;; | ||
| 102 | +;; We assume that where we have unit pairs xx1 is always filled before xx2. | ||
| 103 | + | ||
| 104 | +;; The three issue units | ||
| 105 | +(define_cpu_unit "ca15_i0, ca15_i1, ca15_i2" "cortex_a15") | ||
| 106 | + | ||
| 107 | +(define_reservation "ca15_issue1" "(ca15_i0|ca15_i1|ca15_i2)") | ||
| 108 | +(define_reservation "ca15_issue2" "((ca15_i0+ca15_i1)|(ca15_i1+ca15_i2))") | ||
| 109 | +(define_reservation "ca15_issue3" "(ca15_i0+ca15_i1+ca15_i2)") | ||
| 110 | +(final_presence_set "ca15_i1" "ca15_i0") | ||
| 111 | +(final_presence_set "ca15_i2" "ca15_i1") | ||
| 112 | + | ||
| 113 | +;; The main dispatch units | ||
| 114 | +(define_cpu_unit "ca15_sx1, ca15_sx2" "cortex_a15") | ||
| 115 | +(define_cpu_unit "ca15_cx1, ca15_cx2" "cortex_a15") | ||
| 116 | +(define_cpu_unit "ca15_ls1, ca15_ls2" "cortex_a15") | ||
| 117 | +(define_cpu_unit "ca15_bx, ca15_mx" "cortex_a15") | ||
| 118 | + | ||
| 119 | +(define_reservation "ca15_ls" "(ca15_ls1|ca15_ls2)") | ||
| 120 | + | ||
| 121 | +;; The extended load-store pipeline | ||
| 122 | +(define_cpu_unit "ca15_ldr, ca15_str" "cortex_a15") | ||
| 123 | + | ||
| 124 | +;; The extended ALU pipeline | ||
| 125 | +(define_cpu_unit "ca15_sx1_alu, ca15_sx1_shf, ca15_sx1_sat" "cortex_a15") | ||
| 126 | +(define_cpu_unit "ca15_sx2_alu, ca15_sx2_shf, ca15_sx2_sat" "cortex_a15") | ||
| 127 | + | ||
| 128 | +;; Simple Execution Unit: | ||
| 129 | +;; | ||
| 130 | +;; Simple ALU without shift | ||
| 131 | +(define_insn_reservation "cortex_a15_alu" 2 | ||
| 132 | + (and (eq_attr "tune" "cortexa15") | ||
| 133 | + (and (eq_attr "type" "alu") | ||
| 134 | + (eq_attr "neon_type" "none"))) | ||
| 135 | + "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)") | ||
| 136 | + | ||
| 137 | +;; ALU ops with immediate shift | ||
| 138 | +(define_insn_reservation "cortex_a15_alu_shift" 3 | ||
| 139 | + (and (eq_attr "tune" "cortexa15") | ||
| 140 | + (and (eq_attr "type" "alu_shift") | ||
| 141 | + (eq_attr "neon_type" "none"))) | ||
| 142 | + "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\ | ||
| 143 | + |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)") | ||
| 144 | + | ||
| 145 | +;; ALU ops with register controlled shift | ||
| 146 | +(define_insn_reservation "cortex_a15_alu_shift_reg" 3 | ||
| 147 | + (and (eq_attr "tune" "cortexa15") | ||
| 148 | + (and (eq_attr "type" "alu_shift_reg") | ||
| 149 | + (eq_attr "neon_type" "none"))) | ||
| 150 | + "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\ | ||
| 151 | + |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\ | ||
| 152 | + |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)") | ||
| 153 | + | ||
| 154 | +;; Multiply Execution Unit: | ||
| 155 | +;; | ||
| 156 | +;; 32-bit multiplies | ||
| 157 | +(define_insn_reservation "cortex_a15_mult32" 3 | ||
| 158 | + (and (eq_attr "tune" "cortexa15") | ||
| 159 | + (and (eq_attr "type" "mult") | ||
| 160 | + (and (eq_attr "neon_type" "none") | ||
| 161 | + (eq_attr "mul64" "no")))) | ||
| 162 | + "ca15_issue1,ca15_mx") | ||
| 163 | + | ||
| 164 | +;; 64-bit multiplies | ||
| 165 | +(define_insn_reservation "cortex_a15_mult64" 4 | ||
| 166 | + (and (eq_attr "tune" "cortexa15") | ||
| 167 | + (and (eq_attr "type" "mult") | ||
| 168 | + (and (eq_attr "neon_type" "none") | ||
| 169 | + (eq_attr "mul64" "yes")))) | ||
| 170 | + "ca15_issue1,ca15_mx*2") | ||
| 171 | + | ||
| 172 | +;; Integer divide | ||
| 173 | +(define_insn_reservation "cortex_a15_udiv" 9 | ||
| 174 | + (and (eq_attr "tune" "cortexa15") | ||
| 175 | + (eq_attr "insn" "udiv")) | ||
| 176 | + "ca15_issue1,ca15_mx") | ||
| 177 | + | ||
| 178 | +(define_insn_reservation "cortex_a15_sdiv" 10 | ||
| 179 | + (and (eq_attr "tune" "cortexa15") | ||
| 180 | + (eq_attr "insn" "sdiv")) | ||
| 181 | + "ca15_issue1,ca15_mx") | ||
| 182 | + | ||
| 183 | +;; Block all issue pipes for a cycle | ||
| 184 | +(define_insn_reservation "cortex_a15_block" 1 | ||
| 185 | + (and (eq_attr "tune" "cortexa15") | ||
| 186 | + (and (eq_attr "type" "block") | ||
| 187 | + (eq_attr "neon_type" "none"))) | ||
| 188 | + "ca15_issue3") | ||
| 189 | + | ||
| 190 | +;; Branch execution Unit | ||
| 191 | +;; | ||
| 192 | +;; Branches take one issue slot. | ||
| 193 | +;; No latency as there is no result | ||
| 194 | +(define_insn_reservation "cortex_a15_branch" 0 | ||
| 195 | + (and (eq_attr "tune" "cortexa15") | ||
| 196 | + (and (eq_attr "type" "branch") | ||
| 197 | + (eq_attr "neon_type" "none"))) | ||
| 198 | + "ca15_issue1,ca15_bx") | ||
| 199 | + | ||
| 200 | + | ||
| 201 | +;; We lie with calls. They take up all issue slots, and form a block in the | ||
| 202 | +;; pipeline. The result however is available the next cycle. | ||
| 203 | +;; | ||
| 204 | +;; Addition of new units requires this to be updated. | ||
| 205 | +(define_insn_reservation "cortex_a15_call" 1 | ||
| 206 | + (and (eq_attr "tune" "cortexa15") | ||
| 207 | + (and (eq_attr "type" "call") | ||
| 208 | + (eq_attr "neon_type" "none"))) | ||
| 209 | + "ca15_issue3,\ | ||
| 210 | + ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx1+ca15_cx2+ca15_ls1+ca15_ls2,\ | ||
| 211 | + ca15_sx1_alu+ca15_sx1_shf+ca15_sx1_sat+ca15_sx2_alu+ca15_sx2_shf\ | ||
| 212 | + +ca15_sx2_sat+ca15_ldr+ca15_str") | ||
| 213 | + | ||
| 214 | +;; Load-store execution Unit | ||
| 215 | +;; | ||
| 216 | +;; Loads of up to two words. | ||
| 217 | +(define_insn_reservation "cortex_a15_load1" 4 | ||
| 218 | + (and (eq_attr "tune" "cortexa15") | ||
| 219 | + (and (eq_attr "type" "load_byte,load1,load2") | ||
| 220 | + (eq_attr "neon_type" "none"))) | ||
| 221 | + "ca15_issue1,ca15_ls,ca15_ldr,nothing") | ||
| 222 | + | ||
| 223 | +;; Loads of three or four words. | ||
| 224 | +(define_insn_reservation "cortex_a15_load3" 5 | ||
| 225 | + (and (eq_attr "tune" "cortexa15") | ||
| 226 | + (and (eq_attr "type" "load3,load4") | ||
| 227 | + (eq_attr "neon_type" "none"))) | ||
| 228 | + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing") | ||
| 229 | + | ||
| 230 | +;; Stores of up to two words. | ||
| 231 | +(define_insn_reservation "cortex_a15_store1" 0 | ||
| 232 | + (and (eq_attr "tune" "cortexa15") | ||
| 233 | + (and (eq_attr "type" "store1,store2") | ||
| 234 | + (eq_attr "neon_type" "none"))) | ||
| 235 | + "ca15_issue1,ca15_ls,ca15_str") | ||
| 236 | + | ||
| 237 | +;; Stores of three or four words. | ||
| 238 | +(define_insn_reservation "cortex_a15_store3" 0 | ||
| 239 | + (and (eq_attr "tune" "cortexa15") | ||
| 240 | + (and (eq_attr "type" "store3,store4") | ||
| 241 | + (eq_attr "neon_type" "none"))) | ||
| 242 | + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str") | ||
| 243 | + | ||
| 244 | +;; Simple execution unit bypasses | ||
| 245 | +(define_bypass 1 "cortex_a15_alu" | ||
| 246 | + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") | ||
| 247 | +(define_bypass 2 "cortex_a15_alu_shift" | ||
| 248 | + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") | ||
| 249 | +(define_bypass 2 "cortex_a15_alu_shift_reg" | ||
| 250 | + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") | ||
| 251 | +(define_bypass 1 "cortex_a15_alu" "cortex_a15_load1,cortex_a15_load3") | ||
| 252 | +(define_bypass 2 "cortex_a15_alu_shift" "cortex_a15_load1,cortex_a15_load3") | ||
| 253 | +(define_bypass 2 "cortex_a15_alu_shift_reg" | ||
| 254 | + "cortex_a15_load1,cortex_a15_load3") | ||
| 255 | |||
| 256 | === modified file 'gcc/config/arm/t-arm' | ||
| 257 | --- old/gcc/config/arm/t-arm 2011-01-03 20:52:22 +0000 | ||
| 258 | +++ new/gcc/config/arm/t-arm 2011-12-02 00:38:59 +0000 | ||
| 259 | @@ -31,6 +31,16 @@ | ||
| 260 | $(srcdir)/config/arm/fmp626.md \ | ||
| 261 | $(srcdir)/config/arm/fa726te.md \ | ||
| 262 | $(srcdir)/config/arm/arm926ejs.md \ | ||
| 263 | + $(srcdir)/config/arm/cortex-a15.md \ | ||
| 264 | + $(srcdir)/config/arm/cortex-a5.md \ | ||
| 265 | + $(srcdir)/config/arm/cortex-a8.md \ | ||
| 266 | + $(srcdir)/config/arm/cortex-a8-neon.md \ | ||
| 267 | + $(srcdir)/config/arm/cortex-a9.md \ | ||
| 268 | + $(srcdir)/config/arm/cortex-a9-neon.md \ | ||
| 269 | + $(srcdir)/config/arm/cortex-m4-fpu.md \ | ||
| 270 | + $(srcdir)/config/arm/cortex-m4.md \ | ||
| 271 | + $(srcdir)/config/arm/cortex-r4f.md \ | ||
| 272 | + $(srcdir)/config/arm/cortex-r4.md \ | ||
| 273 | $(srcdir)/config/arm/cirrus.md \ | ||
| 274 | $(srcdir)/config/arm/fpa.md \ | ||
| 275 | $(srcdir)/config/arm/vec-common.md \ | ||
| 276 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch new file mode 100644 index 0000000000..8c51c1d1f3 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106853.patch | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | 2011-12-20 Ira Rosen <ira.rosen@linaro.org> | ||
| 2 | |||
| 3 | Backport from mainline: | ||
| 4 | |||
| 5 | 2011-11-29 Ira Rosen <ira.rosen@linaro.org> | ||
| 6 | |||
| 7 | PR tree-optimization/51301 | ||
| 8 | gcc/ | ||
| 9 | * tree-vect-patterns.c (vect_recog_over_widening_pattern): Check that | ||
| 10 | the last statement doesn't convert to a bigger type than the original | ||
| 11 | type of the computation. | ||
| 12 | |||
| 13 | gcc/testsuite/ | ||
| 14 | * gcc.dg/vect/pr51301.c: New test. | ||
| 15 | |||
| 16 | === added file 'gcc/testsuite/gcc.dg/vect/pr51301.c' | ||
| 17 | --- old/gcc/testsuite/gcc.dg/vect/pr51301.c 1970-01-01 00:00:00 +0000 | ||
| 18 | +++ new/gcc/testsuite/gcc.dg/vect/pr51301.c 2011-11-30 17:54:51 +0000 | ||
| 19 | @@ -0,0 +1,15 @@ | ||
| 20 | +/* { dg-do compile } */ | ||
| 21 | + | ||
| 22 | +typedef signed char int8_t; | ||
| 23 | +typedef signed long long int64_t; | ||
| 24 | +int64_t | ||
| 25 | +f0a (int8_t * __restrict__ arg1) | ||
| 26 | +{ | ||
| 27 | + int idx; | ||
| 28 | + int64_t result = 0; | ||
| 29 | + for (idx = 0; idx < 416; idx += 1) | ||
| 30 | + result += arg1[idx] << (arg1[idx] == arg1[idx]); | ||
| 31 | + return result; | ||
| 32 | +} | ||
| 33 | + | ||
| 34 | +/* { dg-final { cleanup-tree-dump "vect" } } */ | ||
| 35 | |||
| 36 | === modified file 'gcc/tree-vect-patterns.c' | ||
| 37 | --- old/gcc/tree-vect-patterns.c 2011-11-27 12:17:31 +0000 | ||
| 38 | +++ new/gcc/tree-vect-patterns.c 2011-12-20 07:47:44 +0000 | ||
| 39 | @@ -1138,6 +1138,7 @@ | ||
| 40 | struct loop *loop = NULL; | ||
| 41 | bb_vec_info bb_vinfo; | ||
| 42 | stmt_vec_info stmt_vinfo; | ||
| 43 | + tree type = NULL; | ||
| 44 | |||
| 45 | stmt_vinfo = vinfo_for_stmt (stmt); | ||
| 46 | loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | ||
| 47 | @@ -1207,6 +1208,7 @@ | ||
| 48 | print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); | ||
| 49 | } | ||
| 50 | |||
| 51 | + type = gimple_expr_type (stmt); | ||
| 52 | prev_stmt = stmt; | ||
| 53 | stmt = use_stmt; | ||
| 54 | |||
| 55 | @@ -1222,9 +1224,11 @@ | ||
| 56 | { | ||
| 57 | use_lhs = gimple_assign_lhs (use_stmt); | ||
| 58 | use_type = TREE_TYPE (use_lhs); | ||
| 59 | - /* Support only type promotion or signedess change. */ | ||
| 60 | + /* Support only type promotion or signedess change. Check that USE_TYPE | ||
| 61 | + is not bigger than the original type. */ | ||
| 62 | if (!INTEGRAL_TYPE_P (use_type) | ||
| 63 | - || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) | ||
| 64 | + || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type) | ||
| 65 | + || TYPE_PRECISION (type) < TYPE_PRECISION (use_type)) | ||
| 66 | return NULL; | ||
| 67 | |||
| 68 | if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) | ||
| 69 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106854.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106854.patch new file mode 100644 index 0000000000..208ba0e78a --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106854.patch | |||
| @@ -0,0 +1,354 @@ | |||
| 1 | 2011-12-30 Richard Sandiford <richard.sandiford@linaro.org> | ||
| 2 | |||
| 3 | gcc/ | ||
| 4 | Backport from mainline: | ||
| 5 | |||
| 6 | 2011-10-12 Richard Sandiford <richard.sandiford@linaro.org> | ||
| 7 | |||
| 8 | * expr.h (copy_blkmode_to_reg): Declare. | ||
| 9 | * expr.c (copy_blkmode_to_reg): New function. | ||
| 10 | (expand_assignment): Don't expand register RESULT_DECLs before | ||
| 11 | the lhs. Use copy_blkmode_to_reg to copy BLKmode values into a | ||
| 12 | RESULT_DECL register. | ||
| 13 | (expand_expr_real_1): Handle BLKmode decls when looking for promotion. | ||
| 14 | * stmt.c (expand_return): Move BLKmode-to-register code into | ||
| 15 | copy_blkmode_to_reg. | ||
| 16 | |||
| 17 | === modified file 'gcc/expr.c' | ||
| 18 | --- old/gcc/expr.c 2011-10-23 13:33:07 +0000 | ||
| 19 | +++ new/gcc/expr.c 2011-12-30 09:41:30 +0000 | ||
| 20 | @@ -2180,6 +2180,111 @@ | ||
| 21 | return tgtblk; | ||
| 22 | } | ||
| 23 | |||
| 24 | +/* Copy BLKmode value SRC into a register of mode MODE. Return the | ||
| 25 | + register if it contains any data, otherwise return null. | ||
| 26 | + | ||
| 27 | + This is used on targets that return BLKmode values in registers. */ | ||
| 28 | + | ||
| 29 | +rtx | ||
| 30 | +copy_blkmode_to_reg (enum machine_mode mode, tree src) | ||
| 31 | +{ | ||
| 32 | + int i, n_regs; | ||
| 33 | + unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes; | ||
| 34 | + unsigned int bitsize; | ||
| 35 | + rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX; | ||
| 36 | + enum machine_mode dst_mode; | ||
| 37 | + | ||
| 38 | + gcc_assert (TYPE_MODE (TREE_TYPE (src)) == BLKmode); | ||
| 39 | + | ||
| 40 | + x = expand_normal (src); | ||
| 41 | + | ||
| 42 | + bytes = int_size_in_bytes (TREE_TYPE (src)); | ||
| 43 | + if (bytes == 0) | ||
| 44 | + return NULL_RTX; | ||
| 45 | + | ||
| 46 | + /* If the structure doesn't take up a whole number of words, see | ||
| 47 | + whether the register value should be padded on the left or on | ||
| 48 | + the right. Set PADDING_CORRECTION to the number of padding | ||
| 49 | + bits needed on the left side. | ||
| 50 | + | ||
| 51 | + In most ABIs, the structure will be returned at the least end of | ||
| 52 | + the register, which translates to right padding on little-endian | ||
| 53 | + targets and left padding on big-endian targets. The opposite | ||
| 54 | + holds if the structure is returned at the most significant | ||
| 55 | + end of the register. */ | ||
| 56 | + if (bytes % UNITS_PER_WORD != 0 | ||
| 57 | + && (targetm.calls.return_in_msb (TREE_TYPE (src)) | ||
| 58 | + ? !BYTES_BIG_ENDIAN | ||
| 59 | + : BYTES_BIG_ENDIAN)) | ||
| 60 | + padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD) | ||
| 61 | + * BITS_PER_UNIT)); | ||
| 62 | + | ||
| 63 | + n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | ||
| 64 | + dst_words = XALLOCAVEC (rtx, n_regs); | ||
| 65 | + bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD); | ||
| 66 | + | ||
| 67 | + /* Copy the structure BITSIZE bits at a time. */ | ||
| 68 | + for (bitpos = 0, xbitpos = padding_correction; | ||
| 69 | + bitpos < bytes * BITS_PER_UNIT; | ||
| 70 | + bitpos += bitsize, xbitpos += bitsize) | ||
| 71 | + { | ||
| 72 | + /* We need a new destination pseudo each time xbitpos is | ||
| 73 | + on a word boundary and when xbitpos == padding_correction | ||
| 74 | + (the first time through). */ | ||
| 75 | + if (xbitpos % BITS_PER_WORD == 0 | ||
| 76 | + || xbitpos == padding_correction) | ||
| 77 | + { | ||
| 78 | + /* Generate an appropriate register. */ | ||
| 79 | + dst_word = gen_reg_rtx (word_mode); | ||
| 80 | + dst_words[xbitpos / BITS_PER_WORD] = dst_word; | ||
| 81 | + | ||
| 82 | + /* Clear the destination before we move anything into it. */ | ||
| 83 | + emit_move_insn (dst_word, CONST0_RTX (word_mode)); | ||
| 84 | + } | ||
| 85 | + | ||
| 86 | + /* We need a new source operand each time bitpos is on a word | ||
| 87 | + boundary. */ | ||
| 88 | + if (bitpos % BITS_PER_WORD == 0) | ||
| 89 | + src_word = operand_subword_force (x, bitpos / BITS_PER_WORD, BLKmode); | ||
| 90 | + | ||
| 91 | + /* Use bitpos for the source extraction (left justified) and | ||
| 92 | + xbitpos for the destination store (right justified). */ | ||
| 93 | + store_bit_field (dst_word, bitsize, xbitpos % BITS_PER_WORD, word_mode, | ||
| 94 | + extract_bit_field (src_word, bitsize, | ||
| 95 | + bitpos % BITS_PER_WORD, 1, false, | ||
| 96 | + NULL_RTX, word_mode, word_mode)); | ||
| 97 | + } | ||
| 98 | + | ||
| 99 | + if (mode == BLKmode) | ||
| 100 | + { | ||
| 101 | + /* Find the smallest integer mode large enough to hold the | ||
| 102 | + entire structure. */ | ||
| 103 | + for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); | ||
| 104 | + mode != VOIDmode; | ||
| 105 | + mode = GET_MODE_WIDER_MODE (mode)) | ||
| 106 | + /* Have we found a large enough mode? */ | ||
| 107 | + if (GET_MODE_SIZE (mode) >= bytes) | ||
| 108 | + break; | ||
| 109 | + | ||
| 110 | + /* A suitable mode should have been found. */ | ||
| 111 | + gcc_assert (mode != VOIDmode); | ||
| 112 | + } | ||
| 113 | + | ||
| 114 | + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (word_mode)) | ||
| 115 | + dst_mode = word_mode; | ||
| 116 | + else | ||
| 117 | + dst_mode = mode; | ||
| 118 | + dst = gen_reg_rtx (dst_mode); | ||
| 119 | + | ||
| 120 | + for (i = 0; i < n_regs; i++) | ||
| 121 | + emit_move_insn (operand_subword (dst, i, 0, dst_mode), dst_words[i]); | ||
| 122 | + | ||
| 123 | + if (mode != dst_mode) | ||
| 124 | + dst = gen_lowpart (mode, dst); | ||
| 125 | + | ||
| 126 | + return dst; | ||
| 127 | +} | ||
| 128 | + | ||
| 129 | /* Add a USE expression for REG to the (possibly empty) list pointed | ||
| 130 | to by CALL_FUSAGE. REG must denote a hard register. */ | ||
| 131 | |||
| 132 | @@ -4382,7 +4487,9 @@ | ||
| 133 | if (TREE_CODE (from) == CALL_EXPR && ! aggregate_value_p (from, from) | ||
| 134 | && COMPLETE_TYPE_P (TREE_TYPE (from)) | ||
| 135 | && TREE_CODE (TYPE_SIZE (TREE_TYPE (from))) == INTEGER_CST | ||
| 136 | - && ! (((TREE_CODE (to) == VAR_DECL || TREE_CODE (to) == PARM_DECL) | ||
| 137 | + && ! (((TREE_CODE (to) == VAR_DECL | ||
| 138 | + || TREE_CODE (to) == PARM_DECL | ||
| 139 | + || TREE_CODE (to) == RESULT_DECL) | ||
| 140 | && REG_P (DECL_RTL (to))) | ||
| 141 | || TREE_CODE (to) == SSA_NAME)) | ||
| 142 | { | ||
| 143 | @@ -4428,12 +4535,15 @@ | ||
| 144 | rtx temp; | ||
| 145 | |||
| 146 | push_temp_slots (); | ||
| 147 | - temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL); | ||
| 148 | + if (REG_P (to_rtx) && TYPE_MODE (TREE_TYPE (from)) == BLKmode) | ||
| 149 | + temp = copy_blkmode_to_reg (GET_MODE (to_rtx), from); | ||
| 150 | + else | ||
| 151 | + temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL); | ||
| 152 | |||
| 153 | if (GET_CODE (to_rtx) == PARALLEL) | ||
| 154 | emit_group_load (to_rtx, temp, TREE_TYPE (from), | ||
| 155 | int_size_in_bytes (TREE_TYPE (from))); | ||
| 156 | - else | ||
| 157 | + else if (temp) | ||
| 158 | emit_move_insn (to_rtx, temp); | ||
| 159 | |||
| 160 | preserve_temp_slots (to_rtx); | ||
| 161 | @@ -8567,10 +8677,15 @@ | ||
| 162 | return temp; | ||
| 163 | } | ||
| 164 | |||
| 165 | - /* If the mode of DECL_RTL does not match that of the decl, it | ||
| 166 | - must be a promoted value. We return a SUBREG of the wanted mode, | ||
| 167 | - but mark it so that we know that it was already extended. */ | ||
| 168 | - if (REG_P (decl_rtl) && GET_MODE (decl_rtl) != DECL_MODE (exp)) | ||
| 169 | + /* If the mode of DECL_RTL does not match that of the decl, | ||
| 170 | + there are two cases: we are dealing with a BLKmode value | ||
| 171 | + that is returned in a register, or we are dealing with | ||
| 172 | + a promoted value. In the latter case, return a SUBREG | ||
| 173 | + of the wanted mode, but mark it so that we know that it | ||
| 174 | + was already extended. */ | ||
| 175 | + if (REG_P (decl_rtl) | ||
| 176 | + && DECL_MODE (exp) != BLKmode | ||
| 177 | + && GET_MODE (decl_rtl) != DECL_MODE (exp)) | ||
| 178 | { | ||
| 179 | enum machine_mode pmode; | ||
| 180 | |||
| 181 | |||
| 182 | === modified file 'gcc/expr.h' | ||
| 183 | --- old/gcc/expr.h 2011-03-03 21:56:58 +0000 | ||
| 184 | +++ new/gcc/expr.h 2011-10-12 08:01:43 +0000 | ||
| 185 | @@ -324,6 +324,8 @@ | ||
| 186 | /* Copy BLKmode object from a set of registers. */ | ||
| 187 | extern rtx copy_blkmode_from_reg (rtx, rtx, tree); | ||
| 188 | |||
| 189 | +extern rtx copy_blkmode_to_reg (enum machine_mode, tree); | ||
| 190 | + | ||
| 191 | /* Mark REG as holding a parameter for the next CALL_INSN. */ | ||
| 192 | extern void use_reg (rtx *, rtx); | ||
| 193 | |||
| 194 | |||
| 195 | === modified file 'gcc/stmt.c' | ||
| 196 | --- old/gcc/stmt.c 2011-03-03 21:56:58 +0000 | ||
| 197 | +++ new/gcc/stmt.c 2011-10-12 08:01:43 +0000 | ||
| 198 | @@ -1684,119 +1684,21 @@ | ||
| 199 | expand_value_return (result_rtl); | ||
| 200 | |||
| 201 | /* If the result is an aggregate that is being returned in one (or more) | ||
| 202 | - registers, load the registers here. The compiler currently can't handle | ||
| 203 | - copying a BLKmode value into registers. We could put this code in a | ||
| 204 | - more general area (for use by everyone instead of just function | ||
| 205 | - call/return), but until this feature is generally usable it is kept here | ||
| 206 | - (and in expand_call). */ | ||
| 207 | + registers, load the registers here. */ | ||
| 208 | |||
| 209 | else if (retval_rhs != 0 | ||
| 210 | && TYPE_MODE (TREE_TYPE (retval_rhs)) == BLKmode | ||
| 211 | && REG_P (result_rtl)) | ||
| 212 | { | ||
| 213 | - int i; | ||
| 214 | - unsigned HOST_WIDE_INT bitpos, xbitpos; | ||
| 215 | - unsigned HOST_WIDE_INT padding_correction = 0; | ||
| 216 | - unsigned HOST_WIDE_INT bytes | ||
| 217 | - = int_size_in_bytes (TREE_TYPE (retval_rhs)); | ||
| 218 | - int n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | ||
| 219 | - unsigned int bitsize | ||
| 220 | - = MIN (TYPE_ALIGN (TREE_TYPE (retval_rhs)), BITS_PER_WORD); | ||
| 221 | - rtx *result_pseudos = XALLOCAVEC (rtx, n_regs); | ||
| 222 | - rtx result_reg, src = NULL_RTX, dst = NULL_RTX; | ||
| 223 | - rtx result_val = expand_normal (retval_rhs); | ||
| 224 | - enum machine_mode tmpmode, result_reg_mode; | ||
| 225 | - | ||
| 226 | - if (bytes == 0) | ||
| 227 | - { | ||
| 228 | - expand_null_return (); | ||
| 229 | - return; | ||
| 230 | - } | ||
| 231 | - | ||
| 232 | - /* If the structure doesn't take up a whole number of words, see | ||
| 233 | - whether the register value should be padded on the left or on | ||
| 234 | - the right. Set PADDING_CORRECTION to the number of padding | ||
| 235 | - bits needed on the left side. | ||
| 236 | - | ||
| 237 | - In most ABIs, the structure will be returned at the least end of | ||
| 238 | - the register, which translates to right padding on little-endian | ||
| 239 | - targets and left padding on big-endian targets. The opposite | ||
| 240 | - holds if the structure is returned at the most significant | ||
| 241 | - end of the register. */ | ||
| 242 | - if (bytes % UNITS_PER_WORD != 0 | ||
| 243 | - && (targetm.calls.return_in_msb (TREE_TYPE (retval_rhs)) | ||
| 244 | - ? !BYTES_BIG_ENDIAN | ||
| 245 | - : BYTES_BIG_ENDIAN)) | ||
| 246 | - padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD) | ||
| 247 | - * BITS_PER_UNIT)); | ||
| 248 | - | ||
| 249 | - /* Copy the structure BITSIZE bits at a time. */ | ||
| 250 | - for (bitpos = 0, xbitpos = padding_correction; | ||
| 251 | - bitpos < bytes * BITS_PER_UNIT; | ||
| 252 | - bitpos += bitsize, xbitpos += bitsize) | ||
| 253 | - { | ||
| 254 | - /* We need a new destination pseudo each time xbitpos is | ||
| 255 | - on a word boundary and when xbitpos == padding_correction | ||
| 256 | - (the first time through). */ | ||
| 257 | - if (xbitpos % BITS_PER_WORD == 0 | ||
| 258 | - || xbitpos == padding_correction) | ||
| 259 | - { | ||
| 260 | - /* Generate an appropriate register. */ | ||
| 261 | - dst = gen_reg_rtx (word_mode); | ||
| 262 | - result_pseudos[xbitpos / BITS_PER_WORD] = dst; | ||
| 263 | - | ||
| 264 | - /* Clear the destination before we move anything into it. */ | ||
| 265 | - emit_move_insn (dst, CONST0_RTX (GET_MODE (dst))); | ||
| 266 | - } | ||
| 267 | - | ||
| 268 | - /* We need a new source operand each time bitpos is on a word | ||
| 269 | - boundary. */ | ||
| 270 | - if (bitpos % BITS_PER_WORD == 0) | ||
| 271 | - src = operand_subword_force (result_val, | ||
| 272 | - bitpos / BITS_PER_WORD, | ||
| 273 | - BLKmode); | ||
| 274 | - | ||
| 275 | - /* Use bitpos for the source extraction (left justified) and | ||
| 276 | - xbitpos for the destination store (right justified). */ | ||
| 277 | - store_bit_field (dst, bitsize, xbitpos % BITS_PER_WORD, word_mode, | ||
| 278 | - extract_bit_field (src, bitsize, | ||
| 279 | - bitpos % BITS_PER_WORD, 1, false, | ||
| 280 | - NULL_RTX, word_mode, word_mode)); | ||
| 281 | - } | ||
| 282 | - | ||
| 283 | - tmpmode = GET_MODE (result_rtl); | ||
| 284 | - if (tmpmode == BLKmode) | ||
| 285 | - { | ||
| 286 | - /* Find the smallest integer mode large enough to hold the | ||
| 287 | - entire structure and use that mode instead of BLKmode | ||
| 288 | - on the USE insn for the return register. */ | ||
| 289 | - for (tmpmode = GET_CLASS_NARROWEST_MODE (MODE_INT); | ||
| 290 | - tmpmode != VOIDmode; | ||
| 291 | - tmpmode = GET_MODE_WIDER_MODE (tmpmode)) | ||
| 292 | - /* Have we found a large enough mode? */ | ||
| 293 | - if (GET_MODE_SIZE (tmpmode) >= bytes) | ||
| 294 | - break; | ||
| 295 | - | ||
| 296 | - /* A suitable mode should have been found. */ | ||
| 297 | - gcc_assert (tmpmode != VOIDmode); | ||
| 298 | - | ||
| 299 | - PUT_MODE (result_rtl, tmpmode); | ||
| 300 | - } | ||
| 301 | - | ||
| 302 | - if (GET_MODE_SIZE (tmpmode) < GET_MODE_SIZE (word_mode)) | ||
| 303 | - result_reg_mode = word_mode; | ||
| 304 | + val = copy_blkmode_to_reg (GET_MODE (result_rtl), retval_rhs); | ||
| 305 | + if (val) | ||
| 306 | + { | ||
| 307 | + /* Use the mode of the result value on the return register. */ | ||
| 308 | + PUT_MODE (result_rtl, GET_MODE (val)); | ||
| 309 | + expand_value_return (val); | ||
| 310 | + } | ||
| 311 | else | ||
| 312 | - result_reg_mode = tmpmode; | ||
| 313 | - result_reg = gen_reg_rtx (result_reg_mode); | ||
| 314 | - | ||
| 315 | - for (i = 0; i < n_regs; i++) | ||
| 316 | - emit_move_insn (operand_subword (result_reg, i, 0, result_reg_mode), | ||
| 317 | - result_pseudos[i]); | ||
| 318 | - | ||
| 319 | - if (tmpmode != result_reg_mode) | ||
| 320 | - result_reg = gen_lowpart (tmpmode, result_reg); | ||
| 321 | - | ||
| 322 | - expand_value_return (result_reg); | ||
| 323 | + expand_null_return (); | ||
| 324 | } | ||
| 325 | else if (retval_rhs != 0 | ||
| 326 | && !VOID_TYPE_P (TREE_TYPE (retval_rhs)) | ||
| 327 | |||
| 328 | === added file 'gcc/testsuite/g++.dg/pr48660.C' | ||
| 329 | --- old/gcc/testsuite/g++.dg/pr48660.C 1970-01-01 00:00:00 +0000 | ||
| 330 | +++ new/gcc/testsuite/g++.dg/pr48660.C 2011-10-12 08:01:43 +0000 | ||
| 331 | @@ -0,0 +1,22 @@ | ||
| 332 | +template<int N> struct val { char a[N]; }; | ||
| 333 | + | ||
| 334 | +class Base | ||
| 335 | +{ | ||
| 336 | +public: | ||
| 337 | + virtual val<1> get1() const = 0; | ||
| 338 | + virtual val<2> get2() const = 0; | ||
| 339 | + virtual val<3> get3() const = 0; | ||
| 340 | + virtual val<4> get4() const = 0; | ||
| 341 | +}; | ||
| 342 | + | ||
| 343 | +class Derived : public virtual Base | ||
| 344 | +{ | ||
| 345 | +public: | ||
| 346 | + virtual val<1> get1() const { return foo->get1(); } | ||
| 347 | + virtual val<2> get2() const { return foo->get2(); } | ||
| 348 | + virtual val<3> get3() const { return foo->get3(); } | ||
| 349 | + virtual val<4> get4() const { return foo->get4(); } | ||
| 350 | + Base *foo; | ||
| 351 | +}; | ||
| 352 | + | ||
| 353 | +Base* make() { return new Derived; } | ||
| 354 | |||
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch new file mode 100644 index 0000000000..c433fc73f1 --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106855.patch | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | 2012-01-05 Michael Hope <michael.hope@linaro.org> | ||
| 2 | |||
| 3 | Backport from mainline r182271: | ||
| 4 | |||
| 5 | 2011-12-13 Revital Eres <revital.eres@linaro.org> | ||
| 6 | |||
| 7 | gcc/ | ||
| 8 | * modulo-sched.c (mark_loop_unsched): Free bbs. | ||
| 9 | |||
| 10 | === modified file 'gcc/modulo-sched.c' | ||
| 11 | --- old/gcc/modulo-sched.c 2011-10-30 05:31:00 +0000 | ||
| 12 | +++ new/gcc/modulo-sched.c 2012-01-05 02:45:23 +0000 | ||
| 13 | @@ -1204,6 +1204,8 @@ | ||
| 14 | |||
| 15 | for (i = 0; i < loop->num_nodes; i++) | ||
| 16 | bbs[i]->flags |= BB_DISABLE_SCHEDULE; | ||
| 17 | + | ||
| 18 | + free (bbs); | ||
| 19 | } | ||
| 20 | |||
| 21 | /* Return true if all the BBs of the loop are empty except the | ||
| 22 | |||
