diff options
Diffstat (limited to 'meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch')
-rw-r--r-- | meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch | 784 |
1 files changed, 784 insertions, 0 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch new file mode 100644 index 000000000..bb866ce8d --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch | |||
@@ -0,0 +1,784 @@ | |||
1 | 2011-03-24 Revital Eres <revital.eres@linaro.org> | ||
2 | |||
3 | gcc/ | ||
4 | * loop-doloop.c (doloop_condition_get): Support new form of | ||
5 | doloop pattern and use prev_nondebug_insn instead of PREV_INSN. | ||
6 | * config/arm/thumb2.md (*thumb2_addsi3_compare0): Remove "*". | ||
7 | (doloop_end): New. | ||
8 | * config/arm/arm.md (*addsi3_compare0): Remove "*". | ||
9 | * ddg.c (check_closing_branch_deps, get_node_of_insn_uid): | ||
10 | New functions. | ||
11 | (create_ddg): Pass sbitmap containing do-loop related | ||
12 | instructions instead of closing_branch_deps parameter and call | ||
13 | check_closing_branch_deps function. | ||
14 | * ddg.h (create_ddg): Adjust the function declaration. | ||
15 | * modulo-sched.c (PS_STAGE_COUNT): Rename to CALC_STAGE_COUNT | ||
16 | and redefine. | ||
17 | (doloop_register_get): Handle NONDEBUG_INSN_P. | ||
18 | (stage_count): New field in struct partial_schedule. | ||
19 | (mark_doloop_insns, calculate_stage_count): New functions. | ||
20 | (normalize_sched_times): Rename to reset_sched_times and handle | ||
21 | incrementing the sched time of the nodes by a constant value | ||
22 | passed as parameter. | ||
23 | (duplicate_insns_of_cycles): Skip closing branch. | ||
24 | (sms_schedule_by_order): Schedule closing branch when | ||
25 | closing_branch_deps is true. | ||
26 | (ps_insn_find_column): Handle closing branch. | ||
27 | (sms_schedule): Call reset_sched_times and handle case where | ||
28 | do-loop pattern is not decoupled from the other loop instructions. | ||
29 | Support new form of doloop pattern. | ||
30 | (ps_insert_empty_row): Update calls to normalize_sched_times | ||
31 | and rotate_partial_schedule functions. | ||
32 | |||
33 | === modified file 'gcc/config/arm/arm.md' | ||
34 | --- old/gcc/config/arm/arm.md 2011-03-11 14:26:34 +0000 | ||
35 | +++ new/gcc/config/arm/arm.md 2011-03-24 07:45:38 +0000 | ||
36 | @@ -734,7 +734,7 @@ | ||
37 | "" | ||
38 | ) | ||
39 | |||
40 | -(define_insn "*addsi3_compare0" | ||
41 | +(define_insn "addsi3_compare0" | ||
42 | [(set (reg:CC_NOOV CC_REGNUM) | ||
43 | (compare:CC_NOOV | ||
44 | (plus:SI (match_operand:SI 1 "s_register_operand" "r, r") | ||
45 | |||
46 | === modified file 'gcc/config/arm/thumb2.md' | ||
47 | --- old/gcc/config/arm/thumb2.md 2011-02-08 10:51:58 +0000 | ||
48 | +++ new/gcc/config/arm/thumb2.md 2011-03-24 07:45:38 +0000 | ||
49 | @@ -1194,7 +1194,7 @@ | ||
50 | (set_attr "length" "2")] | ||
51 | ) | ||
52 | |||
53 | -(define_insn "*thumb2_addsi3_compare0" | ||
54 | +(define_insn "thumb2_addsi3_compare0" | ||
55 | [(set (reg:CC_NOOV CC_REGNUM) | ||
56 | (compare:CC_NOOV | ||
57 | (plus:SI (match_operand:SI 1 "s_register_operand" "l, 0, r") | ||
58 | @@ -1445,3 +1445,56 @@ | ||
59 | [(set_attr "length" "4,4,16") | ||
60 | (set_attr "predicable" "yes")] | ||
61 | ) | ||
62 | + | ||
63 | + | ||
64 | +;; Define the subtract-one-and-jump insns so loop.c | ||
65 | +;; knows what to generate. | ||
66 | +(define_expand "doloop_end" | ||
67 | + [(use (match_operand 0 "" "")) ; loop pseudo | ||
68 | + (use (match_operand 1 "" "")) ; iterations; zero if unknown | ||
69 | + (use (match_operand 2 "" "")) ; max iterations | ||
70 | + (use (match_operand 3 "" "")) ; loop level | ||
71 | + (use (match_operand 4 "" ""))] ; label | ||
72 | + "TARGET_32BIT" | ||
73 | + " | ||
74 | + { | ||
75 | + /* Currently SMS relies on the do-loop pattern to recognize loops | ||
76 | + where (1) the control part consists of all insns defining and/or | ||
77 | + using a certain 'count' register and (2) the loop count can be | ||
78 | + adjusted by modifying this register prior to the loop. | ||
79 | + ??? The possible introduction of a new block to initialize the | ||
80 | + new IV can potentially affect branch optimizations. */ | ||
81 | + if (optimize > 0 && flag_modulo_sched) | ||
82 | + { | ||
83 | + rtx s0; | ||
84 | + rtx bcomp; | ||
85 | + rtx loc_ref; | ||
86 | + rtx cc_reg; | ||
87 | + rtx insn; | ||
88 | + rtx cmp; | ||
89 | + | ||
90 | + /* Only use this on innermost loops. */ | ||
91 | + if (INTVAL (operands[3]) > 1) | ||
92 | + FAIL; | ||
93 | + | ||
94 | + if (GET_MODE (operands[0]) != SImode) | ||
95 | + FAIL; | ||
96 | + | ||
97 | + s0 = operands [0]; | ||
98 | + if (TARGET_THUMB2) | ||
99 | + insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1))); | ||
100 | + else | ||
101 | + insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1))); | ||
102 | + | ||
103 | + cmp = XVECEXP (PATTERN (insn), 0, 0); | ||
104 | + cc_reg = SET_DEST (cmp); | ||
105 | + bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx); | ||
106 | + loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]); | ||
107 | + emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, | ||
108 | + gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, | ||
109 | + loc_ref, pc_rtx))); | ||
110 | + DONE; | ||
111 | + }else | ||
112 | + FAIL; | ||
113 | + }") | ||
114 | + | ||
115 | |||
116 | === modified file 'gcc/ddg.c' | ||
117 | --- old/gcc/ddg.c 2010-07-19 08:58:53 +0000 | ||
118 | +++ new/gcc/ddg.c 2011-03-24 07:45:38 +0000 | ||
119 | @@ -60,6 +60,8 @@ | ||
120 | static ddg_edge_ptr create_ddg_edge (ddg_node_ptr, ddg_node_ptr, dep_type, | ||
121 | dep_data_type, int, int); | ||
122 | static void add_edge_to_ddg (ddg_ptr g, ddg_edge_ptr); | ||
123 | +static ddg_node_ptr get_node_of_insn_uid (ddg_ptr, int); | ||
124 | + | ||
125 | |||
126 | /* Auxiliary variable for mem_read_insn_p/mem_write_insn_p. */ | ||
127 | static bool mem_ref_p; | ||
128 | @@ -450,12 +452,65 @@ | ||
129 | sched_free_deps (head, tail, false); | ||
130 | } | ||
131 | |||
132 | +/* Given DOLOOP_INSNS which holds the instructions that | ||
133 | + belong to the do-loop part; mark closing_branch_deps field in ddg G | ||
134 | + as TRUE if the do-loop part's instructions are dependent on the other | ||
135 | + loop instructions. Otherwise mark it as FALSE. */ | ||
136 | +static void | ||
137 | +check_closing_branch_deps (ddg_ptr g, sbitmap doloop_insns) | ||
138 | +{ | ||
139 | + sbitmap_iterator sbi; | ||
140 | + unsigned int u = 0; | ||
141 | + | ||
142 | + EXECUTE_IF_SET_IN_SBITMAP (doloop_insns, 0, u, sbi) | ||
143 | + { | ||
144 | + ddg_edge_ptr e; | ||
145 | + ddg_node_ptr u_node = get_node_of_insn_uid (g, u); | ||
146 | + | ||
147 | + gcc_assert (u_node); | ||
148 | + | ||
149 | + for (e = u_node->in; e != 0; e = e->next_in) | ||
150 | + { | ||
151 | + ddg_node_ptr v_node = e->src; | ||
152 | + | ||
153 | + if (((unsigned int) INSN_UID (v_node->insn) == u) | ||
154 | + || DEBUG_INSN_P (v_node->insn)) | ||
155 | + continue; | ||
156 | + | ||
157 | + /* Ignore dependencies between memory writes and the | ||
158 | + jump. */ | ||
159 | + if (JUMP_P (u_node->insn) | ||
160 | + && e->type == OUTPUT_DEP | ||
161 | + && mem_write_insn_p (v_node->insn)) | ||
162 | + continue; | ||
163 | + if (!TEST_BIT (doloop_insns, INSN_UID (v_node->insn))) | ||
164 | + { | ||
165 | + g->closing_branch_deps = 1; | ||
166 | + return; | ||
167 | + } | ||
168 | + } | ||
169 | + for (e = u_node->out; e != 0; e = e->next_out) | ||
170 | + { | ||
171 | + ddg_node_ptr v_node = e->dest; | ||
172 | + | ||
173 | + if (((unsigned int) INSN_UID (v_node->insn) == u) | ||
174 | + || DEBUG_INSN_P (v_node->insn)) | ||
175 | + continue; | ||
176 | + if (!TEST_BIT (doloop_insns, INSN_UID (v_node->insn))) | ||
177 | + { | ||
178 | + g->closing_branch_deps = 1; | ||
179 | + return; | ||
180 | + } | ||
181 | + } | ||
182 | + } | ||
183 | + g->closing_branch_deps = 0; | ||
184 | +} | ||
185 | |||
186 | /* Given a basic block, create its DDG and return a pointer to a variable | ||
187 | of ddg type that represents it. | ||
188 | Initialize the ddg structure fields to the appropriate values. */ | ||
189 | ddg_ptr | ||
190 | -create_ddg (basic_block bb, int closing_branch_deps) | ||
191 | +create_ddg (basic_block bb, sbitmap doloop_insns) | ||
192 | { | ||
193 | ddg_ptr g; | ||
194 | rtx insn, first_note; | ||
195 | @@ -465,7 +520,6 @@ | ||
196 | g = (ddg_ptr) xcalloc (1, sizeof (struct ddg)); | ||
197 | |||
198 | g->bb = bb; | ||
199 | - g->closing_branch_deps = closing_branch_deps; | ||
200 | |||
201 | /* Count the number of insns in the BB. */ | ||
202 | for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); | ||
203 | @@ -538,6 +592,11 @@ | ||
204 | /* Build the data dependency graph. */ | ||
205 | build_intra_loop_deps (g); | ||
206 | build_inter_loop_deps (g); | ||
207 | + | ||
208 | + /* Check whether the do-loop part is decoupled from the other loop | ||
209 | + instructions. */ | ||
210 | + check_closing_branch_deps (g, doloop_insns); | ||
211 | + | ||
212 | return g; | ||
213 | } | ||
214 | |||
215 | @@ -831,6 +890,18 @@ | ||
216 | return NULL; | ||
217 | } | ||
218 | |||
219 | +/* Given the uid of an instruction UID return the node that represents it. */ | ||
220 | +static ddg_node_ptr | ||
221 | +get_node_of_insn_uid (ddg_ptr g, int uid) | ||
222 | +{ | ||
223 | + int i; | ||
224 | + | ||
225 | + for (i = 0; i < g->num_nodes; i++) | ||
226 | + if (uid == INSN_UID (g->nodes[i].insn)) | ||
227 | + return &g->nodes[i]; | ||
228 | + return NULL; | ||
229 | +} | ||
230 | + | ||
231 | /* Given a set OPS of nodes in the DDG, find the set of their successors | ||
232 | which are not in OPS, and set their bits in SUCC. Bits corresponding to | ||
233 | OPS are cleared from SUCC. Leaves the other bits in SUCC unchanged. */ | ||
234 | |||
235 | === modified file 'gcc/ddg.h' | ||
236 | --- old/gcc/ddg.h 2009-11-25 10:55:54 +0000 | ||
237 | +++ new/gcc/ddg.h 2011-03-24 07:45:38 +0000 | ||
238 | @@ -167,7 +167,7 @@ | ||
239 | }; | ||
240 | |||
241 | |||
242 | -ddg_ptr create_ddg (basic_block, int closing_branch_deps); | ||
243 | +ddg_ptr create_ddg (basic_block, sbitmap); | ||
244 | void free_ddg (ddg_ptr); | ||
245 | |||
246 | void print_ddg (FILE *, ddg_ptr); | ||
247 | |||
248 | === modified file 'gcc/loop-doloop.c' | ||
249 | --- old/gcc/loop-doloop.c 2010-07-19 08:58:53 +0000 | ||
250 | +++ new/gcc/loop-doloop.c 2011-03-24 07:45:38 +0000 | ||
251 | @@ -78,6 +78,8 @@ | ||
252 | rtx inc_src; | ||
253 | rtx condition; | ||
254 | rtx pattern; | ||
255 | + rtx cc_reg = NULL_RTX; | ||
256 | + rtx reg_orig = NULL_RTX; | ||
257 | |||
258 | /* The canonical doloop pattern we expect has one of the following | ||
259 | forms: | ||
260 | @@ -96,7 +98,16 @@ | ||
261 | 2) (set (reg) (plus (reg) (const_int -1)) | ||
262 | (set (pc) (if_then_else (reg != 0) | ||
263 | (label_ref (label)) | ||
264 | - (pc))). */ | ||
265 | + (pc))). | ||
266 | + | ||
267 | + Some targets (ARM) do the comparison before the branch, as in the | ||
268 | + following form: | ||
269 | + | ||
270 | + 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) | ||
271 | + (set (reg) (plus (reg) (const_int -1)))]) | ||
272 | + (set (pc) (if_then_else (cc == NE) | ||
273 | + (label_ref (label)) | ||
274 | + (pc))) */ | ||
275 | |||
276 | pattern = PATTERN (doloop_pat); | ||
277 | |||
278 | @@ -104,19 +115,47 @@ | ||
279 | { | ||
280 | rtx cond; | ||
281 | rtx prev_insn = prev_nondebug_insn (doloop_pat); | ||
282 | + rtx cmp_arg1, cmp_arg2; | ||
283 | + rtx cmp_orig; | ||
284 | |||
285 | - /* We expect the decrement to immediately precede the branch. */ | ||
286 | + /* In case the pattern is not PARALLEL we expect two forms | ||
287 | + of doloop which are cases 2) and 3) above: in case 2) the | ||
288 | + decrement immediately precedes the branch, while in case 3) | ||
289 | + the compare and decrement instructions immediately precede | ||
290 | + the branch. */ | ||
291 | |||
292 | if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) | ||
293 | return 0; | ||
294 | |||
295 | cmp = pattern; | ||
296 | - inc = PATTERN (PREV_INSN (doloop_pat)); | ||
297 | + if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) | ||
298 | + { | ||
299 | + /* The third case: the compare and decrement instructions | ||
300 | + immediately precede the branch. */ | ||
301 | + cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); | ||
302 | + if (GET_CODE (cmp_orig) != SET) | ||
303 | + return 0; | ||
304 | + if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) | ||
305 | + return 0; | ||
306 | + cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); | ||
307 | + cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); | ||
308 | + if (cmp_arg2 != const0_rtx | ||
309 | + || GET_CODE (cmp_arg1) != PLUS) | ||
310 | + return 0; | ||
311 | + reg_orig = XEXP (cmp_arg1, 0); | ||
312 | + if (XEXP (cmp_arg1, 1) != GEN_INT (-1) | ||
313 | + || !REG_P (reg_orig)) | ||
314 | + return 0; | ||
315 | + cc_reg = SET_DEST (cmp_orig); | ||
316 | + | ||
317 | + inc = XVECEXP (PATTERN (prev_insn), 0, 1); | ||
318 | + } | ||
319 | + else | ||
320 | + inc = PATTERN (prev_insn); | ||
321 | /* We expect the condition to be of the form (reg != 0) */ | ||
322 | cond = XEXP (SET_SRC (cmp), 0); | ||
323 | if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) | ||
324 | return 0; | ||
325 | - | ||
326 | } | ||
327 | else | ||
328 | { | ||
329 | @@ -162,11 +201,15 @@ | ||
330 | return 0; | ||
331 | |||
332 | if ((XEXP (condition, 0) == reg) | ||
333 | + /* For the third case: */ | ||
334 | + || ((cc_reg != NULL_RTX) | ||
335 | + && (XEXP (condition, 0) == cc_reg) | ||
336 | + && (reg_orig == reg)) | ||
337 | || (GET_CODE (XEXP (condition, 0)) == PLUS | ||
338 | - && XEXP (XEXP (condition, 0), 0) == reg)) | ||
339 | + && XEXP (XEXP (condition, 0), 0) == reg)) | ||
340 | { | ||
341 | if (GET_CODE (pattern) != PARALLEL) | ||
342 | - /* The second form we expect: | ||
343 | + /* For the second form we expect: | ||
344 | |||
345 | (set (reg) (plus (reg) (const_int -1)) | ||
346 | (set (pc) (if_then_else (reg != 0) | ||
347 | @@ -181,7 +224,24 @@ | ||
348 | (set (reg) (plus (reg) (const_int -1))) | ||
349 | (additional clobbers and uses)]) | ||
350 | |||
351 | - So we return that form instead. | ||
352 | + For the third form we expect: | ||
353 | + | ||
354 | + (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) | ||
355 | + (set (reg) (plus (reg) (const_int -1)))]) | ||
356 | + (set (pc) (if_then_else (cc == NE) | ||
357 | + (label_ref (label)) | ||
358 | + (pc))) | ||
359 | + | ||
360 | + which is equivalent to the following: | ||
361 | + | ||
362 | + (parallel [(set (cc) (compare (reg, 1)) | ||
363 | + (set (reg) (plus (reg) (const_int -1))) | ||
364 | + (set (pc) (if_then_else (NE == cc) | ||
365 | + (label_ref (label)) | ||
366 | + (pc))))]) | ||
367 | + | ||
368 | + So we return the second form instead for the two cases. | ||
369 | + | ||
370 | */ | ||
371 | condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); | ||
372 | |||
373 | |||
374 | === modified file 'gcc/modulo-sched.c' | ||
375 | --- old/gcc/modulo-sched.c 2009-11-25 10:55:54 +0000 | ||
376 | +++ new/gcc/modulo-sched.c 2011-03-24 07:45:38 +0000 | ||
377 | @@ -116,8 +116,10 @@ | ||
378 | |||
379 | /* The number of different iterations the nodes in ps span, assuming | ||
380 | the stage boundaries are placed efficiently. */ | ||
381 | -#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \ | ||
382 | - + 1 + (ps)->ii - 1) / (ps)->ii) | ||
383 | +#define CALC_STAGE_COUNT(min_cycle,max_cycle,ii) ((max_cycle - min_cycle \ | ||
384 | + + 1 + ii - 1) / ii) | ||
385 | +/* The stage count of ps. */ | ||
386 | +#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count) | ||
387 | |||
388 | /* A single instruction in the partial schedule. */ | ||
389 | struct ps_insn | ||
390 | @@ -155,6 +157,8 @@ | ||
391 | int max_cycle; | ||
392 | |||
393 | ddg_ptr g; /* The DDG of the insns in the partial schedule. */ | ||
394 | + | ||
395 | + int stage_count; /* The stage count of the partial schedule. */ | ||
396 | }; | ||
397 | |||
398 | /* We use this to record all the register replacements we do in | ||
399 | @@ -195,6 +199,7 @@ | ||
400 | rtx, rtx); | ||
401 | static void duplicate_insns_of_cycles (partial_schedule_ptr, | ||
402 | int, int, int, rtx); | ||
403 | +static int calculate_stage_count (partial_schedule_ptr ps); | ||
404 | |||
405 | #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) | ||
406 | #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) | ||
407 | @@ -310,10 +315,10 @@ | ||
408 | either a single (parallel) branch-on-count or a (non-parallel) | ||
409 | branch immediately preceded by a single (decrement) insn. */ | ||
410 | first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail | ||
411 | - : PREV_INSN (tail)); | ||
412 | + : prev_nondebug_insn (tail)); | ||
413 | |||
414 | for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) | ||
415 | - if (reg_mentioned_p (reg, insn)) | ||
416 | + if (reg_mentioned_p (reg, insn) && NONDEBUG_INSN_P (insn)) | ||
417 | { | ||
418 | if (dump_file) | ||
419 | { | ||
420 | @@ -332,6 +337,24 @@ | ||
421 | #endif | ||
422 | } | ||
423 | |||
424 | +/* Mark in DOLOOP_INSNS the instructions that belong to the do-loop part. | ||
425 | + Use TAIL to recognize that part. */ | ||
426 | +static void | ||
427 | +mark_doloop_insns (sbitmap doloop_insns, rtx tail) | ||
428 | +{ | ||
429 | + rtx first_insn_not_to_check, insn; | ||
430 | + | ||
431 | + /* This is the first instruction which belongs the doloop part. */ | ||
432 | + first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail | ||
433 | + : prev_nondebug_insn (tail)); | ||
434 | + | ||
435 | + sbitmap_zero (doloop_insns); | ||
436 | + for (insn = first_insn_not_to_check; insn != NEXT_INSN (tail); | ||
437 | + insn = NEXT_INSN (insn)) | ||
438 | + if (NONDEBUG_INSN_P (insn)) | ||
439 | + SET_BIT (doloop_insns, INSN_UID (insn)); | ||
440 | +} | ||
441 | + | ||
442 | /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so | ||
443 | that the number of iterations is a compile-time constant. If so, | ||
444 | return the rtx that sets COUNT_REG to a constant, and set COUNT to | ||
445 | @@ -569,13 +592,12 @@ | ||
446 | } | ||
447 | } | ||
448 | |||
449 | -/* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values | ||
450 | - of SCHED_ROW and SCHED_STAGE. */ | ||
451 | +/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of | ||
452 | + SCHED_ROW and SCHED_STAGE. */ | ||
453 | static void | ||
454 | -normalize_sched_times (partial_schedule_ptr ps) | ||
455 | +reset_sched_times (partial_schedule_ptr ps, int amount) | ||
456 | { | ||
457 | int row; | ||
458 | - int amount = PS_MIN_CYCLE (ps); | ||
459 | int ii = ps->ii; | ||
460 | ps_insn_ptr crr_insn; | ||
461 | |||
462 | @@ -584,6 +606,10 @@ | ||
463 | { | ||
464 | ddg_node_ptr u = crr_insn->node; | ||
465 | int normalized_time = SCHED_TIME (u) - amount; | ||
466 | + int new_min_cycle = PS_MIN_CYCLE (ps) - amount; | ||
467 | + /* The first cycle in row zero after the rotation. */ | ||
468 | + int new_first_cycle_in_row_zero = | ||
469 | + new_min_cycle + ii - SMODULO (new_min_cycle, ii); | ||
470 | |||
471 | if (dump_file) | ||
472 | fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\ | ||
473 | @@ -592,8 +618,30 @@ | ||
474 | gcc_assert (SCHED_TIME (u) >= ps->min_cycle); | ||
475 | gcc_assert (SCHED_TIME (u) <= ps->max_cycle); | ||
476 | SCHED_TIME (u) = normalized_time; | ||
477 | - SCHED_ROW (u) = normalized_time % ii; | ||
478 | - SCHED_STAGE (u) = normalized_time / ii; | ||
479 | + crr_insn->cycle = normalized_time; | ||
480 | + SCHED_ROW (u) = SMODULO (normalized_time, ii); | ||
481 | + | ||
482 | + /* If min_cycle is in row zero after the rotation then | ||
483 | + the stage count can be calculated by dividing the cycle | ||
484 | + with ii. Otherwise, the calculation is done by dividing the | ||
485 | + SMSed kernel into two intervals: | ||
486 | + | ||
487 | + 1) min_cycle <= interval 0 < first_cycle_in_row_zero | ||
488 | + 2) first_cycle_in_row_zero <= interval 1 < max_cycle | ||
489 | + | ||
490 | + Cycles in interval 0 are in stage 0. The stage of cycles | ||
491 | + in interval 1 should be added by 1 to take interval 0 into | ||
492 | + account. */ | ||
493 | + if (SMODULO (new_min_cycle, ii) == 0) | ||
494 | + SCHED_STAGE (u) = normalized_time / ii; | ||
495 | + else | ||
496 | + { | ||
497 | + if (crr_insn->cycle < new_first_cycle_in_row_zero) | ||
498 | + SCHED_STAGE (u) = 0; | ||
499 | + else | ||
500 | + SCHED_STAGE (u) = | ||
501 | + ((SCHED_TIME (u) - new_first_cycle_in_row_zero) / ii) + 1; | ||
502 | + } | ||
503 | } | ||
504 | } | ||
505 | |||
506 | @@ -646,9 +694,12 @@ | ||
507 | |||
508 | /* Do not duplicate any insn which refers to count_reg as it | ||
509 | belongs to the control part. | ||
510 | + If closing_branch_deps is true the closing branch is scheduled | ||
511 | + as well and thus should be ignored. | ||
512 | TODO: This should be done by analyzing the control part of | ||
513 | the loop. */ | ||
514 | - if (reg_mentioned_p (count_reg, u_node->insn)) | ||
515 | + if (reg_mentioned_p (count_reg, u_node->insn) | ||
516 | + || JUMP_P (ps_ij->node->insn)) | ||
517 | continue; | ||
518 | |||
519 | if (for_prolog) | ||
520 | @@ -894,7 +945,8 @@ | ||
521 | basic_block condition_bb = NULL; | ||
522 | edge latch_edge; | ||
523 | gcov_type trip_count = 0; | ||
524 | - | ||
525 | + sbitmap doloop_insns; | ||
526 | + | ||
527 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS | ||
528 | | LOOPS_HAVE_RECORDED_EXITS); | ||
529 | if (number_of_loops () <= 1) | ||
530 | @@ -919,6 +971,7 @@ | ||
531 | setup_sched_infos (); | ||
532 | haifa_sched_init (); | ||
533 | |||
534 | + doloop_insns = sbitmap_alloc (get_max_uid () + 1); | ||
535 | /* Allocate memory to hold the DDG array one entry for each loop. | ||
536 | We use loop->num as index into this array. */ | ||
537 | g_arr = XCNEWVEC (ddg_ptr, number_of_loops ()); | ||
538 | @@ -1009,9 +1062,11 @@ | ||
539 | continue; | ||
540 | } | ||
541 | |||
542 | - /* Don't handle BBs with calls or barriers, or !single_set insns, | ||
543 | - or auto-increment insns (to avoid creating invalid reg-moves | ||
544 | - for the auto-increment insns). | ||
545 | + /* Don't handle BBs with calls or barriers or auto-increment insns | ||
546 | + (to avoid creating invalid reg-moves for the auto-increment insns), | ||
547 | + or !single_set with the exception of instructions that include | ||
548 | + count_reg---these instructions are part of the control part | ||
549 | + that do-loop recognizes. | ||
550 | ??? Should handle auto-increment insns. | ||
551 | ??? Should handle insns defining subregs. */ | ||
552 | for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) | ||
553 | @@ -1021,7 +1076,8 @@ | ||
554 | if (CALL_P (insn) | ||
555 | || BARRIER_P (insn) | ||
556 | || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) | ||
557 | - && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE) | ||
558 | + && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE | ||
559 | + && !reg_mentioned_p (count_reg, insn)) | ||
560 | || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) | ||
561 | || (INSN_P (insn) && (set = single_set (insn)) | ||
562 | && GET_CODE (SET_DEST (set)) == SUBREG)) | ||
563 | @@ -1048,14 +1104,16 @@ | ||
564 | |||
565 | continue; | ||
566 | } | ||
567 | - | ||
568 | - if (! (g = create_ddg (bb, 0))) | ||
569 | + mark_doloop_insns (doloop_insns, tail); | ||
570 | + if (! (g = create_ddg (bb, doloop_insns))) | ||
571 | { | ||
572 | if (dump_file) | ||
573 | fprintf (dump_file, "SMS create_ddg failed\n"); | ||
574 | continue; | ||
575 | } | ||
576 | - | ||
577 | + if (dump_file) | ||
578 | + fprintf (dump_file, "SMS closing_branch_deps: %d\n", | ||
579 | + g->closing_branch_deps); | ||
580 | g_arr[loop->num] = g; | ||
581 | if (dump_file) | ||
582 | fprintf (dump_file, "...OK\n"); | ||
583 | @@ -1157,11 +1215,13 @@ | ||
584 | |||
585 | ps = sms_schedule_by_order (g, mii, maxii, node_order); | ||
586 | |||
587 | - if (ps){ | ||
588 | - stage_count = PS_STAGE_COUNT (ps); | ||
589 | - gcc_assert(stage_count >= 1); | ||
590 | - } | ||
591 | - | ||
592 | + if (ps) | ||
593 | + { | ||
594 | + stage_count = calculate_stage_count (ps); | ||
595 | + gcc_assert(stage_count >= 1); | ||
596 | + PS_STAGE_COUNT(ps) = stage_count; | ||
597 | + } | ||
598 | + | ||
599 | /* Stage count of 1 means that there is no interleaving between | ||
600 | iterations, let the scheduling passes do the job. */ | ||
601 | if (stage_count <= 1 | ||
602 | @@ -1182,17 +1242,7 @@ | ||
603 | else | ||
604 | { | ||
605 | struct undo_replace_buff_elem *reg_move_replaces; | ||
606 | - | ||
607 | - if (dump_file) | ||
608 | - { | ||
609 | - fprintf (dump_file, | ||
610 | - "SMS succeeded %d %d (with ii, sc)\n", ps->ii, | ||
611 | - stage_count); | ||
612 | - print_partial_schedule (ps, dump_file); | ||
613 | - fprintf (dump_file, | ||
614 | - "SMS Branch (%d) will later be scheduled at cycle %d.\n", | ||
615 | - g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); | ||
616 | - } | ||
617 | + int amount; | ||
618 | |||
619 | /* Set the stage boundaries. If the DDG is built with closing_branch_deps, | ||
620 | the closing_branch was scheduled and should appear in the last (ii-1) | ||
621 | @@ -1202,12 +1252,28 @@ | ||
622 | TODO: Revisit the issue of scheduling the insns of the | ||
623 | control part relative to the branch when the control part | ||
624 | has more than one insn. */ | ||
625 | - normalize_sched_times (ps); | ||
626 | - rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); | ||
627 | + amount = (g->closing_branch_deps)? SCHED_TIME (g->closing_branch) + 1: | ||
628 | + PS_MIN_CYCLE (ps); | ||
629 | + reset_sched_times (ps, amount); | ||
630 | + rotate_partial_schedule (ps, amount); | ||
631 | + | ||
632 | set_columns_for_ps (ps); | ||
633 | |||
634 | canon_loop (loop); | ||
635 | |||
636 | + if (dump_file) | ||
637 | + { | ||
638 | + fprintf (dump_file, | ||
639 | + "SMS succeeded %d %d (with ii, sc)\n", ps->ii, | ||
640 | + stage_count); | ||
641 | + print_partial_schedule (ps, dump_file); | ||
642 | + if (!g->closing_branch_deps) | ||
643 | + fprintf (dump_file, | ||
644 | + "SMS Branch (%d) will later be scheduled at \ | ||
645 | + cycle %d.\n", | ||
646 | + g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); | ||
647 | + } | ||
648 | + | ||
649 | /* case the BCT count is not known , Do loop-versioning */ | ||
650 | if (count_reg && ! count_init) | ||
651 | { | ||
652 | @@ -1252,6 +1318,7 @@ | ||
653 | } | ||
654 | |||
655 | free (g_arr); | ||
656 | + sbitmap_free (doloop_insns); | ||
657 | |||
658 | /* Release scheduler data, needed until now because of DFA. */ | ||
659 | haifa_sched_finish (); | ||
660 | @@ -1759,8 +1826,9 @@ | ||
661 | RESET_BIT (tobe_scheduled, u); | ||
662 | continue; | ||
663 | } | ||
664 | - | ||
665 | - if (JUMP_P (insn)) /* Closing branch handled later. */ | ||
666 | + /* Closing branch handled later unless closing_branch_deps | ||
667 | + is true. */ | ||
668 | + if (JUMP_P (insn) && !g->closing_branch_deps) | ||
669 | { | ||
670 | RESET_BIT (tobe_scheduled, u); | ||
671 | continue; | ||
672 | @@ -1893,8 +1961,8 @@ | ||
673 | if (dump_file) | ||
674 | fprintf (dump_file, "split_row=%d\n", split_row); | ||
675 | |||
676 | - normalize_sched_times (ps); | ||
677 | - rotate_partial_schedule (ps, ps->min_cycle); | ||
678 | + reset_sched_times (ps, PS_MIN_CYCLE (ps)); | ||
679 | + rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); | ||
680 | |||
681 | rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); | ||
682 | for (row = 0; row < split_row; row++) | ||
683 | @@ -2571,6 +2639,7 @@ | ||
684 | ps_insn_ptr next_ps_i; | ||
685 | ps_insn_ptr first_must_follow = NULL; | ||
686 | ps_insn_ptr last_must_precede = NULL; | ||
687 | + ps_insn_ptr last_in_row = NULL; | ||
688 | int row; | ||
689 | |||
690 | if (! ps_i) | ||
691 | @@ -2597,8 +2666,37 @@ | ||
692 | else | ||
693 | last_must_precede = next_ps_i; | ||
694 | } | ||
695 | + /* The closing branch must be the last in the row. */ | ||
696 | + if (must_precede | ||
697 | + && TEST_BIT (must_precede, next_ps_i->node->cuid) | ||
698 | + && JUMP_P (next_ps_i->node->insn)) | ||
699 | + return false; | ||
700 | + | ||
701 | + last_in_row = next_ps_i; | ||
702 | } | ||
703 | |||
704 | + /* If closing_branch_deps is true we are scheduling the closing | ||
705 | + branch as well. Make sure there is no dependent instruction after | ||
706 | + it as the branch should be the last instruction. */ | ||
707 | + if (JUMP_P (ps_i->node->insn)) | ||
708 | + { | ||
709 | + if (first_must_follow) | ||
710 | + return false; | ||
711 | + if (last_in_row) | ||
712 | + { | ||
713 | + /* Make the branch the last in the row. New instructions | ||
714 | + will be inserted at the beginning of the row or after the | ||
715 | + last must_precede instruction thus the branch is guaranteed | ||
716 | + to remain the last instruction in the row. */ | ||
717 | + last_in_row->next_in_row = ps_i; | ||
718 | + ps_i->prev_in_row = last_in_row; | ||
719 | + ps_i->next_in_row = NULL; | ||
720 | + } | ||
721 | + else | ||
722 | + ps->rows[row] = ps_i; | ||
723 | + return true; | ||
724 | + } | ||
725 | + | ||
726 | /* Now insert the node after INSERT_AFTER_PSI. */ | ||
727 | |||
728 | if (! last_must_precede) | ||
729 | @@ -2820,6 +2918,54 @@ | ||
730 | return ps_i; | ||
731 | } | ||
732 | |||
733 | +/* Calculate the stage count of the partial schedule PS. */ | ||
734 | +int | ||
735 | +calculate_stage_count (partial_schedule_ptr ps) | ||
736 | +{ | ||
737 | + int stage_count; | ||
738 | + | ||
739 | + /* If closing_branch_deps is false then the stage | ||
740 | + boundaries are placed efficiently, meaning that min_cycle will be | ||
741 | + placed at row 0. Otherwise, the closing branch will be placed in | ||
742 | + row ii-1. For the later case we assume the final SMSed kernel can | ||
743 | + be divided into two intervals. This assumption is used for the | ||
744 | + stage count calculation: | ||
745 | + | ||
746 | + 1) min_cycle <= interval 0 < first_cycle_in_row_zero | ||
747 | + 2) first_cycle_in_row_zero <= interval 1 < max_cycle | ||
748 | + */ | ||
749 | + stage_count = | ||
750 | + CALC_STAGE_COUNT (PS_MIN_CYCLE (ps), PS_MAX_CYCLE (ps), ps->ii); | ||
751 | + if (ps->g->closing_branch_deps) | ||
752 | + { | ||
753 | + int new_min_cycle; | ||
754 | + int new_min_cycle_row; | ||
755 | + int rotation_amount = SCHED_TIME (ps->g->closing_branch) + 1; | ||
756 | + | ||
757 | + /* This is the new value of min_cycle after the final rotation to | ||
758 | + bring closing branch into row ii-1. */ | ||
759 | + new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount; | ||
760 | + /* This is the row which the the new min_cycle will be placed in. */ | ||
761 | + new_min_cycle_row = SMODULO (new_min_cycle, ps->ii); | ||
762 | + /* If the row of min_cycle is zero then interval 0 is empty. | ||
763 | + Otherwise, we need to calculate interval 1 and add it by one | ||
764 | + to take interval 0 into account. */ | ||
765 | + if (new_min_cycle_row != 0) | ||
766 | + { | ||
767 | + int new_max_cycle, first_cycle_in_row_zero; | ||
768 | + | ||
769 | + new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount; | ||
770 | + first_cycle_in_row_zero = | ||
771 | + new_min_cycle + ps->ii - new_min_cycle_row; | ||
772 | + | ||
773 | + stage_count = | ||
774 | + CALC_STAGE_COUNT (first_cycle_in_row_zero, new_max_cycle, | ||
775 | + ps->ii) + 1; | ||
776 | + } | ||
777 | + } | ||
778 | + return stage_count; | ||
779 | +} | ||
780 | + | ||
781 | /* Rotate the rows of PS such that insns scheduled at time | ||
782 | START_CYCLE will appear in row 0. Updates max/min_cycles. */ | ||
783 | void | ||
784 | |||