New target hook to calculate MII
[gcc.git] / gcc / modulo-sched.c
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "toplev.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "sched-int.h"
40 #include "target.h"
41 #include "cfglayout.h"
42 #include "cfgloop.h"
43 #include "cfghooks.h"
44 #include "expr.h"
45 #include "params.h"
46 #include "gcov-io.h"
47 #include "ddg.h"
48 #include "timevar.h"
49 #include "tree-pass.h"
50 #include "dbgcnt.h"
51
52 #ifdef INSN_SCHEDULING
53
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
62
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
78 up).
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
86
87 SMS works with countable loops (1) whose control part can be easily
88 decoupled from the rest of the loop and (2) whose loop count can
89 be easily adjusted. This is because we peel a constant number of
90 iterations into a prologue and epilogue for which we want to avoid
91 emitting the control part, and a kernel which is to iterate that
92 constant number of iterations less than the original loop. So the
93 control part should be a set of insns clearly identified and having
94 its own iv, not otherwise used in the loop (at-least for now), which
95 initializes a register before the loop to the number of iterations.
96 Currently SMS relies on the do-loop pattern to recognize such loops,
97 where (1) the control part comprises of all insns defining and/or
98 using a certain 'count' register and (2) the loop count can be
99 adjusted by modifying this register prior to the loop.
100 TODO: Rely on cfgloop analysis instead. */
101 \f
102 /* This page defines partial-schedule structures and functions for
103 modulo scheduling. */
104
105 typedef struct partial_schedule *partial_schedule_ptr;
106 typedef struct ps_insn *ps_insn_ptr;
107
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
110
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
113
114 /* Perform signed modulo, always returning a non-negative value. */
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
116
117 /* The number of different iterations the nodes in ps span, assuming
118 the stage boundaries are placed efficiently. */
119 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
120 + 1 + (ps)->ii - 1) / (ps)->ii)
121
122 /* A single instruction in the partial schedule. */
123 struct ps_insn
124 {
125 /* The corresponding DDG_NODE. */
126 ddg_node_ptr node;
127
128 /* The (absolute) cycle in which the PS instruction is scheduled.
129 Same as SCHED_TIME (node). */
130 int cycle;
131
132 /* The next/prev PS_INSN in the same row. */
133 ps_insn_ptr next_in_row,
134 prev_in_row;
135
136 /* The number of nodes in the same row that come after this node. */
137 int row_rest_count;
138 };
139
140 /* Holds the partial schedule as an array of II rows. Each entry of the
141 array points to a linked list of PS_INSNs, which represents the
142 instructions that are scheduled for that row. */
143 struct partial_schedule
144 {
145 int ii; /* Number of rows in the partial schedule. */
146 int history; /* Threshold for conflict checking using DFA. */
147
148 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
149 ps_insn_ptr *rows;
150
151 /* The earliest absolute cycle of an insn in the partial schedule. */
152 int min_cycle;
153
154 /* The latest absolute cycle of an insn in the partial schedule. */
155 int max_cycle;
156
157 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
158 };
159
160 /* We use this to record all the register replacements we do in
161 the kernel so we can undo SMS if it is not profitable. */
162 struct undo_replace_buff_elem
163 {
164 rtx insn;
165 rtx orig_reg;
166 rtx new_reg;
167 struct undo_replace_buff_elem *next;
168 };
169
170
171
172 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
173 static void free_partial_schedule (partial_schedule_ptr);
174 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
175 void print_partial_schedule (partial_schedule_ptr, FILE *);
176 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
177 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
178 ddg_node_ptr node, int cycle,
179 sbitmap must_precede,
180 sbitmap must_follow);
181 static void rotate_partial_schedule (partial_schedule_ptr, int);
182 void set_row_column_for_ps (partial_schedule_ptr);
183 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
184 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
185
186 \f
187 /* This page defines constants and structures for the modulo scheduling
188 driver. */
189
190 /* As in haifa-sched.c: */
191 /* issue_rate is the number of insns that can be scheduled in the same
192 machine cycle. It can be defined in the config/mach/mach.h file,
193 otherwise we set it to 1. */
194
195 static int issue_rate;
196
197 static int sms_order_nodes (ddg_ptr, int, int * result);
198 static void set_node_sched_params (ddg_ptr);
199 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
200 static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
201 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *loop,
202 rtx, rtx);
203 static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
204 int from_stage, int to_stage,
205 int is_prolog, rtx count_reg);
206
207 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
208 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
209 #define SCHED_FIRST_REG_MOVE(x) \
210 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
211 #define SCHED_NREG_MOVES(x) \
212 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
213 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
214 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
215 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
216
217 /* The scheduling parameters held for each node. */
218 typedef struct node_sched_params
219 {
220 int asap; /* A lower-bound on the absolute scheduling cycle. */
221 int time; /* The absolute scheduling cycle (time >= asap). */
222
223 /* The following field (first_reg_move) is a pointer to the first
224 register-move instruction added to handle the modulo-variable-expansion
225 of the register defined by this node. This register-move copies the
226 original register defined by the node. */
227 rtx first_reg_move;
228
229 /* The number of register-move instructions added, immediately preceding
230 first_reg_move. */
231 int nreg_moves;
232
233 int row; /* Holds time % ii. */
234 int stage; /* Holds time / ii. */
235
236 /* The column of a node inside the ps. If nodes u, v are on the same row,
237 u will precede v if column (u) < column (v). */
238 int column;
239 } *node_sched_params_ptr;
240
241 \f
242 /* The following three functions are copied from the current scheduler
243 code in order to use sched_analyze() for computing the dependencies.
244 They are used when initializing the sched_info structure. */
245 static const char *
246 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
247 {
248 static char tmp[80];
249
250 sprintf (tmp, "i%4d", INSN_UID (insn));
251 return tmp;
252 }
253
254 static void
255 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
256 regset cond_exec ATTRIBUTE_UNUSED,
257 regset used ATTRIBUTE_UNUSED,
258 regset set ATTRIBUTE_UNUSED)
259 {
260 }
261
262 static struct sched_info sms_sched_info =
263 {
264 NULL,
265 NULL,
266 NULL,
267 NULL,
268 NULL,
269 sms_print_insn,
270 NULL,
271 compute_jump_reg_dependencies,
272 NULL, NULL,
273 NULL, NULL,
274 0, 0, 0,
275
276 NULL, NULL, NULL, NULL, NULL,
277 0
278 };
279
280
281 /* Given HEAD and TAIL which are the first and last insns in a loop;
282 return the register which controls the loop. Return zero if it has
283 more than one occurrence in the loop besides the control part or the
284 do-loop pattern is not of the form we expect. */
285 static rtx
286 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
287 {
288 #ifdef HAVE_doloop_end
289 rtx reg, condition, insn;
290 bool found = false;
291
292 if (!JUMP_P (tail))
293 return NULL_RTX;
294
295 /* TODO: Free SMS's dependence on doloop_condition_get. */
296 condition = doloop_condition_get (tail);
297 if (! condition)
298 return NULL_RTX;
299
300 if (REG_P (XEXP (condition, 0)))
301 reg = XEXP (condition, 0);
302 else if (GET_CODE (XEXP (condition, 0)) == PLUS
303 && REG_P (XEXP (XEXP (condition, 0), 0)))
304 reg = XEXP (XEXP (condition, 0), 0);
305 else
306 gcc_unreachable ();
307
308 /* Check that the COUNT_REG has no other occurrences in the loop
309 until the decrement. We assume the control part consists of
310 either a single (parallel) branch-on-count or a (non-parallel)
311 branch immediately preceded by a single (decrement) insn. */
312 for (insn = head; insn != PREV_INSN (tail); insn = NEXT_INSN (insn))
313 if ((found = reg_mentioned_p (reg, insn)) == true)
314 break;
315 if (found)
316 {
317 if (dump_file)
318 fprintf (dump_file, "SMS count_reg found outside control\n");
319
320 return NULL_RTX;
321 }
322 /* One last check in case the do-loop pattern is parallel. */
323 if (GET_CODE (PATTERN (tail)) == PARALLEL)
324 if (reg_mentioned_p (reg, PREV_INSN (tail)))
325 {
326 if (dump_file)
327 fprintf (dump_file, "SMS count_reg found outside control\n");
328
329 return NULL_RTX;
330 }
331 return reg;
332 #else
333 return NULL_RTX;
334 #endif
335 }
336
337 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
338 that the number of iterations is a compile-time constant. If so,
339 return the rtx that sets COUNT_REG to a constant, and set COUNT to
340 this constant. Otherwise return 0. */
341 static rtx
342 const_iteration_count (rtx count_reg, basic_block pre_header,
343 HOST_WIDEST_INT * count)
344 {
345 rtx insn;
346 rtx head, tail;
347
348 if (! pre_header)
349 return NULL_RTX;
350
351 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
352
353 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
354 if (INSN_P (insn) && single_set (insn) &&
355 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
356 {
357 rtx pat = single_set (insn);
358
359 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
360 {
361 *count = INTVAL (SET_SRC (pat));
362 return insn;
363 }
364
365 return NULL_RTX;
366 }
367
368 return NULL_RTX;
369 }
370
371 /* A very simple resource-based lower bound on the initiation interval.
372 ??? Improve the accuracy of this bound by considering the
373 utilization of various units. */
374 static int
375 res_MII (ddg_ptr g)
376 {
377 if (targetm.sched.sms_res_mii)
378 return targetm.sched.sms_res_mii (g);
379
380 return (g->num_nodes / issue_rate);
381 }
382
383
384 /* Points to the array that contains the sched data for each node. */
385 static node_sched_params_ptr node_sched_params;
386
387 /* Allocate sched_params for each node and initialize it. Assumes that
388 the aux field of each node contain the asap bound (computed earlier),
389 and copies it into the sched_params field. */
390 static void
391 set_node_sched_params (ddg_ptr g)
392 {
393 int i;
394
395 /* Allocate for each node in the DDG a place to hold the "sched_data". */
396 /* Initialize ASAP/ALAP/HIGHT to zero. */
397 node_sched_params = (node_sched_params_ptr)
398 xcalloc (g->num_nodes,
399 sizeof (struct node_sched_params));
400
401 /* Set the pointer of the general data of the node to point to the
402 appropriate sched_params structure. */
403 for (i = 0; i < g->num_nodes; i++)
404 {
405 /* Watch out for aliasing problems? */
406 node_sched_params[i].asap = g->nodes[i].aux.count;
407 g->nodes[i].aux.info = &node_sched_params[i];
408 }
409 }
410
411 static void
412 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
413 {
414 int i;
415
416 if (! file)
417 return;
418 for (i = 0; i < num_nodes; i++)
419 {
420 node_sched_params_ptr nsp = &node_sched_params[i];
421 rtx reg_move = nsp->first_reg_move;
422 int j;
423
424 fprintf (file, "Node = %d; INSN = %d\n", i,
425 (INSN_UID (g->nodes[i].insn)));
426 fprintf (file, " asap = %d:\n", nsp->asap);
427 fprintf (file, " time = %d:\n", nsp->time);
428 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
429 for (j = 0; j < nsp->nreg_moves; j++)
430 {
431 fprintf (file, " reg_move = ");
432 print_rtl_single (file, reg_move);
433 reg_move = PREV_INSN (reg_move);
434 }
435 }
436 }
437
438 /*
439 Breaking intra-loop register anti-dependences:
440 Each intra-loop register anti-dependence implies a cross-iteration true
441 dependence of distance 1. Therefore, we can remove such false dependencies
442 and figure out if the partial schedule broke them by checking if (for a
443 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
444 if so generate a register move. The number of such moves is equal to:
445 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
446 nreg_moves = ----------------------------------- + 1 - { dependence.
447 ii { 1 if not.
448 */
449 static struct undo_replace_buff_elem *
450 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
451 {
452 ddg_ptr g = ps->g;
453 int ii = ps->ii;
454 int i;
455 struct undo_replace_buff_elem *reg_move_replaces = NULL;
456
457 for (i = 0; i < g->num_nodes; i++)
458 {
459 ddg_node_ptr u = &g->nodes[i];
460 ddg_edge_ptr e;
461 int nreg_moves = 0, i_reg_move;
462 sbitmap *uses_of_defs;
463 rtx last_reg_move;
464 rtx prev_reg, old_reg;
465
466 /* Compute the number of reg_moves needed for u, by looking at life
467 ranges started at u (excluding self-loops). */
468 for (e = u->out; e; e = e->next_out)
469 if (e->type == TRUE_DEP && e->dest != e->src)
470 {
471 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
472
473 if (e->distance == 1)
474 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
475
476 /* If dest precedes src in the schedule of the kernel, then dest
477 will read before src writes and we can save one reg_copy. */
478 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
479 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
480 nreg_moves4e--;
481
482 nreg_moves = MAX (nreg_moves, nreg_moves4e);
483 }
484
485 if (nreg_moves == 0)
486 continue;
487
488 /* Every use of the register defined by node may require a different
489 copy of this register, depending on the time the use is scheduled.
490 Set a bitmap vector, telling which nodes use each copy of this
491 register. */
492 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
493 sbitmap_vector_zero (uses_of_defs, nreg_moves);
494 for (e = u->out; e; e = e->next_out)
495 if (e->type == TRUE_DEP && e->dest != e->src)
496 {
497 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
498
499 if (e->distance == 1)
500 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
501
502 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
503 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
504 dest_copy--;
505
506 if (dest_copy)
507 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
508 }
509
510 /* Now generate the reg_moves, attaching relevant uses to them. */
511 SCHED_NREG_MOVES (u) = nreg_moves;
512 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
513 last_reg_move = u->insn;
514
515 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
516 {
517 unsigned int i_use = 0;
518 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
519 rtx reg_move = gen_move_insn (new_reg, prev_reg);
520 sbitmap_iterator sbi;
521
522 add_insn_before (reg_move, last_reg_move, NULL);
523 last_reg_move = reg_move;
524
525 if (!SCHED_FIRST_REG_MOVE (u))
526 SCHED_FIRST_REG_MOVE (u) = reg_move;
527
528 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
529 {
530 struct undo_replace_buff_elem *rep;
531
532 rep = (struct undo_replace_buff_elem *)
533 xcalloc (1, sizeof (struct undo_replace_buff_elem));
534 rep->insn = g->nodes[i_use].insn;
535 rep->orig_reg = old_reg;
536 rep->new_reg = new_reg;
537
538 if (! reg_move_replaces)
539 reg_move_replaces = rep;
540 else
541 {
542 rep->next = reg_move_replaces;
543 reg_move_replaces = rep;
544 }
545
546 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
547 if (rescan)
548 df_insn_rescan (g->nodes[i_use].insn);
549 }
550
551 prev_reg = new_reg;
552 }
553 sbitmap_vector_free (uses_of_defs);
554 }
555 return reg_move_replaces;
556 }
557
558 /* Free memory allocated for the undo buffer. */
559 static void
560 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
561 {
562
563 while (reg_move_replaces)
564 {
565 struct undo_replace_buff_elem *rep = reg_move_replaces;
566
567 reg_move_replaces = reg_move_replaces->next;
568 free (rep);
569 }
570 }
571
572 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
573 of SCHED_ROW and SCHED_STAGE. */
574 static void
575 normalize_sched_times (partial_schedule_ptr ps)
576 {
577 int row;
578 int amount = PS_MIN_CYCLE (ps);
579 int ii = ps->ii;
580 ps_insn_ptr crr_insn;
581
582 for (row = 0; row < ii; row++)
583 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
584 {
585 ddg_node_ptr u = crr_insn->node;
586 int normalized_time = SCHED_TIME (u) - amount;
587
588 if (dump_file)
589 fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
590 min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
591 (u), ps->min_cycle);
592 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
593 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
594 SCHED_TIME (u) = normalized_time;
595 SCHED_ROW (u) = normalized_time % ii;
596 SCHED_STAGE (u) = normalized_time / ii;
597 }
598 }
599
600 /* Set SCHED_COLUMN of each node according to its position in PS. */
601 static void
602 set_columns_for_ps (partial_schedule_ptr ps)
603 {
604 int row;
605
606 for (row = 0; row < ps->ii; row++)
607 {
608 ps_insn_ptr cur_insn = ps->rows[row];
609 int column = 0;
610
611 for (; cur_insn; cur_insn = cur_insn->next_in_row)
612 SCHED_COLUMN (cur_insn->node) = column++;
613 }
614 }
615
616 /* Permute the insns according to their order in PS, from row 0 to
617 row ii-1, and position them right before LAST. This schedules
618 the insns of the loop kernel. */
619 static void
620 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
621 {
622 int ii = ps->ii;
623 int row;
624 ps_insn_ptr ps_ij;
625
626 for (row = 0; row < ii ; row++)
627 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
628 if (PREV_INSN (last) != ps_ij->node->insn)
629 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
630 PREV_INSN (last));
631 }
632
633 static void
634 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
635 int to_stage, int for_prolog, rtx count_reg)
636 {
637 int row;
638 ps_insn_ptr ps_ij;
639
640 for (row = 0; row < ps->ii; row++)
641 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
642 {
643 ddg_node_ptr u_node = ps_ij->node;
644 int j, i_reg_moves;
645 rtx reg_move = NULL_RTX;
646
647 /* Do not duplicate any insn which refers to count_reg as it
648 belongs to the control part.
649 TODO: This should be done by analyzing the control part of
650 the loop. */
651 if (reg_mentioned_p (count_reg, u_node->insn))
652 continue;
653
654 if (for_prolog)
655 {
656 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
657 number of reg_moves starting with the second occurrence of
658 u_node, which is generated if its SCHED_STAGE <= to_stage. */
659 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
660 i_reg_moves = MAX (i_reg_moves, 0);
661 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
662
663 /* The reg_moves start from the *first* reg_move backwards. */
664 if (i_reg_moves)
665 {
666 reg_move = SCHED_FIRST_REG_MOVE (u_node);
667 for (j = 1; j < i_reg_moves; j++)
668 reg_move = PREV_INSN (reg_move);
669 }
670 }
671 else /* It's for the epilog. */
672 {
673 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
674 starting to decrease one stage after u_node no longer occurs;
675 that is, generate all reg_moves until
676 SCHED_STAGE (u_node) == from_stage - 1. */
677 i_reg_moves = SCHED_NREG_MOVES (u_node)
678 - (from_stage - SCHED_STAGE (u_node) - 1);
679 i_reg_moves = MAX (i_reg_moves, 0);
680 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
681
682 /* The reg_moves start from the *last* reg_move forwards. */
683 if (i_reg_moves)
684 {
685 reg_move = SCHED_FIRST_REG_MOVE (u_node);
686 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
687 reg_move = PREV_INSN (reg_move);
688 }
689 }
690
691 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
692 emit_insn (copy_rtx (PATTERN (reg_move)));
693 if (SCHED_STAGE (u_node) >= from_stage
694 && SCHED_STAGE (u_node) <= to_stage)
695 duplicate_insn_chain (u_node->first_note, u_node->insn);
696 }
697 }
698
699
700 /* Generate the instructions (including reg_moves) for prolog & epilog. */
701 static void
702 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
703 rtx count_reg, rtx count_init)
704 {
705 int i;
706 int last_stage = PS_STAGE_COUNT (ps) - 1;
707 edge e;
708
709 /* Generate the prolog, inserting its insns on the loop-entry edge. */
710 start_sequence ();
711
712 if (!count_init)
713 {
714 /* Generate instructions at the beginning of the prolog to
715 adjust the loop count by STAGE_COUNT. If loop count is constant
716 (count_init), this constant is adjusted by STAGE_COUNT in
717 generate_prolog_epilog function. */
718 rtx sub_reg = NULL_RTX;
719
720 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
721 count_reg, GEN_INT (last_stage),
722 count_reg, 1, OPTAB_DIRECT);
723 gcc_assert (REG_P (sub_reg));
724 if (REGNO (sub_reg) != REGNO (count_reg))
725 emit_move_insn (count_reg, sub_reg);
726 }
727
728 for (i = 0; i < last_stage; i++)
729 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
730
731 /* Put the prolog on the entry edge. */
732 e = loop_preheader_edge (loop);
733 split_edge_and_insert (e, get_insns ());
734
735 end_sequence ();
736
737 /* Generate the epilog, inserting its insns on the loop-exit edge. */
738 start_sequence ();
739
740 for (i = 0; i < last_stage; i++)
741 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
742
743 /* Put the epilogue on the exit edge. */
744 gcc_assert (single_exit (loop));
745 e = single_exit (loop);
746 split_edge_and_insert (e, get_insns ());
747 end_sequence ();
748 }
749
750 /* Return true if all the BBs of the loop are empty except the
751 loop header. */
752 static bool
753 loop_single_full_bb_p (struct loop *loop)
754 {
755 unsigned i;
756 basic_block *bbs = get_loop_body (loop);
757
758 for (i = 0; i < loop->num_nodes ; i++)
759 {
760 rtx head, tail;
761 bool empty_bb = true;
762
763 if (bbs[i] == loop->header)
764 continue;
765
766 /* Make sure that basic blocks other than the header
767 have only notes labels or jumps. */
768 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
769 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
770 {
771 if (NOTE_P (head) || LABEL_P (head)
772 || (INSN_P (head) && JUMP_P (head)))
773 continue;
774 empty_bb = false;
775 break;
776 }
777
778 if (! empty_bb)
779 {
780 free (bbs);
781 return false;
782 }
783 }
784 free (bbs);
785 return true;
786 }
787
788 /* A simple loop from SMS point of view; it is a loop that is composed of
789 either a single basic block or two BBs - a header and a latch. */
790 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
791 && (EDGE_COUNT (loop->latch->preds) == 1) \
792 && (EDGE_COUNT (loop->latch->succs) == 1))
793
794 /* Return true if the loop is in its canonical form and false if not.
795 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
796 static bool
797 loop_canon_p (struct loop *loop)
798 {
799
800 if (loop->inner || !loop_outer (loop))
801 return false;
802
803 if (!single_exit (loop))
804 {
805 if (dump_file)
806 {
807 rtx insn = BB_END (loop->header);
808
809 fprintf (dump_file, "SMS loop many exits ");
810 fprintf (dump_file, " %s %d (file, line)\n",
811 insn_file (insn), insn_line (insn));
812 }
813 return false;
814 }
815
816 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
817 {
818 if (dump_file)
819 {
820 rtx insn = BB_END (loop->header);
821
822 fprintf (dump_file, "SMS loop many BBs. ");
823 fprintf (dump_file, " %s %d (file, line)\n",
824 insn_file (insn), insn_line (insn));
825 }
826 return false;
827 }
828
829 return true;
830 }
831
832 /* If there are more than one entry for the loop,
833 make it one by splitting the first entry edge and
834 redirecting the others to the new BB. */
835 static void
836 canon_loop (struct loop *loop)
837 {
838 edge e;
839 edge_iterator i;
840
841 /* Avoid annoying special cases of edges going to exit
842 block. */
843 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
844 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
845 split_edge (e);
846
847 if (loop->latch == loop->header
848 || EDGE_COUNT (loop->latch->succs) > 1)
849 {
850 FOR_EACH_EDGE (e, i, loop->header->preds)
851 if (e->src == loop->latch)
852 break;
853 split_edge (e);
854 }
855 }
856
857 /* Probability in % that the sms-ed loop rolls enough so that optimized
858 version may be entered. Just a guess. */
859 #define PROB_SMS_ENOUGH_ITERATIONS 80
860
861 /* Used to calculate the upper bound of ii. */
862 #define MAXII_FACTOR 2
863
864 /* Main entry point, perform SMS scheduling on the loops of the function
865 that consist of single basic blocks. */
866 static void
867 sms_schedule (void)
868 {
869 rtx insn;
870 ddg_ptr *g_arr, g;
871 int * node_order;
872 int maxii;
873 loop_iterator li;
874 partial_schedule_ptr ps;
875 basic_block bb = NULL;
876 struct loop *loop;
877 basic_block condition_bb = NULL;
878 edge latch_edge;
879 gcov_type trip_count = 0;
880
881 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
882 | LOOPS_HAVE_RECORDED_EXITS);
883 if (number_of_loops () <= 1)
884 {
885 loop_optimizer_finalize ();
886 return; /* There are no loops to schedule. */
887 }
888
889 /* Initialize issue_rate. */
890 if (targetm.sched.issue_rate)
891 {
892 int temp = reload_completed;
893
894 reload_completed = 1;
895 issue_rate = targetm.sched.issue_rate ();
896 reload_completed = temp;
897 }
898 else
899 issue_rate = 1;
900
901 /* Initialize the scheduler. */
902 current_sched_info = &sms_sched_info;
903
904 /* Init Data Flow analysis, to be used in interloop dep calculation. */
905 df_set_flags (DF_LR_RUN_DCE);
906 df_rd_add_problem ();
907 df_note_add_problem ();
908 df_chain_add_problem (DF_DU_CHAIN);
909 df_analyze ();
910 regstat_compute_calls_crossed ();
911 sched_init ();
912
913 /* Allocate memory to hold the DDG array one entry for each loop.
914 We use loop->num as index into this array. */
915 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
916
917 /* Build DDGs for all the relevant loops and hold them in G_ARR
918 indexed by the loop index. */
919 FOR_EACH_LOOP (li, loop, 0)
920 {
921 rtx head, tail;
922 rtx count_reg;
923
924 /* For debugging. */
925 if (dbg_cnt (sms_sched_loop) == false)
926 {
927 if (dump_file)
928 fprintf (dump_file, "SMS reached max limit... \n");
929
930 break;
931 }
932
933 if (! loop_canon_p (loop))
934 continue;
935
936 if (! loop_single_full_bb_p (loop))
937 continue;
938
939 bb = loop->header;
940
941 get_ebb_head_tail (bb, bb, &head, &tail);
942 latch_edge = loop_latch_edge (loop);
943 gcc_assert (single_exit (loop));
944 if (single_exit (loop)->count)
945 trip_count = latch_edge->count / single_exit (loop)->count;
946
947 /* Perfrom SMS only on loops that their average count is above threshold. */
948
949 if ( latch_edge->count
950 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
951 {
952 if (dump_file)
953 {
954 fprintf (dump_file, " %s %d (file, line)\n",
955 insn_file (tail), insn_line (tail));
956 fprintf (dump_file, "SMS single-bb-loop\n");
957 if (profile_info && flag_branch_probabilities)
958 {
959 fprintf (dump_file, "SMS loop-count ");
960 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
961 (HOST_WIDEST_INT) bb->count);
962 fprintf (dump_file, "\n");
963 fprintf (dump_file, "SMS trip-count ");
964 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
965 (HOST_WIDEST_INT) trip_count);
966 fprintf (dump_file, "\n");
967 fprintf (dump_file, "SMS profile-sum-max ");
968 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
969 (HOST_WIDEST_INT) profile_info->sum_max);
970 fprintf (dump_file, "\n");
971 }
972 }
973 continue;
974 }
975
976 /* Make sure this is a doloop. */
977 if ( !(count_reg = doloop_register_get (head, tail)))
978 continue;
979
980 /* Don't handle BBs with calls or barriers, or !single_set insns,
981 or auto-increment insns (to avoid creating invalid reg-moves
982 for the auto-increment insns).
983 ??? Should handle auto-increment insns.
984 ??? Should handle insns defining subregs. */
985 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
986 {
987 rtx set;
988
989 if (CALL_P (insn)
990 || BARRIER_P (insn)
991 || (INSN_P (insn) && !JUMP_P (insn)
992 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
993 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
994 || (INSN_P (insn) && (set = single_set (insn))
995 && GET_CODE (SET_DEST (set)) == SUBREG))
996 break;
997 }
998
999 if (insn != NEXT_INSN (tail))
1000 {
1001 if (dump_file)
1002 {
1003 if (CALL_P (insn))
1004 fprintf (dump_file, "SMS loop-with-call\n");
1005 else if (BARRIER_P (insn))
1006 fprintf (dump_file, "SMS loop-with-barrier\n");
1007 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1008 fprintf (dump_file, "SMS reg inc\n");
1009 else if ((INSN_P (insn) && !JUMP_P (insn)
1010 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1011 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1012 else
1013 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1014 print_rtl_single (dump_file, insn);
1015 }
1016
1017 continue;
1018 }
1019
1020 if (! (g = create_ddg (bb, 0)))
1021 {
1022 if (dump_file)
1023 fprintf (dump_file, "SMS doloop\n");
1024 continue;
1025 }
1026
1027 g_arr[loop->num] = g;
1028 }
1029
1030 /* We don't want to perform SMS on new loops - created by versioning. */
1031 FOR_EACH_LOOP (li, loop, 0)
1032 {
1033 rtx head, tail;
1034 rtx count_reg, count_init;
1035 int mii, rec_mii;
1036 unsigned stage_count = 0;
1037 HOST_WIDEST_INT loop_count = 0;
1038
1039 if (! (g = g_arr[loop->num]))
1040 continue;
1041
1042 if (dump_file)
1043 print_ddg (dump_file, g);
1044
1045 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1046
1047 latch_edge = loop_latch_edge (loop);
1048 gcc_assert (single_exit (loop));
1049 if (single_exit (loop)->count)
1050 trip_count = latch_edge->count / single_exit (loop)->count;
1051
1052 if (dump_file)
1053 {
1054 fprintf (dump_file, " %s %d (file, line)\n",
1055 insn_file (tail), insn_line (tail));
1056 fprintf (dump_file, "SMS single-bb-loop\n");
1057 if (profile_info && flag_branch_probabilities)
1058 {
1059 fprintf (dump_file, "SMS loop-count ");
1060 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1061 (HOST_WIDEST_INT) bb->count);
1062 fprintf (dump_file, "\n");
1063 fprintf (dump_file, "SMS profile-sum-max ");
1064 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1065 (HOST_WIDEST_INT) profile_info->sum_max);
1066 fprintf (dump_file, "\n");
1067 }
1068 fprintf (dump_file, "SMS doloop\n");
1069 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1070 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1071 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1072 }
1073
1074
1075 /* In case of th loop have doloop register it gets special
1076 handling. */
1077 count_init = NULL_RTX;
1078 if ((count_reg = doloop_register_get (head, tail)))
1079 {
1080 basic_block pre_header;
1081
1082 pre_header = loop_preheader_edge (loop)->src;
1083 count_init = const_iteration_count (count_reg, pre_header,
1084 &loop_count);
1085 }
1086 gcc_assert (count_reg);
1087
1088 if (dump_file && count_init)
1089 {
1090 fprintf (dump_file, "SMS const-doloop ");
1091 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1092 loop_count);
1093 fprintf (dump_file, "\n");
1094 }
1095
1096 node_order = XNEWVEC (int, g->num_nodes);
1097
1098 mii = 1; /* Need to pass some estimate of mii. */
1099 rec_mii = sms_order_nodes (g, mii, node_order);
1100 mii = MAX (res_MII (g), rec_mii);
1101 maxii = MAXII_FACTOR * mii;
1102
1103 if (dump_file)
1104 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1105 rec_mii, mii, maxii);
1106
1107 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1108 ASAP. */
1109 set_node_sched_params (g);
1110
1111 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1112
1113 if (ps)
1114 stage_count = PS_STAGE_COUNT (ps);
1115
1116 /* Stage count of 1 means that there is no interleaving between
1117 iterations, let the scheduling passes do the job. */
1118 if (stage_count < 1
1119 || (count_init && (loop_count <= stage_count))
1120 || (flag_branch_probabilities && (trip_count <= stage_count)))
1121 {
1122 if (dump_file)
1123 {
1124 fprintf (dump_file, "SMS failed... \n");
1125 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1126 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1127 fprintf (dump_file, ", trip-count=");
1128 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1129 fprintf (dump_file, ")\n");
1130 }
1131 continue;
1132 }
1133 else
1134 {
1135 struct undo_replace_buff_elem *reg_move_replaces;
1136
1137 if (dump_file)
1138 {
1139 fprintf (dump_file,
1140 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1141 stage_count);
1142 print_partial_schedule (ps, dump_file);
1143 fprintf (dump_file,
1144 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1145 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1146 }
1147
1148 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1149 the closing_branch was scheduled and should appear in the last (ii-1)
1150 row. Otherwise, we are free to schedule the branch, and we let nodes
1151 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1152 row; this should reduce stage_count to minimum.
1153 TODO: Revisit the issue of scheduling the insns of the
1154 control part relative to the branch when the control part
1155 has more than one insn. */
1156 normalize_sched_times (ps);
1157 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1158 set_columns_for_ps (ps);
1159
1160 canon_loop (loop);
1161
1162 /* case the BCT count is not known , Do loop-versioning */
1163 if (count_reg && ! count_init)
1164 {
1165 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1166 GEN_INT(stage_count));
1167 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1168 * REG_BR_PROB_BASE) / 100;
1169
1170 loop_version (loop, comp_rtx, &condition_bb,
1171 prob, prob, REG_BR_PROB_BASE - prob,
1172 true);
1173 }
1174
1175 /* Set new iteration count of loop kernel. */
1176 if (count_reg && count_init)
1177 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1178 - stage_count + 1);
1179
1180 /* Now apply the scheduled kernel to the RTL of the loop. */
1181 permute_partial_schedule (ps, g->closing_branch->first_note);
1182
1183 /* Mark this loop as software pipelined so the later
1184 scheduling passes doesn't touch it. */
1185 if (! flag_resched_modulo_sched)
1186 g->bb->flags |= BB_DISABLE_SCHEDULE;
1187 /* The life-info is not valid any more. */
1188 df_set_bb_dirty (g->bb);
1189
1190 reg_move_replaces = generate_reg_moves (ps, true);
1191 if (dump_file)
1192 print_node_sched_params (dump_file, g->num_nodes, g);
1193 /* Generate prolog and epilog. */
1194 generate_prolog_epilog (ps, loop, count_reg, count_init);
1195
1196 free_undo_replace_buff (reg_move_replaces);
1197 }
1198
1199 free_partial_schedule (ps);
1200 free (node_sched_params);
1201 free (node_order);
1202 free_ddg (g);
1203 }
1204
1205 regstat_free_calls_crossed ();
1206 free (g_arr);
1207
1208 /* Release scheduler data, needed until now because of DFA. */
1209 sched_finish ();
1210 loop_optimizer_finalize ();
1211 }
1212
1213 /* The SMS scheduling algorithm itself
1214 -----------------------------------
1215 Input: 'O' an ordered list of insns of a loop.
1216 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1217
1218 'Q' is the empty Set
1219 'PS' is the partial schedule; it holds the currently scheduled nodes with
1220 their cycle/slot.
1221 'PSP' previously scheduled predecessors.
1222 'PSS' previously scheduled successors.
1223 't(u)' the cycle where u is scheduled.
1224 'l(u)' is the latency of u.
1225 'd(v,u)' is the dependence distance from v to u.
1226 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1227 the node ordering phase.
1228 'check_hardware_resources_conflicts(u, PS, c)'
1229 run a trace around cycle/slot through DFA model
1230 to check resource conflicts involving instruction u
1231 at cycle c given the partial schedule PS.
1232 'add_to_partial_schedule_at_time(u, PS, c)'
1233 Add the node/instruction u to the partial schedule
1234 PS at time c.
1235 'calculate_register_pressure(PS)'
1236 Given a schedule of instructions, calculate the register
1237 pressure it implies. One implementation could be the
1238 maximum number of overlapping live ranges.
1239 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1240 registers available in the hardware.
1241
1242 1. II = MII.
1243 2. PS = empty list
1244 3. for each node u in O in pre-computed order
1245 4. if (PSP(u) != Q && PSS(u) == Q) then
1246 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1247 6. start = Early_start; end = Early_start + II - 1; step = 1
1248 11. else if (PSP(u) == Q && PSS(u) != Q) then
1249 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1250 13. start = Late_start; end = Late_start - II + 1; step = -1
1251 14. else if (PSP(u) != Q && PSS(u) != Q) then
1252 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1253 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1254 17. start = Early_start;
1255 18. end = min(Early_start + II - 1 , Late_start);
1256 19. step = 1
1257 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1258 21. start = ASAP(u); end = start + II - 1; step = 1
1259 22. endif
1260
1261 23. success = false
1262 24. for (c = start ; c != end ; c += step)
1263 25. if check_hardware_resources_conflicts(u, PS, c) then
1264 26. add_to_partial_schedule_at_time(u, PS, c)
1265 27. success = true
1266 28. break
1267 29. endif
1268 30. endfor
1269 31. if (success == false) then
1270 32. II = II + 1
1271 33. if (II > maxII) then
1272 34. finish - failed to schedule
1273 35. endif
1274 36. goto 2.
1275 37. endif
1276 38. endfor
1277 39. if (calculate_register_pressure(PS) > maxRP) then
1278 40. goto 32.
1279 41. endif
1280 42. compute epilogue & prologue
1281 43. finish - succeeded to schedule
1282 */
1283
1284 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1285 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1286 set to 0 to save compile time. */
1287 #define DFA_HISTORY SMS_DFA_HISTORY
1288
1289 /* A threshold for the number of repeated unsuccessful attempts to insert
1290 an empty row, before we flush the partial schedule and start over. */
1291 #define MAX_SPLIT_NUM 10
1292 /* Given the partial schedule PS, this function calculates and returns the
1293 cycles in which we can schedule the node with the given index I.
1294 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1295 noticed that there are several cases in which we fail to SMS the loop
1296 because the sched window of a node is empty due to tight data-deps. In
1297 such cases we want to unschedule some of the predecessors/successors
1298 until we get non-empty scheduling window. It returns -1 if the
1299 scheduling window is empty and zero otherwise. */
1300
1301 static int
1302 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1303 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1304 {
1305 int start, step, end;
1306 ddg_edge_ptr e;
1307 int u = nodes_order [i];
1308 ddg_node_ptr u_node = &ps->g->nodes[u];
1309 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1310 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1311 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1312 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1313 int psp_not_empty;
1314 int pss_not_empty;
1315
1316 /* 1. compute sched window for u (start, end, step). */
1317 sbitmap_zero (psp);
1318 sbitmap_zero (pss);
1319 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1320 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1321
1322 if (psp_not_empty && !pss_not_empty)
1323 {
1324 int early_start = INT_MIN;
1325
1326 end = INT_MAX;
1327 for (e = u_node->in; e != 0; e = e->next_in)
1328 {
1329 ddg_node_ptr v_node = e->src;
1330
1331 if (dump_file)
1332 {
1333 fprintf (dump_file, "\nProcessing edge: ");
1334 print_ddg_edge (dump_file, e);
1335 fprintf (dump_file,
1336 "\nScheduling %d (%d) in psp_not_empty,"
1337 " checking node %d (%d): ", u_node->cuid,
1338 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1339 (v_node->insn));
1340 }
1341
1342 if (TEST_BIT (sched_nodes, v_node->cuid))
1343 {
1344 int node_st = SCHED_TIME (v_node)
1345 + e->latency - (e->distance * ii);
1346
1347 early_start = MAX (early_start, node_st);
1348
1349 if (e->data_type == MEM_DEP)
1350 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1351 }
1352 }
1353 start = early_start;
1354 end = MIN (end, early_start + ii);
1355 step = 1;
1356
1357 if (dump_file)
1358 fprintf (dump_file,
1359 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1360 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1361 }
1362
1363 else if (!psp_not_empty && pss_not_empty)
1364 {
1365 int late_start = INT_MAX;
1366
1367 end = INT_MIN;
1368 for (e = u_node->out; e != 0; e = e->next_out)
1369 {
1370 ddg_node_ptr v_node = e->dest;
1371
1372 if (dump_file)
1373 {
1374 fprintf (dump_file, "\nProcessing edge:");
1375 print_ddg_edge (dump_file, e);
1376 fprintf (dump_file,
1377 "\nScheduling %d (%d) in pss_not_empty,"
1378 " checking node %d (%d): ", u_node->cuid,
1379 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1380 (v_node->insn));
1381 }
1382
1383 if (TEST_BIT (sched_nodes, v_node->cuid))
1384 {
1385 late_start = MIN (late_start,
1386 SCHED_TIME (v_node) - e->latency
1387 + (e->distance * ii));
1388 if (dump_file)
1389 fprintf (dump_file, "late_start = %d;", late_start);
1390
1391 if (e->data_type == MEM_DEP)
1392 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1393 if (dump_file)
1394 fprintf (dump_file, "end = %d\n", end);
1395
1396 }
1397 else if (dump_file)
1398 fprintf (dump_file, "the node is not scheduled\n");
1399
1400 }
1401 start = late_start;
1402 end = MAX (end, late_start - ii);
1403 step = -1;
1404
1405 if (dump_file)
1406 fprintf (dump_file,
1407 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1408 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1409
1410 }
1411
1412 else if (psp_not_empty && pss_not_empty)
1413 {
1414 int early_start = INT_MIN;
1415 int late_start = INT_MAX;
1416
1417 start = INT_MIN;
1418 end = INT_MAX;
1419 for (e = u_node->in; e != 0; e = e->next_in)
1420 {
1421 ddg_node_ptr v_node = e->src;
1422
1423 if (dump_file)
1424 {
1425 fprintf (dump_file, "\nProcessing edge:");
1426 print_ddg_edge (dump_file, e);
1427 fprintf (dump_file,
1428 "\nScheduling %d (%d) in psp_pss_not_empty,"
1429 " checking p %d (%d): ", u_node->cuid, INSN_UID
1430 (u_node->insn), v_node->cuid, INSN_UID
1431 (v_node->insn));
1432 }
1433
1434 if (TEST_BIT (sched_nodes, v_node->cuid))
1435 {
1436 early_start = MAX (early_start,
1437 SCHED_TIME (v_node) + e->latency
1438 - (e->distance * ii));
1439 if (e->data_type == MEM_DEP)
1440 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1441 }
1442 }
1443 for (e = u_node->out; e != 0; e = e->next_out)
1444 {
1445 ddg_node_ptr v_node = e->dest;
1446
1447 if (dump_file)
1448 {
1449 fprintf (dump_file, "\nProcessing edge:");
1450 print_ddg_edge (dump_file, e);
1451 fprintf (dump_file,
1452 "\nScheduling %d (%d) in psp_pss_not_empty,"
1453 " checking s %d (%d): ", u_node->cuid, INSN_UID
1454 (u_node->insn), v_node->cuid, INSN_UID
1455 (v_node->insn));
1456 }
1457
1458 if (TEST_BIT (sched_nodes, v_node->cuid))
1459 {
1460 late_start = MIN (late_start,
1461 SCHED_TIME (v_node) - e->latency
1462 + (e->distance * ii));
1463 if (e->data_type == MEM_DEP)
1464 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1465 }
1466 }
1467 start = MAX (start, early_start);
1468 end = MIN (end, MIN (early_start + ii, late_start + 1));
1469 step = 1;
1470 }
1471 else /* psp is empty && pss is empty. */
1472 {
1473 start = SCHED_ASAP (u_node);
1474 end = start + ii;
1475 step = 1;
1476 }
1477
1478 *start_p = start;
1479 *step_p = step;
1480 *end_p = end;
1481 sbitmap_free (psp);
1482 sbitmap_free (pss);
1483
1484 if ((start >= end && step == 1) || (start <= end && step == -1))
1485 {
1486 if (dump_file)
1487 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1488 start, end, step);
1489 return -1;
1490 }
1491
1492 return 0;
1493 }
1494
1495 /* This function implements the scheduling algorithm for SMS according to the
1496 above algorithm. */
1497 static partial_schedule_ptr
1498 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1499 {
1500 int ii = mii;
1501 int i, c, success, num_splits = 0;
1502 int flush_and_start_over = true;
1503 int num_nodes = g->num_nodes;
1504 ddg_edge_ptr e;
1505 ps_insn_ptr psi;
1506 int start, end, step; /* Place together into one struct? */
1507 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1508 sbitmap must_precede = sbitmap_alloc (num_nodes);
1509 sbitmap must_follow = sbitmap_alloc (num_nodes);
1510 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1511
1512 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1513
1514 sbitmap_ones (tobe_scheduled);
1515 sbitmap_zero (sched_nodes);
1516
1517 while (flush_and_start_over && (ii < maxii))
1518 {
1519
1520 if (dump_file)
1521 fprintf (dump_file, "Starting with ii=%d\n", ii);
1522 flush_and_start_over = false;
1523 sbitmap_zero (sched_nodes);
1524
1525 for (i = 0; i < num_nodes; i++)
1526 {
1527 int u = nodes_order[i];
1528 ddg_node_ptr u_node = &ps->g->nodes[u];
1529 rtx insn = u_node->insn;
1530
1531 if (!INSN_P (insn))
1532 {
1533 RESET_BIT (tobe_scheduled, u);
1534 continue;
1535 }
1536
1537 if (JUMP_P (insn)) /* Closing branch handled later. */
1538 {
1539 RESET_BIT (tobe_scheduled, u);
1540 continue;
1541 }
1542
1543 if (TEST_BIT (sched_nodes, u))
1544 continue;
1545
1546 /* Try to get non-empty scheduling window. */
1547 success = 0;
1548 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1549 &step, &end) == 0)
1550 {
1551 if (dump_file)
1552 fprintf (dump_file, "\nTrying to schedule node %d \
1553 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1554 (g->nodes[u].insn)), start, end, step);
1555 /* Use must_follow & must_precede bitmaps to determine order
1556 of nodes within the cycle. */
1557
1558 /* use must_follow & must_precede bitmaps to determine order
1559 of nodes within the cycle. */
1560 sbitmap_zero (must_precede);
1561 sbitmap_zero (must_follow);
1562 /* TODO: We can add an insn to the must_precede or must_follow
1563 bitmaps only if it has tight dependence to U and they
1564 both scheduled in the same row. The current check is less
1565 conservative and content with the fact that both U and the
1566 insn are scheduled in the same row. */
1567 for (e = u_node->in; e != 0; e = e->next_in)
1568 if (TEST_BIT (sched_nodes, e->src->cuid)
1569 && (SMODULO (SCHED_TIME (e->src), ii) ==
1570 SMODULO (start, ii)))
1571 SET_BIT (must_precede, e->src->cuid);
1572
1573 for (e = u_node->out; e != 0; e = e->next_out)
1574 if (TEST_BIT (sched_nodes, e->dest->cuid)
1575 && (SMODULO (SCHED_TIME (e->dest), ii) ==
1576 SMODULO ((end - step), ii)))
1577 SET_BIT (must_follow, e->dest->cuid);
1578
1579 gcc_assert ((step > 0 && start < end)
1580 || (step < 0 && start > end));
1581
1582 for (c = start; c != end; c += step)
1583 {
1584 verify_partial_schedule (ps, sched_nodes);
1585
1586 psi = ps_add_node_check_conflicts (ps, u_node, c,
1587 must_precede,
1588 must_follow);
1589
1590 if (psi)
1591 {
1592 SCHED_TIME (u_node) = c;
1593 SET_BIT (sched_nodes, u);
1594 success = 1;
1595 num_splits = 0;
1596 if (dump_file)
1597 fprintf (dump_file, "Scheduled w/o split in %d\n", c);
1598
1599 break;
1600 }
1601 }
1602 verify_partial_schedule (ps, sched_nodes);
1603 }
1604 if (!success)
1605 {
1606 int split_row;
1607
1608 if (ii++ == maxii)
1609 break;
1610
1611 if (num_splits >= MAX_SPLIT_NUM)
1612 {
1613 num_splits = 0;
1614 flush_and_start_over = true;
1615 verify_partial_schedule (ps, sched_nodes);
1616 reset_partial_schedule (ps, ii);
1617 verify_partial_schedule (ps, sched_nodes);
1618 break;
1619 }
1620
1621 num_splits++;
1622 if (step == 1)
1623 split_row = compute_split_row (sched_nodes, start, end,
1624 ps->ii, u_node);
1625 else
1626 split_row = compute_split_row (sched_nodes, end, start,
1627 ps->ii, u_node);
1628
1629 ps_insert_empty_row (ps, split_row, sched_nodes);
1630 i--; /* Go back and retry node i. */
1631
1632 if (dump_file)
1633 fprintf (dump_file, "num_splits=%d\n", num_splits);
1634 }
1635
1636 /* ??? If (success), check register pressure estimates. */
1637 } /* Continue with next node. */
1638 } /* While flush_and_start_over. */
1639 if (ii >= maxii)
1640 {
1641 free_partial_schedule (ps);
1642 ps = NULL;
1643 }
1644 else
1645 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1646
1647 sbitmap_free (sched_nodes);
1648 sbitmap_free (must_precede);
1649 sbitmap_free (must_follow);
1650 sbitmap_free (tobe_scheduled);
1651
1652 return ps;
1653 }
1654
1655 /* This function inserts a new empty row into PS at the position
1656 according to SPLITROW, keeping all already scheduled instructions
1657 intact and updating their SCHED_TIME and cycle accordingly. */
1658 static void
1659 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1660 sbitmap sched_nodes)
1661 {
1662 ps_insn_ptr crr_insn;
1663 ps_insn_ptr *rows_new;
1664 int ii = ps->ii;
1665 int new_ii = ii + 1;
1666 int row;
1667
1668 verify_partial_schedule (ps, sched_nodes);
1669
1670 /* We normalize sched_time and rotate ps to have only non-negative sched
1671 times, for simplicity of updating cycles after inserting new row. */
1672 split_row -= ps->min_cycle;
1673 split_row = SMODULO (split_row, ii);
1674 if (dump_file)
1675 fprintf (dump_file, "split_row=%d\n", split_row);
1676
1677 normalize_sched_times (ps);
1678 rotate_partial_schedule (ps, ps->min_cycle);
1679
1680 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1681 for (row = 0; row < split_row; row++)
1682 {
1683 rows_new[row] = ps->rows[row];
1684 ps->rows[row] = NULL;
1685 for (crr_insn = rows_new[row];
1686 crr_insn; crr_insn = crr_insn->next_in_row)
1687 {
1688 ddg_node_ptr u = crr_insn->node;
1689 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1690
1691 SCHED_TIME (u) = new_time;
1692 crr_insn->cycle = new_time;
1693 SCHED_ROW (u) = new_time % new_ii;
1694 SCHED_STAGE (u) = new_time / new_ii;
1695 }
1696
1697 }
1698
1699 rows_new[split_row] = NULL;
1700
1701 for (row = split_row; row < ii; row++)
1702 {
1703 rows_new[row + 1] = ps->rows[row];
1704 ps->rows[row] = NULL;
1705 for (crr_insn = rows_new[row + 1];
1706 crr_insn; crr_insn = crr_insn->next_in_row)
1707 {
1708 ddg_node_ptr u = crr_insn->node;
1709 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1710
1711 SCHED_TIME (u) = new_time;
1712 crr_insn->cycle = new_time;
1713 SCHED_ROW (u) = new_time % new_ii;
1714 SCHED_STAGE (u) = new_time / new_ii;
1715 }
1716 }
1717
1718 /* Updating ps. */
1719 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1720 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1721 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1722 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1723 free (ps->rows);
1724 ps->rows = rows_new;
1725 ps->ii = new_ii;
1726 gcc_assert (ps->min_cycle >= 0);
1727
1728 verify_partial_schedule (ps, sched_nodes);
1729
1730 if (dump_file)
1731 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1732 ps->max_cycle);
1733 }
1734
1735 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1736 UP which are the boundaries of it's scheduling window; compute using
1737 SCHED_NODES and II a row in the partial schedule that can be splitted
1738 which will separate a critical predecessor from a critical successor
1739 thereby expanding the window, and return it. */
1740 static int
1741 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1742 ddg_node_ptr u_node)
1743 {
1744 ddg_edge_ptr e;
1745 int lower = INT_MIN, upper = INT_MAX;
1746 ddg_node_ptr crit_pred = NULL;
1747 ddg_node_ptr crit_succ = NULL;
1748 int crit_cycle;
1749
1750 for (e = u_node->in; e != 0; e = e->next_in)
1751 {
1752 ddg_node_ptr v_node = e->src;
1753
1754 if (TEST_BIT (sched_nodes, v_node->cuid)
1755 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
1756 if (SCHED_TIME (v_node) > lower)
1757 {
1758 crit_pred = v_node;
1759 lower = SCHED_TIME (v_node);
1760 }
1761 }
1762
1763 if (crit_pred != NULL)
1764 {
1765 crit_cycle = SCHED_TIME (crit_pred) + 1;
1766 return SMODULO (crit_cycle, ii);
1767 }
1768
1769 for (e = u_node->out; e != 0; e = e->next_out)
1770 {
1771 ddg_node_ptr v_node = e->dest;
1772 if (TEST_BIT (sched_nodes, v_node->cuid)
1773 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
1774 if (SCHED_TIME (v_node) < upper)
1775 {
1776 crit_succ = v_node;
1777 upper = SCHED_TIME (v_node);
1778 }
1779 }
1780
1781 if (crit_succ != NULL)
1782 {
1783 crit_cycle = SCHED_TIME (crit_succ);
1784 return SMODULO (crit_cycle, ii);
1785 }
1786
1787 if (dump_file)
1788 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
1789
1790 return SMODULO ((low + up + 1) / 2, ii);
1791 }
1792
1793 static void
1794 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
1795 {
1796 int row;
1797 ps_insn_ptr crr_insn;
1798
1799 for (row = 0; row < ps->ii; row++)
1800 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
1801 {
1802 ddg_node_ptr u = crr_insn->node;
1803
1804 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
1805 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
1806 popcount (sched_nodes) == number of insns in ps. */
1807 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
1808 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
1809 }
1810 }
1811
1812 \f
1813 /* This page implements the algorithm for ordering the nodes of a DDG
1814 for modulo scheduling, activated through the
1815 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1816
1817 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1818 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1819 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1820 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1821 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1822 #define DEPTH(x) (ASAP ((x)))
1823
1824 typedef struct node_order_params * nopa;
1825
1826 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1827 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1828 static nopa calculate_order_params (ddg_ptr, int mii);
1829 static int find_max_asap (ddg_ptr, sbitmap);
1830 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1831 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1832
1833 enum sms_direction {BOTTOMUP, TOPDOWN};
1834
1835 struct node_order_params
1836 {
1837 int asap;
1838 int alap;
1839 int height;
1840 };
1841
1842 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1843 static void
1844 check_nodes_order (int *node_order, int num_nodes)
1845 {
1846 int i;
1847 sbitmap tmp = sbitmap_alloc (num_nodes);
1848
1849 sbitmap_zero (tmp);
1850
1851 for (i = 0; i < num_nodes; i++)
1852 {
1853 int u = node_order[i];
1854
1855 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
1856
1857 SET_BIT (tmp, u);
1858 }
1859
1860 sbitmap_free (tmp);
1861 }
1862
1863 /* Order the nodes of G for scheduling and pass the result in
1864 NODE_ORDER. Also set aux.count of each node to ASAP.
1865 Return the recMII for the given DDG. */
1866 static int
1867 sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1868 {
1869 int i;
1870 int rec_mii = 0;
1871 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1872
1873 nopa nops = calculate_order_params (g, mii);
1874
1875 if (dump_file)
1876 print_sccs (dump_file, sccs, g);
1877
1878 order_nodes_of_sccs (sccs, node_order);
1879
1880 if (sccs->num_sccs > 0)
1881 /* First SCC has the largest recurrence_length. */
1882 rec_mii = sccs->sccs[0]->recurrence_length;
1883
1884 /* Save ASAP before destroying node_order_params. */
1885 for (i = 0; i < g->num_nodes; i++)
1886 {
1887 ddg_node_ptr v = &g->nodes[i];
1888 v->aux.count = ASAP (v);
1889 }
1890
1891 free (nops);
1892 free_ddg_all_sccs (sccs);
1893 check_nodes_order (node_order, g->num_nodes);
1894
1895 return rec_mii;
1896 }
1897
1898 static void
1899 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1900 {
1901 int i, pos = 0;
1902 ddg_ptr g = all_sccs->ddg;
1903 int num_nodes = g->num_nodes;
1904 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1905 sbitmap on_path = sbitmap_alloc (num_nodes);
1906 sbitmap tmp = sbitmap_alloc (num_nodes);
1907 sbitmap ones = sbitmap_alloc (num_nodes);
1908
1909 sbitmap_zero (prev_sccs);
1910 sbitmap_ones (ones);
1911
1912 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1913 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1914 for (i = 0; i < all_sccs->num_sccs; i++)
1915 {
1916 ddg_scc_ptr scc = all_sccs->sccs[i];
1917
1918 /* Add nodes on paths from previous SCCs to the current SCC. */
1919 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1920 sbitmap_a_or_b (tmp, scc->nodes, on_path);
1921
1922 /* Add nodes on paths from the current SCC to previous SCCs. */
1923 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1924 sbitmap_a_or_b (tmp, tmp, on_path);
1925
1926 /* Remove nodes of previous SCCs from current extended SCC. */
1927 sbitmap_difference (tmp, tmp, prev_sccs);
1928
1929 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1930 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1931 }
1932
1933 /* Handle the remaining nodes that do not belong to any scc. Each call
1934 to order_nodes_in_scc handles a single connected component. */
1935 while (pos < g->num_nodes)
1936 {
1937 sbitmap_difference (tmp, ones, prev_sccs);
1938 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1939 }
1940 sbitmap_free (prev_sccs);
1941 sbitmap_free (on_path);
1942 sbitmap_free (tmp);
1943 sbitmap_free (ones);
1944 }
1945
1946 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1947 static struct node_order_params *
1948 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1949 {
1950 int u;
1951 int max_asap;
1952 int num_nodes = g->num_nodes;
1953 ddg_edge_ptr e;
1954 /* Allocate a place to hold ordering params for each node in the DDG. */
1955 nopa node_order_params_arr;
1956
1957 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1958 node_order_params_arr = (nopa) xcalloc (num_nodes,
1959 sizeof (struct node_order_params));
1960
1961 /* Set the aux pointer of each node to point to its order_params structure. */
1962 for (u = 0; u < num_nodes; u++)
1963 g->nodes[u].aux.info = &node_order_params_arr[u];
1964
1965 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1966 calculate ASAP, ALAP, mobility, distance, and height for each node
1967 in the dependence (direct acyclic) graph. */
1968
1969 /* We assume that the nodes in the array are in topological order. */
1970
1971 max_asap = 0;
1972 for (u = 0; u < num_nodes; u++)
1973 {
1974 ddg_node_ptr u_node = &g->nodes[u];
1975
1976 ASAP (u_node) = 0;
1977 for (e = u_node->in; e; e = e->next_in)
1978 if (e->distance == 0)
1979 ASAP (u_node) = MAX (ASAP (u_node),
1980 ASAP (e->src) + e->latency);
1981 max_asap = MAX (max_asap, ASAP (u_node));
1982 }
1983
1984 for (u = num_nodes - 1; u > -1; u--)
1985 {
1986 ddg_node_ptr u_node = &g->nodes[u];
1987
1988 ALAP (u_node) = max_asap;
1989 HEIGHT (u_node) = 0;
1990 for (e = u_node->out; e; e = e->next_out)
1991 if (e->distance == 0)
1992 {
1993 ALAP (u_node) = MIN (ALAP (u_node),
1994 ALAP (e->dest) - e->latency);
1995 HEIGHT (u_node) = MAX (HEIGHT (u_node),
1996 HEIGHT (e->dest) + e->latency);
1997 }
1998 }
1999
2000 return node_order_params_arr;
2001 }
2002
2003 static int
2004 find_max_asap (ddg_ptr g, sbitmap nodes)
2005 {
2006 unsigned int u = 0;
2007 int max_asap = -1;
2008 int result = -1;
2009 sbitmap_iterator sbi;
2010
2011 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2012 {
2013 ddg_node_ptr u_node = &g->nodes[u];
2014
2015 if (max_asap < ASAP (u_node))
2016 {
2017 max_asap = ASAP (u_node);
2018 result = u;
2019 }
2020 }
2021 return result;
2022 }
2023
2024 static int
2025 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2026 {
2027 unsigned int u = 0;
2028 int max_hv = -1;
2029 int min_mob = INT_MAX;
2030 int result = -1;
2031 sbitmap_iterator sbi;
2032
2033 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2034 {
2035 ddg_node_ptr u_node = &g->nodes[u];
2036
2037 if (max_hv < HEIGHT (u_node))
2038 {
2039 max_hv = HEIGHT (u_node);
2040 min_mob = MOB (u_node);
2041 result = u;
2042 }
2043 else if ((max_hv == HEIGHT (u_node))
2044 && (min_mob > MOB (u_node)))
2045 {
2046 min_mob = MOB (u_node);
2047 result = u;
2048 }
2049 }
2050 return result;
2051 }
2052
2053 static int
2054 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2055 {
2056 unsigned int u = 0;
2057 int max_dv = -1;
2058 int min_mob = INT_MAX;
2059 int result = -1;
2060 sbitmap_iterator sbi;
2061
2062 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2063 {
2064 ddg_node_ptr u_node = &g->nodes[u];
2065
2066 if (max_dv < DEPTH (u_node))
2067 {
2068 max_dv = DEPTH (u_node);
2069 min_mob = MOB (u_node);
2070 result = u;
2071 }
2072 else if ((max_dv == DEPTH (u_node))
2073 && (min_mob > MOB (u_node)))
2074 {
2075 min_mob = MOB (u_node);
2076 result = u;
2077 }
2078 }
2079 return result;
2080 }
2081
2082 /* Places the nodes of SCC into the NODE_ORDER array starting
2083 at position POS, according to the SMS ordering algorithm.
2084 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2085 the NODE_ORDER array, starting from position zero. */
2086 static int
2087 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2088 int * node_order, int pos)
2089 {
2090 enum sms_direction dir;
2091 int num_nodes = g->num_nodes;
2092 sbitmap workset = sbitmap_alloc (num_nodes);
2093 sbitmap tmp = sbitmap_alloc (num_nodes);
2094 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2095 sbitmap predecessors = sbitmap_alloc (num_nodes);
2096 sbitmap successors = sbitmap_alloc (num_nodes);
2097
2098 sbitmap_zero (predecessors);
2099 find_predecessors (predecessors, g, nodes_ordered);
2100
2101 sbitmap_zero (successors);
2102 find_successors (successors, g, nodes_ordered);
2103
2104 sbitmap_zero (tmp);
2105 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2106 {
2107 sbitmap_copy (workset, tmp);
2108 dir = BOTTOMUP;
2109 }
2110 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2111 {
2112 sbitmap_copy (workset, tmp);
2113 dir = TOPDOWN;
2114 }
2115 else
2116 {
2117 int u;
2118
2119 sbitmap_zero (workset);
2120 if ((u = find_max_asap (g, scc)) >= 0)
2121 SET_BIT (workset, u);
2122 dir = BOTTOMUP;
2123 }
2124
2125 sbitmap_zero (zero_bitmap);
2126 while (!sbitmap_equal (workset, zero_bitmap))
2127 {
2128 int v;
2129 ddg_node_ptr v_node;
2130 sbitmap v_node_preds;
2131 sbitmap v_node_succs;
2132
2133 if (dir == TOPDOWN)
2134 {
2135 while (!sbitmap_equal (workset, zero_bitmap))
2136 {
2137 v = find_max_hv_min_mob (g, workset);
2138 v_node = &g->nodes[v];
2139 node_order[pos++] = v;
2140 v_node_succs = NODE_SUCCESSORS (v_node);
2141 sbitmap_a_and_b (tmp, v_node_succs, scc);
2142
2143 /* Don't consider the already ordered successors again. */
2144 sbitmap_difference (tmp, tmp, nodes_ordered);
2145 sbitmap_a_or_b (workset, workset, tmp);
2146 RESET_BIT (workset, v);
2147 SET_BIT (nodes_ordered, v);
2148 }
2149 dir = BOTTOMUP;
2150 sbitmap_zero (predecessors);
2151 find_predecessors (predecessors, g, nodes_ordered);
2152 sbitmap_a_and_b (workset, predecessors, scc);
2153 }
2154 else
2155 {
2156 while (!sbitmap_equal (workset, zero_bitmap))
2157 {
2158 v = find_max_dv_min_mob (g, workset);
2159 v_node = &g->nodes[v];
2160 node_order[pos++] = v;
2161 v_node_preds = NODE_PREDECESSORS (v_node);
2162 sbitmap_a_and_b (tmp, v_node_preds, scc);
2163
2164 /* Don't consider the already ordered predecessors again. */
2165 sbitmap_difference (tmp, tmp, nodes_ordered);
2166 sbitmap_a_or_b (workset, workset, tmp);
2167 RESET_BIT (workset, v);
2168 SET_BIT (nodes_ordered, v);
2169 }
2170 dir = TOPDOWN;
2171 sbitmap_zero (successors);
2172 find_successors (successors, g, nodes_ordered);
2173 sbitmap_a_and_b (workset, successors, scc);
2174 }
2175 }
2176 sbitmap_free (tmp);
2177 sbitmap_free (workset);
2178 sbitmap_free (zero_bitmap);
2179 sbitmap_free (predecessors);
2180 sbitmap_free (successors);
2181 return pos;
2182 }
2183
2184 \f
2185 /* This page contains functions for manipulating partial-schedules during
2186 modulo scheduling. */
2187
2188 /* Create a partial schedule and allocate a memory to hold II rows. */
2189
2190 static partial_schedule_ptr
2191 create_partial_schedule (int ii, ddg_ptr g, int history)
2192 {
2193 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2194 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2195 ps->ii = ii;
2196 ps->history = history;
2197 ps->min_cycle = INT_MAX;
2198 ps->max_cycle = INT_MIN;
2199 ps->g = g;
2200
2201 return ps;
2202 }
2203
2204 /* Free the PS_INSNs in rows array of the given partial schedule.
2205 ??? Consider caching the PS_INSN's. */
2206 static void
2207 free_ps_insns (partial_schedule_ptr ps)
2208 {
2209 int i;
2210
2211 for (i = 0; i < ps->ii; i++)
2212 {
2213 while (ps->rows[i])
2214 {
2215 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2216
2217 free (ps->rows[i]);
2218 ps->rows[i] = ps_insn;
2219 }
2220 ps->rows[i] = NULL;
2221 }
2222 }
2223
2224 /* Free all the memory allocated to the partial schedule. */
2225
2226 static void
2227 free_partial_schedule (partial_schedule_ptr ps)
2228 {
2229 if (!ps)
2230 return;
2231 free_ps_insns (ps);
2232 free (ps->rows);
2233 free (ps);
2234 }
2235
2236 /* Clear the rows array with its PS_INSNs, and create a new one with
2237 NEW_II rows. */
2238
2239 static void
2240 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2241 {
2242 if (!ps)
2243 return;
2244 free_ps_insns (ps);
2245 if (new_ii == ps->ii)
2246 return;
2247 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2248 * sizeof (ps_insn_ptr));
2249 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2250 ps->ii = new_ii;
2251 ps->min_cycle = INT_MAX;
2252 ps->max_cycle = INT_MIN;
2253 }
2254
2255 /* Prints the partial schedule as an ii rows array, for each rows
2256 print the ids of the insns in it. */
2257 void
2258 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2259 {
2260 int i;
2261
2262 for (i = 0; i < ps->ii; i++)
2263 {
2264 ps_insn_ptr ps_i = ps->rows[i];
2265
2266 fprintf (dump, "\n[CYCLE %d ]: ", i);
2267 while (ps_i)
2268 {
2269 fprintf (dump, "%d, ",
2270 INSN_UID (ps_i->node->insn));
2271 ps_i = ps_i->next_in_row;
2272 }
2273 }
2274 }
2275
2276 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2277 static ps_insn_ptr
2278 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2279 {
2280 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2281
2282 ps_i->node = node;
2283 ps_i->next_in_row = NULL;
2284 ps_i->prev_in_row = NULL;
2285 ps_i->row_rest_count = rest_count;
2286 ps_i->cycle = cycle;
2287
2288 return ps_i;
2289 }
2290
2291
2292 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2293 node is not found in the partial schedule, else returns true. */
2294 static bool
2295 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2296 {
2297 int row;
2298
2299 if (!ps || !ps_i)
2300 return false;
2301
2302 row = SMODULO (ps_i->cycle, ps->ii);
2303 if (! ps_i->prev_in_row)
2304 {
2305 if (ps_i != ps->rows[row])
2306 return false;
2307
2308 ps->rows[row] = ps_i->next_in_row;
2309 if (ps->rows[row])
2310 ps->rows[row]->prev_in_row = NULL;
2311 }
2312 else
2313 {
2314 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2315 if (ps_i->next_in_row)
2316 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2317 }
2318 free (ps_i);
2319 return true;
2320 }
2321
2322 /* Unlike what literature describes for modulo scheduling (which focuses
2323 on VLIW machines) the order of the instructions inside a cycle is
2324 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2325 where the current instruction should go relative to the already
2326 scheduled instructions in the given cycle. Go over these
2327 instructions and find the first possible column to put it in. */
2328 static bool
2329 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2330 sbitmap must_precede, sbitmap must_follow)
2331 {
2332 ps_insn_ptr next_ps_i;
2333 ps_insn_ptr first_must_follow = NULL;
2334 ps_insn_ptr last_must_precede = NULL;
2335 int row;
2336
2337 if (! ps_i)
2338 return false;
2339
2340 row = SMODULO (ps_i->cycle, ps->ii);
2341
2342 /* Find the first must follow and the last must precede
2343 and insert the node immediately after the must precede
2344 but make sure that it there is no must follow after it. */
2345 for (next_ps_i = ps->rows[row];
2346 next_ps_i;
2347 next_ps_i = next_ps_i->next_in_row)
2348 {
2349 if (TEST_BIT (must_follow, next_ps_i->node->cuid)
2350 && ! first_must_follow)
2351 first_must_follow = next_ps_i;
2352 if (TEST_BIT (must_precede, next_ps_i->node->cuid))
2353 {
2354 /* If we have already met a node that must follow, then
2355 there is no possible column. */
2356 if (first_must_follow)
2357 return false;
2358 else
2359 last_must_precede = next_ps_i;
2360 }
2361 }
2362
2363 /* Now insert the node after INSERT_AFTER_PSI. */
2364
2365 if (! last_must_precede)
2366 {
2367 ps_i->next_in_row = ps->rows[row];
2368 ps_i->prev_in_row = NULL;
2369 if (ps_i->next_in_row)
2370 ps_i->next_in_row->prev_in_row = ps_i;
2371 ps->rows[row] = ps_i;
2372 }
2373 else
2374 {
2375 ps_i->next_in_row = last_must_precede->next_in_row;
2376 last_must_precede->next_in_row = ps_i;
2377 ps_i->prev_in_row = last_must_precede;
2378 if (ps_i->next_in_row)
2379 ps_i->next_in_row->prev_in_row = ps_i;
2380 }
2381
2382 return true;
2383 }
2384
2385 /* Advances the PS_INSN one column in its current row; returns false
2386 in failure and true in success. Bit N is set in MUST_FOLLOW if
2387 the node with cuid N must be come after the node pointed to by
2388 PS_I when scheduled in the same cycle. */
2389 static int
2390 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2391 sbitmap must_follow)
2392 {
2393 ps_insn_ptr prev, next;
2394 int row;
2395 ddg_node_ptr next_node;
2396
2397 if (!ps || !ps_i)
2398 return false;
2399
2400 row = SMODULO (ps_i->cycle, ps->ii);
2401
2402 if (! ps_i->next_in_row)
2403 return false;
2404
2405 next_node = ps_i->next_in_row->node;
2406
2407 /* Check if next_in_row is dependent on ps_i, both having same sched
2408 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2409 if (TEST_BIT (must_follow, next_node->cuid))
2410 return false;
2411
2412 /* Advance PS_I over its next_in_row in the doubly linked list. */
2413 prev = ps_i->prev_in_row;
2414 next = ps_i->next_in_row;
2415
2416 if (ps_i == ps->rows[row])
2417 ps->rows[row] = next;
2418
2419 ps_i->next_in_row = next->next_in_row;
2420
2421 if (next->next_in_row)
2422 next->next_in_row->prev_in_row = ps_i;
2423
2424 next->next_in_row = ps_i;
2425 ps_i->prev_in_row = next;
2426
2427 next->prev_in_row = prev;
2428 if (prev)
2429 prev->next_in_row = next;
2430
2431 return true;
2432 }
2433
2434 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2435 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2436 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2437 before/after (respectively) the node pointed to by PS_I when scheduled
2438 in the same cycle. */
2439 static ps_insn_ptr
2440 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2441 sbitmap must_precede, sbitmap must_follow)
2442 {
2443 ps_insn_ptr ps_i;
2444 int rest_count = 1;
2445 int row = SMODULO (cycle, ps->ii);
2446
2447 if (ps->rows[row]
2448 && ps->rows[row]->row_rest_count >= issue_rate)
2449 return NULL;
2450
2451 if (ps->rows[row])
2452 rest_count += ps->rows[row]->row_rest_count;
2453
2454 ps_i = create_ps_insn (node, rest_count, cycle);
2455
2456 /* Finds and inserts PS_I according to MUST_FOLLOW and
2457 MUST_PRECEDE. */
2458 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2459 {
2460 free (ps_i);
2461 return NULL;
2462 }
2463
2464 return ps_i;
2465 }
2466
2467 /* Advance time one cycle. Assumes DFA is being used. */
2468 static void
2469 advance_one_cycle (void)
2470 {
2471 if (targetm.sched.dfa_pre_cycle_insn)
2472 state_transition (curr_state,
2473 targetm.sched.dfa_pre_cycle_insn ());
2474
2475 state_transition (curr_state, NULL);
2476
2477 if (targetm.sched.dfa_post_cycle_insn)
2478 state_transition (curr_state,
2479 targetm.sched.dfa_post_cycle_insn ());
2480 }
2481
2482
2483
2484 /* Checks if PS has resource conflicts according to DFA, starting from
2485 FROM cycle to TO cycle; returns true if there are conflicts and false
2486 if there are no conflicts. Assumes DFA is being used. */
2487 static int
2488 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2489 {
2490 int cycle;
2491
2492 state_reset (curr_state);
2493
2494 for (cycle = from; cycle <= to; cycle++)
2495 {
2496 ps_insn_ptr crr_insn;
2497 /* Holds the remaining issue slots in the current row. */
2498 int can_issue_more = issue_rate;
2499
2500 /* Walk through the DFA for the current row. */
2501 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2502 crr_insn;
2503 crr_insn = crr_insn->next_in_row)
2504 {
2505 rtx insn = crr_insn->node->insn;
2506
2507 if (!INSN_P (insn))
2508 continue;
2509
2510 /* Check if there is room for the current insn. */
2511 if (!can_issue_more || state_dead_lock_p (curr_state))
2512 return true;
2513
2514 /* Update the DFA state and return with failure if the DFA found
2515 recource conflicts. */
2516 if (state_transition (curr_state, insn) >= 0)
2517 return true;
2518
2519 if (targetm.sched.variable_issue)
2520 can_issue_more =
2521 targetm.sched.variable_issue (sched_dump, sched_verbose,
2522 insn, can_issue_more);
2523 /* A naked CLOBBER or USE generates no instruction, so don't
2524 let them consume issue slots. */
2525 else if (GET_CODE (PATTERN (insn)) != USE
2526 && GET_CODE (PATTERN (insn)) != CLOBBER)
2527 can_issue_more--;
2528 }
2529
2530 /* Advance the DFA to the next cycle. */
2531 advance_one_cycle ();
2532 }
2533 return false;
2534 }
2535
2536 /* Checks if the given node causes resource conflicts when added to PS at
2537 cycle C. If not the node is added to PS and returned; otherwise zero
2538 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2539 cuid N must be come before/after (respectively) the node pointed to by
2540 PS_I when scheduled in the same cycle. */
2541 ps_insn_ptr
2542 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2543 int c, sbitmap must_precede,
2544 sbitmap must_follow)
2545 {
2546 int has_conflicts = 0;
2547 ps_insn_ptr ps_i;
2548
2549 /* First add the node to the PS, if this succeeds check for
2550 conflicts, trying different issue slots in the same row. */
2551 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2552 return NULL; /* Failed to insert the node at the given cycle. */
2553
2554 has_conflicts = ps_has_conflicts (ps, c, c)
2555 || (ps->history > 0
2556 && ps_has_conflicts (ps,
2557 c - ps->history,
2558 c + ps->history));
2559
2560 /* Try different issue slots to find one that the given node can be
2561 scheduled in without conflicts. */
2562 while (has_conflicts)
2563 {
2564 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2565 break;
2566 has_conflicts = ps_has_conflicts (ps, c, c)
2567 || (ps->history > 0
2568 && ps_has_conflicts (ps,
2569 c - ps->history,
2570 c + ps->history));
2571 }
2572
2573 if (has_conflicts)
2574 {
2575 remove_node_from_ps (ps, ps_i);
2576 return NULL;
2577 }
2578
2579 ps->min_cycle = MIN (ps->min_cycle, c);
2580 ps->max_cycle = MAX (ps->max_cycle, c);
2581 return ps_i;
2582 }
2583
2584 /* Rotate the rows of PS such that insns scheduled at time
2585 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2586 void
2587 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2588 {
2589 int i, row, backward_rotates;
2590 int last_row = ps->ii - 1;
2591
2592 if (start_cycle == 0)
2593 return;
2594
2595 backward_rotates = SMODULO (start_cycle, ps->ii);
2596
2597 /* Revisit later and optimize this into a single loop. */
2598 for (i = 0; i < backward_rotates; i++)
2599 {
2600 ps_insn_ptr first_row = ps->rows[0];
2601
2602 for (row = 0; row < last_row; row++)
2603 ps->rows[row] = ps->rows[row+1];
2604
2605 ps->rows[last_row] = first_row;
2606 }
2607
2608 ps->max_cycle -= start_cycle;
2609 ps->min_cycle -= start_cycle;
2610 }
2611
2612 #endif /* INSN_SCHEDULING */
2613 \f
2614 static bool
2615 gate_handle_sms (void)
2616 {
2617 return (optimize > 0 && flag_modulo_sched);
2618 }
2619
2620
2621 /* Run instruction scheduler. */
2622 /* Perform SMS module scheduling. */
2623 static unsigned int
2624 rest_of_handle_sms (void)
2625 {
2626 #ifdef INSN_SCHEDULING
2627 basic_block bb;
2628
2629 /* Collect loop information to be used in SMS. */
2630 cfg_layout_initialize (0);
2631 sms_schedule ();
2632
2633 /* Update the life information, because we add pseudos. */
2634 max_regno = max_reg_num ();
2635
2636 /* Finalize layout changes. */
2637 FOR_EACH_BB (bb)
2638 if (bb->next_bb != EXIT_BLOCK_PTR)
2639 bb->aux = bb->next_bb;
2640 free_dominance_info (CDI_DOMINATORS);
2641 cfg_layout_finalize ();
2642 #endif /* INSN_SCHEDULING */
2643 return 0;
2644 }
2645
2646 struct tree_opt_pass pass_sms =
2647 {
2648 "sms", /* name */
2649 gate_handle_sms, /* gate */
2650 rest_of_handle_sms, /* execute */
2651 NULL, /* sub */
2652 NULL, /* next */
2653 0, /* static_pass_number */
2654 TV_SMS, /* tv_id */
2655 0, /* properties_required */
2656 0, /* properties_provided */
2657 0, /* properties_destroyed */
2658 TODO_dump_func, /* todo_flags_start */
2659 TODO_df_finish | TODO_verify_rtl_sharing |
2660 TODO_dump_func |
2661 TODO_ggc_collect, /* todo_flags_finish */
2662 'm' /* letter */
2663 };
2664