modulo-sched.c (sms_order_nodes, [...]): Fix prototypes.
[gcc.git] / gcc / modulo-sched.c
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "toplev.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "sched-int.h"
40 #include "target.h"
41 #include "cfglayout.h"
42 #include "cfgloop.h"
43 #include "cfghooks.h"
44 #include "expr.h"
45 #include "params.h"
46 #include "gcov-io.h"
47 #include "ddg.h"
48 #include "timevar.h"
49 #include "tree-pass.h"
50 #include "dbgcnt.h"
51
52 #ifdef INSN_SCHEDULING
53
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
62
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
78 up).
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
86
87 SMS works with countable loops (1) whose control part can be easily
88 decoupled from the rest of the loop and (2) whose loop count can
89 be easily adjusted. This is because we peel a constant number of
90 iterations into a prologue and epilogue for which we want to avoid
91 emitting the control part, and a kernel which is to iterate that
92 constant number of iterations less than the original loop. So the
93 control part should be a set of insns clearly identified and having
94 its own iv, not otherwise used in the loop (at-least for now), which
95 initializes a register before the loop to the number of iterations.
96 Currently SMS relies on the do-loop pattern to recognize such loops,
97 where (1) the control part comprises of all insns defining and/or
98 using a certain 'count' register and (2) the loop count can be
99 adjusted by modifying this register prior to the loop.
100 TODO: Rely on cfgloop analysis instead. */
101 \f
102 /* This page defines partial-schedule structures and functions for
103 modulo scheduling. */
104
105 typedef struct partial_schedule *partial_schedule_ptr;
106 typedef struct ps_insn *ps_insn_ptr;
107
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
110
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
113
114 /* Perform signed modulo, always returning a non-negative value. */
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
116
117 /* The number of different iterations the nodes in ps span, assuming
118 the stage boundaries are placed efficiently. */
119 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
120 + 1 + (ps)->ii - 1) / (ps)->ii)
121
122 /* A single instruction in the partial schedule. */
123 struct ps_insn
124 {
125 /* The corresponding DDG_NODE. */
126 ddg_node_ptr node;
127
128 /* The (absolute) cycle in which the PS instruction is scheduled.
129 Same as SCHED_TIME (node). */
130 int cycle;
131
132 /* The next/prev PS_INSN in the same row. */
133 ps_insn_ptr next_in_row,
134 prev_in_row;
135
136 /* The number of nodes in the same row that come after this node. */
137 int row_rest_count;
138 };
139
140 /* Holds the partial schedule as an array of II rows. Each entry of the
141 array points to a linked list of PS_INSNs, which represents the
142 instructions that are scheduled for that row. */
143 struct partial_schedule
144 {
145 int ii; /* Number of rows in the partial schedule. */
146 int history; /* Threshold for conflict checking using DFA. */
147
148 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
149 ps_insn_ptr *rows;
150
151 /* The earliest absolute cycle of an insn in the partial schedule. */
152 int min_cycle;
153
154 /* The latest absolute cycle of an insn in the partial schedule. */
155 int max_cycle;
156
157 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
158 };
159
160 /* We use this to record all the register replacements we do in
161 the kernel so we can undo SMS if it is not profitable. */
162 struct undo_replace_buff_elem
163 {
164 rtx insn;
165 rtx orig_reg;
166 rtx new_reg;
167 struct undo_replace_buff_elem *next;
168 };
169
170
171
172 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
173 static void free_partial_schedule (partial_schedule_ptr);
174 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
175 void print_partial_schedule (partial_schedule_ptr, FILE *);
176 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
177 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
178 ddg_node_ptr node, int cycle,
179 sbitmap must_precede,
180 sbitmap must_follow);
181 static void rotate_partial_schedule (partial_schedule_ptr, int);
182 void set_row_column_for_ps (partial_schedule_ptr);
183 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
184 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
185
186 \f
187 /* This page defines constants and structures for the modulo scheduling
188 driver. */
189
190 /* As in haifa-sched.c: */
191 /* issue_rate is the number of insns that can be scheduled in the same
192 machine cycle. It can be defined in the config/mach/mach.h file,
193 otherwise we set it to 1. */
194
195 static int issue_rate;
196
197 static int sms_order_nodes (ddg_ptr, int, int *, int *);
198 static void set_node_sched_params (ddg_ptr);
199 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
200 static void permute_partial_schedule (partial_schedule_ptr, rtx);
201 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
202 rtx, rtx);
203 static void duplicate_insns_of_cycles (partial_schedule_ptr,
204 int, int, int, rtx);
205
206 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
207 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
208 #define SCHED_FIRST_REG_MOVE(x) \
209 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
210 #define SCHED_NREG_MOVES(x) \
211 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
212 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
213 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
214 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
215
216 /* The scheduling parameters held for each node. */
217 typedef struct node_sched_params
218 {
219 int asap; /* A lower-bound on the absolute scheduling cycle. */
220 int time; /* The absolute scheduling cycle (time >= asap). */
221
222 /* The following field (first_reg_move) is a pointer to the first
223 register-move instruction added to handle the modulo-variable-expansion
224 of the register defined by this node. This register-move copies the
225 original register defined by the node. */
226 rtx first_reg_move;
227
228 /* The number of register-move instructions added, immediately preceding
229 first_reg_move. */
230 int nreg_moves;
231
232 int row; /* Holds time % ii. */
233 int stage; /* Holds time / ii. */
234
235 /* The column of a node inside the ps. If nodes u, v are on the same row,
236 u will precede v if column (u) < column (v). */
237 int column;
238 } *node_sched_params_ptr;
239
240 \f
241 /* The following three functions are copied from the current scheduler
242 code in order to use sched_analyze() for computing the dependencies.
243 They are used when initializing the sched_info structure. */
244 static const char *
245 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
246 {
247 static char tmp[80];
248
249 sprintf (tmp, "i%4d", INSN_UID (insn));
250 return tmp;
251 }
252
253 static void
254 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
255 regset cond_exec ATTRIBUTE_UNUSED,
256 regset used ATTRIBUTE_UNUSED,
257 regset set ATTRIBUTE_UNUSED)
258 {
259 }
260
261 static struct sched_info sms_sched_info =
262 {
263 NULL,
264 NULL,
265 NULL,
266 NULL,
267 NULL,
268 sms_print_insn,
269 NULL,
270 compute_jump_reg_dependencies,
271 NULL, NULL,
272 NULL, NULL,
273 0, 0, 0,
274
275 NULL, NULL, NULL, NULL, NULL,
276 0
277 };
278
279
280 /* Given HEAD and TAIL which are the first and last insns in a loop;
281 return the register which controls the loop. Return zero if it has
282 more than one occurrence in the loop besides the control part or the
283 do-loop pattern is not of the form we expect. */
284 static rtx
285 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
286 {
287 #ifdef HAVE_doloop_end
288 rtx reg, condition, insn, first_insn_not_to_check;
289
290 if (!JUMP_P (tail))
291 return NULL_RTX;
292
293 /* TODO: Free SMS's dependence on doloop_condition_get. */
294 condition = doloop_condition_get (tail);
295 if (! condition)
296 return NULL_RTX;
297
298 if (REG_P (XEXP (condition, 0)))
299 reg = XEXP (condition, 0);
300 else if (GET_CODE (XEXP (condition, 0)) == PLUS
301 && REG_P (XEXP (XEXP (condition, 0), 0)))
302 reg = XEXP (XEXP (condition, 0), 0);
303 else
304 gcc_unreachable ();
305
306 /* Check that the COUNT_REG has no other occurrences in the loop
307 until the decrement. We assume the control part consists of
308 either a single (parallel) branch-on-count or a (non-parallel)
309 branch immediately preceded by a single (decrement) insn. */
310 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
311 : PREV_INSN (tail));
312
313 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
314 if (reg_mentioned_p (reg, insn))
315 {
316 if (dump_file)
317 {
318 fprintf (dump_file, "SMS count_reg found ");
319 print_rtl_single (dump_file, reg);
320 fprintf (dump_file, " outside control in insn:\n");
321 print_rtl_single (dump_file, insn);
322 }
323
324 return NULL_RTX;
325 }
326
327 return reg;
328 #else
329 return NULL_RTX;
330 #endif
331 }
332
333 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
334 that the number of iterations is a compile-time constant. If so,
335 return the rtx that sets COUNT_REG to a constant, and set COUNT to
336 this constant. Otherwise return 0. */
337 static rtx
338 const_iteration_count (rtx count_reg, basic_block pre_header,
339 HOST_WIDEST_INT * count)
340 {
341 rtx insn;
342 rtx head, tail;
343
344 if (! pre_header)
345 return NULL_RTX;
346
347 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
348
349 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
350 if (INSN_P (insn) && single_set (insn) &&
351 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
352 {
353 rtx pat = single_set (insn);
354
355 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
356 {
357 *count = INTVAL (SET_SRC (pat));
358 return insn;
359 }
360
361 return NULL_RTX;
362 }
363
364 return NULL_RTX;
365 }
366
367 /* A very simple resource-based lower bound on the initiation interval.
368 ??? Improve the accuracy of this bound by considering the
369 utilization of various units. */
370 static int
371 res_MII (ddg_ptr g)
372 {
373 if (targetm.sched.sms_res_mii)
374 return targetm.sched.sms_res_mii (g);
375
376 return (g->num_nodes / issue_rate);
377 }
378
379
380 /* Points to the array that contains the sched data for each node. */
381 static node_sched_params_ptr node_sched_params;
382
383 /* Allocate sched_params for each node and initialize it. Assumes that
384 the aux field of each node contain the asap bound (computed earlier),
385 and copies it into the sched_params field. */
386 static void
387 set_node_sched_params (ddg_ptr g)
388 {
389 int i;
390
391 /* Allocate for each node in the DDG a place to hold the "sched_data". */
392 /* Initialize ASAP/ALAP/HIGHT to zero. */
393 node_sched_params = (node_sched_params_ptr)
394 xcalloc (g->num_nodes,
395 sizeof (struct node_sched_params));
396
397 /* Set the pointer of the general data of the node to point to the
398 appropriate sched_params structure. */
399 for (i = 0; i < g->num_nodes; i++)
400 {
401 /* Watch out for aliasing problems? */
402 node_sched_params[i].asap = g->nodes[i].aux.count;
403 g->nodes[i].aux.info = &node_sched_params[i];
404 }
405 }
406
407 static void
408 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
409 {
410 int i;
411
412 if (! file)
413 return;
414 for (i = 0; i < num_nodes; i++)
415 {
416 node_sched_params_ptr nsp = &node_sched_params[i];
417 rtx reg_move = nsp->first_reg_move;
418 int j;
419
420 fprintf (file, "Node = %d; INSN = %d\n", i,
421 (INSN_UID (g->nodes[i].insn)));
422 fprintf (file, " asap = %d:\n", nsp->asap);
423 fprintf (file, " time = %d:\n", nsp->time);
424 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
425 for (j = 0; j < nsp->nreg_moves; j++)
426 {
427 fprintf (file, " reg_move = ");
428 print_rtl_single (file, reg_move);
429 reg_move = PREV_INSN (reg_move);
430 }
431 }
432 }
433
434 /*
435 Breaking intra-loop register anti-dependences:
436 Each intra-loop register anti-dependence implies a cross-iteration true
437 dependence of distance 1. Therefore, we can remove such false dependencies
438 and figure out if the partial schedule broke them by checking if (for a
439 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
440 if so generate a register move. The number of such moves is equal to:
441 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
442 nreg_moves = ----------------------------------- + 1 - { dependence.
443 ii { 1 if not.
444 */
445 static struct undo_replace_buff_elem *
446 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
447 {
448 ddg_ptr g = ps->g;
449 int ii = ps->ii;
450 int i;
451 struct undo_replace_buff_elem *reg_move_replaces = NULL;
452
453 for (i = 0; i < g->num_nodes; i++)
454 {
455 ddg_node_ptr u = &g->nodes[i];
456 ddg_edge_ptr e;
457 int nreg_moves = 0, i_reg_move;
458 sbitmap *uses_of_defs;
459 rtx last_reg_move;
460 rtx prev_reg, old_reg;
461
462 /* Compute the number of reg_moves needed for u, by looking at life
463 ranges started at u (excluding self-loops). */
464 for (e = u->out; e; e = e->next_out)
465 if (e->type == TRUE_DEP && e->dest != e->src)
466 {
467 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
468
469 if (e->distance == 1)
470 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
471
472 /* If dest precedes src in the schedule of the kernel, then dest
473 will read before src writes and we can save one reg_copy. */
474 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
475 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
476 nreg_moves4e--;
477
478 nreg_moves = MAX (nreg_moves, nreg_moves4e);
479 }
480
481 if (nreg_moves == 0)
482 continue;
483
484 /* Every use of the register defined by node may require a different
485 copy of this register, depending on the time the use is scheduled.
486 Set a bitmap vector, telling which nodes use each copy of this
487 register. */
488 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
489 sbitmap_vector_zero (uses_of_defs, nreg_moves);
490 for (e = u->out; e; e = e->next_out)
491 if (e->type == TRUE_DEP && e->dest != e->src)
492 {
493 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
494
495 if (e->distance == 1)
496 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
497
498 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
499 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
500 dest_copy--;
501
502 if (dest_copy)
503 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
504 }
505
506 /* Now generate the reg_moves, attaching relevant uses to them. */
507 SCHED_NREG_MOVES (u) = nreg_moves;
508 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
509 last_reg_move = u->insn;
510
511 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
512 {
513 unsigned int i_use = 0;
514 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
515 rtx reg_move = gen_move_insn (new_reg, prev_reg);
516 sbitmap_iterator sbi;
517
518 add_insn_before (reg_move, last_reg_move, NULL);
519 last_reg_move = reg_move;
520
521 if (!SCHED_FIRST_REG_MOVE (u))
522 SCHED_FIRST_REG_MOVE (u) = reg_move;
523
524 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
525 {
526 struct undo_replace_buff_elem *rep;
527
528 rep = (struct undo_replace_buff_elem *)
529 xcalloc (1, sizeof (struct undo_replace_buff_elem));
530 rep->insn = g->nodes[i_use].insn;
531 rep->orig_reg = old_reg;
532 rep->new_reg = new_reg;
533
534 if (! reg_move_replaces)
535 reg_move_replaces = rep;
536 else
537 {
538 rep->next = reg_move_replaces;
539 reg_move_replaces = rep;
540 }
541
542 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
543 if (rescan)
544 df_insn_rescan (g->nodes[i_use].insn);
545 }
546
547 prev_reg = new_reg;
548 }
549 sbitmap_vector_free (uses_of_defs);
550 }
551 return reg_move_replaces;
552 }
553
554 /* Free memory allocated for the undo buffer. */
555 static void
556 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
557 {
558
559 while (reg_move_replaces)
560 {
561 struct undo_replace_buff_elem *rep = reg_move_replaces;
562
563 reg_move_replaces = reg_move_replaces->next;
564 free (rep);
565 }
566 }
567
568 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
569 of SCHED_ROW and SCHED_STAGE. */
570 static void
571 normalize_sched_times (partial_schedule_ptr ps)
572 {
573 int row;
574 int amount = PS_MIN_CYCLE (ps);
575 int ii = ps->ii;
576 ps_insn_ptr crr_insn;
577
578 for (row = 0; row < ii; row++)
579 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
580 {
581 ddg_node_ptr u = crr_insn->node;
582 int normalized_time = SCHED_TIME (u) - amount;
583
584 if (dump_file)
585 fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
586 min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
587 (u), ps->min_cycle);
588 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
589 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
590 SCHED_TIME (u) = normalized_time;
591 SCHED_ROW (u) = normalized_time % ii;
592 SCHED_STAGE (u) = normalized_time / ii;
593 }
594 }
595
596 /* Set SCHED_COLUMN of each node according to its position in PS. */
597 static void
598 set_columns_for_ps (partial_schedule_ptr ps)
599 {
600 int row;
601
602 for (row = 0; row < ps->ii; row++)
603 {
604 ps_insn_ptr cur_insn = ps->rows[row];
605 int column = 0;
606
607 for (; cur_insn; cur_insn = cur_insn->next_in_row)
608 SCHED_COLUMN (cur_insn->node) = column++;
609 }
610 }
611
612 /* Permute the insns according to their order in PS, from row 0 to
613 row ii-1, and position them right before LAST. This schedules
614 the insns of the loop kernel. */
615 static void
616 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
617 {
618 int ii = ps->ii;
619 int row;
620 ps_insn_ptr ps_ij;
621
622 for (row = 0; row < ii ; row++)
623 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
624 if (PREV_INSN (last) != ps_ij->node->insn)
625 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
626 PREV_INSN (last));
627 }
628
629 static void
630 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
631 int to_stage, int for_prolog, rtx count_reg)
632 {
633 int row;
634 ps_insn_ptr ps_ij;
635
636 for (row = 0; row < ps->ii; row++)
637 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
638 {
639 ddg_node_ptr u_node = ps_ij->node;
640 int j, i_reg_moves;
641 rtx reg_move = NULL_RTX;
642
643 /* Do not duplicate any insn which refers to count_reg as it
644 belongs to the control part.
645 TODO: This should be done by analyzing the control part of
646 the loop. */
647 if (reg_mentioned_p (count_reg, u_node->insn))
648 continue;
649
650 if (for_prolog)
651 {
652 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
653 number of reg_moves starting with the second occurrence of
654 u_node, which is generated if its SCHED_STAGE <= to_stage. */
655 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
656 i_reg_moves = MAX (i_reg_moves, 0);
657 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
658
659 /* The reg_moves start from the *first* reg_move backwards. */
660 if (i_reg_moves)
661 {
662 reg_move = SCHED_FIRST_REG_MOVE (u_node);
663 for (j = 1; j < i_reg_moves; j++)
664 reg_move = PREV_INSN (reg_move);
665 }
666 }
667 else /* It's for the epilog. */
668 {
669 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
670 starting to decrease one stage after u_node no longer occurs;
671 that is, generate all reg_moves until
672 SCHED_STAGE (u_node) == from_stage - 1. */
673 i_reg_moves = SCHED_NREG_MOVES (u_node)
674 - (from_stage - SCHED_STAGE (u_node) - 1);
675 i_reg_moves = MAX (i_reg_moves, 0);
676 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
677
678 /* The reg_moves start from the *last* reg_move forwards. */
679 if (i_reg_moves)
680 {
681 reg_move = SCHED_FIRST_REG_MOVE (u_node);
682 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
683 reg_move = PREV_INSN (reg_move);
684 }
685 }
686
687 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
688 emit_insn (copy_rtx (PATTERN (reg_move)));
689 if (SCHED_STAGE (u_node) >= from_stage
690 && SCHED_STAGE (u_node) <= to_stage)
691 duplicate_insn_chain (u_node->first_note, u_node->insn);
692 }
693 }
694
695
696 /* Generate the instructions (including reg_moves) for prolog & epilog. */
697 static void
698 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
699 rtx count_reg, rtx count_init)
700 {
701 int i;
702 int last_stage = PS_STAGE_COUNT (ps) - 1;
703 edge e;
704
705 /* Generate the prolog, inserting its insns on the loop-entry edge. */
706 start_sequence ();
707
708 if (!count_init)
709 {
710 /* Generate instructions at the beginning of the prolog to
711 adjust the loop count by STAGE_COUNT. If loop count is constant
712 (count_init), this constant is adjusted by STAGE_COUNT in
713 generate_prolog_epilog function. */
714 rtx sub_reg = NULL_RTX;
715
716 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
717 count_reg, GEN_INT (last_stage),
718 count_reg, 1, OPTAB_DIRECT);
719 gcc_assert (REG_P (sub_reg));
720 if (REGNO (sub_reg) != REGNO (count_reg))
721 emit_move_insn (count_reg, sub_reg);
722 }
723
724 for (i = 0; i < last_stage; i++)
725 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
726
727 /* Put the prolog on the entry edge. */
728 e = loop_preheader_edge (loop);
729 split_edge_and_insert (e, get_insns ());
730
731 end_sequence ();
732
733 /* Generate the epilog, inserting its insns on the loop-exit edge. */
734 start_sequence ();
735
736 for (i = 0; i < last_stage; i++)
737 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
738
739 /* Put the epilogue on the exit edge. */
740 gcc_assert (single_exit (loop));
741 e = single_exit (loop);
742 split_edge_and_insert (e, get_insns ());
743 end_sequence ();
744 }
745
746 /* Return true if all the BBs of the loop are empty except the
747 loop header. */
748 static bool
749 loop_single_full_bb_p (struct loop *loop)
750 {
751 unsigned i;
752 basic_block *bbs = get_loop_body (loop);
753
754 for (i = 0; i < loop->num_nodes ; i++)
755 {
756 rtx head, tail;
757 bool empty_bb = true;
758
759 if (bbs[i] == loop->header)
760 continue;
761
762 /* Make sure that basic blocks other than the header
763 have only notes labels or jumps. */
764 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
765 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
766 {
767 if (NOTE_P (head) || LABEL_P (head)
768 || (INSN_P (head) && JUMP_P (head)))
769 continue;
770 empty_bb = false;
771 break;
772 }
773
774 if (! empty_bb)
775 {
776 free (bbs);
777 return false;
778 }
779 }
780 free (bbs);
781 return true;
782 }
783
784 /* A simple loop from SMS point of view; it is a loop that is composed of
785 either a single basic block or two BBs - a header and a latch. */
786 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
787 && (EDGE_COUNT (loop->latch->preds) == 1) \
788 && (EDGE_COUNT (loop->latch->succs) == 1))
789
790 /* Return true if the loop is in its canonical form and false if not.
791 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
792 static bool
793 loop_canon_p (struct loop *loop)
794 {
795
796 if (loop->inner || !loop_outer (loop))
797 return false;
798
799 if (!single_exit (loop))
800 {
801 if (dump_file)
802 {
803 rtx insn = BB_END (loop->header);
804
805 fprintf (dump_file, "SMS loop many exits ");
806 fprintf (dump_file, " %s %d (file, line)\n",
807 insn_file (insn), insn_line (insn));
808 }
809 return false;
810 }
811
812 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
813 {
814 if (dump_file)
815 {
816 rtx insn = BB_END (loop->header);
817
818 fprintf (dump_file, "SMS loop many BBs. ");
819 fprintf (dump_file, " %s %d (file, line)\n",
820 insn_file (insn), insn_line (insn));
821 }
822 return false;
823 }
824
825 return true;
826 }
827
828 /* If there are more than one entry for the loop,
829 make it one by splitting the first entry edge and
830 redirecting the others to the new BB. */
831 static void
832 canon_loop (struct loop *loop)
833 {
834 edge e;
835 edge_iterator i;
836
837 /* Avoid annoying special cases of edges going to exit
838 block. */
839 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
840 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
841 split_edge (e);
842
843 if (loop->latch == loop->header
844 || EDGE_COUNT (loop->latch->succs) > 1)
845 {
846 FOR_EACH_EDGE (e, i, loop->header->preds)
847 if (e->src == loop->latch)
848 break;
849 split_edge (e);
850 }
851 }
852
853 /* Probability in % that the sms-ed loop rolls enough so that optimized
854 version may be entered. Just a guess. */
855 #define PROB_SMS_ENOUGH_ITERATIONS 80
856
857 /* Used to calculate the upper bound of ii. */
858 #define MAXII_FACTOR 2
859
860 /* Main entry point, perform SMS scheduling on the loops of the function
861 that consist of single basic blocks. */
862 static void
863 sms_schedule (void)
864 {
865 rtx insn;
866 ddg_ptr *g_arr, g;
867 int * node_order;
868 int maxii, max_asap;
869 loop_iterator li;
870 partial_schedule_ptr ps;
871 basic_block bb = NULL;
872 struct loop *loop;
873 basic_block condition_bb = NULL;
874 edge latch_edge;
875 gcov_type trip_count = 0;
876
877 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
878 | LOOPS_HAVE_RECORDED_EXITS);
879 if (number_of_loops () <= 1)
880 {
881 loop_optimizer_finalize ();
882 return; /* There are no loops to schedule. */
883 }
884
885 /* Initialize issue_rate. */
886 if (targetm.sched.issue_rate)
887 {
888 int temp = reload_completed;
889
890 reload_completed = 1;
891 issue_rate = targetm.sched.issue_rate ();
892 reload_completed = temp;
893 }
894 else
895 issue_rate = 1;
896
897 /* Initialize the scheduler. */
898 current_sched_info = &sms_sched_info;
899
900 /* Init Data Flow analysis, to be used in interloop dep calculation. */
901 df_set_flags (DF_LR_RUN_DCE);
902 df_rd_add_problem ();
903 df_note_add_problem ();
904 df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
905 df_analyze ();
906 regstat_compute_calls_crossed ();
907 sched_init ();
908
909 /* Allocate memory to hold the DDG array one entry for each loop.
910 We use loop->num as index into this array. */
911 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
912
913 /* Build DDGs for all the relevant loops and hold them in G_ARR
914 indexed by the loop index. */
915 FOR_EACH_LOOP (li, loop, 0)
916 {
917 rtx head, tail;
918 rtx count_reg;
919
920 /* For debugging. */
921 if (dbg_cnt (sms_sched_loop) == false)
922 {
923 if (dump_file)
924 fprintf (dump_file, "SMS reached max limit... \n");
925
926 break;
927 }
928
929 if (! loop_canon_p (loop))
930 continue;
931
932 if (! loop_single_full_bb_p (loop))
933 continue;
934
935 bb = loop->header;
936
937 get_ebb_head_tail (bb, bb, &head, &tail);
938 latch_edge = loop_latch_edge (loop);
939 gcc_assert (single_exit (loop));
940 if (single_exit (loop)->count)
941 trip_count = latch_edge->count / single_exit (loop)->count;
942
943 /* Perfrom SMS only on loops that their average count is above threshold. */
944
945 if ( latch_edge->count
946 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
947 {
948 if (dump_file)
949 {
950 fprintf (dump_file, " %s %d (file, line)\n",
951 insn_file (tail), insn_line (tail));
952 fprintf (dump_file, "SMS single-bb-loop\n");
953 if (profile_info && flag_branch_probabilities)
954 {
955 fprintf (dump_file, "SMS loop-count ");
956 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
957 (HOST_WIDEST_INT) bb->count);
958 fprintf (dump_file, "\n");
959 fprintf (dump_file, "SMS trip-count ");
960 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
961 (HOST_WIDEST_INT) trip_count);
962 fprintf (dump_file, "\n");
963 fprintf (dump_file, "SMS profile-sum-max ");
964 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
965 (HOST_WIDEST_INT) profile_info->sum_max);
966 fprintf (dump_file, "\n");
967 }
968 }
969 continue;
970 }
971
972 /* Make sure this is a doloop. */
973 if ( !(count_reg = doloop_register_get (head, tail)))
974 continue;
975
976 /* Don't handle BBs with calls or barriers, or !single_set insns,
977 or auto-increment insns (to avoid creating invalid reg-moves
978 for the auto-increment insns).
979 ??? Should handle auto-increment insns.
980 ??? Should handle insns defining subregs. */
981 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
982 {
983 rtx set;
984
985 if (CALL_P (insn)
986 || BARRIER_P (insn)
987 || (INSN_P (insn) && !JUMP_P (insn)
988 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
989 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
990 || (INSN_P (insn) && (set = single_set (insn))
991 && GET_CODE (SET_DEST (set)) == SUBREG))
992 break;
993 }
994
995 if (insn != NEXT_INSN (tail))
996 {
997 if (dump_file)
998 {
999 if (CALL_P (insn))
1000 fprintf (dump_file, "SMS loop-with-call\n");
1001 else if (BARRIER_P (insn))
1002 fprintf (dump_file, "SMS loop-with-barrier\n");
1003 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1004 fprintf (dump_file, "SMS reg inc\n");
1005 else if ((INSN_P (insn) && !JUMP_P (insn)
1006 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1007 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1008 else
1009 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1010 print_rtl_single (dump_file, insn);
1011 }
1012
1013 continue;
1014 }
1015
1016 if (! (g = create_ddg (bb, 0)))
1017 {
1018 if (dump_file)
1019 fprintf (dump_file, "SMS doloop\n");
1020 continue;
1021 }
1022
1023 g_arr[loop->num] = g;
1024 }
1025
1026 /* We don't want to perform SMS on new loops - created by versioning. */
1027 FOR_EACH_LOOP (li, loop, 0)
1028 {
1029 rtx head, tail;
1030 rtx count_reg, count_init;
1031 int mii, rec_mii;
1032 unsigned stage_count = 0;
1033 HOST_WIDEST_INT loop_count = 0;
1034
1035 if (! (g = g_arr[loop->num]))
1036 continue;
1037
1038 if (dump_file)
1039 print_ddg (dump_file, g);
1040
1041 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1042
1043 latch_edge = loop_latch_edge (loop);
1044 gcc_assert (single_exit (loop));
1045 if (single_exit (loop)->count)
1046 trip_count = latch_edge->count / single_exit (loop)->count;
1047
1048 if (dump_file)
1049 {
1050 fprintf (dump_file, " %s %d (file, line)\n",
1051 insn_file (tail), insn_line (tail));
1052 fprintf (dump_file, "SMS single-bb-loop\n");
1053 if (profile_info && flag_branch_probabilities)
1054 {
1055 fprintf (dump_file, "SMS loop-count ");
1056 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1057 (HOST_WIDEST_INT) bb->count);
1058 fprintf (dump_file, "\n");
1059 fprintf (dump_file, "SMS profile-sum-max ");
1060 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1061 (HOST_WIDEST_INT) profile_info->sum_max);
1062 fprintf (dump_file, "\n");
1063 }
1064 fprintf (dump_file, "SMS doloop\n");
1065 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1066 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1067 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1068 }
1069
1070
1071 /* In case of th loop have doloop register it gets special
1072 handling. */
1073 count_init = NULL_RTX;
1074 if ((count_reg = doloop_register_get (head, tail)))
1075 {
1076 basic_block pre_header;
1077
1078 pre_header = loop_preheader_edge (loop)->src;
1079 count_init = const_iteration_count (count_reg, pre_header,
1080 &loop_count);
1081 }
1082 gcc_assert (count_reg);
1083
1084 if (dump_file && count_init)
1085 {
1086 fprintf (dump_file, "SMS const-doloop ");
1087 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1088 loop_count);
1089 fprintf (dump_file, "\n");
1090 }
1091
1092 node_order = XNEWVEC (int, g->num_nodes);
1093
1094 mii = 1; /* Need to pass some estimate of mii. */
1095 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1096 mii = MAX (res_MII (g), rec_mii);
1097 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1098
1099 if (dump_file)
1100 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1101 rec_mii, mii, maxii);
1102
1103 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1104 ASAP. */
1105 set_node_sched_params (g);
1106
1107 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1108
1109 if (ps)
1110 stage_count = PS_STAGE_COUNT (ps);
1111
1112 /* Stage count of 1 means that there is no interleaving between
1113 iterations, let the scheduling passes do the job. */
1114 if (stage_count < 1
1115 || (count_init && (loop_count <= stage_count))
1116 || (flag_branch_probabilities && (trip_count <= stage_count)))
1117 {
1118 if (dump_file)
1119 {
1120 fprintf (dump_file, "SMS failed... \n");
1121 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1122 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1123 fprintf (dump_file, ", trip-count=");
1124 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1125 fprintf (dump_file, ")\n");
1126 }
1127 continue;
1128 }
1129 else
1130 {
1131 struct undo_replace_buff_elem *reg_move_replaces;
1132
1133 if (dump_file)
1134 {
1135 fprintf (dump_file,
1136 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1137 stage_count);
1138 print_partial_schedule (ps, dump_file);
1139 fprintf (dump_file,
1140 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1141 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1142 }
1143
1144 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1145 the closing_branch was scheduled and should appear in the last (ii-1)
1146 row. Otherwise, we are free to schedule the branch, and we let nodes
1147 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1148 row; this should reduce stage_count to minimum.
1149 TODO: Revisit the issue of scheduling the insns of the
1150 control part relative to the branch when the control part
1151 has more than one insn. */
1152 normalize_sched_times (ps);
1153 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1154 set_columns_for_ps (ps);
1155
1156 canon_loop (loop);
1157
1158 /* case the BCT count is not known , Do loop-versioning */
1159 if (count_reg && ! count_init)
1160 {
1161 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1162 GEN_INT(stage_count));
1163 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1164 * REG_BR_PROB_BASE) / 100;
1165
1166 loop_version (loop, comp_rtx, &condition_bb,
1167 prob, prob, REG_BR_PROB_BASE - prob,
1168 true);
1169 }
1170
1171 /* Set new iteration count of loop kernel. */
1172 if (count_reg && count_init)
1173 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1174 - stage_count + 1);
1175
1176 /* Now apply the scheduled kernel to the RTL of the loop. */
1177 permute_partial_schedule (ps, g->closing_branch->first_note);
1178
1179 /* Mark this loop as software pipelined so the later
1180 scheduling passes doesn't touch it. */
1181 if (! flag_resched_modulo_sched)
1182 g->bb->flags |= BB_DISABLE_SCHEDULE;
1183 /* The life-info is not valid any more. */
1184 df_set_bb_dirty (g->bb);
1185
1186 reg_move_replaces = generate_reg_moves (ps, true);
1187 if (dump_file)
1188 print_node_sched_params (dump_file, g->num_nodes, g);
1189 /* Generate prolog and epilog. */
1190 generate_prolog_epilog (ps, loop, count_reg, count_init);
1191
1192 free_undo_replace_buff (reg_move_replaces);
1193 }
1194
1195 free_partial_schedule (ps);
1196 free (node_sched_params);
1197 free (node_order);
1198 free_ddg (g);
1199 }
1200
1201 regstat_free_calls_crossed ();
1202 free (g_arr);
1203
1204 /* Release scheduler data, needed until now because of DFA. */
1205 sched_finish ();
1206 loop_optimizer_finalize ();
1207 }
1208
1209 /* The SMS scheduling algorithm itself
1210 -----------------------------------
1211 Input: 'O' an ordered list of insns of a loop.
1212 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1213
1214 'Q' is the empty Set
1215 'PS' is the partial schedule; it holds the currently scheduled nodes with
1216 their cycle/slot.
1217 'PSP' previously scheduled predecessors.
1218 'PSS' previously scheduled successors.
1219 't(u)' the cycle where u is scheduled.
1220 'l(u)' is the latency of u.
1221 'd(v,u)' is the dependence distance from v to u.
1222 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1223 the node ordering phase.
1224 'check_hardware_resources_conflicts(u, PS, c)'
1225 run a trace around cycle/slot through DFA model
1226 to check resource conflicts involving instruction u
1227 at cycle c given the partial schedule PS.
1228 'add_to_partial_schedule_at_time(u, PS, c)'
1229 Add the node/instruction u to the partial schedule
1230 PS at time c.
1231 'calculate_register_pressure(PS)'
1232 Given a schedule of instructions, calculate the register
1233 pressure it implies. One implementation could be the
1234 maximum number of overlapping live ranges.
1235 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1236 registers available in the hardware.
1237
1238 1. II = MII.
1239 2. PS = empty list
1240 3. for each node u in O in pre-computed order
1241 4. if (PSP(u) != Q && PSS(u) == Q) then
1242 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1243 6. start = Early_start; end = Early_start + II - 1; step = 1
1244 11. else if (PSP(u) == Q && PSS(u) != Q) then
1245 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1246 13. start = Late_start; end = Late_start - II + 1; step = -1
1247 14. else if (PSP(u) != Q && PSS(u) != Q) then
1248 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1249 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1250 17. start = Early_start;
1251 18. end = min(Early_start + II - 1 , Late_start);
1252 19. step = 1
1253 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1254 21. start = ASAP(u); end = start + II - 1; step = 1
1255 22. endif
1256
1257 23. success = false
1258 24. for (c = start ; c != end ; c += step)
1259 25. if check_hardware_resources_conflicts(u, PS, c) then
1260 26. add_to_partial_schedule_at_time(u, PS, c)
1261 27. success = true
1262 28. break
1263 29. endif
1264 30. endfor
1265 31. if (success == false) then
1266 32. II = II + 1
1267 33. if (II > maxII) then
1268 34. finish - failed to schedule
1269 35. endif
1270 36. goto 2.
1271 37. endif
1272 38. endfor
1273 39. if (calculate_register_pressure(PS) > maxRP) then
1274 40. goto 32.
1275 41. endif
1276 42. compute epilogue & prologue
1277 43. finish - succeeded to schedule
1278 */
1279
1280 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1281 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1282 set to 0 to save compile time. */
1283 #define DFA_HISTORY SMS_DFA_HISTORY
1284
1285 /* A threshold for the number of repeated unsuccessful attempts to insert
1286 an empty row, before we flush the partial schedule and start over. */
1287 #define MAX_SPLIT_NUM 10
1288 /* Given the partial schedule PS, this function calculates and returns the
1289 cycles in which we can schedule the node with the given index I.
1290 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1291 noticed that there are several cases in which we fail to SMS the loop
1292 because the sched window of a node is empty due to tight data-deps. In
1293 such cases we want to unschedule some of the predecessors/successors
1294 until we get non-empty scheduling window. It returns -1 if the
1295 scheduling window is empty and zero otherwise. */
1296
1297 static int
1298 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1299 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1300 {
1301 int start, step, end;
1302 ddg_edge_ptr e;
1303 int u = nodes_order [i];
1304 ddg_node_ptr u_node = &ps->g->nodes[u];
1305 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1306 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1307 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1308 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1309 int psp_not_empty;
1310 int pss_not_empty;
1311
1312 /* 1. compute sched window for u (start, end, step). */
1313 sbitmap_zero (psp);
1314 sbitmap_zero (pss);
1315 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1316 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1317
1318 if (psp_not_empty && !pss_not_empty)
1319 {
1320 int early_start = INT_MIN;
1321
1322 end = INT_MAX;
1323 for (e = u_node->in; e != 0; e = e->next_in)
1324 {
1325 ddg_node_ptr v_node = e->src;
1326
1327 if (dump_file)
1328 {
1329 fprintf (dump_file, "\nProcessing edge: ");
1330 print_ddg_edge (dump_file, e);
1331 fprintf (dump_file,
1332 "\nScheduling %d (%d) in psp_not_empty,"
1333 " checking p %d (%d): ", u_node->cuid,
1334 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1335 (v_node->insn));
1336 }
1337
1338 if (TEST_BIT (sched_nodes, v_node->cuid))
1339 {
1340 int p_st = SCHED_TIME (v_node);
1341
1342 early_start =
1343 MAX (early_start, p_st + e->latency - (e->distance * ii));
1344
1345 if (dump_file)
1346 fprintf (dump_file, "pred st = %d; early_start = %d; ", p_st,
1347 early_start);
1348
1349 if (e->data_type == MEM_DEP)
1350 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1351 }
1352 else if (dump_file)
1353 fprintf (dump_file, "the node is not scheduled\n");
1354 }
1355 start = early_start;
1356 end = MIN (end, early_start + ii);
1357 step = 1;
1358
1359 if (dump_file)
1360 fprintf (dump_file,
1361 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1362 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1363 }
1364
1365 else if (!psp_not_empty && pss_not_empty)
1366 {
1367 int late_start = INT_MAX;
1368
1369 end = INT_MIN;
1370 for (e = u_node->out; e != 0; e = e->next_out)
1371 {
1372 ddg_node_ptr v_node = e->dest;
1373
1374 if (dump_file)
1375 {
1376 fprintf (dump_file, "\nProcessing edge:");
1377 print_ddg_edge (dump_file, e);
1378 fprintf (dump_file,
1379 "\nScheduling %d (%d) in pss_not_empty,"
1380 " checking s %d (%d): ", u_node->cuid,
1381 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1382 (v_node->insn));
1383 }
1384
1385 if (TEST_BIT (sched_nodes, v_node->cuid))
1386 {
1387 int s_st = SCHED_TIME (v_node);
1388
1389 late_start = MIN (late_start,
1390 s_st - e->latency + (e->distance * ii));
1391
1392 if (dump_file)
1393 fprintf (dump_file, "succ st = %d; late_start = %d;", s_st,
1394 late_start);
1395
1396 if (e->data_type == MEM_DEP)
1397 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1398 if (dump_file)
1399 fprintf (dump_file, "end = %d\n", end);
1400
1401 }
1402 else if (dump_file)
1403 fprintf (dump_file, "the node is not scheduled\n");
1404
1405 }
1406 start = late_start;
1407 end = MAX (end, late_start - ii);
1408 step = -1;
1409
1410 if (dump_file)
1411 fprintf (dump_file,
1412 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1413 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1414
1415 }
1416
1417 else if (psp_not_empty && pss_not_empty)
1418 {
1419 int early_start = INT_MIN;
1420 int late_start = INT_MAX;
1421
1422 start = INT_MIN;
1423 end = INT_MAX;
1424 for (e = u_node->in; e != 0; e = e->next_in)
1425 {
1426 ddg_node_ptr v_node = e->src;
1427
1428 if (dump_file)
1429 {
1430 fprintf (dump_file, "\nProcessing edge:");
1431 print_ddg_edge (dump_file, e);
1432 fprintf (dump_file,
1433 "\nScheduling %d (%d) in psp_pss_not_empty,"
1434 " checking p %d (%d): ", u_node->cuid, INSN_UID
1435 (u_node->insn), v_node->cuid, INSN_UID
1436 (v_node->insn));
1437 }
1438
1439 if (TEST_BIT (sched_nodes, v_node->cuid))
1440 {
1441 int p_st = SCHED_TIME (v_node);
1442
1443 early_start = MAX (early_start,
1444 p_st + e->latency
1445 - (e->distance * ii));
1446
1447 if (dump_file)
1448 fprintf (dump_file, "pred st = %d; early_start = %d;", p_st,
1449 early_start);
1450
1451 if (e->data_type == MEM_DEP)
1452 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1453 }
1454 else if (dump_file)
1455 fprintf (dump_file, "the node is not scheduled\n");
1456
1457 }
1458 for (e = u_node->out; e != 0; e = e->next_out)
1459 {
1460 ddg_node_ptr v_node = e->dest;
1461
1462 if (dump_file)
1463 {
1464 fprintf (dump_file, "\nProcessing edge:");
1465 print_ddg_edge (dump_file, e);
1466 fprintf (dump_file,
1467 "\nScheduling %d (%d) in psp_pss_not_empty,"
1468 " checking s %d (%d): ", u_node->cuid, INSN_UID
1469 (u_node->insn), v_node->cuid, INSN_UID
1470 (v_node->insn));
1471 }
1472
1473 if (TEST_BIT (sched_nodes, v_node->cuid))
1474 {
1475 int s_st = SCHED_TIME (v_node);
1476
1477 late_start = MIN (late_start,
1478 s_st - e->latency
1479 + (e->distance * ii));
1480
1481 if (dump_file)
1482 fprintf (dump_file, "succ st = %d; late_start = %d;", s_st,
1483 late_start);
1484
1485 if (e->data_type == MEM_DEP)
1486 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1487 }
1488 else if (dump_file)
1489 fprintf (dump_file, "the node is not scheduled\n");
1490
1491 }
1492 start = MAX (start, early_start);
1493 end = MIN (end, MIN (early_start + ii, late_start + 1));
1494 step = 1;
1495 }
1496 else /* psp is empty && pss is empty. */
1497 {
1498 start = SCHED_ASAP (u_node);
1499 end = start + ii;
1500 step = 1;
1501 }
1502
1503 *start_p = start;
1504 *step_p = step;
1505 *end_p = end;
1506 sbitmap_free (psp);
1507 sbitmap_free (pss);
1508
1509 if ((start >= end && step == 1) || (start <= end && step == -1))
1510 {
1511 if (dump_file)
1512 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1513 start, end, step);
1514 return -1;
1515 }
1516
1517 return 0;
1518 }
1519
1520 /* This function implements the scheduling algorithm for SMS according to the
1521 above algorithm. */
1522 static partial_schedule_ptr
1523 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1524 {
1525 int ii = mii;
1526 int i, c, success, num_splits = 0;
1527 int flush_and_start_over = true;
1528 int num_nodes = g->num_nodes;
1529 ddg_edge_ptr e;
1530 ps_insn_ptr psi;
1531 int start, end, step; /* Place together into one struct? */
1532 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1533 sbitmap must_precede = sbitmap_alloc (num_nodes);
1534 sbitmap must_follow = sbitmap_alloc (num_nodes);
1535 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1536
1537 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1538
1539 sbitmap_ones (tobe_scheduled);
1540 sbitmap_zero (sched_nodes);
1541
1542 while (flush_and_start_over && (ii < maxii))
1543 {
1544
1545 if (dump_file)
1546 fprintf (dump_file, "Starting with ii=%d\n", ii);
1547 flush_and_start_over = false;
1548 sbitmap_zero (sched_nodes);
1549
1550 for (i = 0; i < num_nodes; i++)
1551 {
1552 int u = nodes_order[i];
1553 ddg_node_ptr u_node = &ps->g->nodes[u];
1554 rtx insn = u_node->insn;
1555
1556 if (!INSN_P (insn))
1557 {
1558 RESET_BIT (tobe_scheduled, u);
1559 continue;
1560 }
1561
1562 if (JUMP_P (insn)) /* Closing branch handled later. */
1563 {
1564 RESET_BIT (tobe_scheduled, u);
1565 continue;
1566 }
1567
1568 if (TEST_BIT (sched_nodes, u))
1569 continue;
1570
1571 /* Try to get non-empty scheduling window. */
1572 success = 0;
1573 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1574 &step, &end) == 0)
1575 {
1576 if (dump_file)
1577 fprintf (dump_file, "\nTrying to schedule node %d \
1578 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1579 (g->nodes[u].insn)), start, end, step);
1580 /* Use must_follow & must_precede bitmaps to determine order
1581 of nodes within the cycle. */
1582
1583 /* use must_follow & must_precede bitmaps to determine order
1584 of nodes within the cycle. */
1585 sbitmap_zero (must_precede);
1586 sbitmap_zero (must_follow);
1587 /* TODO: We can add an insn to the must_precede or must_follow
1588 bitmaps only if it has tight dependence to U and they
1589 both scheduled in the same row. The current check is less
1590 conservative and content with the fact that both U and the
1591 insn are scheduled in the same row. */
1592 for (e = u_node->in; e != 0; e = e->next_in)
1593 if (TEST_BIT (sched_nodes, e->src->cuid)
1594 && (SMODULO (SCHED_TIME (e->src), ii) ==
1595 SMODULO (start, ii)))
1596 SET_BIT (must_precede, e->src->cuid);
1597
1598 for (e = u_node->out; e != 0; e = e->next_out)
1599 if (TEST_BIT (sched_nodes, e->dest->cuid)
1600 && (SMODULO (SCHED_TIME (e->dest), ii) ==
1601 SMODULO ((end - step), ii)))
1602 SET_BIT (must_follow, e->dest->cuid);
1603
1604 gcc_assert ((step > 0 && start < end)
1605 || (step < 0 && start > end));
1606
1607 for (c = start; c != end; c += step)
1608 {
1609 verify_partial_schedule (ps, sched_nodes);
1610
1611 psi = ps_add_node_check_conflicts (ps, u_node, c,
1612 must_precede,
1613 must_follow);
1614
1615 if (psi)
1616 {
1617 SCHED_TIME (u_node) = c;
1618 SET_BIT (sched_nodes, u);
1619 success = 1;
1620 num_splits = 0;
1621 if (dump_file)
1622 fprintf (dump_file, "Scheduled w/o split in %d\n", c);
1623
1624 break;
1625 }
1626 }
1627 verify_partial_schedule (ps, sched_nodes);
1628 }
1629 if (!success)
1630 {
1631 int split_row;
1632
1633 if (ii++ == maxii)
1634 break;
1635
1636 if (num_splits >= MAX_SPLIT_NUM)
1637 {
1638 num_splits = 0;
1639 flush_and_start_over = true;
1640 verify_partial_schedule (ps, sched_nodes);
1641 reset_partial_schedule (ps, ii);
1642 verify_partial_schedule (ps, sched_nodes);
1643 break;
1644 }
1645
1646 num_splits++;
1647 if (step == 1)
1648 split_row = compute_split_row (sched_nodes, start, end,
1649 ps->ii, u_node);
1650 else
1651 split_row = compute_split_row (sched_nodes, end, start,
1652 ps->ii, u_node);
1653
1654 ps_insert_empty_row (ps, split_row, sched_nodes);
1655 i--; /* Go back and retry node i. */
1656
1657 if (dump_file)
1658 fprintf (dump_file, "num_splits=%d\n", num_splits);
1659 }
1660
1661 /* ??? If (success), check register pressure estimates. */
1662 } /* Continue with next node. */
1663 } /* While flush_and_start_over. */
1664 if (ii >= maxii)
1665 {
1666 free_partial_schedule (ps);
1667 ps = NULL;
1668 }
1669 else
1670 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1671
1672 sbitmap_free (sched_nodes);
1673 sbitmap_free (must_precede);
1674 sbitmap_free (must_follow);
1675 sbitmap_free (tobe_scheduled);
1676
1677 return ps;
1678 }
1679
1680 /* This function inserts a new empty row into PS at the position
1681 according to SPLITROW, keeping all already scheduled instructions
1682 intact and updating their SCHED_TIME and cycle accordingly. */
1683 static void
1684 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1685 sbitmap sched_nodes)
1686 {
1687 ps_insn_ptr crr_insn;
1688 ps_insn_ptr *rows_new;
1689 int ii = ps->ii;
1690 int new_ii = ii + 1;
1691 int row;
1692
1693 verify_partial_schedule (ps, sched_nodes);
1694
1695 /* We normalize sched_time and rotate ps to have only non-negative sched
1696 times, for simplicity of updating cycles after inserting new row. */
1697 split_row -= ps->min_cycle;
1698 split_row = SMODULO (split_row, ii);
1699 if (dump_file)
1700 fprintf (dump_file, "split_row=%d\n", split_row);
1701
1702 normalize_sched_times (ps);
1703 rotate_partial_schedule (ps, ps->min_cycle);
1704
1705 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1706 for (row = 0; row < split_row; row++)
1707 {
1708 rows_new[row] = ps->rows[row];
1709 ps->rows[row] = NULL;
1710 for (crr_insn = rows_new[row];
1711 crr_insn; crr_insn = crr_insn->next_in_row)
1712 {
1713 ddg_node_ptr u = crr_insn->node;
1714 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1715
1716 SCHED_TIME (u) = new_time;
1717 crr_insn->cycle = new_time;
1718 SCHED_ROW (u) = new_time % new_ii;
1719 SCHED_STAGE (u) = new_time / new_ii;
1720 }
1721
1722 }
1723
1724 rows_new[split_row] = NULL;
1725
1726 for (row = split_row; row < ii; row++)
1727 {
1728 rows_new[row + 1] = ps->rows[row];
1729 ps->rows[row] = NULL;
1730 for (crr_insn = rows_new[row + 1];
1731 crr_insn; crr_insn = crr_insn->next_in_row)
1732 {
1733 ddg_node_ptr u = crr_insn->node;
1734 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1735
1736 SCHED_TIME (u) = new_time;
1737 crr_insn->cycle = new_time;
1738 SCHED_ROW (u) = new_time % new_ii;
1739 SCHED_STAGE (u) = new_time / new_ii;
1740 }
1741 }
1742
1743 /* Updating ps. */
1744 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1745 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1746 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1747 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1748 free (ps->rows);
1749 ps->rows = rows_new;
1750 ps->ii = new_ii;
1751 gcc_assert (ps->min_cycle >= 0);
1752
1753 verify_partial_schedule (ps, sched_nodes);
1754
1755 if (dump_file)
1756 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1757 ps->max_cycle);
1758 }
1759
1760 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1761 UP which are the boundaries of it's scheduling window; compute using
1762 SCHED_NODES and II a row in the partial schedule that can be split
1763 which will separate a critical predecessor from a critical successor
1764 thereby expanding the window, and return it. */
1765 static int
1766 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1767 ddg_node_ptr u_node)
1768 {
1769 ddg_edge_ptr e;
1770 int lower = INT_MIN, upper = INT_MAX;
1771 ddg_node_ptr crit_pred = NULL;
1772 ddg_node_ptr crit_succ = NULL;
1773 int crit_cycle;
1774
1775 for (e = u_node->in; e != 0; e = e->next_in)
1776 {
1777 ddg_node_ptr v_node = e->src;
1778
1779 if (TEST_BIT (sched_nodes, v_node->cuid)
1780 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
1781 if (SCHED_TIME (v_node) > lower)
1782 {
1783 crit_pred = v_node;
1784 lower = SCHED_TIME (v_node);
1785 }
1786 }
1787
1788 if (crit_pred != NULL)
1789 {
1790 crit_cycle = SCHED_TIME (crit_pred) + 1;
1791 return SMODULO (crit_cycle, ii);
1792 }
1793
1794 for (e = u_node->out; e != 0; e = e->next_out)
1795 {
1796 ddg_node_ptr v_node = e->dest;
1797 if (TEST_BIT (sched_nodes, v_node->cuid)
1798 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
1799 if (SCHED_TIME (v_node) < upper)
1800 {
1801 crit_succ = v_node;
1802 upper = SCHED_TIME (v_node);
1803 }
1804 }
1805
1806 if (crit_succ != NULL)
1807 {
1808 crit_cycle = SCHED_TIME (crit_succ);
1809 return SMODULO (crit_cycle, ii);
1810 }
1811
1812 if (dump_file)
1813 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
1814
1815 return SMODULO ((low + up + 1) / 2, ii);
1816 }
1817
1818 static void
1819 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
1820 {
1821 int row;
1822 ps_insn_ptr crr_insn;
1823
1824 for (row = 0; row < ps->ii; row++)
1825 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
1826 {
1827 ddg_node_ptr u = crr_insn->node;
1828
1829 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
1830 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
1831 popcount (sched_nodes) == number of insns in ps. */
1832 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
1833 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
1834 }
1835 }
1836
1837 \f
1838 /* This page implements the algorithm for ordering the nodes of a DDG
1839 for modulo scheduling, activated through the
1840 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1841
1842 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1843 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1844 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1845 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1846 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1847 #define DEPTH(x) (ASAP ((x)))
1848
1849 typedef struct node_order_params * nopa;
1850
1851 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1852 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1853 static nopa calculate_order_params (ddg_ptr, int, int *);
1854 static int find_max_asap (ddg_ptr, sbitmap);
1855 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1856 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1857
1858 enum sms_direction {BOTTOMUP, TOPDOWN};
1859
1860 struct node_order_params
1861 {
1862 int asap;
1863 int alap;
1864 int height;
1865 };
1866
1867 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1868 static void
1869 check_nodes_order (int *node_order, int num_nodes)
1870 {
1871 int i;
1872 sbitmap tmp = sbitmap_alloc (num_nodes);
1873
1874 sbitmap_zero (tmp);
1875
1876 if (dump_file)
1877 fprintf (dump_file, "SMS final nodes order: \n");
1878
1879 for (i = 0; i < num_nodes; i++)
1880 {
1881 int u = node_order[i];
1882
1883 if (dump_file)
1884 fprintf (dump_file, "%d ", u);
1885 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
1886
1887 SET_BIT (tmp, u);
1888 }
1889
1890 if (dump_file)
1891 fprintf (dump_file, "\n");
1892
1893 sbitmap_free (tmp);
1894 }
1895
1896 /* Order the nodes of G for scheduling and pass the result in
1897 NODE_ORDER. Also set aux.count of each node to ASAP.
1898 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
1899 static int
1900 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
1901 {
1902 int i;
1903 int rec_mii = 0;
1904 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1905
1906 nopa nops = calculate_order_params (g, mii, pmax_asap);
1907
1908 if (dump_file)
1909 print_sccs (dump_file, sccs, g);
1910
1911 order_nodes_of_sccs (sccs, node_order);
1912
1913 if (sccs->num_sccs > 0)
1914 /* First SCC has the largest recurrence_length. */
1915 rec_mii = sccs->sccs[0]->recurrence_length;
1916
1917 /* Save ASAP before destroying node_order_params. */
1918 for (i = 0; i < g->num_nodes; i++)
1919 {
1920 ddg_node_ptr v = &g->nodes[i];
1921 v->aux.count = ASAP (v);
1922 }
1923
1924 free (nops);
1925 free_ddg_all_sccs (sccs);
1926 check_nodes_order (node_order, g->num_nodes);
1927
1928 return rec_mii;
1929 }
1930
1931 static void
1932 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1933 {
1934 int i, pos = 0;
1935 ddg_ptr g = all_sccs->ddg;
1936 int num_nodes = g->num_nodes;
1937 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1938 sbitmap on_path = sbitmap_alloc (num_nodes);
1939 sbitmap tmp = sbitmap_alloc (num_nodes);
1940 sbitmap ones = sbitmap_alloc (num_nodes);
1941
1942 sbitmap_zero (prev_sccs);
1943 sbitmap_ones (ones);
1944
1945 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1946 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1947 for (i = 0; i < all_sccs->num_sccs; i++)
1948 {
1949 ddg_scc_ptr scc = all_sccs->sccs[i];
1950
1951 /* Add nodes on paths from previous SCCs to the current SCC. */
1952 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1953 sbitmap_a_or_b (tmp, scc->nodes, on_path);
1954
1955 /* Add nodes on paths from the current SCC to previous SCCs. */
1956 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1957 sbitmap_a_or_b (tmp, tmp, on_path);
1958
1959 /* Remove nodes of previous SCCs from current extended SCC. */
1960 sbitmap_difference (tmp, tmp, prev_sccs);
1961
1962 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1963 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1964 }
1965
1966 /* Handle the remaining nodes that do not belong to any scc. Each call
1967 to order_nodes_in_scc handles a single connected component. */
1968 while (pos < g->num_nodes)
1969 {
1970 sbitmap_difference (tmp, ones, prev_sccs);
1971 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1972 }
1973 sbitmap_free (prev_sccs);
1974 sbitmap_free (on_path);
1975 sbitmap_free (tmp);
1976 sbitmap_free (ones);
1977 }
1978
1979 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1980 static struct node_order_params *
1981 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
1982 {
1983 int u;
1984 int max_asap;
1985 int num_nodes = g->num_nodes;
1986 ddg_edge_ptr e;
1987 /* Allocate a place to hold ordering params for each node in the DDG. */
1988 nopa node_order_params_arr;
1989
1990 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1991 node_order_params_arr = (nopa) xcalloc (num_nodes,
1992 sizeof (struct node_order_params));
1993
1994 /* Set the aux pointer of each node to point to its order_params structure. */
1995 for (u = 0; u < num_nodes; u++)
1996 g->nodes[u].aux.info = &node_order_params_arr[u];
1997
1998 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1999 calculate ASAP, ALAP, mobility, distance, and height for each node
2000 in the dependence (direct acyclic) graph. */
2001
2002 /* We assume that the nodes in the array are in topological order. */
2003
2004 max_asap = 0;
2005 for (u = 0; u < num_nodes; u++)
2006 {
2007 ddg_node_ptr u_node = &g->nodes[u];
2008
2009 ASAP (u_node) = 0;
2010 for (e = u_node->in; e; e = e->next_in)
2011 if (e->distance == 0)
2012 ASAP (u_node) = MAX (ASAP (u_node),
2013 ASAP (e->src) + e->latency);
2014 max_asap = MAX (max_asap, ASAP (u_node));
2015 }
2016
2017 for (u = num_nodes - 1; u > -1; u--)
2018 {
2019 ddg_node_ptr u_node = &g->nodes[u];
2020
2021 ALAP (u_node) = max_asap;
2022 HEIGHT (u_node) = 0;
2023 for (e = u_node->out; e; e = e->next_out)
2024 if (e->distance == 0)
2025 {
2026 ALAP (u_node) = MIN (ALAP (u_node),
2027 ALAP (e->dest) - e->latency);
2028 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2029 HEIGHT (e->dest) + e->latency);
2030 }
2031 }
2032 if (dump_file)
2033 {
2034 fprintf (dump_file, "\nOrder params\n");
2035 for (u = 0; u < num_nodes; u++)
2036 {
2037 ddg_node_ptr u_node = &g->nodes[u];
2038
2039 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2040 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2041 }
2042 }
2043
2044 *pmax_asap = max_asap;
2045 return node_order_params_arr;
2046 }
2047
2048 static int
2049 find_max_asap (ddg_ptr g, sbitmap nodes)
2050 {
2051 unsigned int u = 0;
2052 int max_asap = -1;
2053 int result = -1;
2054 sbitmap_iterator sbi;
2055
2056 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2057 {
2058 ddg_node_ptr u_node = &g->nodes[u];
2059
2060 if (max_asap < ASAP (u_node))
2061 {
2062 max_asap = ASAP (u_node);
2063 result = u;
2064 }
2065 }
2066 return result;
2067 }
2068
2069 static int
2070 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2071 {
2072 unsigned int u = 0;
2073 int max_hv = -1;
2074 int min_mob = INT_MAX;
2075 int result = -1;
2076 sbitmap_iterator sbi;
2077
2078 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2079 {
2080 ddg_node_ptr u_node = &g->nodes[u];
2081
2082 if (max_hv < HEIGHT (u_node))
2083 {
2084 max_hv = HEIGHT (u_node);
2085 min_mob = MOB (u_node);
2086 result = u;
2087 }
2088 else if ((max_hv == HEIGHT (u_node))
2089 && (min_mob > MOB (u_node)))
2090 {
2091 min_mob = MOB (u_node);
2092 result = u;
2093 }
2094 }
2095 return result;
2096 }
2097
2098 static int
2099 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2100 {
2101 unsigned int u = 0;
2102 int max_dv = -1;
2103 int min_mob = INT_MAX;
2104 int result = -1;
2105 sbitmap_iterator sbi;
2106
2107 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2108 {
2109 ddg_node_ptr u_node = &g->nodes[u];
2110
2111 if (max_dv < DEPTH (u_node))
2112 {
2113 max_dv = DEPTH (u_node);
2114 min_mob = MOB (u_node);
2115 result = u;
2116 }
2117 else if ((max_dv == DEPTH (u_node))
2118 && (min_mob > MOB (u_node)))
2119 {
2120 min_mob = MOB (u_node);
2121 result = u;
2122 }
2123 }
2124 return result;
2125 }
2126
2127 /* Places the nodes of SCC into the NODE_ORDER array starting
2128 at position POS, according to the SMS ordering algorithm.
2129 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2130 the NODE_ORDER array, starting from position zero. */
2131 static int
2132 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2133 int * node_order, int pos)
2134 {
2135 enum sms_direction dir;
2136 int num_nodes = g->num_nodes;
2137 sbitmap workset = sbitmap_alloc (num_nodes);
2138 sbitmap tmp = sbitmap_alloc (num_nodes);
2139 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2140 sbitmap predecessors = sbitmap_alloc (num_nodes);
2141 sbitmap successors = sbitmap_alloc (num_nodes);
2142
2143 sbitmap_zero (predecessors);
2144 find_predecessors (predecessors, g, nodes_ordered);
2145
2146 sbitmap_zero (successors);
2147 find_successors (successors, g, nodes_ordered);
2148
2149 sbitmap_zero (tmp);
2150 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2151 {
2152 sbitmap_copy (workset, tmp);
2153 dir = BOTTOMUP;
2154 }
2155 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2156 {
2157 sbitmap_copy (workset, tmp);
2158 dir = TOPDOWN;
2159 }
2160 else
2161 {
2162 int u;
2163
2164 sbitmap_zero (workset);
2165 if ((u = find_max_asap (g, scc)) >= 0)
2166 SET_BIT (workset, u);
2167 dir = BOTTOMUP;
2168 }
2169
2170 sbitmap_zero (zero_bitmap);
2171 while (!sbitmap_equal (workset, zero_bitmap))
2172 {
2173 int v;
2174 ddg_node_ptr v_node;
2175 sbitmap v_node_preds;
2176 sbitmap v_node_succs;
2177
2178 if (dir == TOPDOWN)
2179 {
2180 while (!sbitmap_equal (workset, zero_bitmap))
2181 {
2182 v = find_max_hv_min_mob (g, workset);
2183 v_node = &g->nodes[v];
2184 node_order[pos++] = v;
2185 v_node_succs = NODE_SUCCESSORS (v_node);
2186 sbitmap_a_and_b (tmp, v_node_succs, scc);
2187
2188 /* Don't consider the already ordered successors again. */
2189 sbitmap_difference (tmp, tmp, nodes_ordered);
2190 sbitmap_a_or_b (workset, workset, tmp);
2191 RESET_BIT (workset, v);
2192 SET_BIT (nodes_ordered, v);
2193 }
2194 dir = BOTTOMUP;
2195 sbitmap_zero (predecessors);
2196 find_predecessors (predecessors, g, nodes_ordered);
2197 sbitmap_a_and_b (workset, predecessors, scc);
2198 }
2199 else
2200 {
2201 while (!sbitmap_equal (workset, zero_bitmap))
2202 {
2203 v = find_max_dv_min_mob (g, workset);
2204 v_node = &g->nodes[v];
2205 node_order[pos++] = v;
2206 v_node_preds = NODE_PREDECESSORS (v_node);
2207 sbitmap_a_and_b (tmp, v_node_preds, scc);
2208
2209 /* Don't consider the already ordered predecessors again. */
2210 sbitmap_difference (tmp, tmp, nodes_ordered);
2211 sbitmap_a_or_b (workset, workset, tmp);
2212 RESET_BIT (workset, v);
2213 SET_BIT (nodes_ordered, v);
2214 }
2215 dir = TOPDOWN;
2216 sbitmap_zero (successors);
2217 find_successors (successors, g, nodes_ordered);
2218 sbitmap_a_and_b (workset, successors, scc);
2219 }
2220 }
2221 sbitmap_free (tmp);
2222 sbitmap_free (workset);
2223 sbitmap_free (zero_bitmap);
2224 sbitmap_free (predecessors);
2225 sbitmap_free (successors);
2226 return pos;
2227 }
2228
2229 \f
2230 /* This page contains functions for manipulating partial-schedules during
2231 modulo scheduling. */
2232
2233 /* Create a partial schedule and allocate a memory to hold II rows. */
2234
2235 static partial_schedule_ptr
2236 create_partial_schedule (int ii, ddg_ptr g, int history)
2237 {
2238 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2239 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2240 ps->ii = ii;
2241 ps->history = history;
2242 ps->min_cycle = INT_MAX;
2243 ps->max_cycle = INT_MIN;
2244 ps->g = g;
2245
2246 return ps;
2247 }
2248
2249 /* Free the PS_INSNs in rows array of the given partial schedule.
2250 ??? Consider caching the PS_INSN's. */
2251 static void
2252 free_ps_insns (partial_schedule_ptr ps)
2253 {
2254 int i;
2255
2256 for (i = 0; i < ps->ii; i++)
2257 {
2258 while (ps->rows[i])
2259 {
2260 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2261
2262 free (ps->rows[i]);
2263 ps->rows[i] = ps_insn;
2264 }
2265 ps->rows[i] = NULL;
2266 }
2267 }
2268
2269 /* Free all the memory allocated to the partial schedule. */
2270
2271 static void
2272 free_partial_schedule (partial_schedule_ptr ps)
2273 {
2274 if (!ps)
2275 return;
2276 free_ps_insns (ps);
2277 free (ps->rows);
2278 free (ps);
2279 }
2280
2281 /* Clear the rows array with its PS_INSNs, and create a new one with
2282 NEW_II rows. */
2283
2284 static void
2285 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2286 {
2287 if (!ps)
2288 return;
2289 free_ps_insns (ps);
2290 if (new_ii == ps->ii)
2291 return;
2292 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2293 * sizeof (ps_insn_ptr));
2294 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2295 ps->ii = new_ii;
2296 ps->min_cycle = INT_MAX;
2297 ps->max_cycle = INT_MIN;
2298 }
2299
2300 /* Prints the partial schedule as an ii rows array, for each rows
2301 print the ids of the insns in it. */
2302 void
2303 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2304 {
2305 int i;
2306
2307 for (i = 0; i < ps->ii; i++)
2308 {
2309 ps_insn_ptr ps_i = ps->rows[i];
2310
2311 fprintf (dump, "\n[CYCLE %d ]: ", i);
2312 while (ps_i)
2313 {
2314 fprintf (dump, "%d, ",
2315 INSN_UID (ps_i->node->insn));
2316 ps_i = ps_i->next_in_row;
2317 }
2318 }
2319 }
2320
2321 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2322 static ps_insn_ptr
2323 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2324 {
2325 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2326
2327 ps_i->node = node;
2328 ps_i->next_in_row = NULL;
2329 ps_i->prev_in_row = NULL;
2330 ps_i->row_rest_count = rest_count;
2331 ps_i->cycle = cycle;
2332
2333 return ps_i;
2334 }
2335
2336
2337 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2338 node is not found in the partial schedule, else returns true. */
2339 static bool
2340 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2341 {
2342 int row;
2343
2344 if (!ps || !ps_i)
2345 return false;
2346
2347 row = SMODULO (ps_i->cycle, ps->ii);
2348 if (! ps_i->prev_in_row)
2349 {
2350 if (ps_i != ps->rows[row])
2351 return false;
2352
2353 ps->rows[row] = ps_i->next_in_row;
2354 if (ps->rows[row])
2355 ps->rows[row]->prev_in_row = NULL;
2356 }
2357 else
2358 {
2359 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2360 if (ps_i->next_in_row)
2361 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2362 }
2363 free (ps_i);
2364 return true;
2365 }
2366
2367 /* Unlike what literature describes for modulo scheduling (which focuses
2368 on VLIW machines) the order of the instructions inside a cycle is
2369 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2370 where the current instruction should go relative to the already
2371 scheduled instructions in the given cycle. Go over these
2372 instructions and find the first possible column to put it in. */
2373 static bool
2374 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2375 sbitmap must_precede, sbitmap must_follow)
2376 {
2377 ps_insn_ptr next_ps_i;
2378 ps_insn_ptr first_must_follow = NULL;
2379 ps_insn_ptr last_must_precede = NULL;
2380 int row;
2381
2382 if (! ps_i)
2383 return false;
2384
2385 row = SMODULO (ps_i->cycle, ps->ii);
2386
2387 /* Find the first must follow and the last must precede
2388 and insert the node immediately after the must precede
2389 but make sure that it there is no must follow after it. */
2390 for (next_ps_i = ps->rows[row];
2391 next_ps_i;
2392 next_ps_i = next_ps_i->next_in_row)
2393 {
2394 if (TEST_BIT (must_follow, next_ps_i->node->cuid)
2395 && ! first_must_follow)
2396 first_must_follow = next_ps_i;
2397 if (TEST_BIT (must_precede, next_ps_i->node->cuid))
2398 {
2399 /* If we have already met a node that must follow, then
2400 there is no possible column. */
2401 if (first_must_follow)
2402 return false;
2403 else
2404 last_must_precede = next_ps_i;
2405 }
2406 }
2407
2408 /* Now insert the node after INSERT_AFTER_PSI. */
2409
2410 if (! last_must_precede)
2411 {
2412 ps_i->next_in_row = ps->rows[row];
2413 ps_i->prev_in_row = NULL;
2414 if (ps_i->next_in_row)
2415 ps_i->next_in_row->prev_in_row = ps_i;
2416 ps->rows[row] = ps_i;
2417 }
2418 else
2419 {
2420 ps_i->next_in_row = last_must_precede->next_in_row;
2421 last_must_precede->next_in_row = ps_i;
2422 ps_i->prev_in_row = last_must_precede;
2423 if (ps_i->next_in_row)
2424 ps_i->next_in_row->prev_in_row = ps_i;
2425 }
2426
2427 return true;
2428 }
2429
2430 /* Advances the PS_INSN one column in its current row; returns false
2431 in failure and true in success. Bit N is set in MUST_FOLLOW if
2432 the node with cuid N must be come after the node pointed to by
2433 PS_I when scheduled in the same cycle. */
2434 static int
2435 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2436 sbitmap must_follow)
2437 {
2438 ps_insn_ptr prev, next;
2439 int row;
2440 ddg_node_ptr next_node;
2441
2442 if (!ps || !ps_i)
2443 return false;
2444
2445 row = SMODULO (ps_i->cycle, ps->ii);
2446
2447 if (! ps_i->next_in_row)
2448 return false;
2449
2450 next_node = ps_i->next_in_row->node;
2451
2452 /* Check if next_in_row is dependent on ps_i, both having same sched
2453 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2454 if (TEST_BIT (must_follow, next_node->cuid))
2455 return false;
2456
2457 /* Advance PS_I over its next_in_row in the doubly linked list. */
2458 prev = ps_i->prev_in_row;
2459 next = ps_i->next_in_row;
2460
2461 if (ps_i == ps->rows[row])
2462 ps->rows[row] = next;
2463
2464 ps_i->next_in_row = next->next_in_row;
2465
2466 if (next->next_in_row)
2467 next->next_in_row->prev_in_row = ps_i;
2468
2469 next->next_in_row = ps_i;
2470 ps_i->prev_in_row = next;
2471
2472 next->prev_in_row = prev;
2473 if (prev)
2474 prev->next_in_row = next;
2475
2476 return true;
2477 }
2478
2479 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2480 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2481 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2482 before/after (respectively) the node pointed to by PS_I when scheduled
2483 in the same cycle. */
2484 static ps_insn_ptr
2485 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2486 sbitmap must_precede, sbitmap must_follow)
2487 {
2488 ps_insn_ptr ps_i;
2489 int rest_count = 1;
2490 int row = SMODULO (cycle, ps->ii);
2491
2492 if (ps->rows[row]
2493 && ps->rows[row]->row_rest_count >= issue_rate)
2494 return NULL;
2495
2496 if (ps->rows[row])
2497 rest_count += ps->rows[row]->row_rest_count;
2498
2499 ps_i = create_ps_insn (node, rest_count, cycle);
2500
2501 /* Finds and inserts PS_I according to MUST_FOLLOW and
2502 MUST_PRECEDE. */
2503 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2504 {
2505 free (ps_i);
2506 return NULL;
2507 }
2508
2509 return ps_i;
2510 }
2511
2512 /* Advance time one cycle. Assumes DFA is being used. */
2513 static void
2514 advance_one_cycle (void)
2515 {
2516 if (targetm.sched.dfa_pre_cycle_insn)
2517 state_transition (curr_state,
2518 targetm.sched.dfa_pre_cycle_insn ());
2519
2520 state_transition (curr_state, NULL);
2521
2522 if (targetm.sched.dfa_post_cycle_insn)
2523 state_transition (curr_state,
2524 targetm.sched.dfa_post_cycle_insn ());
2525 }
2526
2527
2528
2529 /* Checks if PS has resource conflicts according to DFA, starting from
2530 FROM cycle to TO cycle; returns true if there are conflicts and false
2531 if there are no conflicts. Assumes DFA is being used. */
2532 static int
2533 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2534 {
2535 int cycle;
2536
2537 state_reset (curr_state);
2538
2539 for (cycle = from; cycle <= to; cycle++)
2540 {
2541 ps_insn_ptr crr_insn;
2542 /* Holds the remaining issue slots in the current row. */
2543 int can_issue_more = issue_rate;
2544
2545 /* Walk through the DFA for the current row. */
2546 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2547 crr_insn;
2548 crr_insn = crr_insn->next_in_row)
2549 {
2550 rtx insn = crr_insn->node->insn;
2551
2552 if (!INSN_P (insn))
2553 continue;
2554
2555 /* Check if there is room for the current insn. */
2556 if (!can_issue_more || state_dead_lock_p (curr_state))
2557 return true;
2558
2559 /* Update the DFA state and return with failure if the DFA found
2560 recource conflicts. */
2561 if (state_transition (curr_state, insn) >= 0)
2562 return true;
2563
2564 if (targetm.sched.variable_issue)
2565 can_issue_more =
2566 targetm.sched.variable_issue (sched_dump, sched_verbose,
2567 insn, can_issue_more);
2568 /* A naked CLOBBER or USE generates no instruction, so don't
2569 let them consume issue slots. */
2570 else if (GET_CODE (PATTERN (insn)) != USE
2571 && GET_CODE (PATTERN (insn)) != CLOBBER)
2572 can_issue_more--;
2573 }
2574
2575 /* Advance the DFA to the next cycle. */
2576 advance_one_cycle ();
2577 }
2578 return false;
2579 }
2580
2581 /* Checks if the given node causes resource conflicts when added to PS at
2582 cycle C. If not the node is added to PS and returned; otherwise zero
2583 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2584 cuid N must be come before/after (respectively) the node pointed to by
2585 PS_I when scheduled in the same cycle. */
2586 ps_insn_ptr
2587 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2588 int c, sbitmap must_precede,
2589 sbitmap must_follow)
2590 {
2591 int has_conflicts = 0;
2592 ps_insn_ptr ps_i;
2593
2594 /* First add the node to the PS, if this succeeds check for
2595 conflicts, trying different issue slots in the same row. */
2596 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2597 return NULL; /* Failed to insert the node at the given cycle. */
2598
2599 has_conflicts = ps_has_conflicts (ps, c, c)
2600 || (ps->history > 0
2601 && ps_has_conflicts (ps,
2602 c - ps->history,
2603 c + ps->history));
2604
2605 /* Try different issue slots to find one that the given node can be
2606 scheduled in without conflicts. */
2607 while (has_conflicts)
2608 {
2609 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2610 break;
2611 has_conflicts = ps_has_conflicts (ps, c, c)
2612 || (ps->history > 0
2613 && ps_has_conflicts (ps,
2614 c - ps->history,
2615 c + ps->history));
2616 }
2617
2618 if (has_conflicts)
2619 {
2620 remove_node_from_ps (ps, ps_i);
2621 return NULL;
2622 }
2623
2624 ps->min_cycle = MIN (ps->min_cycle, c);
2625 ps->max_cycle = MAX (ps->max_cycle, c);
2626 return ps_i;
2627 }
2628
2629 /* Rotate the rows of PS such that insns scheduled at time
2630 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2631 void
2632 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2633 {
2634 int i, row, backward_rotates;
2635 int last_row = ps->ii - 1;
2636
2637 if (start_cycle == 0)
2638 return;
2639
2640 backward_rotates = SMODULO (start_cycle, ps->ii);
2641
2642 /* Revisit later and optimize this into a single loop. */
2643 for (i = 0; i < backward_rotates; i++)
2644 {
2645 ps_insn_ptr first_row = ps->rows[0];
2646
2647 for (row = 0; row < last_row; row++)
2648 ps->rows[row] = ps->rows[row+1];
2649
2650 ps->rows[last_row] = first_row;
2651 }
2652
2653 ps->max_cycle -= start_cycle;
2654 ps->min_cycle -= start_cycle;
2655 }
2656
2657 #endif /* INSN_SCHEDULING */
2658 \f
2659 static bool
2660 gate_handle_sms (void)
2661 {
2662 return (optimize > 0 && flag_modulo_sched);
2663 }
2664
2665
2666 /* Run instruction scheduler. */
2667 /* Perform SMS module scheduling. */
2668 static unsigned int
2669 rest_of_handle_sms (void)
2670 {
2671 #ifdef INSN_SCHEDULING
2672 basic_block bb;
2673
2674 /* Collect loop information to be used in SMS. */
2675 cfg_layout_initialize (0);
2676 sms_schedule ();
2677
2678 /* Update the life information, because we add pseudos. */
2679 max_regno = max_reg_num ();
2680
2681 /* Finalize layout changes. */
2682 FOR_EACH_BB (bb)
2683 if (bb->next_bb != EXIT_BLOCK_PTR)
2684 bb->aux = bb->next_bb;
2685 free_dominance_info (CDI_DOMINATORS);
2686 cfg_layout_finalize ();
2687 #endif /* INSN_SCHEDULING */
2688 return 0;
2689 }
2690
2691 struct tree_opt_pass pass_sms =
2692 {
2693 "sms", /* name */
2694 gate_handle_sms, /* gate */
2695 rest_of_handle_sms, /* execute */
2696 NULL, /* sub */
2697 NULL, /* next */
2698 0, /* static_pass_number */
2699 TV_SMS, /* tv_id */
2700 0, /* properties_required */
2701 0, /* properties_provided */
2702 0, /* properties_destroyed */
2703 TODO_dump_func, /* todo_flags_start */
2704 TODO_df_finish | TODO_verify_rtl_sharing |
2705 TODO_dump_func |
2706 TODO_ggc_collect, /* todo_flags_finish */
2707 'm' /* letter */
2708 };
2709