intrinsic.h (gfc_check_selected_real_kind, [...]): Update prototypes.
[gcc.git] / gcc / modulo-sched.c
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "toplev.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "hard-reg-set.h"
31 #include "regs.h"
32 #include "function.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "except.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "sched-int.h"
40 #include "target.h"
41 #include "cfglayout.h"
42 #include "cfgloop.h"
43 #include "cfghooks.h"
44 #include "expr.h"
45 #include "params.h"
46 #include "gcov-io.h"
47 #include "ddg.h"
48 #include "timevar.h"
49 #include "tree-pass.h"
50 #include "dbgcnt.h"
51 #include "df.h"
52
53 #ifdef INSN_SCHEDULING
54
55 /* This file contains the implementation of the Swing Modulo Scheduler,
56 described in the following references:
57 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
58 Lifetime--sensitive modulo scheduling in a production environment.
59 IEEE Trans. on Comps., 50(3), March 2001
60 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
61 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
62 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63
64 The basic structure is:
65 1. Build a data-dependence graph (DDG) for each loop.
66 2. Use the DDG to order the insns of a loop (not in topological order
67 necessarily, but rather) trying to place each insn after all its
68 predecessors _or_ after all its successors.
69 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
70 4. Use the ordering to perform list-scheduling of the loop:
71 1. Set II = MII. We will try to schedule the loop within II cycles.
72 2. Try to schedule the insns one by one according to the ordering.
73 For each insn compute an interval of cycles by considering already-
74 scheduled preds and succs (and associated latencies); try to place
75 the insn in the cycles of this window checking for potential
76 resource conflicts (using the DFA interface).
77 Note: this is different from the cycle-scheduling of schedule_insns;
78 here the insns are not scheduled monotonically top-down (nor bottom-
79 up).
80 3. If failed in scheduling all insns - bump II++ and try again, unless
81 II reaches an upper bound MaxII, in which case report failure.
82 5. If we succeeded in scheduling the loop within II cycles, we now
83 generate prolog and epilog, decrease the counter of the loop, and
84 perform modulo variable expansion for live ranges that span more than
85 II cycles (i.e. use register copies to prevent a def from overwriting
86 itself before reaching the use).
87
88 SMS works with countable loops (1) whose control part can be easily
89 decoupled from the rest of the loop and (2) whose loop count can
90 be easily adjusted. This is because we peel a constant number of
91 iterations into a prologue and epilogue for which we want to avoid
92 emitting the control part, and a kernel which is to iterate that
93 constant number of iterations less than the original loop. So the
94 control part should be a set of insns clearly identified and having
95 its own iv, not otherwise used in the loop (at-least for now), which
96 initializes a register before the loop to the number of iterations.
97 Currently SMS relies on the do-loop pattern to recognize such loops,
98 where (1) the control part comprises of all insns defining and/or
99 using a certain 'count' register and (2) the loop count can be
100 adjusted by modifying this register prior to the loop.
101 TODO: Rely on cfgloop analysis instead. */
102 \f
103 /* This page defines partial-schedule structures and functions for
104 modulo scheduling. */
105
106 typedef struct partial_schedule *partial_schedule_ptr;
107 typedef struct ps_insn *ps_insn_ptr;
108
109 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
110 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
111
112 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
113 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
114
115 /* Perform signed modulo, always returning a non-negative value. */
116 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
117
118 /* The number of different iterations the nodes in ps span, assuming
119 the stage boundaries are placed efficiently. */
120 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
121 + 1 + (ps)->ii - 1) / (ps)->ii)
122
123 /* A single instruction in the partial schedule. */
124 struct ps_insn
125 {
126 /* The corresponding DDG_NODE. */
127 ddg_node_ptr node;
128
129 /* The (absolute) cycle in which the PS instruction is scheduled.
130 Same as SCHED_TIME (node). */
131 int cycle;
132
133 /* The next/prev PS_INSN in the same row. */
134 ps_insn_ptr next_in_row,
135 prev_in_row;
136
137 /* The number of nodes in the same row that come after this node. */
138 int row_rest_count;
139 };
140
141 /* Holds the partial schedule as an array of II rows. Each entry of the
142 array points to a linked list of PS_INSNs, which represents the
143 instructions that are scheduled for that row. */
144 struct partial_schedule
145 {
146 int ii; /* Number of rows in the partial schedule. */
147 int history; /* Threshold for conflict checking using DFA. */
148
149 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
150 ps_insn_ptr *rows;
151
152 /* The earliest absolute cycle of an insn in the partial schedule. */
153 int min_cycle;
154
155 /* The latest absolute cycle of an insn in the partial schedule. */
156 int max_cycle;
157
158 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
159 };
160
161 /* We use this to record all the register replacements we do in
162 the kernel so we can undo SMS if it is not profitable. */
163 struct undo_replace_buff_elem
164 {
165 rtx insn;
166 rtx orig_reg;
167 rtx new_reg;
168 struct undo_replace_buff_elem *next;
169 };
170
171
172
173 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
174 static void free_partial_schedule (partial_schedule_ptr);
175 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
176 void print_partial_schedule (partial_schedule_ptr, FILE *);
177 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
178 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
179 ddg_node_ptr node, int cycle,
180 sbitmap must_precede,
181 sbitmap must_follow);
182 static void rotate_partial_schedule (partial_schedule_ptr, int);
183 void set_row_column_for_ps (partial_schedule_ptr);
184 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
185 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
186
187 \f
188 /* This page defines constants and structures for the modulo scheduling
189 driver. */
190
191 static int sms_order_nodes (ddg_ptr, int, int *, int *);
192 static void set_node_sched_params (ddg_ptr);
193 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
194 static void permute_partial_schedule (partial_schedule_ptr, rtx);
195 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
196 rtx, rtx);
197 static void duplicate_insns_of_cycles (partial_schedule_ptr,
198 int, int, int, rtx);
199
200 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
201 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
202 #define SCHED_FIRST_REG_MOVE(x) \
203 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
204 #define SCHED_NREG_MOVES(x) \
205 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
206 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
207 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
208 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
209
210 /* The scheduling parameters held for each node. */
211 typedef struct node_sched_params
212 {
213 int asap; /* A lower-bound on the absolute scheduling cycle. */
214 int time; /* The absolute scheduling cycle (time >= asap). */
215
216 /* The following field (first_reg_move) is a pointer to the first
217 register-move instruction added to handle the modulo-variable-expansion
218 of the register defined by this node. This register-move copies the
219 original register defined by the node. */
220 rtx first_reg_move;
221
222 /* The number of register-move instructions added, immediately preceding
223 first_reg_move. */
224 int nreg_moves;
225
226 int row; /* Holds time % ii. */
227 int stage; /* Holds time / ii. */
228
229 /* The column of a node inside the ps. If nodes u, v are on the same row,
230 u will precede v if column (u) < column (v). */
231 int column;
232 } *node_sched_params_ptr;
233
234 \f
235 /* The following three functions are copied from the current scheduler
236 code in order to use sched_analyze() for computing the dependencies.
237 They are used when initializing the sched_info structure. */
238 static const char *
239 sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
240 {
241 static char tmp[80];
242
243 sprintf (tmp, "i%4d", INSN_UID (insn));
244 return tmp;
245 }
246
247 static void
248 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
249 regset cond_exec ATTRIBUTE_UNUSED,
250 regset used ATTRIBUTE_UNUSED,
251 regset set ATTRIBUTE_UNUSED)
252 {
253 }
254
255 static struct common_sched_info_def sms_common_sched_info;
256
257 static struct sched_deps_info_def sms_sched_deps_info =
258 {
259 compute_jump_reg_dependencies,
260 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
261 NULL,
262 0, 0, 0
263 };
264
265 static struct haifa_sched_info sms_sched_info =
266 {
267 NULL,
268 NULL,
269 NULL,
270 NULL,
271 NULL,
272 sms_print_insn,
273 NULL,
274 NULL, /* insn_finishes_block_p */
275 NULL, NULL,
276 NULL, NULL,
277 0, 0,
278
279 NULL, NULL, NULL,
280 0
281 };
282
283 /* Given HEAD and TAIL which are the first and last insns in a loop;
284 return the register which controls the loop. Return zero if it has
285 more than one occurrence in the loop besides the control part or the
286 do-loop pattern is not of the form we expect. */
287 static rtx
288 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
289 {
290 #ifdef HAVE_doloop_end
291 rtx reg, condition, insn, first_insn_not_to_check;
292
293 if (!JUMP_P (tail))
294 return NULL_RTX;
295
296 /* TODO: Free SMS's dependence on doloop_condition_get. */
297 condition = doloop_condition_get (tail);
298 if (! condition)
299 return NULL_RTX;
300
301 if (REG_P (XEXP (condition, 0)))
302 reg = XEXP (condition, 0);
303 else if (GET_CODE (XEXP (condition, 0)) == PLUS
304 && REG_P (XEXP (XEXP (condition, 0), 0)))
305 reg = XEXP (XEXP (condition, 0), 0);
306 else
307 gcc_unreachable ();
308
309 /* Check that the COUNT_REG has no other occurrences in the loop
310 until the decrement. We assume the control part consists of
311 either a single (parallel) branch-on-count or a (non-parallel)
312 branch immediately preceded by a single (decrement) insn. */
313 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
314 : PREV_INSN (tail));
315
316 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
317 if (reg_mentioned_p (reg, insn))
318 {
319 if (dump_file)
320 {
321 fprintf (dump_file, "SMS count_reg found ");
322 print_rtl_single (dump_file, reg);
323 fprintf (dump_file, " outside control in insn:\n");
324 print_rtl_single (dump_file, insn);
325 }
326
327 return NULL_RTX;
328 }
329
330 return reg;
331 #else
332 return NULL_RTX;
333 #endif
334 }
335
336 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
337 that the number of iterations is a compile-time constant. If so,
338 return the rtx that sets COUNT_REG to a constant, and set COUNT to
339 this constant. Otherwise return 0. */
340 static rtx
341 const_iteration_count (rtx count_reg, basic_block pre_header,
342 HOST_WIDEST_INT * count)
343 {
344 rtx insn;
345 rtx head, tail;
346
347 if (! pre_header)
348 return NULL_RTX;
349
350 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
351
352 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
353 if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
354 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
355 {
356 rtx pat = single_set (insn);
357
358 if (CONST_INT_P (SET_SRC (pat)))
359 {
360 *count = INTVAL (SET_SRC (pat));
361 return insn;
362 }
363
364 return NULL_RTX;
365 }
366
367 return NULL_RTX;
368 }
369
370 /* A very simple resource-based lower bound on the initiation interval.
371 ??? Improve the accuracy of this bound by considering the
372 utilization of various units. */
373 static int
374 res_MII (ddg_ptr g)
375 {
376 if (targetm.sched.sms_res_mii)
377 return targetm.sched.sms_res_mii (g);
378
379 return ((g->num_nodes - g->num_debug) / issue_rate);
380 }
381
382
383 /* Points to the array that contains the sched data for each node. */
384 static node_sched_params_ptr node_sched_params;
385
386 /* Allocate sched_params for each node and initialize it. Assumes that
387 the aux field of each node contain the asap bound (computed earlier),
388 and copies it into the sched_params field. */
389 static void
390 set_node_sched_params (ddg_ptr g)
391 {
392 int i;
393
394 /* Allocate for each node in the DDG a place to hold the "sched_data". */
395 /* Initialize ASAP/ALAP/HIGHT to zero. */
396 node_sched_params = (node_sched_params_ptr)
397 xcalloc (g->num_nodes,
398 sizeof (struct node_sched_params));
399
400 /* Set the pointer of the general data of the node to point to the
401 appropriate sched_params structure. */
402 for (i = 0; i < g->num_nodes; i++)
403 {
404 /* Watch out for aliasing problems? */
405 node_sched_params[i].asap = g->nodes[i].aux.count;
406 g->nodes[i].aux.info = &node_sched_params[i];
407 }
408 }
409
410 static void
411 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
412 {
413 int i;
414
415 if (! file)
416 return;
417 for (i = 0; i < num_nodes; i++)
418 {
419 node_sched_params_ptr nsp = &node_sched_params[i];
420 rtx reg_move = nsp->first_reg_move;
421 int j;
422
423 fprintf (file, "Node = %d; INSN = %d\n", i,
424 (INSN_UID (g->nodes[i].insn)));
425 fprintf (file, " asap = %d:\n", nsp->asap);
426 fprintf (file, " time = %d:\n", nsp->time);
427 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
428 for (j = 0; j < nsp->nreg_moves; j++)
429 {
430 fprintf (file, " reg_move = ");
431 print_rtl_single (file, reg_move);
432 reg_move = PREV_INSN (reg_move);
433 }
434 }
435 }
436
437 /*
438 Breaking intra-loop register anti-dependences:
439 Each intra-loop register anti-dependence implies a cross-iteration true
440 dependence of distance 1. Therefore, we can remove such false dependencies
441 and figure out if the partial schedule broke them by checking if (for a
442 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
443 if so generate a register move. The number of such moves is equal to:
444 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
445 nreg_moves = ----------------------------------- + 1 - { dependence.
446 ii { 1 if not.
447 */
448 static struct undo_replace_buff_elem *
449 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
450 {
451 ddg_ptr g = ps->g;
452 int ii = ps->ii;
453 int i;
454 struct undo_replace_buff_elem *reg_move_replaces = NULL;
455
456 for (i = 0; i < g->num_nodes; i++)
457 {
458 ddg_node_ptr u = &g->nodes[i];
459 ddg_edge_ptr e;
460 int nreg_moves = 0, i_reg_move;
461 sbitmap *uses_of_defs;
462 rtx last_reg_move;
463 rtx prev_reg, old_reg;
464
465 /* Compute the number of reg_moves needed for u, by looking at life
466 ranges started at u (excluding self-loops). */
467 for (e = u->out; e; e = e->next_out)
468 if (e->type == TRUE_DEP && e->dest != e->src)
469 {
470 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
471
472 if (e->distance == 1)
473 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
474
475 /* If dest precedes src in the schedule of the kernel, then dest
476 will read before src writes and we can save one reg_copy. */
477 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
478 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
479 nreg_moves4e--;
480
481 nreg_moves = MAX (nreg_moves, nreg_moves4e);
482 }
483
484 if (nreg_moves == 0)
485 continue;
486
487 /* Every use of the register defined by node may require a different
488 copy of this register, depending on the time the use is scheduled.
489 Set a bitmap vector, telling which nodes use each copy of this
490 register. */
491 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
492 sbitmap_vector_zero (uses_of_defs, nreg_moves);
493 for (e = u->out; e; e = e->next_out)
494 if (e->type == TRUE_DEP && e->dest != e->src)
495 {
496 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
497
498 if (e->distance == 1)
499 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
500
501 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
502 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
503 dest_copy--;
504
505 if (dest_copy)
506 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
507 }
508
509 /* Now generate the reg_moves, attaching relevant uses to them. */
510 SCHED_NREG_MOVES (u) = nreg_moves;
511 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
512 /* Insert the reg-moves right before the notes which precede
513 the insn they relates to. */
514 last_reg_move = u->first_note;
515
516 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
517 {
518 unsigned int i_use = 0;
519 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
520 rtx reg_move = gen_move_insn (new_reg, prev_reg);
521 sbitmap_iterator sbi;
522
523 add_insn_before (reg_move, last_reg_move, NULL);
524 last_reg_move = reg_move;
525
526 if (!SCHED_FIRST_REG_MOVE (u))
527 SCHED_FIRST_REG_MOVE (u) = reg_move;
528
529 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
530 {
531 struct undo_replace_buff_elem *rep;
532
533 rep = (struct undo_replace_buff_elem *)
534 xcalloc (1, sizeof (struct undo_replace_buff_elem));
535 rep->insn = g->nodes[i_use].insn;
536 rep->orig_reg = old_reg;
537 rep->new_reg = new_reg;
538
539 if (! reg_move_replaces)
540 reg_move_replaces = rep;
541 else
542 {
543 rep->next = reg_move_replaces;
544 reg_move_replaces = rep;
545 }
546
547 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
548 if (rescan)
549 df_insn_rescan (g->nodes[i_use].insn);
550 }
551
552 prev_reg = new_reg;
553 }
554 sbitmap_vector_free (uses_of_defs);
555 }
556 return reg_move_replaces;
557 }
558
559 /* Free memory allocated for the undo buffer. */
560 static void
561 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
562 {
563
564 while (reg_move_replaces)
565 {
566 struct undo_replace_buff_elem *rep = reg_move_replaces;
567
568 reg_move_replaces = reg_move_replaces->next;
569 free (rep);
570 }
571 }
572
573 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
574 of SCHED_ROW and SCHED_STAGE. */
575 static void
576 normalize_sched_times (partial_schedule_ptr ps)
577 {
578 int row;
579 int amount = PS_MIN_CYCLE (ps);
580 int ii = ps->ii;
581 ps_insn_ptr crr_insn;
582
583 for (row = 0; row < ii; row++)
584 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
585 {
586 ddg_node_ptr u = crr_insn->node;
587 int normalized_time = SCHED_TIME (u) - amount;
588
589 if (dump_file)
590 fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
591 min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
592 (u), ps->min_cycle);
593 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
594 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
595 SCHED_TIME (u) = normalized_time;
596 SCHED_ROW (u) = normalized_time % ii;
597 SCHED_STAGE (u) = normalized_time / ii;
598 }
599 }
600
601 /* Set SCHED_COLUMN of each node according to its position in PS. */
602 static void
603 set_columns_for_ps (partial_schedule_ptr ps)
604 {
605 int row;
606
607 for (row = 0; row < ps->ii; row++)
608 {
609 ps_insn_ptr cur_insn = ps->rows[row];
610 int column = 0;
611
612 for (; cur_insn; cur_insn = cur_insn->next_in_row)
613 SCHED_COLUMN (cur_insn->node) = column++;
614 }
615 }
616
617 /* Permute the insns according to their order in PS, from row 0 to
618 row ii-1, and position them right before LAST. This schedules
619 the insns of the loop kernel. */
620 static void
621 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
622 {
623 int ii = ps->ii;
624 int row;
625 ps_insn_ptr ps_ij;
626
627 for (row = 0; row < ii ; row++)
628 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
629 if (PREV_INSN (last) != ps_ij->node->insn)
630 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
631 PREV_INSN (last));
632 }
633
634 static void
635 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
636 int to_stage, int for_prolog, rtx count_reg)
637 {
638 int row;
639 ps_insn_ptr ps_ij;
640
641 for (row = 0; row < ps->ii; row++)
642 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
643 {
644 ddg_node_ptr u_node = ps_ij->node;
645 int j, i_reg_moves;
646 rtx reg_move = NULL_RTX;
647
648 /* Do not duplicate any insn which refers to count_reg as it
649 belongs to the control part.
650 TODO: This should be done by analyzing the control part of
651 the loop. */
652 if (reg_mentioned_p (count_reg, u_node->insn))
653 continue;
654
655 if (for_prolog)
656 {
657 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
658 number of reg_moves starting with the second occurrence of
659 u_node, which is generated if its SCHED_STAGE <= to_stage. */
660 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
661 i_reg_moves = MAX (i_reg_moves, 0);
662 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
663
664 /* The reg_moves start from the *first* reg_move backwards. */
665 if (i_reg_moves)
666 {
667 reg_move = SCHED_FIRST_REG_MOVE (u_node);
668 for (j = 1; j < i_reg_moves; j++)
669 reg_move = PREV_INSN (reg_move);
670 }
671 }
672 else /* It's for the epilog. */
673 {
674 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
675 starting to decrease one stage after u_node no longer occurs;
676 that is, generate all reg_moves until
677 SCHED_STAGE (u_node) == from_stage - 1. */
678 i_reg_moves = SCHED_NREG_MOVES (u_node)
679 - (from_stage - SCHED_STAGE (u_node) - 1);
680 i_reg_moves = MAX (i_reg_moves, 0);
681 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
682
683 /* The reg_moves start from the *last* reg_move forwards. */
684 if (i_reg_moves)
685 {
686 reg_move = SCHED_FIRST_REG_MOVE (u_node);
687 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
688 reg_move = PREV_INSN (reg_move);
689 }
690 }
691
692 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
693 emit_insn (copy_rtx (PATTERN (reg_move)));
694 if (SCHED_STAGE (u_node) >= from_stage
695 && SCHED_STAGE (u_node) <= to_stage)
696 duplicate_insn_chain (u_node->first_note, u_node->insn);
697 }
698 }
699
700
701 /* Generate the instructions (including reg_moves) for prolog & epilog. */
702 static void
703 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
704 rtx count_reg, rtx count_init)
705 {
706 int i;
707 int last_stage = PS_STAGE_COUNT (ps) - 1;
708 edge e;
709
710 /* Generate the prolog, inserting its insns on the loop-entry edge. */
711 start_sequence ();
712
713 if (!count_init)
714 {
715 /* Generate instructions at the beginning of the prolog to
716 adjust the loop count by STAGE_COUNT. If loop count is constant
717 (count_init), this constant is adjusted by STAGE_COUNT in
718 generate_prolog_epilog function. */
719 rtx sub_reg = NULL_RTX;
720
721 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
722 count_reg, GEN_INT (last_stage),
723 count_reg, 1, OPTAB_DIRECT);
724 gcc_assert (REG_P (sub_reg));
725 if (REGNO (sub_reg) != REGNO (count_reg))
726 emit_move_insn (count_reg, sub_reg);
727 }
728
729 for (i = 0; i < last_stage; i++)
730 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
731
732 /* Put the prolog on the entry edge. */
733 e = loop_preheader_edge (loop);
734 split_edge_and_insert (e, get_insns ());
735
736 end_sequence ();
737
738 /* Generate the epilog, inserting its insns on the loop-exit edge. */
739 start_sequence ();
740
741 for (i = 0; i < last_stage; i++)
742 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
743
744 /* Put the epilogue on the exit edge. */
745 gcc_assert (single_exit (loop));
746 e = single_exit (loop);
747 split_edge_and_insert (e, get_insns ());
748 end_sequence ();
749 }
750
751 /* Return true if all the BBs of the loop are empty except the
752 loop header. */
753 static bool
754 loop_single_full_bb_p (struct loop *loop)
755 {
756 unsigned i;
757 basic_block *bbs = get_loop_body (loop);
758
759 for (i = 0; i < loop->num_nodes ; i++)
760 {
761 rtx head, tail;
762 bool empty_bb = true;
763
764 if (bbs[i] == loop->header)
765 continue;
766
767 /* Make sure that basic blocks other than the header
768 have only notes labels or jumps. */
769 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
770 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
771 {
772 if (NOTE_P (head) || LABEL_P (head)
773 || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
774 continue;
775 empty_bb = false;
776 break;
777 }
778
779 if (! empty_bb)
780 {
781 free (bbs);
782 return false;
783 }
784 }
785 free (bbs);
786 return true;
787 }
788
789 /* A simple loop from SMS point of view; it is a loop that is composed of
790 either a single basic block or two BBs - a header and a latch. */
791 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
792 && (EDGE_COUNT (loop->latch->preds) == 1) \
793 && (EDGE_COUNT (loop->latch->succs) == 1))
794
795 /* Return true if the loop is in its canonical form and false if not.
796 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
797 static bool
798 loop_canon_p (struct loop *loop)
799 {
800
801 if (loop->inner || !loop_outer (loop))
802 {
803 if (dump_file)
804 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
805 return false;
806 }
807
808 if (!single_exit (loop))
809 {
810 if (dump_file)
811 {
812 rtx insn = BB_END (loop->header);
813
814 fprintf (dump_file, "SMS loop many exits ");
815 fprintf (dump_file, " %s %d (file, line)\n",
816 insn_file (insn), insn_line (insn));
817 }
818 return false;
819 }
820
821 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
822 {
823 if (dump_file)
824 {
825 rtx insn = BB_END (loop->header);
826
827 fprintf (dump_file, "SMS loop many BBs. ");
828 fprintf (dump_file, " %s %d (file, line)\n",
829 insn_file (insn), insn_line (insn));
830 }
831 return false;
832 }
833
834 return true;
835 }
836
837 /* If there are more than one entry for the loop,
838 make it one by splitting the first entry edge and
839 redirecting the others to the new BB. */
840 static void
841 canon_loop (struct loop *loop)
842 {
843 edge e;
844 edge_iterator i;
845
846 /* Avoid annoying special cases of edges going to exit
847 block. */
848 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
849 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
850 split_edge (e);
851
852 if (loop->latch == loop->header
853 || EDGE_COUNT (loop->latch->succs) > 1)
854 {
855 FOR_EACH_EDGE (e, i, loop->header->preds)
856 if (e->src == loop->latch)
857 break;
858 split_edge (e);
859 }
860 }
861
862 /* Setup infos. */
863 static void
864 setup_sched_infos (void)
865 {
866 memcpy (&sms_common_sched_info, &haifa_common_sched_info,
867 sizeof (sms_common_sched_info));
868 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
869 common_sched_info = &sms_common_sched_info;
870
871 sched_deps_info = &sms_sched_deps_info;
872 current_sched_info = &sms_sched_info;
873 }
874
875 /* Probability in % that the sms-ed loop rolls enough so that optimized
876 version may be entered. Just a guess. */
877 #define PROB_SMS_ENOUGH_ITERATIONS 80
878
879 /* Used to calculate the upper bound of ii. */
880 #define MAXII_FACTOR 2
881
882 /* Main entry point, perform SMS scheduling on the loops of the function
883 that consist of single basic blocks. */
884 static void
885 sms_schedule (void)
886 {
887 rtx insn;
888 ddg_ptr *g_arr, g;
889 int * node_order;
890 int maxii, max_asap;
891 loop_iterator li;
892 partial_schedule_ptr ps;
893 basic_block bb = NULL;
894 struct loop *loop;
895 basic_block condition_bb = NULL;
896 edge latch_edge;
897 gcov_type trip_count = 0;
898
899 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
900 | LOOPS_HAVE_RECORDED_EXITS);
901 if (number_of_loops () <= 1)
902 {
903 loop_optimizer_finalize ();
904 return; /* There are no loops to schedule. */
905 }
906
907 /* Initialize issue_rate. */
908 if (targetm.sched.issue_rate)
909 {
910 int temp = reload_completed;
911
912 reload_completed = 1;
913 issue_rate = targetm.sched.issue_rate ();
914 reload_completed = temp;
915 }
916 else
917 issue_rate = 1;
918
919 /* Initialize the scheduler. */
920 setup_sched_infos ();
921 haifa_sched_init ();
922
923 /* Allocate memory to hold the DDG array one entry for each loop.
924 We use loop->num as index into this array. */
925 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
926
927 if (dump_file)
928 {
929 fprintf (dump_file, "\n\nSMS analysis phase\n");
930 fprintf (dump_file, "===================\n\n");
931 }
932
933 /* Build DDGs for all the relevant loops and hold them in G_ARR
934 indexed by the loop index. */
935 FOR_EACH_LOOP (li, loop, 0)
936 {
937 rtx head, tail;
938 rtx count_reg;
939
940 /* For debugging. */
941 if (dbg_cnt (sms_sched_loop) == false)
942 {
943 if (dump_file)
944 fprintf (dump_file, "SMS reached max limit... \n");
945
946 break;
947 }
948
949 if (dump_file)
950 {
951 rtx insn = BB_END (loop->header);
952
953 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
954 loop->num, insn_file (insn), insn_line (insn));
955
956 }
957
958 if (! loop_canon_p (loop))
959 continue;
960
961 if (! loop_single_full_bb_p (loop))
962 {
963 if (dump_file)
964 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
965 continue;
966 }
967
968 bb = loop->header;
969
970 get_ebb_head_tail (bb, bb, &head, &tail);
971 latch_edge = loop_latch_edge (loop);
972 gcc_assert (single_exit (loop));
973 if (single_exit (loop)->count)
974 trip_count = latch_edge->count / single_exit (loop)->count;
975
976 /* Perform SMS only on loops that their average count is above threshold. */
977
978 if ( latch_edge->count
979 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
980 {
981 if (dump_file)
982 {
983 fprintf (dump_file, " %s %d (file, line)\n",
984 insn_file (tail), insn_line (tail));
985 fprintf (dump_file, "SMS single-bb-loop\n");
986 if (profile_info && flag_branch_probabilities)
987 {
988 fprintf (dump_file, "SMS loop-count ");
989 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
990 (HOST_WIDEST_INT) bb->count);
991 fprintf (dump_file, "\n");
992 fprintf (dump_file, "SMS trip-count ");
993 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
994 (HOST_WIDEST_INT) trip_count);
995 fprintf (dump_file, "\n");
996 fprintf (dump_file, "SMS profile-sum-max ");
997 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
998 (HOST_WIDEST_INT) profile_info->sum_max);
999 fprintf (dump_file, "\n");
1000 }
1001 }
1002 continue;
1003 }
1004
1005 /* Make sure this is a doloop. */
1006 if ( !(count_reg = doloop_register_get (head, tail)))
1007 {
1008 if (dump_file)
1009 fprintf (dump_file, "SMS doloop_register_get failed\n");
1010 continue;
1011 }
1012
1013 /* Don't handle BBs with calls or barriers, or !single_set insns,
1014 or auto-increment insns (to avoid creating invalid reg-moves
1015 for the auto-increment insns).
1016 ??? Should handle auto-increment insns.
1017 ??? Should handle insns defining subregs. */
1018 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1019 {
1020 rtx set;
1021
1022 if (CALL_P (insn)
1023 || BARRIER_P (insn)
1024 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1025 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
1026 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1027 || (INSN_P (insn) && (set = single_set (insn))
1028 && GET_CODE (SET_DEST (set)) == SUBREG))
1029 break;
1030 }
1031
1032 if (insn != NEXT_INSN (tail))
1033 {
1034 if (dump_file)
1035 {
1036 if (CALL_P (insn))
1037 fprintf (dump_file, "SMS loop-with-call\n");
1038 else if (BARRIER_P (insn))
1039 fprintf (dump_file, "SMS loop-with-barrier\n");
1040 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1041 fprintf (dump_file, "SMS reg inc\n");
1042 else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1043 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1044 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1045 else
1046 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1047 print_rtl_single (dump_file, insn);
1048 }
1049
1050 continue;
1051 }
1052
1053 if (! (g = create_ddg (bb, 0)))
1054 {
1055 if (dump_file)
1056 fprintf (dump_file, "SMS create_ddg failed\n");
1057 continue;
1058 }
1059
1060 g_arr[loop->num] = g;
1061 if (dump_file)
1062 fprintf (dump_file, "...OK\n");
1063
1064 }
1065 if (dump_file)
1066 {
1067 fprintf (dump_file, "\nSMS transformation phase\n");
1068 fprintf (dump_file, "=========================\n\n");
1069 }
1070
1071 /* We don't want to perform SMS on new loops - created by versioning. */
1072 FOR_EACH_LOOP (li, loop, 0)
1073 {
1074 rtx head, tail;
1075 rtx count_reg, count_init;
1076 int mii, rec_mii;
1077 unsigned stage_count = 0;
1078 HOST_WIDEST_INT loop_count = 0;
1079
1080 if (! (g = g_arr[loop->num]))
1081 continue;
1082
1083 if (dump_file)
1084 {
1085 rtx insn = BB_END (loop->header);
1086
1087 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
1088 loop->num, insn_file (insn), insn_line (insn));
1089
1090 print_ddg (dump_file, g);
1091 }
1092
1093 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1094
1095 latch_edge = loop_latch_edge (loop);
1096 gcc_assert (single_exit (loop));
1097 if (single_exit (loop)->count)
1098 trip_count = latch_edge->count / single_exit (loop)->count;
1099
1100 if (dump_file)
1101 {
1102 fprintf (dump_file, " %s %d (file, line)\n",
1103 insn_file (tail), insn_line (tail));
1104 fprintf (dump_file, "SMS single-bb-loop\n");
1105 if (profile_info && flag_branch_probabilities)
1106 {
1107 fprintf (dump_file, "SMS loop-count ");
1108 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1109 (HOST_WIDEST_INT) bb->count);
1110 fprintf (dump_file, "\n");
1111 fprintf (dump_file, "SMS profile-sum-max ");
1112 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1113 (HOST_WIDEST_INT) profile_info->sum_max);
1114 fprintf (dump_file, "\n");
1115 }
1116 fprintf (dump_file, "SMS doloop\n");
1117 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1118 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1119 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1120 }
1121
1122
1123 /* In case of th loop have doloop register it gets special
1124 handling. */
1125 count_init = NULL_RTX;
1126 if ((count_reg = doloop_register_get (head, tail)))
1127 {
1128 basic_block pre_header;
1129
1130 pre_header = loop_preheader_edge (loop)->src;
1131 count_init = const_iteration_count (count_reg, pre_header,
1132 &loop_count);
1133 }
1134 gcc_assert (count_reg);
1135
1136 if (dump_file && count_init)
1137 {
1138 fprintf (dump_file, "SMS const-doloop ");
1139 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1140 loop_count);
1141 fprintf (dump_file, "\n");
1142 }
1143
1144 node_order = XNEWVEC (int, g->num_nodes);
1145
1146 mii = 1; /* Need to pass some estimate of mii. */
1147 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1148 mii = MAX (res_MII (g), rec_mii);
1149 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1150
1151 if (dump_file)
1152 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1153 rec_mii, mii, maxii);
1154
1155 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1156 ASAP. */
1157 set_node_sched_params (g);
1158
1159 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1160
1161 if (ps){
1162 stage_count = PS_STAGE_COUNT (ps);
1163 gcc_assert(stage_count >= 1);
1164 }
1165
1166 /* Stage count of 1 means that there is no interleaving between
1167 iterations, let the scheduling passes do the job. */
1168 if (stage_count <= 1
1169 || (count_init && (loop_count <= stage_count))
1170 || (flag_branch_probabilities && (trip_count <= stage_count)))
1171 {
1172 if (dump_file)
1173 {
1174 fprintf (dump_file, "SMS failed... \n");
1175 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1176 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1177 fprintf (dump_file, ", trip-count=");
1178 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1179 fprintf (dump_file, ")\n");
1180 }
1181 continue;
1182 }
1183 else
1184 {
1185 struct undo_replace_buff_elem *reg_move_replaces;
1186
1187 if (dump_file)
1188 {
1189 fprintf (dump_file,
1190 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1191 stage_count);
1192 print_partial_schedule (ps, dump_file);
1193 fprintf (dump_file,
1194 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1195 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1196 }
1197
1198 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1199 the closing_branch was scheduled and should appear in the last (ii-1)
1200 row. Otherwise, we are free to schedule the branch, and we let nodes
1201 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1202 row; this should reduce stage_count to minimum.
1203 TODO: Revisit the issue of scheduling the insns of the
1204 control part relative to the branch when the control part
1205 has more than one insn. */
1206 normalize_sched_times (ps);
1207 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1208 set_columns_for_ps (ps);
1209
1210 canon_loop (loop);
1211
1212 /* case the BCT count is not known , Do loop-versioning */
1213 if (count_reg && ! count_init)
1214 {
1215 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1216 GEN_INT(stage_count));
1217 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1218 * REG_BR_PROB_BASE) / 100;
1219
1220 loop_version (loop, comp_rtx, &condition_bb,
1221 prob, prob, REG_BR_PROB_BASE - prob,
1222 true);
1223 }
1224
1225 /* Set new iteration count of loop kernel. */
1226 if (count_reg && count_init)
1227 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1228 - stage_count + 1);
1229
1230 /* Now apply the scheduled kernel to the RTL of the loop. */
1231 permute_partial_schedule (ps, g->closing_branch->first_note);
1232
1233 /* Mark this loop as software pipelined so the later
1234 scheduling passes doesn't touch it. */
1235 if (! flag_resched_modulo_sched)
1236 g->bb->flags |= BB_DISABLE_SCHEDULE;
1237 /* The life-info is not valid any more. */
1238 df_set_bb_dirty (g->bb);
1239
1240 reg_move_replaces = generate_reg_moves (ps, true);
1241 if (dump_file)
1242 print_node_sched_params (dump_file, g->num_nodes, g);
1243 /* Generate prolog and epilog. */
1244 generate_prolog_epilog (ps, loop, count_reg, count_init);
1245
1246 free_undo_replace_buff (reg_move_replaces);
1247 }
1248
1249 free_partial_schedule (ps);
1250 free (node_sched_params);
1251 free (node_order);
1252 free_ddg (g);
1253 }
1254
1255 free (g_arr);
1256
1257 /* Release scheduler data, needed until now because of DFA. */
1258 haifa_sched_finish ();
1259 loop_optimizer_finalize ();
1260 }
1261
1262 /* The SMS scheduling algorithm itself
1263 -----------------------------------
1264 Input: 'O' an ordered list of insns of a loop.
1265 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1266
1267 'Q' is the empty Set
1268 'PS' is the partial schedule; it holds the currently scheduled nodes with
1269 their cycle/slot.
1270 'PSP' previously scheduled predecessors.
1271 'PSS' previously scheduled successors.
1272 't(u)' the cycle where u is scheduled.
1273 'l(u)' is the latency of u.
1274 'd(v,u)' is the dependence distance from v to u.
1275 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1276 the node ordering phase.
1277 'check_hardware_resources_conflicts(u, PS, c)'
1278 run a trace around cycle/slot through DFA model
1279 to check resource conflicts involving instruction u
1280 at cycle c given the partial schedule PS.
1281 'add_to_partial_schedule_at_time(u, PS, c)'
1282 Add the node/instruction u to the partial schedule
1283 PS at time c.
1284 'calculate_register_pressure(PS)'
1285 Given a schedule of instructions, calculate the register
1286 pressure it implies. One implementation could be the
1287 maximum number of overlapping live ranges.
1288 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1289 registers available in the hardware.
1290
1291 1. II = MII.
1292 2. PS = empty list
1293 3. for each node u in O in pre-computed order
1294 4. if (PSP(u) != Q && PSS(u) == Q) then
1295 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1296 6. start = Early_start; end = Early_start + II - 1; step = 1
1297 11. else if (PSP(u) == Q && PSS(u) != Q) then
1298 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1299 13. start = Late_start; end = Late_start - II + 1; step = -1
1300 14. else if (PSP(u) != Q && PSS(u) != Q) then
1301 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1302 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1303 17. start = Early_start;
1304 18. end = min(Early_start + II - 1 , Late_start);
1305 19. step = 1
1306 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1307 21. start = ASAP(u); end = start + II - 1; step = 1
1308 22. endif
1309
1310 23. success = false
1311 24. for (c = start ; c != end ; c += step)
1312 25. if check_hardware_resources_conflicts(u, PS, c) then
1313 26. add_to_partial_schedule_at_time(u, PS, c)
1314 27. success = true
1315 28. break
1316 29. endif
1317 30. endfor
1318 31. if (success == false) then
1319 32. II = II + 1
1320 33. if (II > maxII) then
1321 34. finish - failed to schedule
1322 35. endif
1323 36. goto 2.
1324 37. endif
1325 38. endfor
1326 39. if (calculate_register_pressure(PS) > maxRP) then
1327 40. goto 32.
1328 41. endif
1329 42. compute epilogue & prologue
1330 43. finish - succeeded to schedule
1331 */
1332
1333 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1334 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1335 set to 0 to save compile time. */
1336 #define DFA_HISTORY SMS_DFA_HISTORY
1337
1338 /* A threshold for the number of repeated unsuccessful attempts to insert
1339 an empty row, before we flush the partial schedule and start over. */
1340 #define MAX_SPLIT_NUM 10
1341 /* Given the partial schedule PS, this function calculates and returns the
1342 cycles in which we can schedule the node with the given index I.
1343 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1344 noticed that there are several cases in which we fail to SMS the loop
1345 because the sched window of a node is empty due to tight data-deps. In
1346 such cases we want to unschedule some of the predecessors/successors
1347 until we get non-empty scheduling window. It returns -1 if the
1348 scheduling window is empty and zero otherwise. */
1349
1350 static int
1351 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1352 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1353 {
1354 int start, step, end;
1355 ddg_edge_ptr e;
1356 int u = nodes_order [i];
1357 ddg_node_ptr u_node = &ps->g->nodes[u];
1358 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1359 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1360 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1361 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1362 int psp_not_empty;
1363 int pss_not_empty;
1364
1365 /* 1. compute sched window for u (start, end, step). */
1366 sbitmap_zero (psp);
1367 sbitmap_zero (pss);
1368 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1369 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1370
1371 if (psp_not_empty && !pss_not_empty)
1372 {
1373 int early_start = INT_MIN;
1374
1375 end = INT_MAX;
1376 for (e = u_node->in; e != 0; e = e->next_in)
1377 {
1378 ddg_node_ptr v_node = e->src;
1379
1380 if (dump_file)
1381 {
1382 fprintf (dump_file, "\nProcessing edge: ");
1383 print_ddg_edge (dump_file, e);
1384 fprintf (dump_file,
1385 "\nScheduling %d (%d) in psp_not_empty,"
1386 " checking p %d (%d): ", u_node->cuid,
1387 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1388 (v_node->insn));
1389 }
1390
1391 if (TEST_BIT (sched_nodes, v_node->cuid))
1392 {
1393 int p_st = SCHED_TIME (v_node);
1394
1395 early_start =
1396 MAX (early_start, p_st + e->latency - (e->distance * ii));
1397
1398 if (dump_file)
1399 fprintf (dump_file,
1400 "pred st = %d; early_start = %d; latency: %d",
1401 p_st, early_start, e->latency);
1402
1403 if (e->data_type == MEM_DEP)
1404 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1405 }
1406 else if (dump_file)
1407 fprintf (dump_file, "the node is not scheduled\n");
1408 }
1409 start = early_start;
1410 end = MIN (end, early_start + ii);
1411 /* Schedule the node close to it's predecessors. */
1412 step = 1;
1413
1414 if (dump_file)
1415 fprintf (dump_file,
1416 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1417 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1418 }
1419
1420 else if (!psp_not_empty && pss_not_empty)
1421 {
1422 int late_start = INT_MAX;
1423
1424 end = INT_MIN;
1425 for (e = u_node->out; e != 0; e = e->next_out)
1426 {
1427 ddg_node_ptr v_node = e->dest;
1428
1429 if (dump_file)
1430 {
1431 fprintf (dump_file, "\nProcessing edge:");
1432 print_ddg_edge (dump_file, e);
1433 fprintf (dump_file,
1434 "\nScheduling %d (%d) in pss_not_empty,"
1435 " checking s %d (%d): ", u_node->cuid,
1436 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1437 (v_node->insn));
1438 }
1439
1440 if (TEST_BIT (sched_nodes, v_node->cuid))
1441 {
1442 int s_st = SCHED_TIME (v_node);
1443
1444 late_start = MIN (late_start,
1445 s_st - e->latency + (e->distance * ii));
1446
1447 if (dump_file)
1448 fprintf (dump_file,
1449 "succ st = %d; late_start = %d; latency = %d",
1450 s_st, late_start, e->latency);
1451
1452 if (e->data_type == MEM_DEP)
1453 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1454 if (dump_file)
1455 fprintf (dump_file, "end = %d\n", end);
1456
1457 }
1458 else if (dump_file)
1459 fprintf (dump_file, "the node is not scheduled\n");
1460
1461 }
1462 start = late_start;
1463 end = MAX (end, late_start - ii);
1464 /* Schedule the node close to it's successors. */
1465 step = -1;
1466
1467 if (dump_file)
1468 fprintf (dump_file,
1469 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1470 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1471
1472 }
1473
1474 else if (psp_not_empty && pss_not_empty)
1475 {
1476 int early_start = INT_MIN;
1477 int late_start = INT_MAX;
1478 int count_preds = 0;
1479 int count_succs = 0;
1480
1481 start = INT_MIN;
1482 end = INT_MAX;
1483 for (e = u_node->in; e != 0; e = e->next_in)
1484 {
1485 ddg_node_ptr v_node = e->src;
1486
1487 if (dump_file)
1488 {
1489 fprintf (dump_file, "\nProcessing edge:");
1490 print_ddg_edge (dump_file, e);
1491 fprintf (dump_file,
1492 "\nScheduling %d (%d) in psp_pss_not_empty,"
1493 " checking p %d (%d): ", u_node->cuid, INSN_UID
1494 (u_node->insn), v_node->cuid, INSN_UID
1495 (v_node->insn));
1496 }
1497
1498 if (TEST_BIT (sched_nodes, v_node->cuid))
1499 {
1500 int p_st = SCHED_TIME (v_node);
1501
1502 early_start = MAX (early_start,
1503 p_st + e->latency
1504 - (e->distance * ii));
1505
1506 if (dump_file)
1507 fprintf (dump_file,
1508 "pred st = %d; early_start = %d; latency = %d",
1509 p_st, early_start, e->latency);
1510
1511 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1512 count_preds++;
1513
1514 if (e->data_type == MEM_DEP)
1515 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1516 }
1517 else if (dump_file)
1518 fprintf (dump_file, "the node is not scheduled\n");
1519
1520 }
1521 for (e = u_node->out; e != 0; e = e->next_out)
1522 {
1523 ddg_node_ptr v_node = e->dest;
1524
1525 if (dump_file)
1526 {
1527 fprintf (dump_file, "\nProcessing edge:");
1528 print_ddg_edge (dump_file, e);
1529 fprintf (dump_file,
1530 "\nScheduling %d (%d) in psp_pss_not_empty,"
1531 " checking s %d (%d): ", u_node->cuid, INSN_UID
1532 (u_node->insn), v_node->cuid, INSN_UID
1533 (v_node->insn));
1534 }
1535
1536 if (TEST_BIT (sched_nodes, v_node->cuid))
1537 {
1538 int s_st = SCHED_TIME (v_node);
1539
1540 late_start = MIN (late_start,
1541 s_st - e->latency
1542 + (e->distance * ii));
1543
1544 if (dump_file)
1545 fprintf (dump_file,
1546 "succ st = %d; late_start = %d; latency = %d",
1547 s_st, late_start, e->latency);
1548
1549 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1550 count_succs++;
1551
1552 if (e->data_type == MEM_DEP)
1553 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1554 }
1555 else if (dump_file)
1556 fprintf (dump_file, "the node is not scheduled\n");
1557
1558 }
1559 start = MAX (start, early_start);
1560 end = MIN (end, MIN (early_start + ii, late_start + 1));
1561 step = 1;
1562 /* If there are more successors than predecessors schedule the
1563 node close to it's successors. */
1564 if (count_succs >= count_preds)
1565 {
1566 int old_start = start;
1567
1568 start = end - 1;
1569 end = old_start - 1;
1570 step = -1;
1571 }
1572 }
1573 else /* psp is empty && pss is empty. */
1574 {
1575 start = SCHED_ASAP (u_node);
1576 end = start + ii;
1577 step = 1;
1578 }
1579
1580 *start_p = start;
1581 *step_p = step;
1582 *end_p = end;
1583 sbitmap_free (psp);
1584 sbitmap_free (pss);
1585
1586 if ((start >= end && step == 1) || (start <= end && step == -1))
1587 {
1588 if (dump_file)
1589 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1590 start, end, step);
1591 return -1;
1592 }
1593
1594 return 0;
1595 }
1596
1597 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1598 node currently been scheduled. At the end of the calculation
1599 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
1600 U_NODE which are (1) already scheduled in the first/last row of
1601 U_NODE's scheduling window, (2) whose dependence inequality with U
1602 becomes an equality when U is scheduled in this same row, and (3)
1603 whose dependence latency is zero.
1604
1605 The first and last rows are calculated using the following parameters:
1606 START/END rows - The cycles that begins/ends the traversal on the window;
1607 searching for an empty cycle to schedule U_NODE.
1608 STEP - The direction in which we traverse the window.
1609 II - The initiation interval. */
1610
1611 static void
1612 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
1613 int step, int ii, sbitmap sched_nodes,
1614 sbitmap must_precede, sbitmap must_follow)
1615 {
1616 ddg_edge_ptr e;
1617 int first_cycle_in_window, last_cycle_in_window;
1618
1619 gcc_assert (must_precede && must_follow);
1620
1621 /* Consider the following scheduling window:
1622 {first_cycle_in_window, first_cycle_in_window+1, ...,
1623 last_cycle_in_window}. If step is 1 then the following will be
1624 the order we traverse the window: {start=first_cycle_in_window,
1625 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1626 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1627 end=first_cycle_in_window-1} if step is -1. */
1628 first_cycle_in_window = (step == 1) ? start : end - step;
1629 last_cycle_in_window = (step == 1) ? end - step : start;
1630
1631 sbitmap_zero (must_precede);
1632 sbitmap_zero (must_follow);
1633
1634 if (dump_file)
1635 fprintf (dump_file, "\nmust_precede: ");
1636
1637 /* Instead of checking if:
1638 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
1639 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
1640 first_cycle_in_window)
1641 && e->latency == 0
1642 we use the fact that latency is non-negative:
1643 SCHED_TIME (e->src) - (e->distance * ii) <=
1644 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
1645 first_cycle_in_window
1646 and check only if
1647 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
1648 for (e = u_node->in; e != 0; e = e->next_in)
1649 if (TEST_BIT (sched_nodes, e->src->cuid)
1650 && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
1651 first_cycle_in_window))
1652 {
1653 if (dump_file)
1654 fprintf (dump_file, "%d ", e->src->cuid);
1655
1656 SET_BIT (must_precede, e->src->cuid);
1657 }
1658
1659 if (dump_file)
1660 fprintf (dump_file, "\nmust_follow: ");
1661
1662 /* Instead of checking if:
1663 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
1664 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
1665 last_cycle_in_window)
1666 && e->latency == 0
1667 we use the fact that latency is non-negative:
1668 SCHED_TIME (e->dest) + (e->distance * ii) >=
1669 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
1670 last_cycle_in_window
1671 and check only if
1672 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
1673 for (e = u_node->out; e != 0; e = e->next_out)
1674 if (TEST_BIT (sched_nodes, e->dest->cuid)
1675 && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
1676 last_cycle_in_window))
1677 {
1678 if (dump_file)
1679 fprintf (dump_file, "%d ", e->dest->cuid);
1680
1681 SET_BIT (must_follow, e->dest->cuid);
1682 }
1683
1684 if (dump_file)
1685 fprintf (dump_file, "\n");
1686 }
1687
1688 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1689 parameters to decide if that's possible:
1690 PS - The partial schedule.
1691 U - The serial number of U_NODE.
1692 NUM_SPLITS - The number of row splits made so far.
1693 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1694 the first row of the scheduling window)
1695 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1696 last row of the scheduling window) */
1697
1698 static bool
1699 try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
1700 int u, int cycle, sbitmap sched_nodes,
1701 int *num_splits, sbitmap must_precede,
1702 sbitmap must_follow)
1703 {
1704 ps_insn_ptr psi;
1705 bool success = 0;
1706
1707 verify_partial_schedule (ps, sched_nodes);
1708 psi = ps_add_node_check_conflicts (ps, u_node, cycle,
1709 must_precede, must_follow);
1710 if (psi)
1711 {
1712 SCHED_TIME (u_node) = cycle;
1713 SET_BIT (sched_nodes, u);
1714 success = 1;
1715 *num_splits = 0;
1716 if (dump_file)
1717 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
1718
1719 }
1720
1721 return success;
1722 }
1723
1724 /* This function implements the scheduling algorithm for SMS according to the
1725 above algorithm. */
1726 static partial_schedule_ptr
1727 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1728 {
1729 int ii = mii;
1730 int i, c, success, num_splits = 0;
1731 int flush_and_start_over = true;
1732 int num_nodes = g->num_nodes;
1733 int start, end, step; /* Place together into one struct? */
1734 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1735 sbitmap must_precede = sbitmap_alloc (num_nodes);
1736 sbitmap must_follow = sbitmap_alloc (num_nodes);
1737 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1738
1739 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1740
1741 sbitmap_ones (tobe_scheduled);
1742 sbitmap_zero (sched_nodes);
1743
1744 while (flush_and_start_over && (ii < maxii))
1745 {
1746
1747 if (dump_file)
1748 fprintf (dump_file, "Starting with ii=%d\n", ii);
1749 flush_and_start_over = false;
1750 sbitmap_zero (sched_nodes);
1751
1752 for (i = 0; i < num_nodes; i++)
1753 {
1754 int u = nodes_order[i];
1755 ddg_node_ptr u_node = &ps->g->nodes[u];
1756 rtx insn = u_node->insn;
1757
1758 if (!NONDEBUG_INSN_P (insn))
1759 {
1760 RESET_BIT (tobe_scheduled, u);
1761 continue;
1762 }
1763
1764 if (JUMP_P (insn)) /* Closing branch handled later. */
1765 {
1766 RESET_BIT (tobe_scheduled, u);
1767 continue;
1768 }
1769
1770 if (TEST_BIT (sched_nodes, u))
1771 continue;
1772
1773 /* Try to get non-empty scheduling window. */
1774 success = 0;
1775 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1776 &step, &end) == 0)
1777 {
1778 if (dump_file)
1779 fprintf (dump_file, "\nTrying to schedule node %d \
1780 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1781 (g->nodes[u].insn)), start, end, step);
1782
1783 gcc_assert ((step > 0 && start < end)
1784 || (step < 0 && start > end));
1785
1786 calculate_must_precede_follow (u_node, start, end, step, ii,
1787 sched_nodes, must_precede,
1788 must_follow);
1789
1790 for (c = start; c != end; c += step)
1791 {
1792 sbitmap tmp_precede = NULL;
1793 sbitmap tmp_follow = NULL;
1794
1795 if (c == start)
1796 {
1797 if (step == 1)
1798 tmp_precede = must_precede;
1799 else /* step == -1. */
1800 tmp_follow = must_follow;
1801 }
1802 if (c == end - step)
1803 {
1804 if (step == 1)
1805 tmp_follow = must_follow;
1806 else /* step == -1. */
1807 tmp_precede = must_precede;
1808 }
1809
1810 success =
1811 try_scheduling_node_in_cycle (ps, u_node, u, c,
1812 sched_nodes,
1813 &num_splits, tmp_precede,
1814 tmp_follow);
1815 if (success)
1816 break;
1817 }
1818
1819 verify_partial_schedule (ps, sched_nodes);
1820 }
1821 if (!success)
1822 {
1823 int split_row;
1824
1825 if (ii++ == maxii)
1826 break;
1827
1828 if (num_splits >= MAX_SPLIT_NUM)
1829 {
1830 num_splits = 0;
1831 flush_and_start_over = true;
1832 verify_partial_schedule (ps, sched_nodes);
1833 reset_partial_schedule (ps, ii);
1834 verify_partial_schedule (ps, sched_nodes);
1835 break;
1836 }
1837
1838 num_splits++;
1839 /* The scheduling window is exclusive of 'end'
1840 whereas compute_split_window() expects an inclusive,
1841 ordered range. */
1842 if (step == 1)
1843 split_row = compute_split_row (sched_nodes, start, end - 1,
1844 ps->ii, u_node);
1845 else
1846 split_row = compute_split_row (sched_nodes, end + 1, start,
1847 ps->ii, u_node);
1848
1849 ps_insert_empty_row (ps, split_row, sched_nodes);
1850 i--; /* Go back and retry node i. */
1851
1852 if (dump_file)
1853 fprintf (dump_file, "num_splits=%d\n", num_splits);
1854 }
1855
1856 /* ??? If (success), check register pressure estimates. */
1857 } /* Continue with next node. */
1858 } /* While flush_and_start_over. */
1859 if (ii >= maxii)
1860 {
1861 free_partial_schedule (ps);
1862 ps = NULL;
1863 }
1864 else
1865 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1866
1867 sbitmap_free (sched_nodes);
1868 sbitmap_free (must_precede);
1869 sbitmap_free (must_follow);
1870 sbitmap_free (tobe_scheduled);
1871
1872 return ps;
1873 }
1874
1875 /* This function inserts a new empty row into PS at the position
1876 according to SPLITROW, keeping all already scheduled instructions
1877 intact and updating their SCHED_TIME and cycle accordingly. */
1878 static void
1879 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1880 sbitmap sched_nodes)
1881 {
1882 ps_insn_ptr crr_insn;
1883 ps_insn_ptr *rows_new;
1884 int ii = ps->ii;
1885 int new_ii = ii + 1;
1886 int row;
1887
1888 verify_partial_schedule (ps, sched_nodes);
1889
1890 /* We normalize sched_time and rotate ps to have only non-negative sched
1891 times, for simplicity of updating cycles after inserting new row. */
1892 split_row -= ps->min_cycle;
1893 split_row = SMODULO (split_row, ii);
1894 if (dump_file)
1895 fprintf (dump_file, "split_row=%d\n", split_row);
1896
1897 normalize_sched_times (ps);
1898 rotate_partial_schedule (ps, ps->min_cycle);
1899
1900 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1901 for (row = 0; row < split_row; row++)
1902 {
1903 rows_new[row] = ps->rows[row];
1904 ps->rows[row] = NULL;
1905 for (crr_insn = rows_new[row];
1906 crr_insn; crr_insn = crr_insn->next_in_row)
1907 {
1908 ddg_node_ptr u = crr_insn->node;
1909 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1910
1911 SCHED_TIME (u) = new_time;
1912 crr_insn->cycle = new_time;
1913 SCHED_ROW (u) = new_time % new_ii;
1914 SCHED_STAGE (u) = new_time / new_ii;
1915 }
1916
1917 }
1918
1919 rows_new[split_row] = NULL;
1920
1921 for (row = split_row; row < ii; row++)
1922 {
1923 rows_new[row + 1] = ps->rows[row];
1924 ps->rows[row] = NULL;
1925 for (crr_insn = rows_new[row + 1];
1926 crr_insn; crr_insn = crr_insn->next_in_row)
1927 {
1928 ddg_node_ptr u = crr_insn->node;
1929 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1930
1931 SCHED_TIME (u) = new_time;
1932 crr_insn->cycle = new_time;
1933 SCHED_ROW (u) = new_time % new_ii;
1934 SCHED_STAGE (u) = new_time / new_ii;
1935 }
1936 }
1937
1938 /* Updating ps. */
1939 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1940 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1941 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1942 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1943 free (ps->rows);
1944 ps->rows = rows_new;
1945 ps->ii = new_ii;
1946 gcc_assert (ps->min_cycle >= 0);
1947
1948 verify_partial_schedule (ps, sched_nodes);
1949
1950 if (dump_file)
1951 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1952 ps->max_cycle);
1953 }
1954
1955 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1956 UP which are the boundaries of it's scheduling window; compute using
1957 SCHED_NODES and II a row in the partial schedule that can be split
1958 which will separate a critical predecessor from a critical successor
1959 thereby expanding the window, and return it. */
1960 static int
1961 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1962 ddg_node_ptr u_node)
1963 {
1964 ddg_edge_ptr e;
1965 int lower = INT_MIN, upper = INT_MAX;
1966 ddg_node_ptr crit_pred = NULL;
1967 ddg_node_ptr crit_succ = NULL;
1968 int crit_cycle;
1969
1970 for (e = u_node->in; e != 0; e = e->next_in)
1971 {
1972 ddg_node_ptr v_node = e->src;
1973
1974 if (TEST_BIT (sched_nodes, v_node->cuid)
1975 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
1976 if (SCHED_TIME (v_node) > lower)
1977 {
1978 crit_pred = v_node;
1979 lower = SCHED_TIME (v_node);
1980 }
1981 }
1982
1983 if (crit_pred != NULL)
1984 {
1985 crit_cycle = SCHED_TIME (crit_pred) + 1;
1986 return SMODULO (crit_cycle, ii);
1987 }
1988
1989 for (e = u_node->out; e != 0; e = e->next_out)
1990 {
1991 ddg_node_ptr v_node = e->dest;
1992 if (TEST_BIT (sched_nodes, v_node->cuid)
1993 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
1994 if (SCHED_TIME (v_node) < upper)
1995 {
1996 crit_succ = v_node;
1997 upper = SCHED_TIME (v_node);
1998 }
1999 }
2000
2001 if (crit_succ != NULL)
2002 {
2003 crit_cycle = SCHED_TIME (crit_succ);
2004 return SMODULO (crit_cycle, ii);
2005 }
2006
2007 if (dump_file)
2008 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2009
2010 return SMODULO ((low + up + 1) / 2, ii);
2011 }
2012
2013 static void
2014 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2015 {
2016 int row;
2017 ps_insn_ptr crr_insn;
2018
2019 for (row = 0; row < ps->ii; row++)
2020 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2021 {
2022 ddg_node_ptr u = crr_insn->node;
2023
2024 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
2025 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2026 popcount (sched_nodes) == number of insns in ps. */
2027 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2028 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2029 }
2030 }
2031
2032 \f
2033 /* This page implements the algorithm for ordering the nodes of a DDG
2034 for modulo scheduling, activated through the
2035 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2036
2037 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2038 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2039 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2040 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2041 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2042 #define DEPTH(x) (ASAP ((x)))
2043
2044 typedef struct node_order_params * nopa;
2045
2046 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2047 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2048 static nopa calculate_order_params (ddg_ptr, int, int *);
2049 static int find_max_asap (ddg_ptr, sbitmap);
2050 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2051 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2052
2053 enum sms_direction {BOTTOMUP, TOPDOWN};
2054
2055 struct node_order_params
2056 {
2057 int asap;
2058 int alap;
2059 int height;
2060 };
2061
2062 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2063 static void
2064 check_nodes_order (int *node_order, int num_nodes)
2065 {
2066 int i;
2067 sbitmap tmp = sbitmap_alloc (num_nodes);
2068
2069 sbitmap_zero (tmp);
2070
2071 if (dump_file)
2072 fprintf (dump_file, "SMS final nodes order: \n");
2073
2074 for (i = 0; i < num_nodes; i++)
2075 {
2076 int u = node_order[i];
2077
2078 if (dump_file)
2079 fprintf (dump_file, "%d ", u);
2080 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2081
2082 SET_BIT (tmp, u);
2083 }
2084
2085 if (dump_file)
2086 fprintf (dump_file, "\n");
2087
2088 sbitmap_free (tmp);
2089 }
2090
2091 /* Order the nodes of G for scheduling and pass the result in
2092 NODE_ORDER. Also set aux.count of each node to ASAP.
2093 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2094 static int
2095 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2096 {
2097 int i;
2098 int rec_mii = 0;
2099 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2100
2101 nopa nops = calculate_order_params (g, mii, pmax_asap);
2102
2103 if (dump_file)
2104 print_sccs (dump_file, sccs, g);
2105
2106 order_nodes_of_sccs (sccs, node_order);
2107
2108 if (sccs->num_sccs > 0)
2109 /* First SCC has the largest recurrence_length. */
2110 rec_mii = sccs->sccs[0]->recurrence_length;
2111
2112 /* Save ASAP before destroying node_order_params. */
2113 for (i = 0; i < g->num_nodes; i++)
2114 {
2115 ddg_node_ptr v = &g->nodes[i];
2116 v->aux.count = ASAP (v);
2117 }
2118
2119 free (nops);
2120 free_ddg_all_sccs (sccs);
2121 check_nodes_order (node_order, g->num_nodes);
2122
2123 return rec_mii;
2124 }
2125
2126 static void
2127 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2128 {
2129 int i, pos = 0;
2130 ddg_ptr g = all_sccs->ddg;
2131 int num_nodes = g->num_nodes;
2132 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2133 sbitmap on_path = sbitmap_alloc (num_nodes);
2134 sbitmap tmp = sbitmap_alloc (num_nodes);
2135 sbitmap ones = sbitmap_alloc (num_nodes);
2136
2137 sbitmap_zero (prev_sccs);
2138 sbitmap_ones (ones);
2139
2140 /* Perform the node ordering starting from the SCC with the highest recMII.
2141 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2142 for (i = 0; i < all_sccs->num_sccs; i++)
2143 {
2144 ddg_scc_ptr scc = all_sccs->sccs[i];
2145
2146 /* Add nodes on paths from previous SCCs to the current SCC. */
2147 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2148 sbitmap_a_or_b (tmp, scc->nodes, on_path);
2149
2150 /* Add nodes on paths from the current SCC to previous SCCs. */
2151 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2152 sbitmap_a_or_b (tmp, tmp, on_path);
2153
2154 /* Remove nodes of previous SCCs from current extended SCC. */
2155 sbitmap_difference (tmp, tmp, prev_sccs);
2156
2157 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2158 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2159 }
2160
2161 /* Handle the remaining nodes that do not belong to any scc. Each call
2162 to order_nodes_in_scc handles a single connected component. */
2163 while (pos < g->num_nodes)
2164 {
2165 sbitmap_difference (tmp, ones, prev_sccs);
2166 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2167 }
2168 sbitmap_free (prev_sccs);
2169 sbitmap_free (on_path);
2170 sbitmap_free (tmp);
2171 sbitmap_free (ones);
2172 }
2173
2174 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2175 static struct node_order_params *
2176 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2177 {
2178 int u;
2179 int max_asap;
2180 int num_nodes = g->num_nodes;
2181 ddg_edge_ptr e;
2182 /* Allocate a place to hold ordering params for each node in the DDG. */
2183 nopa node_order_params_arr;
2184
2185 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2186 node_order_params_arr = (nopa) xcalloc (num_nodes,
2187 sizeof (struct node_order_params));
2188
2189 /* Set the aux pointer of each node to point to its order_params structure. */
2190 for (u = 0; u < num_nodes; u++)
2191 g->nodes[u].aux.info = &node_order_params_arr[u];
2192
2193 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2194 calculate ASAP, ALAP, mobility, distance, and height for each node
2195 in the dependence (direct acyclic) graph. */
2196
2197 /* We assume that the nodes in the array are in topological order. */
2198
2199 max_asap = 0;
2200 for (u = 0; u < num_nodes; u++)
2201 {
2202 ddg_node_ptr u_node = &g->nodes[u];
2203
2204 ASAP (u_node) = 0;
2205 for (e = u_node->in; e; e = e->next_in)
2206 if (e->distance == 0)
2207 ASAP (u_node) = MAX (ASAP (u_node),
2208 ASAP (e->src) + e->latency);
2209 max_asap = MAX (max_asap, ASAP (u_node));
2210 }
2211
2212 for (u = num_nodes - 1; u > -1; u--)
2213 {
2214 ddg_node_ptr u_node = &g->nodes[u];
2215
2216 ALAP (u_node) = max_asap;
2217 HEIGHT (u_node) = 0;
2218 for (e = u_node->out; e; e = e->next_out)
2219 if (e->distance == 0)
2220 {
2221 ALAP (u_node) = MIN (ALAP (u_node),
2222 ALAP (e->dest) - e->latency);
2223 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2224 HEIGHT (e->dest) + e->latency);
2225 }
2226 }
2227 if (dump_file)
2228 {
2229 fprintf (dump_file, "\nOrder params\n");
2230 for (u = 0; u < num_nodes; u++)
2231 {
2232 ddg_node_ptr u_node = &g->nodes[u];
2233
2234 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2235 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2236 }
2237 }
2238
2239 *pmax_asap = max_asap;
2240 return node_order_params_arr;
2241 }
2242
2243 static int
2244 find_max_asap (ddg_ptr g, sbitmap nodes)
2245 {
2246 unsigned int u = 0;
2247 int max_asap = -1;
2248 int result = -1;
2249 sbitmap_iterator sbi;
2250
2251 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2252 {
2253 ddg_node_ptr u_node = &g->nodes[u];
2254
2255 if (max_asap < ASAP (u_node))
2256 {
2257 max_asap = ASAP (u_node);
2258 result = u;
2259 }
2260 }
2261 return result;
2262 }
2263
2264 static int
2265 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2266 {
2267 unsigned int u = 0;
2268 int max_hv = -1;
2269 int min_mob = INT_MAX;
2270 int result = -1;
2271 sbitmap_iterator sbi;
2272
2273 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2274 {
2275 ddg_node_ptr u_node = &g->nodes[u];
2276
2277 if (max_hv < HEIGHT (u_node))
2278 {
2279 max_hv = HEIGHT (u_node);
2280 min_mob = MOB (u_node);
2281 result = u;
2282 }
2283 else if ((max_hv == HEIGHT (u_node))
2284 && (min_mob > MOB (u_node)))
2285 {
2286 min_mob = MOB (u_node);
2287 result = u;
2288 }
2289 }
2290 return result;
2291 }
2292
2293 static int
2294 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2295 {
2296 unsigned int u = 0;
2297 int max_dv = -1;
2298 int min_mob = INT_MAX;
2299 int result = -1;
2300 sbitmap_iterator sbi;
2301
2302 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2303 {
2304 ddg_node_ptr u_node = &g->nodes[u];
2305
2306 if (max_dv < DEPTH (u_node))
2307 {
2308 max_dv = DEPTH (u_node);
2309 min_mob = MOB (u_node);
2310 result = u;
2311 }
2312 else if ((max_dv == DEPTH (u_node))
2313 && (min_mob > MOB (u_node)))
2314 {
2315 min_mob = MOB (u_node);
2316 result = u;
2317 }
2318 }
2319 return result;
2320 }
2321
2322 /* Places the nodes of SCC into the NODE_ORDER array starting
2323 at position POS, according to the SMS ordering algorithm.
2324 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2325 the NODE_ORDER array, starting from position zero. */
2326 static int
2327 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2328 int * node_order, int pos)
2329 {
2330 enum sms_direction dir;
2331 int num_nodes = g->num_nodes;
2332 sbitmap workset = sbitmap_alloc (num_nodes);
2333 sbitmap tmp = sbitmap_alloc (num_nodes);
2334 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2335 sbitmap predecessors = sbitmap_alloc (num_nodes);
2336 sbitmap successors = sbitmap_alloc (num_nodes);
2337
2338 sbitmap_zero (predecessors);
2339 find_predecessors (predecessors, g, nodes_ordered);
2340
2341 sbitmap_zero (successors);
2342 find_successors (successors, g, nodes_ordered);
2343
2344 sbitmap_zero (tmp);
2345 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2346 {
2347 sbitmap_copy (workset, tmp);
2348 dir = BOTTOMUP;
2349 }
2350 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2351 {
2352 sbitmap_copy (workset, tmp);
2353 dir = TOPDOWN;
2354 }
2355 else
2356 {
2357 int u;
2358
2359 sbitmap_zero (workset);
2360 if ((u = find_max_asap (g, scc)) >= 0)
2361 SET_BIT (workset, u);
2362 dir = BOTTOMUP;
2363 }
2364
2365 sbitmap_zero (zero_bitmap);
2366 while (!sbitmap_equal (workset, zero_bitmap))
2367 {
2368 int v;
2369 ddg_node_ptr v_node;
2370 sbitmap v_node_preds;
2371 sbitmap v_node_succs;
2372
2373 if (dir == TOPDOWN)
2374 {
2375 while (!sbitmap_equal (workset, zero_bitmap))
2376 {
2377 v = find_max_hv_min_mob (g, workset);
2378 v_node = &g->nodes[v];
2379 node_order[pos++] = v;
2380 v_node_succs = NODE_SUCCESSORS (v_node);
2381 sbitmap_a_and_b (tmp, v_node_succs, scc);
2382
2383 /* Don't consider the already ordered successors again. */
2384 sbitmap_difference (tmp, tmp, nodes_ordered);
2385 sbitmap_a_or_b (workset, workset, tmp);
2386 RESET_BIT (workset, v);
2387 SET_BIT (nodes_ordered, v);
2388 }
2389 dir = BOTTOMUP;
2390 sbitmap_zero (predecessors);
2391 find_predecessors (predecessors, g, nodes_ordered);
2392 sbitmap_a_and_b (workset, predecessors, scc);
2393 }
2394 else
2395 {
2396 while (!sbitmap_equal (workset, zero_bitmap))
2397 {
2398 v = find_max_dv_min_mob (g, workset);
2399 v_node = &g->nodes[v];
2400 node_order[pos++] = v;
2401 v_node_preds = NODE_PREDECESSORS (v_node);
2402 sbitmap_a_and_b (tmp, v_node_preds, scc);
2403
2404 /* Don't consider the already ordered predecessors again. */
2405 sbitmap_difference (tmp, tmp, nodes_ordered);
2406 sbitmap_a_or_b (workset, workset, tmp);
2407 RESET_BIT (workset, v);
2408 SET_BIT (nodes_ordered, v);
2409 }
2410 dir = TOPDOWN;
2411 sbitmap_zero (successors);
2412 find_successors (successors, g, nodes_ordered);
2413 sbitmap_a_and_b (workset, successors, scc);
2414 }
2415 }
2416 sbitmap_free (tmp);
2417 sbitmap_free (workset);
2418 sbitmap_free (zero_bitmap);
2419 sbitmap_free (predecessors);
2420 sbitmap_free (successors);
2421 return pos;
2422 }
2423
2424 \f
2425 /* This page contains functions for manipulating partial-schedules during
2426 modulo scheduling. */
2427
2428 /* Create a partial schedule and allocate a memory to hold II rows. */
2429
2430 static partial_schedule_ptr
2431 create_partial_schedule (int ii, ddg_ptr g, int history)
2432 {
2433 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2434 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2435 ps->ii = ii;
2436 ps->history = history;
2437 ps->min_cycle = INT_MAX;
2438 ps->max_cycle = INT_MIN;
2439 ps->g = g;
2440
2441 return ps;
2442 }
2443
2444 /* Free the PS_INSNs in rows array of the given partial schedule.
2445 ??? Consider caching the PS_INSN's. */
2446 static void
2447 free_ps_insns (partial_schedule_ptr ps)
2448 {
2449 int i;
2450
2451 for (i = 0; i < ps->ii; i++)
2452 {
2453 while (ps->rows[i])
2454 {
2455 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2456
2457 free (ps->rows[i]);
2458 ps->rows[i] = ps_insn;
2459 }
2460 ps->rows[i] = NULL;
2461 }
2462 }
2463
2464 /* Free all the memory allocated to the partial schedule. */
2465
2466 static void
2467 free_partial_schedule (partial_schedule_ptr ps)
2468 {
2469 if (!ps)
2470 return;
2471 free_ps_insns (ps);
2472 free (ps->rows);
2473 free (ps);
2474 }
2475
2476 /* Clear the rows array with its PS_INSNs, and create a new one with
2477 NEW_II rows. */
2478
2479 static void
2480 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2481 {
2482 if (!ps)
2483 return;
2484 free_ps_insns (ps);
2485 if (new_ii == ps->ii)
2486 return;
2487 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2488 * sizeof (ps_insn_ptr));
2489 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2490 ps->ii = new_ii;
2491 ps->min_cycle = INT_MAX;
2492 ps->max_cycle = INT_MIN;
2493 }
2494
2495 /* Prints the partial schedule as an ii rows array, for each rows
2496 print the ids of the insns in it. */
2497 void
2498 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2499 {
2500 int i;
2501
2502 for (i = 0; i < ps->ii; i++)
2503 {
2504 ps_insn_ptr ps_i = ps->rows[i];
2505
2506 fprintf (dump, "\n[ROW %d ]: ", i);
2507 while (ps_i)
2508 {
2509 fprintf (dump, "%d, ",
2510 INSN_UID (ps_i->node->insn));
2511 ps_i = ps_i->next_in_row;
2512 }
2513 }
2514 }
2515
2516 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2517 static ps_insn_ptr
2518 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2519 {
2520 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2521
2522 ps_i->node = node;
2523 ps_i->next_in_row = NULL;
2524 ps_i->prev_in_row = NULL;
2525 ps_i->row_rest_count = rest_count;
2526 ps_i->cycle = cycle;
2527
2528 return ps_i;
2529 }
2530
2531
2532 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2533 node is not found in the partial schedule, else returns true. */
2534 static bool
2535 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2536 {
2537 int row;
2538
2539 if (!ps || !ps_i)
2540 return false;
2541
2542 row = SMODULO (ps_i->cycle, ps->ii);
2543 if (! ps_i->prev_in_row)
2544 {
2545 if (ps_i != ps->rows[row])
2546 return false;
2547
2548 ps->rows[row] = ps_i->next_in_row;
2549 if (ps->rows[row])
2550 ps->rows[row]->prev_in_row = NULL;
2551 }
2552 else
2553 {
2554 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2555 if (ps_i->next_in_row)
2556 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2557 }
2558 free (ps_i);
2559 return true;
2560 }
2561
2562 /* Unlike what literature describes for modulo scheduling (which focuses
2563 on VLIW machines) the order of the instructions inside a cycle is
2564 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2565 where the current instruction should go relative to the already
2566 scheduled instructions in the given cycle. Go over these
2567 instructions and find the first possible column to put it in. */
2568 static bool
2569 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2570 sbitmap must_precede, sbitmap must_follow)
2571 {
2572 ps_insn_ptr next_ps_i;
2573 ps_insn_ptr first_must_follow = NULL;
2574 ps_insn_ptr last_must_precede = NULL;
2575 int row;
2576
2577 if (! ps_i)
2578 return false;
2579
2580 row = SMODULO (ps_i->cycle, ps->ii);
2581
2582 /* Find the first must follow and the last must precede
2583 and insert the node immediately after the must precede
2584 but make sure that it there is no must follow after it. */
2585 for (next_ps_i = ps->rows[row];
2586 next_ps_i;
2587 next_ps_i = next_ps_i->next_in_row)
2588 {
2589 if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
2590 && ! first_must_follow)
2591 first_must_follow = next_ps_i;
2592 if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
2593 {
2594 /* If we have already met a node that must follow, then
2595 there is no possible column. */
2596 if (first_must_follow)
2597 return false;
2598 else
2599 last_must_precede = next_ps_i;
2600 }
2601 }
2602
2603 /* Now insert the node after INSERT_AFTER_PSI. */
2604
2605 if (! last_must_precede)
2606 {
2607 ps_i->next_in_row = ps->rows[row];
2608 ps_i->prev_in_row = NULL;
2609 if (ps_i->next_in_row)
2610 ps_i->next_in_row->prev_in_row = ps_i;
2611 ps->rows[row] = ps_i;
2612 }
2613 else
2614 {
2615 ps_i->next_in_row = last_must_precede->next_in_row;
2616 last_must_precede->next_in_row = ps_i;
2617 ps_i->prev_in_row = last_must_precede;
2618 if (ps_i->next_in_row)
2619 ps_i->next_in_row->prev_in_row = ps_i;
2620 }
2621
2622 return true;
2623 }
2624
2625 /* Advances the PS_INSN one column in its current row; returns false
2626 in failure and true in success. Bit N is set in MUST_FOLLOW if
2627 the node with cuid N must be come after the node pointed to by
2628 PS_I when scheduled in the same cycle. */
2629 static int
2630 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2631 sbitmap must_follow)
2632 {
2633 ps_insn_ptr prev, next;
2634 int row;
2635 ddg_node_ptr next_node;
2636
2637 if (!ps || !ps_i)
2638 return false;
2639
2640 row = SMODULO (ps_i->cycle, ps->ii);
2641
2642 if (! ps_i->next_in_row)
2643 return false;
2644
2645 next_node = ps_i->next_in_row->node;
2646
2647 /* Check if next_in_row is dependent on ps_i, both having same sched
2648 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2649 if (must_follow && TEST_BIT (must_follow, next_node->cuid))
2650 return false;
2651
2652 /* Advance PS_I over its next_in_row in the doubly linked list. */
2653 prev = ps_i->prev_in_row;
2654 next = ps_i->next_in_row;
2655
2656 if (ps_i == ps->rows[row])
2657 ps->rows[row] = next;
2658
2659 ps_i->next_in_row = next->next_in_row;
2660
2661 if (next->next_in_row)
2662 next->next_in_row->prev_in_row = ps_i;
2663
2664 next->next_in_row = ps_i;
2665 ps_i->prev_in_row = next;
2666
2667 next->prev_in_row = prev;
2668 if (prev)
2669 prev->next_in_row = next;
2670
2671 return true;
2672 }
2673
2674 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2675 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2676 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2677 before/after (respectively) the node pointed to by PS_I when scheduled
2678 in the same cycle. */
2679 static ps_insn_ptr
2680 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2681 sbitmap must_precede, sbitmap must_follow)
2682 {
2683 ps_insn_ptr ps_i;
2684 int rest_count = 1;
2685 int row = SMODULO (cycle, ps->ii);
2686
2687 if (ps->rows[row]
2688 && ps->rows[row]->row_rest_count >= issue_rate)
2689 return NULL;
2690
2691 if (ps->rows[row])
2692 rest_count += ps->rows[row]->row_rest_count;
2693
2694 ps_i = create_ps_insn (node, rest_count, cycle);
2695
2696 /* Finds and inserts PS_I according to MUST_FOLLOW and
2697 MUST_PRECEDE. */
2698 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2699 {
2700 free (ps_i);
2701 return NULL;
2702 }
2703
2704 return ps_i;
2705 }
2706
2707 /* Advance time one cycle. Assumes DFA is being used. */
2708 static void
2709 advance_one_cycle (void)
2710 {
2711 if (targetm.sched.dfa_pre_cycle_insn)
2712 state_transition (curr_state,
2713 targetm.sched.dfa_pre_cycle_insn ());
2714
2715 state_transition (curr_state, NULL);
2716
2717 if (targetm.sched.dfa_post_cycle_insn)
2718 state_transition (curr_state,
2719 targetm.sched.dfa_post_cycle_insn ());
2720 }
2721
2722
2723
2724 /* Checks if PS has resource conflicts according to DFA, starting from
2725 FROM cycle to TO cycle; returns true if there are conflicts and false
2726 if there are no conflicts. Assumes DFA is being used. */
2727 static int
2728 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2729 {
2730 int cycle;
2731
2732 state_reset (curr_state);
2733
2734 for (cycle = from; cycle <= to; cycle++)
2735 {
2736 ps_insn_ptr crr_insn;
2737 /* Holds the remaining issue slots in the current row. */
2738 int can_issue_more = issue_rate;
2739
2740 /* Walk through the DFA for the current row. */
2741 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2742 crr_insn;
2743 crr_insn = crr_insn->next_in_row)
2744 {
2745 rtx insn = crr_insn->node->insn;
2746
2747 if (!NONDEBUG_INSN_P (insn))
2748 continue;
2749
2750 /* Check if there is room for the current insn. */
2751 if (!can_issue_more || state_dead_lock_p (curr_state))
2752 return true;
2753
2754 /* Update the DFA state and return with failure if the DFA found
2755 resource conflicts. */
2756 if (state_transition (curr_state, insn) >= 0)
2757 return true;
2758
2759 if (targetm.sched.variable_issue)
2760 can_issue_more =
2761 targetm.sched.variable_issue (sched_dump, sched_verbose,
2762 insn, can_issue_more);
2763 /* A naked CLOBBER or USE generates no instruction, so don't
2764 let them consume issue slots. */
2765 else if (GET_CODE (PATTERN (insn)) != USE
2766 && GET_CODE (PATTERN (insn)) != CLOBBER)
2767 can_issue_more--;
2768 }
2769
2770 /* Advance the DFA to the next cycle. */
2771 advance_one_cycle ();
2772 }
2773 return false;
2774 }
2775
2776 /* Checks if the given node causes resource conflicts when added to PS at
2777 cycle C. If not the node is added to PS and returned; otherwise zero
2778 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2779 cuid N must be come before/after (respectively) the node pointed to by
2780 PS_I when scheduled in the same cycle. */
2781 ps_insn_ptr
2782 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2783 int c, sbitmap must_precede,
2784 sbitmap must_follow)
2785 {
2786 int has_conflicts = 0;
2787 ps_insn_ptr ps_i;
2788
2789 /* First add the node to the PS, if this succeeds check for
2790 conflicts, trying different issue slots in the same row. */
2791 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2792 return NULL; /* Failed to insert the node at the given cycle. */
2793
2794 has_conflicts = ps_has_conflicts (ps, c, c)
2795 || (ps->history > 0
2796 && ps_has_conflicts (ps,
2797 c - ps->history,
2798 c + ps->history));
2799
2800 /* Try different issue slots to find one that the given node can be
2801 scheduled in without conflicts. */
2802 while (has_conflicts)
2803 {
2804 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2805 break;
2806 has_conflicts = ps_has_conflicts (ps, c, c)
2807 || (ps->history > 0
2808 && ps_has_conflicts (ps,
2809 c - ps->history,
2810 c + ps->history));
2811 }
2812
2813 if (has_conflicts)
2814 {
2815 remove_node_from_ps (ps, ps_i);
2816 return NULL;
2817 }
2818
2819 ps->min_cycle = MIN (ps->min_cycle, c);
2820 ps->max_cycle = MAX (ps->max_cycle, c);
2821 return ps_i;
2822 }
2823
2824 /* Rotate the rows of PS such that insns scheduled at time
2825 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2826 void
2827 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2828 {
2829 int i, row, backward_rotates;
2830 int last_row = ps->ii - 1;
2831
2832 if (start_cycle == 0)
2833 return;
2834
2835 backward_rotates = SMODULO (start_cycle, ps->ii);
2836
2837 /* Revisit later and optimize this into a single loop. */
2838 for (i = 0; i < backward_rotates; i++)
2839 {
2840 ps_insn_ptr first_row = ps->rows[0];
2841
2842 for (row = 0; row < last_row; row++)
2843 ps->rows[row] = ps->rows[row+1];
2844
2845 ps->rows[last_row] = first_row;
2846 }
2847
2848 ps->max_cycle -= start_cycle;
2849 ps->min_cycle -= start_cycle;
2850 }
2851
2852 #endif /* INSN_SCHEDULING */
2853 \f
2854 static bool
2855 gate_handle_sms (void)
2856 {
2857 return (optimize > 0 && flag_modulo_sched);
2858 }
2859
2860
2861 /* Run instruction scheduler. */
2862 /* Perform SMS module scheduling. */
2863 static unsigned int
2864 rest_of_handle_sms (void)
2865 {
2866 #ifdef INSN_SCHEDULING
2867 basic_block bb;
2868
2869 /* Collect loop information to be used in SMS. */
2870 cfg_layout_initialize (0);
2871 sms_schedule ();
2872
2873 /* Update the life information, because we add pseudos. */
2874 max_regno = max_reg_num ();
2875
2876 /* Finalize layout changes. */
2877 FOR_EACH_BB (bb)
2878 if (bb->next_bb != EXIT_BLOCK_PTR)
2879 bb->aux = bb->next_bb;
2880 free_dominance_info (CDI_DOMINATORS);
2881 cfg_layout_finalize ();
2882 #endif /* INSN_SCHEDULING */
2883 return 0;
2884 }
2885
2886 struct rtl_opt_pass pass_sms =
2887 {
2888 {
2889 RTL_PASS,
2890 "sms", /* name */
2891 gate_handle_sms, /* gate */
2892 rest_of_handle_sms, /* execute */
2893 NULL, /* sub */
2894 NULL, /* next */
2895 0, /* static_pass_number */
2896 TV_SMS, /* tv_id */
2897 0, /* properties_required */
2898 0, /* properties_provided */
2899 0, /* properties_destroyed */
2900 TODO_dump_func, /* todo_flags_start */
2901 TODO_df_finish | TODO_verify_rtl_sharing |
2902 TODO_dump_func |
2903 TODO_ggc_collect /* todo_flags_finish */
2904 }
2905 };
2906