New files for implementing sms in gcc.
[gcc.git] / gcc / modulo-sched.c
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
22
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tm_p.h"
31 #include "hard-reg-set.h"
32 #include "basic-block.h"
33 #include "regs.h"
34 #include "function.h"
35 #include "flags.h"
36 #include "insn-config.h"
37 #include "insn-attr.h"
38 #include "except.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "sched-int.h"
42 #include "target.h"
43 #include "cfglayout.h"
44 #include "cfgloop.h"
45 #include "cfghooks.h"
46 #include "expr.h"
47 #include "params.h"
48 #include "gcov-io.h"
49 #include "df.h"
50 #include "ddg.h"
51
52
53 /* This file contains the implementation of the Swing Modulo Scheduler,
54 described in the following references:
55 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
56 Lifetime--sensitive modulo scheduling in a production environment.
57 IEEE Trans. on Comps., 50(3), March 2001
58 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
59 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
60 PACT '96 , pages 80-87, October 1996 (Boston - Massachussets - USA).
61
62 The basic structure is:
63 1. Build a data-dependence graph (DDG) for each loop.
64 2. Use the DDG to order the insns of a loop (not in topological order
65 necessarily, but rather) trying to place each insn after all its
66 predecessors _or_ after all its successors.
67 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
68 4. Use the ordering to perform list-scheduling of the loop:
69 1. Set II = MII. We will try to schedule the loop within II cycles.
70 2. Try to schedule the insns one by one according to the ordering.
71 For each insn compute an interval of cycles by considering already-
72 scheduled preds and succs (and associated latencies); try to place
73 the insn in the cycles of this window checking for potential
74 resource conflicts (using the DFA interface).
75 Note: this is different from the cycle-scheduling of schedule_insns;
76 here the insns are not scheduled monotonically top-down (nor bottom-
77 up).
78 3. If failed in scheduling all insns - bump II++ and try again, unless
79 II reaches an upper bound MaxII, inwhich case report failure.
80 5. If we succeeded in scheduling the loop within II cycles, we now
81 generate prolog and epilog, decrease the counter of the loop, and
82 perform modulo variable expansion for live ranges that span more than
83 II cycles (i.e. use register copies to prevent a def from overwriting
84 itself before reaching the use).
85 */
86
87 \f
88 /* This page defines partial-schedule structures and functions for
89 modulo scheduling. */
90
91 typedef struct partial_schedule *partial_schedule_ptr;
92 typedef struct ps_insn *ps_insn_ptr;
93
94 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
95 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
96
97 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
98 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
99
100 /* Perform signed modulo, always returning a non-negative value. */
101 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
102
103 /* The number of different iterations the nodes in ps span, assuming
104 the stage boundaries are placed efficiently. */
105 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
106 + 1 + (ps)->ii - 1) / (ps)->ii)
107
108 #define CFG_HOOKS cfg_layout_rtl_cfg_hooks
109
110 /* A single instruction in the partial schedule. */
111 struct ps_insn
112 {
113 /* The corresponding DDG_NODE. */
114 ddg_node_ptr node;
115
116 /* The (absolute) cycle in which the PS instruction is scheduled.
117 Same as SCHED_TIME (node). */
118 int cycle;
119
120 /* The next/prev PS_INSN in the same row. */
121 ps_insn_ptr next_in_row,
122 prev_in_row;
123
124 /* The number of nodes in the same row that come after this node. */
125 int row_rest_count;
126 };
127
128 /* Holds the partial schedule as an array of II rows. Each entry of the
129 array points to a linked list of PS_INSNs, which represents the
130 instructions that are scheduled for that row. */
131 struct partial_schedule
132 {
133 int ii; /* Number of rows in the partial schedule. */
134 int history; /* Threshold for conflict checking using DFA. */
135
136 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
137 ps_insn_ptr *rows;
138
139 /* The earliest absolute cycle of an insn in the partial schedule. */
140 int min_cycle;
141
142 /* The latest absolute cycle of an insn in the partial schedule. */
143 int max_cycle;
144
145 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
146 };
147
148
149 partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
150 void free_partial_schedule (partial_schedule_ptr);
151 void reset_partial_schedule (partial_schedule_ptr, int new_ii);
152 void print_partial_schedule (partial_schedule_ptr, FILE *);
153 ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
154 ddg_node_ptr node, int cycle);
155 void rotate_partial_schedule (partial_schedule_ptr, int);
156 void set_row_column_for_ps (partial_schedule_ptr);
157
158 \f
159 /* This page defines constants and structures for the modulo scheduiing
160 driver. */
161
162 /* As in haifa-sched.c: */
163 /* issue_rate is the number of insns that can be scheduled in the same
164 machine cycle. It can be defined in the config/mach/mach.h file,
165 otherwise we set it to 1. */
166
167 static int issue_rate;
168
169 /* For printing statistics. */
170 static FILE *stats_file;
171
172 static int sms_order_nodes (ddg_ptr, int, int * result);
173 static void set_node_sched_params (ddg_ptr);
174 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int,
175 int *, FILE*);
176 static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
177 static void generate_prolog_epilog (partial_schedule_ptr, rtx, rtx, int);
178 static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
179 int from_stage, int to_stage,
180 int is_prolog);
181
182
183 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
184 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
185 #define SCHED_FIRST_REG_MOVE(x) \
186 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
187 #define SCHED_NREG_MOVES(x) \
188 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
189 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
190 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
191 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
192
193 /* The scheduling parameters held for each node. */
194 typedef struct node_sched_params
195 {
196 int asap; /* A lower-bound on the absolute scheduling cycle. */
197 int time; /* The absolute scheduling cycle (time >= asap). */
198
199 /* The following field (first_reg_move) is a pointer to the first
200 register-move instruction added to handle the modulo-variable-expansion
201 of the register defined by this node. This register-move copies the
202 original register defined by the node. */
203 rtx first_reg_move;
204
205 /* The number of register-move instructions added, immediately preceeding
206 first_reg_move. */
207 int nreg_moves;
208
209 int row; /* Holds time % ii. */
210 int stage; /* Holds time / ii. */
211
212 /* The column of a node inside the ps. If nodes u, v are on the same row,
213 u will preceed v if column (u) < column (v). */
214 int column;
215 } *node_sched_params_ptr;
216
217 \f
218 /* The following three functions are copied from the current scheduler
219 code in order to use sched_analyze() for computing the dependecies.
220 They are used when initializing the sched_info structure. */
221 static const char *
222 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
223 {
224 static char tmp[80];
225
226 sprintf (tmp, "i%4d", INSN_UID (insn));
227 return tmp;
228 }
229
230 static int
231 contributes_to_priority (rtx next, rtx insn)
232 {
233 return BLOCK_NUM (next) == BLOCK_NUM (insn);
234 }
235
236 static void
237 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
238 regset cond_exec ATTRIBUTE_UNUSED,
239 regset used ATTRIBUTE_UNUSED,
240 regset set ATTRIBUTE_UNUSED)
241 {
242 }
243
244 static struct sched_info sms_sched_info =
245 {
246 NULL,
247 NULL,
248 NULL,
249 NULL,
250 NULL,
251 sms_print_insn,
252 contributes_to_priority,
253 compute_jump_reg_dependencies,
254 NULL, NULL,
255 NULL, NULL,
256 0, 0, 0
257 };
258
259
260 /* Return the register decremented and tested or zero if it is not a decrement
261 and branch jump insn (similar to doloop_condition_get). */
262 static rtx
263 doloop_register_get (rtx insn, rtx *comp)
264 {
265 rtx pattern, cmp, inc, reg, condition;
266
267 if (GET_CODE (insn) != JUMP_INSN)
268 return NULL_RTX;
269 pattern = PATTERN (insn);
270
271 /* The canonical doloop pattern we expect is:
272
273 (parallel [(set (pc) (if_then_else (condition)
274 (label_ref (label))
275 (pc)))
276 (set (reg) (plus (reg) (const_int -1)))
277 (additional clobbers and uses)])
278
279 where condition is further restricted to be
280 (ne (reg) (const_int 1)). */
281
282 if (GET_CODE (pattern) != PARALLEL)
283 return NULL_RTX;
284
285 cmp = XVECEXP (pattern, 0, 0);
286 inc = XVECEXP (pattern, 0, 1);
287 /* Return the compare rtx. */
288 *comp = cmp;
289
290 /* Check for (set (reg) (something)). */
291 if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
292 return NULL_RTX;
293
294 /* Extract loop counter register. */
295 reg = SET_DEST (inc);
296
297 /* Check if something = (plus (reg) (const_int -1)). */
298 if (GET_CODE (SET_SRC (inc)) != PLUS
299 || XEXP (SET_SRC (inc), 0) != reg
300 || XEXP (SET_SRC (inc), 1) != constm1_rtx)
301 return NULL_RTX;
302
303 /* Check for (set (pc) (if_then_else (condition)
304 (label_ref (label))
305 (pc))). */
306 if (GET_CODE (cmp) != SET
307 || SET_DEST (cmp) != pc_rtx
308 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
309 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
310 || XEXP (SET_SRC (cmp), 2) != pc_rtx)
311 return NULL_RTX;
312
313 /* Extract loop termination condition. */
314 condition = XEXP (SET_SRC (cmp), 0);
315
316 /* Check if condition = (ne (reg) (const_int 1)), which is more
317 restrictive than the check in doloop_condition_get:
318 if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
319 || GET_CODE (XEXP (condition, 1)) != CONST_INT). */
320 if (GET_CODE (condition) != NE
321 || XEXP (condition, 1) != const1_rtx)
322 return NULL_RTX;
323
324 if (XEXP (condition, 0) == reg)
325 return reg;
326
327 return NULL_RTX;
328 }
329
330 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
331 that the number of iterations is a compile-time constant. If so,
332 return the rtx that sets COUNT_REG to a constant, and set COUNT to
333 this constant. Otherwise return 0. */
334 static rtx
335 const_iteration_count (rtx count_reg, basic_block pre_header,
336 HOST_WIDEST_INT * count)
337 {
338 rtx insn;
339 rtx head, tail;
340 get_block_head_tail (pre_header->index, &head, &tail);
341
342 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
343 if (INSN_P (insn) && single_set (insn) &&
344 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
345 {
346 rtx pat = single_set (insn);
347
348 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
349 {
350 *count = INTVAL (SET_SRC (pat));
351 return insn;
352 }
353
354 return NULL_RTX;
355 }
356
357 return NULL_RTX;
358 }
359
360 /* A very simple resource-based lower bound on the initiation interval.
361 ??? Improve the accuracy of this bound by considering the
362 utilization of various units. */
363 static int
364 res_MII (ddg_ptr g)
365 {
366 return (g->num_nodes / issue_rate);
367 }
368
369
370 /* Points to the array that contains the sched data for each node. */
371 static node_sched_params_ptr node_sched_params;
372
373 /* Allocate sched_params for each node and initialize it. Assumes that
374 the aux field of each node contain the asap bound (computed earlier),
375 and copies it into the sched_params field. */
376 static void
377 set_node_sched_params (ddg_ptr g)
378 {
379 int i;
380
381 /* Allocate for each node in the DDG a place to hold the "sched_data". */
382 /* Initialize ASAP/ALAP/HIGHT to zero. */
383 node_sched_params = (node_sched_params_ptr)
384 xcalloc (g->num_nodes,
385 sizeof (struct node_sched_params));
386
387 /* Set the pointer of the general data of the node to point to the
388 appropriate sched_params strcture. */
389 for (i = 0; i < g->num_nodes; i++)
390 {
391 /* Watch out for aliasing problems? */
392 node_sched_params[i].asap = g->nodes[i].aux.count;
393 g->nodes[i].aux.info = &node_sched_params[i];
394 }
395 }
396
397 static void
398 print_node_sched_params (FILE * dump_file, int num_nodes)
399 {
400 int i;
401
402 for (i = 0; i < num_nodes; i++)
403 {
404 node_sched_params_ptr nsp = &node_sched_params[i];
405 rtx reg_move = nsp->first_reg_move;
406 int j;
407
408 fprintf (dump_file, "Node %d:\n", i);
409 fprintf (dump_file, " asap = %d:\n", nsp->asap);
410 fprintf (dump_file, " time = %d:\n", nsp->time);
411 fprintf (dump_file, " nreg_moves = %d:\n", nsp->nreg_moves);
412 for (j = 0; j < nsp->nreg_moves; j++)
413 {
414 fprintf (dump_file, " reg_move = ");
415 print_rtl_single (dump_file, reg_move);
416 reg_move = PREV_INSN (reg_move);
417 }
418 }
419 }
420
421 /* Calculate an upper bound for II. SMS should not schedule the loop if it
422 requires more cycles than this bound. Currently set to the sum of the
423 longest latency edge for each node. Reset based on experiments. */
424 static int
425 calculate_maxii (ddg_ptr g)
426 {
427 int i;
428 int maxii = 0;
429
430 for (i = 0; i < g->num_nodes; i++)
431 {
432 ddg_node_ptr u = &g->nodes[i];
433 ddg_edge_ptr e;
434 int max_edge_latency = 0;
435
436 for (e = u->out; e; e = e->next_out)
437 max_edge_latency = MAX (max_edge_latency, e->latency);
438
439 maxii += max_edge_latency;
440 }
441 return maxii;
442 }
443
444
445 /* Given the partial schdule, generate register moves when the length
446 of the register live range is more than ii; the number of moves is
447 determined according to the following equation:
448 SCHED_TIME (use) - SCHED_TIME (def) { 1 broken loop-carried
449 nreg_moves = ----------------------------------- - { dependecnce.
450 ii { 0 if not.
451 This handles the modulo-variable-expansions (mve's) needed for the ps. */
452 static void
453 generate_reg_moves (partial_schedule_ptr ps)
454 {
455 ddg_ptr g = ps->g;
456 int ii = ps->ii;
457 int i;
458
459 for (i = 0; i < g->num_nodes; i++)
460 {
461 ddg_node_ptr u = &g->nodes[i];
462 ddg_edge_ptr e;
463 int nreg_moves = 0, i_reg_move;
464 sbitmap *uses_of_defs;
465 rtx last_reg_move;
466 rtx prev_reg, old_reg;
467
468 /* Compute the number of reg_moves needed for u, by looking at life
469 ranges started at u (excluding self-loops). */
470 for (e = u->out; e; e = e->next_out)
471 if (e->type == TRUE_DEP && e->dest != e->src)
472 {
473 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
474
475 /* If dest preceeds src in the schedule of the kernel, then dest
476 will read before src writes and we can save one reg_copy. */
477 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
478 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
479 nreg_moves4e--;
480
481 nreg_moves = MAX (nreg_moves, nreg_moves4e);
482 }
483
484 if (nreg_moves == 0)
485 continue;
486
487 /* Every use of the register defined by node may require a different
488 copy of this register, depending on the time the use is scheduled.
489 Set a bitmap vector, telling which nodes use each copy of this
490 register. */
491 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
492 sbitmap_vector_zero (uses_of_defs, nreg_moves);
493 for (e = u->out; e; e = e->next_out)
494 if (e->type == TRUE_DEP && e->dest != e->src)
495 {
496 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
497
498 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
499 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
500 dest_copy--;
501
502 if (dest_copy)
503 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
504 }
505
506 /* Now generate the reg_moves, attaching relevant uses to them. */
507 SCHED_NREG_MOVES (u) = nreg_moves;
508 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
509 last_reg_move = u->insn;
510
511 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
512 {
513 int i_use;
514 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
515 rtx reg_move = gen_move_insn (new_reg, prev_reg);
516
517 add_insn_before (reg_move, last_reg_move);
518 last_reg_move = reg_move;
519
520 if (!SCHED_FIRST_REG_MOVE (u))
521 SCHED_FIRST_REG_MOVE (u) = reg_move;
522
523 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use,
524 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg));
525
526 prev_reg = new_reg;
527 }
528 }
529 }
530
531 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
532 of SCHED_ROW and SCHED_STAGE. */
533 static void
534 normalize_sched_times (partial_schedule_ptr ps)
535 {
536 int i;
537 ddg_ptr g = ps->g;
538 int amount = PS_MIN_CYCLE (ps);
539 int ii = ps->ii;
540
541 for (i = 0; i < g->num_nodes; i++)
542 {
543 ddg_node_ptr u = &g->nodes[i];
544 int normalized_time = SCHED_TIME (u) - amount;
545
546 if (normalized_time < 0)
547 abort ();
548
549 SCHED_TIME (u) = normalized_time;
550 SCHED_ROW (u) = normalized_time % ii;
551 SCHED_STAGE (u) = normalized_time / ii;
552 }
553 }
554
555 /* Set SCHED_COLUMN of each node according to its position in PS. */
556 static void
557 set_columns_for_ps (partial_schedule_ptr ps)
558 {
559 int row;
560
561 for (row = 0; row < ps->ii; row++)
562 {
563 ps_insn_ptr cur_insn = ps->rows[row];
564 int column = 0;
565
566 for (; cur_insn; cur_insn = cur_insn->next_in_row)
567 SCHED_COLUMN (cur_insn->node) = column++;
568 }
569 }
570
571 /* Permute the insns according to their order in PS, from row 0 to
572 row ii-1, and position them right before LAST. This schedules
573 the insns of the loop kernel. */
574 static void
575 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
576 {
577 int ii = ps->ii;
578 int row;
579 ps_insn_ptr ps_ij;
580
581 for (row = 0; row < ii ; row++)
582 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
583 if (PREV_INSN (last) != ps_ij->node->insn)
584 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
585 PREV_INSN (last));
586 }
587
588 /* Used to generate the prologue & epilogue. Duplicate the subset of
589 nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive
590 of both), together with a prefix/suffix of their reg_moves. */
591 static void
592 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
593 int to_stage, int for_prolog)
594 {
595 int row;
596 ps_insn_ptr ps_ij;
597
598 for (row = 0; row < ps->ii; row++)
599 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
600 {
601 ddg_node_ptr u_node = ps_ij->node;
602 int j, i_reg_moves;
603 rtx reg_move = NULL_RTX;
604
605 if (for_prolog)
606 {
607 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
608 number of reg_moves starting with the second occurance of
609 u_node, which is generated if its SCHED_STAGE <= to_stage. */
610 i_reg_moves = to_stage - SCHED_STAGE (u_node);
611 i_reg_moves = MAX (i_reg_moves, 0);
612 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
613
614 /* The reg_moves start from the *first* reg_move backwards. */
615 if (i_reg_moves)
616 {
617 reg_move = SCHED_FIRST_REG_MOVE (u_node);
618 for (j = 1; j < i_reg_moves; j++)
619 reg_move = PREV_INSN (reg_move);
620 }
621 }
622 else /* It's for the epilog. */
623 {
624 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
625 starting to decrease one stage after u_node no longer occurs;
626 that is, generate all reg_moves until
627 SCHED_STAGE (u_node) == from_stage - 1. */
628 i_reg_moves = SCHED_NREG_MOVES (u_node)
629 - (from_stage - SCHED_STAGE (u_node) - 1);
630 i_reg_moves = MAX (i_reg_moves, 0);
631 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
632
633 /* The reg_moves start from the *last* reg_move forwards. */
634 if (i_reg_moves)
635 {
636 reg_move = SCHED_FIRST_REG_MOVE (u_node);
637 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
638 reg_move = PREV_INSN (reg_move);
639 }
640 }
641
642 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
643 emit_insn (copy_rtx (PATTERN (reg_move)));
644
645 if (SCHED_STAGE (u_node) >= from_stage
646 && SCHED_STAGE (u_node) <= to_stage)
647 duplicate_insn_chain (u_node->first_note, u_node->insn);
648 }
649 }
650
651
652 /* Generate the instructions (including reg_moves) for prolog & epilog. */
653 static void
654 generate_prolog_epilog (partial_schedule_ptr ps, rtx orig_loop_beg,
655 rtx orig_loop_end, int unknown_count)
656 {
657 int i;
658 int last_stage = PS_STAGE_COUNT (ps) - 1;
659 edge e;
660 rtx c_reg = NULL_RTX;
661 rtx cmp = NULL_RTX;
662 rtx precond_jump = NULL_RTX;
663 rtx precond_exit_label = NULL_RTX;
664 rtx precond_exit_label_insn = NULL_RTX;
665 rtx last_epilog_insn = NULL_RTX;
666 rtx loop_exit_label = NULL_RTX;
667 rtx loop_exit_label_insn = NULL_RTX;
668 rtx orig_loop_bct = NULL_RTX;
669
670 /* Loop header edge. */
671 e = ps->g->bb->pred;
672 if (e->src == ps->g->bb)
673 e = e->pred_next;
674
675 /* Generate the prolog, inserting its insns on the loop-entry edge. */
676 start_sequence ();
677
678 /* This is the place where we want to insert the precondition. */
679 if (unknown_count)
680 precond_jump = emit_note (NOTE_INSN_DELETED);
681
682 for (i = 0; i < last_stage; i++)
683 duplicate_insns_of_cycles (ps, 0, i, 1);
684
685 /* No need to call insert_insn_on_edge; we prepared the sequence. */
686 e->insns.r = get_insns ();
687 end_sequence ();
688
689 /* Generate the epilog, inserting its insns on the loop-exit edge. */
690 start_sequence ();
691
692 for (i = 0; i < last_stage; i++)
693 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
694
695 last_epilog_insn = emit_note (NOTE_INSN_DELETED);
696
697 /* Emit the label where to put the original loop code. */
698 if (unknown_count)
699 {
700 rtx label, cond;
701
702 precond_exit_label = gen_label_rtx ();
703 precond_exit_label_insn = emit_label (precond_exit_label);
704
705 /* Put the original loop code. */
706 reorder_insns_nobb (orig_loop_beg, orig_loop_end, precond_exit_label_insn);
707
708 /* Change the label of the BCT to be the PRECOND_EXIT_LABEL. */
709 orig_loop_bct = get_last_insn ();
710 c_reg = doloop_register_get (orig_loop_bct, &cmp);
711 label = XEXP (SET_SRC (cmp), 1);
712 cond = XEXP (SET_SRC (cmp), 0);
713
714 if (! c_reg || GET_CODE (cond) != NE)
715 abort ();
716
717 XEXP (label, 0) = precond_exit_label;
718 JUMP_LABEL (orig_loop_bct) = precond_exit_label_insn;
719 LABEL_NUSES (precond_exit_label_insn)++;
720
721 /* Generate the loop exit label. */
722 loop_exit_label = gen_label_rtx ();
723 loop_exit_label_insn = emit_label (loop_exit_label);
724 }
725
726 e = ps->g->bb->succ;
727 if (e->dest == ps->g->bb)
728 e = e->succ_next;
729
730 e->insns.r = get_insns ();
731 end_sequence ();
732
733 commit_edge_insertions ();
734
735 if (unknown_count)
736 {
737 rtx precond_insns, epilog_jump, insert_after_insn;
738 basic_block loop_exit_bb = BLOCK_FOR_INSN (loop_exit_label_insn);
739 basic_block epilog_bb = BLOCK_FOR_INSN (last_epilog_insn);
740 basic_block precond_bb = BLOCK_FOR_INSN (precond_jump);
741 basic_block orig_loop_bb = BLOCK_FOR_INSN (precond_exit_label_insn);
742 edge epilog_exit_edge = epilog_bb->succ;
743
744 /* Do loop preconditioning to take care of cases were the loop count is
745 less than the stage count. Update the CFG properly. */
746 insert_after_insn = precond_jump;
747 start_sequence ();
748 c_reg = doloop_register_get (ps->g->closing_branch->insn, &cmp);
749 emit_cmp_and_jump_insns (c_reg, GEN_INT (PS_STAGE_COUNT (ps)), LT, NULL,
750 GET_MODE (c_reg), 1, precond_exit_label);
751 precond_insns = get_insns ();
752 precond_jump = get_last_insn ();
753 end_sequence ();
754 reorder_insns (precond_insns, precond_jump, insert_after_insn);
755
756 /* Generate a subtract instruction at the beginning of the prolog to
757 adjust the loop count by STAGE_COUNT. */
758 emit_insn_after (gen_sub2_insn (c_reg, GEN_INT (PS_STAGE_COUNT (ps) - 1)),
759 precond_jump);
760 update_bb_for_insn (precond_bb);
761 delete_insn (insert_after_insn);
762
763 /* Update label info for the precondition jump. */
764 JUMP_LABEL (precond_jump) = precond_exit_label_insn;
765 LABEL_NUSES (precond_exit_label_insn)++;
766
767 /* Update the CFG. */
768 split_block (precond_bb, precond_jump);
769 make_edge (precond_bb, orig_loop_bb, 0);
770
771 /* Add a jump at end of the epilog to the LOOP_EXIT_LABEL to jump over the
772 original loop copy and update the CFG. */
773 epilog_jump = emit_jump_insn_after (gen_jump (loop_exit_label),
774 last_epilog_insn);
775 delete_insn (last_epilog_insn);
776 JUMP_LABEL (epilog_jump) = loop_exit_label_insn;
777 LABEL_NUSES (loop_exit_label_insn)++;
778
779 redirect_edge_succ (epilog_exit_edge, loop_exit_bb);
780 epilog_exit_edge->flags &= ~EDGE_FALLTHRU;
781 emit_barrier_after (BB_END (epilog_bb));
782 }
783 }
784
785 /* Return the line note insn preceding INSN, for debugging. Taken from
786 emit-rtl.c. */
787 static rtx
788 find_line_note (rtx insn)
789 {
790 for (; insn; insn = PREV_INSN (insn))
791 if (GET_CODE (insn) == NOTE
792 && NOTE_LINE_NUMBER (insn) >= 0)
793 break;
794
795 return insn;
796 }
797
798 /* Main entry point, perform SMS scheduling on the loops of the function
799 that consist of single basic blocks. */
800 void
801 sms_schedule (FILE *dump_file)
802 {
803 static int passes = 0;
804 rtx insn;
805 ddg_ptr *g_arr, g;
806 basic_block bb, pre_header = NULL;
807 int * node_order;
808 int maxii;
809 int i;
810 partial_schedule_ptr ps;
811 int max_bb_index = last_basic_block;
812 struct df *df;
813
814 /* SMS uses the DFA interface. */
815 if (! targetm.sched.use_dfa_pipeline_interface
816 || ! (*targetm.sched.use_dfa_pipeline_interface) ())
817 return;
818
819 stats_file = dump_file;
820
821
822 /* Initialize issue_rate. */
823 if (targetm.sched.issue_rate)
824 {
825 int temp = reload_completed;
826
827 reload_completed = 1;
828 issue_rate = (*targetm.sched.issue_rate) ();
829 reload_completed = temp;
830 }
831 else
832 issue_rate = 1;
833
834 /* Initilize the scheduler. */
835 current_sched_info = &sms_sched_info;
836 sched_init (NULL);
837
838 /* Init Data Flow analysis, to be used in interloop dep calculation. */
839 df = df_init ();
840 df_analyze (df, 0, DF_ALL);
841
842 /* Allocate memory to hold the DDG array. */
843 g_arr = xcalloc (max_bb_index, sizeof (ddg_ptr));
844
845 /* Build DDGs for all the relevant loops and hold them in G_ARR
846 indexed by the loop BB index. */
847 FOR_EACH_BB (bb)
848 {
849 rtx head, tail;
850 rtx count_reg, comp;
851 edge e, pre_header_edge;
852
853 if (bb->index < 0)
854 continue;
855
856 /* Check if bb has two successors, one being itself. */
857 e = bb->succ;
858 if (!e || !e->succ_next || e->succ_next->succ_next)
859 continue;
860
861 if (e->dest != bb && e->succ_next->dest != bb)
862 continue;
863
864 if ((e->flags & EDGE_COMPLEX)
865 || (e->succ_next->flags & EDGE_COMPLEX))
866 continue;
867
868 /* Check if bb has two predecessors, one being itself. */
869 /* In view of above tests, suffices to check e->pred_next->pred_next? */
870 e = bb->pred;
871 if (!e || !e->pred_next || e->pred_next->pred_next)
872 continue;
873
874 if (e->src != bb && e->pred_next->src != bb)
875 continue;
876
877 if ((e->flags & EDGE_COMPLEX)
878 || (e->pred_next->flags & EDGE_COMPLEX))
879 continue;
880
881 /* For debugging. */
882 if (passes++ > MAX_SMS_LOOP_NUMBER && MAX_SMS_LOOP_NUMBER != -1)
883 {
884 if (dump_file)
885 fprintf (dump_file, "SMS reached MAX_PASSES... \n");
886 break;
887 }
888
889 get_block_head_tail (bb->index, &head, &tail);
890 pre_header_edge = bb->pred;
891 if (bb->pred->src != bb)
892 pre_header_edge = bb->pred->pred_next;
893
894 /* Perfrom SMS only on loops that their average count is above threshold. */
895 if (bb->count < pre_header_edge->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD)
896 {
897 if (stats_file)
898 {
899 rtx line_note = find_line_note (tail);
900
901 if (line_note)
902 fprintf (stats_file, "SMS bb %s %d (file, line)\n",
903 NOTE_SOURCE_FILE (line_note), NOTE_LINE_NUMBER (line_note));
904 fprintf (stats_file, "SMS single-bb-loop\n");
905 if (profile_info && flag_branch_probabilities)
906 {
907 fprintf (stats_file, "SMS loop-count ");
908 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
909 (HOST_WIDEST_INT) bb->count);
910 fprintf (stats_file, "\n");
911 fprintf (stats_file, "SMS preheader-count ");
912 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
913 (HOST_WIDEST_INT) pre_header_edge->count);
914 fprintf (stats_file, "\n");
915 fprintf (stats_file, "SMS profile-sum-max ");
916 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
917 (HOST_WIDEST_INT) profile_info->sum_max);
918 fprintf (stats_file, "\n");
919 }
920 }
921 continue;
922 }
923
924 /* Make sure this is a doloop. */
925 if ( !(count_reg = doloop_register_get (tail, &comp)))
926 continue;
927
928 e = bb->pred;
929 if (e->src == bb)
930 pre_header = e->pred_next->src;
931 else
932 pre_header = e->src;
933
934 /* Don't handle BBs with calls or barriers, or !single_set insns. */
935 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
936 if (GET_CODE (insn) == CALL_INSN
937 || GET_CODE (insn) == BARRIER
938 || (INSN_P (insn) && GET_CODE (insn) != JUMP_INSN
939 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
940 break;
941
942 if (insn != NEXT_INSN (tail))
943 {
944 if (stats_file)
945 {
946 if (GET_CODE (insn) == CALL_INSN)
947 fprintf (stats_file, "SMS loop-with-call\n");
948 else if (GET_CODE (insn) == BARRIER)
949 fprintf (stats_file, "SMS loop-with-barrier\n");
950 else
951 fprintf (stats_file, "SMS loop-with-not-single-set\n");
952 print_rtl_single (stats_file, insn);
953 }
954
955 continue;
956 }
957
958 if (! (g = create_ddg (bb, df, 0)))
959 {
960 if (stats_file)
961 fprintf (stats_file, "SMS doloop\n");
962 continue;
963 }
964
965 g_arr[bb->index] = g;
966 }
967
968 /* Release Data Flow analysis data structures. */
969 df_finish (df);
970
971 /* Go over the built DDGs and perfrom SMS for each one of them. */
972 for (i = 0; i < max_bb_index; i++)
973 {
974 rtx head, tail;
975 rtx count_reg, count_init, comp;
976 edge pre_header_edge;
977 int mii, rec_mii;
978 int stage_count = 0;
979 HOST_WIDEST_INT loop_count = 0;
980
981 if (! (g = g_arr[i]))
982 continue;
983
984 if (dump_file)
985 print_ddg (dump_file, g);
986
987 get_block_head_tail (g->bb->index, &head, &tail);
988
989 pre_header_edge = g->bb->pred;
990 if (g->bb->pred->src != g->bb)
991 pre_header_edge = g->bb->pred->pred_next;
992
993 if (stats_file)
994 {
995 rtx line_note = find_line_note (tail);
996
997 if (line_note)
998 fprintf (stats_file, "SMS bb %s %d (file, line)\n",
999 NOTE_SOURCE_FILE (line_note), NOTE_LINE_NUMBER (line_note));
1000 fprintf (stats_file, "SMS single-bb-loop\n");
1001 if (profile_info && flag_branch_probabilities)
1002 {
1003 fprintf (stats_file, "SMS loop-count ");
1004 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1005 (HOST_WIDEST_INT) bb->count);
1006 fprintf (stats_file, "\n");
1007 fprintf (stats_file, "SMS preheader-count ");
1008 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1009 (HOST_WIDEST_INT) pre_header_edge->count);
1010 fprintf (stats_file, "\n");
1011 fprintf (stats_file, "SMS profile-sum-max ");
1012 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1013 (HOST_WIDEST_INT) profile_info->sum_max);
1014 fprintf (stats_file, "\n");
1015 }
1016 fprintf (stats_file, "SMS doloop\n");
1017 fprintf (stats_file, "SMS built-ddg %d\n", g->num_nodes);
1018 fprintf (stats_file, "SMS num-loads %d\n", g->num_loads);
1019 fprintf (stats_file, "SMS num-stores %d\n", g->num_stores);
1020 }
1021
1022 /* Make sure this is a doloop. */
1023 if ( !(count_reg = doloop_register_get (tail, &comp)))
1024 abort ();
1025
1026 /* This should be NULL_RTX if the count is unknown at compile time. */
1027 count_init = const_iteration_count (count_reg, pre_header, &loop_count);
1028
1029 if (stats_file && count_init)
1030 {
1031 fprintf (stats_file, "SMS const-doloop ");
1032 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1033 fprintf (stats_file, "\n");
1034 }
1035
1036 node_order = (int *) xmalloc (sizeof (int) * g->num_nodes);
1037
1038 mii = 1; /* Need to pass some estimate of mii. */
1039 rec_mii = sms_order_nodes (g, mii, node_order);
1040 mii = MAX (res_MII (g), rec_mii);
1041 maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100;
1042
1043 if (stats_file)
1044 fprintf (stats_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1045 rec_mii, mii, maxii);
1046
1047 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1048 ASAP. */
1049 set_node_sched_params (g);
1050
1051 ps = sms_schedule_by_order (g, mii, maxii, node_order, dump_file);
1052
1053 if (ps)
1054 stage_count = PS_STAGE_COUNT (ps);
1055
1056 if (stage_count == 0 || (count_init && (stage_count > loop_count)))
1057 {
1058 if (dump_file)
1059 fprintf (dump_file, "SMS failed... \n");
1060 if (stats_file)
1061 fprintf (stats_file, "SMS sched-failed %d\n", stage_count);
1062 }
1063 else
1064 {
1065 rtx orig_loop_beg = NULL_RTX;
1066 rtx orig_loop_end = NULL_RTX;
1067
1068 if (stats_file)
1069 {
1070 fprintf (stats_file,
1071 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1072 stage_count);
1073 print_partial_schedule (ps, dump_file);
1074 fprintf (dump_file,
1075 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1076 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1077 }
1078
1079 /* Save the original loop if we want to do loop preconditioning in
1080 case the BCT count is not known. */
1081 if (! count_init)
1082 {
1083 int i;
1084
1085 start_sequence ();
1086 /* Copy the original loop code before modifying it - so we can use
1087 it later. */
1088 for (i = 0; i < ps->g->num_nodes; i++)
1089 duplicate_insn_chain (ps->g->nodes[i].first_note,
1090 ps->g->nodes[i].insn);
1091
1092 orig_loop_beg = get_insns ();
1093 orig_loop_end = get_last_insn ();
1094 end_sequence ();
1095 }
1096 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1097 the closing_branch was scheduled and should appear in the last (ii-1)
1098 row. Otherwise, we are free to schedule the branch, and we let nodes
1099 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1100 row; this should reduce stage_count to minimum. */
1101 normalize_sched_times (ps);
1102 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1103 set_columns_for_ps (ps);
1104
1105 permute_partial_schedule (ps, g->closing_branch->first_note);
1106 generate_reg_moves (ps);
1107 if (dump_file)
1108 print_node_sched_params (dump_file, g->num_nodes);
1109
1110 /* Set new iteration count of loop kernel. */
1111 if (count_init)
1112 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1113 - stage_count + 1);
1114
1115 /* Generate prolog and epilog. */
1116 generate_prolog_epilog (ps, orig_loop_beg, orig_loop_end,
1117 count_init ? 0 : 1);
1118 }
1119 free_partial_schedule (ps);
1120 free (node_sched_params);
1121 free (node_order);
1122 free_ddg (g);
1123 }
1124
1125 /* Release scheduler data, needed until now because of DFA. */
1126 sched_finish ();
1127 }
1128
1129 /* The SMS scheduling algorithm itself
1130 -----------------------------------
1131 Input: 'O' an ordered list of insns of a loop.
1132 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1133
1134 'Q' is the empty Set
1135 'PS' is the partial schedule; it holds the currently scheduled nodes with
1136 their cycle/slot.
1137 'PSP' previously scheduled predecessors.
1138 'PSS' previously scheduled successors.
1139 't(u)' the cycle where u is scheduled.
1140 'l(u)' is the latency of u.
1141 'd(v,u)' is the dependence distance from v to u.
1142 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1143 the node ordering phase.
1144 'check_hardware_resources_conflicts(u, PS, c)'
1145 run a trace around cycle/slot through DFA model
1146 to check resource conflicts involving instruction u
1147 at cycle c given the partial schedule PS.
1148 'add_to_partial_schedule_at_time(u, PS, c)'
1149 Add the node/instruction u to the partial schedule
1150 PS at time c.
1151 'calculate_register_pressure(PS)'
1152 Given a schedule of instructions, calculate the register
1153 pressure it implies. One implementation could be the
1154 maximum number of overlapping live ranges.
1155 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1156 registers available in the hardware.
1157
1158 1. II = MII.
1159 2. PS = empty list
1160 3. for each node u in O in pre-computed order
1161 4. if (PSP(u) != Q && PSS(u) == Q) then
1162 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1163 6. start = Early_start; end = Early_start + II - 1; step = 1
1164 11. else if (PSP(u) == Q && PSS(u) != Q) then
1165 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1166 13. start = Late_start; end = Late_start - II + 1; step = -1
1167 14. else if (PSP(u) != Q && PSS(u) != Q) then
1168 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1169 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1170 17. start = Early_start;
1171 18. end = min(Early_start + II - 1 , Late_start);
1172 19. step = 1
1173 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1174 21. start = ASAP(u); end = start + II - 1; step = 1
1175 22. endif
1176
1177 23. success = false
1178 24. for (c = start ; c != end ; c += step)
1179 25. if check_hardware_resources_conflicts(u, PS, c) then
1180 26. add_to_partial_schedule_at_time(u, PS, c)
1181 27. success = true
1182 28. break
1183 29. endif
1184 30. endfor
1185 31. if (success == false) then
1186 32. II = II + 1
1187 33. if (II > maxII) then
1188 34. finish - failed to schedule
1189 35. endif
1190 36. goto 2.
1191 37. endif
1192 38. endfor
1193 39. if (calculate_register_pressure(PS) > maxRP) then
1194 40. goto 32.
1195 41. endif
1196 42. compute epilogue & prologue
1197 43. finish - succeeded to schedule
1198 */
1199
1200 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1201 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1202 set to 0 to save compile time. */
1203 #define DFA_HISTORY SMS_DFA_HISTORY
1204
1205 static partial_schedule_ptr
1206 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *dump_file)
1207 {
1208 int ii = mii;
1209 int i, c, success;
1210 int try_again_with_larger_ii = true;
1211 int num_nodes = g->num_nodes;
1212 ddg_edge_ptr e;
1213 int start, end, step; /* Place together into one struct? */
1214 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1215 sbitmap psp = sbitmap_alloc (num_nodes);
1216 sbitmap pss = sbitmap_alloc (num_nodes);
1217 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1218
1219 while (try_again_with_larger_ii && ii < maxii)
1220 {
1221 if (dump_file)
1222 fprintf(dump_file, "Starting with ii=%d\n", ii);
1223 try_again_with_larger_ii = false;
1224 sbitmap_zero (sched_nodes);
1225
1226 for (i = 0; i < num_nodes; i++)
1227 {
1228 int u = nodes_order[i];
1229 ddg_node_ptr u_node = &g->nodes[u];
1230 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1231 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1232 int psp_not_empty;
1233 int pss_not_empty;
1234 rtx insn = u_node->insn;
1235
1236 if (!INSN_P (insn))
1237 continue;
1238
1239 if (GET_CODE (insn) == JUMP_INSN) /* Closing branch handled later. */
1240 continue;
1241
1242 /* 1. compute sched window for u (start, end, step). */
1243 sbitmap_zero (psp);
1244 sbitmap_zero (pss);
1245 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1246 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1247
1248 if (psp_not_empty && !pss_not_empty)
1249 {
1250 int early_start = 0;
1251
1252 end = INT_MAX;
1253 for (e = u_node->in; e != 0; e = e->next_in)
1254 {
1255 ddg_node_ptr v_node = e->src;
1256 if (TEST_BIT (sched_nodes, v_node->cuid))
1257 {
1258 early_start = MAX (early_start,
1259 SCHED_TIME (v_node)
1260 + e->latency - (e->distance * ii));
1261 if (e->data_type == MEM_DEP)
1262 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1263 }
1264 }
1265 start = early_start;
1266 end = MIN (end, early_start + ii);
1267 step = 1;
1268 }
1269
1270 else if (!psp_not_empty && pss_not_empty)
1271 {
1272 int late_start = INT_MAX;
1273
1274 end = INT_MIN;
1275 for (e = u_node->out; e != 0; e = e->next_out)
1276 {
1277 ddg_node_ptr v_node = e->dest;
1278 if (TEST_BIT (sched_nodes, v_node->cuid))
1279 {
1280 late_start = MIN (late_start,
1281 SCHED_TIME (v_node) - e->latency
1282 + (e->distance * ii));
1283 if (e->data_type == MEM_DEP)
1284 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1285 }
1286 }
1287 start = late_start;
1288 end = MAX (end, late_start - ii);
1289 step = -1;
1290 }
1291
1292 else if (psp_not_empty && pss_not_empty)
1293 {
1294 int early_start = 0;
1295 int late_start = INT_MAX;
1296
1297 start = INT_MIN;
1298 end = INT_MAX;
1299 for (e = u_node->in; e != 0; e = e->next_in)
1300 {
1301 ddg_node_ptr v_node = e->src;
1302
1303 if (TEST_BIT (sched_nodes, v_node->cuid))
1304 {
1305 early_start = MAX (early_start,
1306 SCHED_TIME (v_node) + e->latency
1307 - (e->distance * ii));
1308 if (e->data_type == MEM_DEP)
1309 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1310 }
1311 }
1312 for (e = u_node->out; e != 0; e = e->next_out)
1313 {
1314 ddg_node_ptr v_node = e->dest;
1315
1316 if (TEST_BIT (sched_nodes, v_node->cuid))
1317 {
1318 late_start = MIN (late_start,
1319 SCHED_TIME (v_node) - e->latency
1320 + (e->distance * ii));
1321 if (e->data_type == MEM_DEP)
1322 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1323 }
1324 }
1325 start = MAX (start, early_start);
1326 end = MIN (end, MIN (early_start + ii, late_start + 1));
1327 step = 1;
1328 }
1329 else /* psp is empty && pss is empty. */
1330 {
1331 start = SCHED_ASAP (u_node);
1332 end = start + ii;
1333 step = 1;
1334 }
1335
1336 /* 2. Try scheduling u in window. */
1337 if (dump_file)
1338 fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
1339 u, start, end, step);
1340
1341 success = 0;
1342 if ((step > 0 && start < end) || (step < 0 && start > end))
1343 for (c = start; c != end; c += step)
1344 {
1345 ps_insn_ptr psi = ps_add_node_check_conflicts (ps, u_node, c);
1346
1347 if (psi)
1348 {
1349 SCHED_TIME (u_node) = c;
1350 SET_BIT (sched_nodes, u);
1351 success = 1;
1352 if (dump_file)
1353 fprintf(dump_file, "Schedule in %d\n", c);
1354 break;
1355 }
1356 }
1357 if (!success)
1358 {
1359 /* ??? Try backtracking instead of immediately ii++? */
1360 ii++;
1361 try_again_with_larger_ii = true;
1362 reset_partial_schedule (ps, ii);
1363 break;
1364 }
1365 /* ??? If (success), check register pressure estimates. */
1366 } /* Continue with next node. */
1367 } /* While try_again_with_larger_ii. */
1368
1369 sbitmap_free (sched_nodes);
1370 sbitmap_free (psp);
1371 sbitmap_free (pss);
1372
1373 if (ii >= maxii)
1374 {
1375 free_partial_schedule (ps);
1376 ps = NULL;
1377 }
1378 return ps;
1379 }
1380
1381 \f
1382 /* This page implements the algorithm for ordering the nodes of a DDG
1383 for modulo scheduling, activated through the
1384 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1385
1386 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1387 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1388 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1389 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1390 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1391 #define DEPTH(x) (ASAP ((x)))
1392
1393 typedef struct node_order_params * nopa;
1394
1395 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1396 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1397 static nopa calculate_order_params (ddg_ptr, int mii);
1398 static int find_max_asap (ddg_ptr, sbitmap);
1399 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1400 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1401
1402 enum sms_direction {BOTTOMUP, TOPDOWN};
1403
1404 struct node_order_params
1405 {
1406 int asap;
1407 int alap;
1408 int height;
1409 };
1410
1411 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1412 static void
1413 check_nodes_order (int *node_order, int num_nodes)
1414 {
1415 int i;
1416 sbitmap tmp = sbitmap_alloc (num_nodes);
1417
1418 sbitmap_zero (tmp);
1419
1420 for (i = 0; i < num_nodes; i++)
1421 {
1422 int u = node_order[i];
1423
1424 if (u >= num_nodes || u < 0 || TEST_BIT (tmp, u))
1425 abort ();
1426
1427 SET_BIT (tmp, u);
1428 }
1429
1430 sbitmap_free (tmp);
1431 }
1432
1433 /* Order the nodes of G for scheduling and pass the result in
1434 NODE_ORDER. Also set aux.count of each node to ASAP.
1435 Return the recMII for the given DDG. */
1436 static int
1437 sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1438 {
1439 int i;
1440 int rec_mii = 0;
1441 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1442
1443 nopa nops = calculate_order_params (g, mii);
1444
1445 order_nodes_of_sccs (sccs, node_order);
1446
1447 if (sccs->num_sccs > 0)
1448 /* First SCC has the largest recurrence_length. */
1449 rec_mii = sccs->sccs[0]->recurrence_length;
1450
1451 /* Save ASAP before destroying node_order_params. */
1452 for (i = 0; i < g->num_nodes; i++)
1453 {
1454 ddg_node_ptr v = &g->nodes[i];
1455 v->aux.count = ASAP (v);
1456 }
1457
1458 free (nops);
1459 free_ddg_all_sccs (sccs);
1460 check_nodes_order (node_order, g->num_nodes);
1461
1462 return rec_mii;
1463 }
1464
1465 static void
1466 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1467 {
1468 int i, pos = 0;
1469 ddg_ptr g = all_sccs->ddg;
1470 int num_nodes = g->num_nodes;
1471 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1472 sbitmap on_path = sbitmap_alloc (num_nodes);
1473 sbitmap tmp = sbitmap_alloc (num_nodes);
1474 sbitmap ones = sbitmap_alloc (num_nodes);
1475
1476 sbitmap_zero (prev_sccs);
1477 sbitmap_ones (ones);
1478
1479 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1480 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1481 for (i = 0; i < all_sccs->num_sccs; i++)
1482 {
1483 ddg_scc_ptr scc = all_sccs->sccs[i];
1484
1485 /* Add nodes on paths from previous SCCs to the current SCC. */
1486 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1487 sbitmap_a_or_b (tmp, scc->nodes, on_path);
1488
1489 /* Add nodes on paths from the current SCC to previous SCCs. */
1490 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1491 sbitmap_a_or_b (tmp, tmp, on_path);
1492
1493 /* Remove nodes of previous SCCs from current extended SCC. */
1494 sbitmap_difference (tmp, tmp, prev_sccs);
1495
1496 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1497 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1498 }
1499
1500 /* Handle the remaining nodes that do not belong to any scc. Each call
1501 to order_nodes_in_scc handles a single connected component. */
1502 while (pos < g->num_nodes)
1503 {
1504 sbitmap_difference (tmp, ones, prev_sccs);
1505 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1506 }
1507 sbitmap_free (prev_sccs);
1508 sbitmap_free (on_path);
1509 sbitmap_free (tmp);
1510 sbitmap_free (ones);
1511 }
1512
1513 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1514 static struct node_order_params *
1515 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1516 {
1517 int u;
1518 int max_asap;
1519 int num_nodes = g->num_nodes;
1520 ddg_edge_ptr e;
1521 /* Allocate a place to hold ordering params for each node in the DDG. */
1522 nopa node_order_params_arr;
1523
1524 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1525 node_order_params_arr = (nopa) xcalloc (num_nodes,
1526 sizeof (struct node_order_params));
1527
1528 /* Set the aux pointer of each node to point to its order_params strcture. */
1529 for (u = 0; u < num_nodes; u++)
1530 g->nodes[u].aux.info = &node_order_params_arr[u];
1531
1532 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1533 calculate ASAP, ALAP, mobility, distance, and height for each node
1534 in the dependence (direct acyclic) graph. */
1535
1536 /* We assume that the nodes in the array are in topological order. */
1537
1538 max_asap = 0;
1539 for (u = 0; u < num_nodes; u++)
1540 {
1541 ddg_node_ptr u_node = &g->nodes[u];
1542
1543 ASAP (u_node) = 0;
1544 for (e = u_node->in; e; e = e->next_in)
1545 if (e->distance == 0)
1546 ASAP (u_node) = MAX (ASAP (u_node),
1547 ASAP (e->src) + e->latency);
1548 max_asap = MAX (max_asap, ASAP (u_node));
1549 }
1550
1551 for (u = num_nodes - 1; u > -1; u--)
1552 {
1553 ddg_node_ptr u_node = &g->nodes[u];
1554
1555 ALAP (u_node) = max_asap;
1556 HEIGHT (u_node) = 0;
1557 for (e = u_node->out; e; e = e->next_out)
1558 if (e->distance == 0)
1559 {
1560 ALAP (u_node) = MIN (ALAP (u_node),
1561 ALAP (e->dest) - e->latency);
1562 HEIGHT (u_node) = MAX (HEIGHT (u_node),
1563 HEIGHT (e->dest) + e->latency);
1564 }
1565 }
1566
1567 return node_order_params_arr;
1568 }
1569
1570 static int
1571 find_max_asap (ddg_ptr g, sbitmap nodes)
1572 {
1573 int u;
1574 int max_asap = -1;
1575 int result = -1;
1576
1577 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1578 {
1579 ddg_node_ptr u_node = &g->nodes[u];
1580
1581 if (max_asap < ASAP (u_node))
1582 {
1583 max_asap = ASAP (u_node);
1584 result = u;
1585 }
1586 });
1587 return result;
1588 }
1589
1590 static int
1591 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1592 {
1593 int u;
1594 int max_hv = -1;
1595 int min_mob = INT_MAX;
1596 int result = -1;
1597
1598 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1599 {
1600 ddg_node_ptr u_node = &g->nodes[u];
1601
1602 if (max_hv < HEIGHT (u_node))
1603 {
1604 max_hv = HEIGHT (u_node);
1605 min_mob = MOB (u_node);
1606 result = u;
1607 }
1608 else if ((max_hv == HEIGHT (u_node))
1609 && (min_mob > MOB (u_node)))
1610 {
1611 min_mob = MOB (u_node);
1612 result = u;
1613 }
1614 });
1615 return result;
1616 }
1617
1618 static int
1619 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1620 {
1621 int u;
1622 int max_dv = -1;
1623 int min_mob = INT_MAX;
1624 int result = -1;
1625
1626 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1627 {
1628 ddg_node_ptr u_node = &g->nodes[u];
1629
1630 if (max_dv < DEPTH (u_node))
1631 {
1632 max_dv = DEPTH (u_node);
1633 min_mob = MOB (u_node);
1634 result = u;
1635 }
1636 else if ((max_dv == DEPTH (u_node))
1637 && (min_mob > MOB (u_node)))
1638 {
1639 min_mob = MOB (u_node);
1640 result = u;
1641 }
1642 });
1643 return result;
1644 }
1645
1646 /* Places the nodes of SCC into the NODE_ORDER array starting
1647 at position POS, according to the SMS ordering algorithm.
1648 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1649 the NODE_ORDER array, starting from position zero. */
1650 static int
1651 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1652 int * node_order, int pos)
1653 {
1654 enum sms_direction dir;
1655 int num_nodes = g->num_nodes;
1656 sbitmap workset = sbitmap_alloc (num_nodes);
1657 sbitmap tmp = sbitmap_alloc (num_nodes);
1658 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1659 sbitmap predecessors = sbitmap_alloc (num_nodes);
1660 sbitmap successors = sbitmap_alloc (num_nodes);
1661
1662 sbitmap_zero (predecessors);
1663 find_predecessors (predecessors, g, nodes_ordered);
1664
1665 sbitmap_zero (successors);
1666 find_successors (successors, g, nodes_ordered);
1667
1668 sbitmap_zero (tmp);
1669 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1670 {
1671 sbitmap_copy (workset, tmp);
1672 dir = BOTTOMUP;
1673 }
1674 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1675 {
1676 sbitmap_copy (workset, tmp);
1677 dir = TOPDOWN;
1678 }
1679 else
1680 {
1681 int u;
1682
1683 sbitmap_zero (workset);
1684 if ((u = find_max_asap (g, scc)) >= 0)
1685 SET_BIT (workset, u);
1686 dir = BOTTOMUP;
1687 }
1688
1689 sbitmap_zero (zero_bitmap);
1690 while (!sbitmap_equal (workset, zero_bitmap))
1691 {
1692 int v;
1693 ddg_node_ptr v_node;
1694 sbitmap v_node_preds;
1695 sbitmap v_node_succs;
1696
1697 if (dir == TOPDOWN)
1698 {
1699 while (!sbitmap_equal (workset, zero_bitmap))
1700 {
1701 v = find_max_hv_min_mob (g, workset);
1702 v_node = &g->nodes[v];
1703 node_order[pos++] = v;
1704 v_node_succs = NODE_SUCCESSORS (v_node);
1705 sbitmap_a_and_b (tmp, v_node_succs, scc);
1706
1707 /* Don't consider the already ordered successors again. */
1708 sbitmap_difference (tmp, tmp, nodes_ordered);
1709 sbitmap_a_or_b (workset, workset, tmp);
1710 RESET_BIT (workset, v);
1711 SET_BIT (nodes_ordered, v);
1712 }
1713 dir = BOTTOMUP;
1714 sbitmap_zero (predecessors);
1715 find_predecessors (predecessors, g, nodes_ordered);
1716 sbitmap_a_and_b (workset, predecessors, scc);
1717 }
1718 else
1719 {
1720 while (!sbitmap_equal (workset, zero_bitmap))
1721 {
1722 v = find_max_dv_min_mob (g, workset);
1723 v_node = &g->nodes[v];
1724 node_order[pos++] = v;
1725 v_node_preds = NODE_PREDECESSORS (v_node);
1726 sbitmap_a_and_b (tmp, v_node_preds, scc);
1727
1728 /* Don't consider the already ordered predecessors again. */
1729 sbitmap_difference (tmp, tmp, nodes_ordered);
1730 sbitmap_a_or_b (workset, workset, tmp);
1731 RESET_BIT (workset, v);
1732 SET_BIT (nodes_ordered, v);
1733 }
1734 dir = TOPDOWN;
1735 sbitmap_zero (successors);
1736 find_successors (successors, g, nodes_ordered);
1737 sbitmap_a_and_b (workset, successors, scc);
1738 }
1739 }
1740 sbitmap_free (tmp);
1741 sbitmap_free (workset);
1742 sbitmap_free (zero_bitmap);
1743 sbitmap_free (predecessors);
1744 sbitmap_free (successors);
1745 return pos;
1746 }
1747
1748 \f
1749 /* This page contains functions for manipulating partial-schedules during
1750 modulo scheduling. */
1751
1752 /* Create a partial schedule and allocate a memory to hold II rows. */
1753 partial_schedule_ptr
1754 create_partial_schedule (int ii, ddg_ptr g, int history)
1755 {
1756 partial_schedule_ptr ps = (partial_schedule_ptr)
1757 xmalloc (sizeof (struct partial_schedule));
1758 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
1759 ps->ii = ii;
1760 ps->history = history;
1761 ps->min_cycle = INT_MAX;
1762 ps->max_cycle = INT_MIN;
1763 ps->g = g;
1764
1765 return ps;
1766 }
1767
1768 /* Free the PS_INSNs in rows array of the given partial schedule.
1769 ??? Consider caching the PS_INSN's. */
1770 static void
1771 free_ps_insns (partial_schedule_ptr ps)
1772 {
1773 int i;
1774
1775 for (i = 0; i < ps->ii; i++)
1776 {
1777 while (ps->rows[i])
1778 {
1779 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
1780
1781 free (ps->rows[i]);
1782 ps->rows[i] = ps_insn;
1783 }
1784 ps->rows[i] = NULL;
1785 }
1786 }
1787
1788 /* Free all the memory allocated to the partial schedule. */
1789 void
1790 free_partial_schedule (partial_schedule_ptr ps)
1791 {
1792 if (!ps)
1793 return;
1794 free_ps_insns (ps);
1795 free (ps->rows);
1796 free (ps);
1797 }
1798
1799 /* Clear the rows array with its PS_INSNs, and create a new one with
1800 NEW_II rows. */
1801 void
1802 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
1803 {
1804 if (!ps)
1805 return;
1806 free_ps_insns (ps);
1807 if (new_ii == ps->ii)
1808 return;
1809 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
1810 * sizeof (ps_insn_ptr));
1811 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
1812 ps->ii = new_ii;
1813 ps->min_cycle = INT_MAX;
1814 ps->max_cycle = INT_MIN;
1815 }
1816
1817 /* Prints the partial schedule as an ii rows array, for each rows
1818 print the ids of the insns in it. */
1819 void
1820 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
1821 {
1822 int i;
1823
1824 for (i = 0; i < ps->ii; i++)
1825 {
1826 ps_insn_ptr ps_i = ps->rows[i];
1827
1828 fprintf (dump, "\n[CYCLE %d ]: ", i);
1829 while (ps_i)
1830 {
1831 fprintf (dump, "%d, ",
1832 INSN_UID (ps_i->node->insn));
1833 ps_i = ps_i->next_in_row;
1834 }
1835 }
1836 }
1837
1838 /* Creates an object of PS_INSN and initializes it to the given parameters. */
1839 static ps_insn_ptr
1840 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
1841 {
1842 ps_insn_ptr ps_i = xmalloc (sizeof (struct ps_insn));
1843
1844 ps_i->node = node;
1845 ps_i->next_in_row = NULL;
1846 ps_i->prev_in_row = NULL;
1847 ps_i->row_rest_count = rest_count;
1848 ps_i->cycle = cycle;
1849
1850 return ps_i;
1851 }
1852
1853
1854 /* Removes the given PS_INSN from the partial schedule. Returns false if the
1855 node is not found in the partial schedule, else returns true. */
1856 static int
1857 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
1858 {
1859 int row;
1860
1861 if (!ps || !ps_i)
1862 return false;
1863
1864 row = SMODULO (ps_i->cycle, ps->ii);
1865 if (! ps_i->prev_in_row)
1866 {
1867 if (ps_i != ps->rows[row])
1868 return false;
1869
1870 ps->rows[row] = ps_i->next_in_row;
1871 if (ps->rows[row])
1872 ps->rows[row]->prev_in_row = NULL;
1873 }
1874 else
1875 {
1876 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
1877 if (ps_i->next_in_row)
1878 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
1879 }
1880 free (ps_i);
1881 return true;
1882 }
1883
1884 /* Advances the PS_INSN one column in its current row; returns false
1885 in failure and true in success. */
1886 static int
1887 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i)
1888 {
1889 ps_insn_ptr prev, next;
1890 int row;
1891
1892 if (!ps || !ps_i)
1893 return false;
1894
1895 row = SMODULO (ps_i->cycle, ps->ii);
1896
1897 if (! ps_i->next_in_row)
1898 return false;
1899
1900 /* Check if next_in_row is dependent on ps_i, both having same sched
1901 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
1902 if (ps_i->cycle == ps_i->next_in_row->cycle)
1903 {
1904 ddg_edge_ptr e;
1905 ddg_node_ptr next_node = ps_i->next_in_row->node;
1906
1907 for (e = ps_i->node->out; e; e = e->next_out)
1908 if (e->dest == next_node)
1909 return false;
1910 }
1911
1912 /* Advace PS_I over its next_in_row in the doubly linked list. */
1913 prev = ps_i->prev_in_row;
1914 next = ps_i->next_in_row;
1915
1916 if (ps_i == ps->rows[row])
1917 ps->rows[row] = next;
1918
1919 ps_i->next_in_row = next->next_in_row;
1920
1921 if (next->next_in_row)
1922 next->next_in_row->prev_in_row = ps_i;
1923
1924 next->next_in_row = ps_i;
1925 ps_i->prev_in_row = next;
1926
1927 next->prev_in_row = prev;
1928 if (prev)
1929 prev->next_in_row = next;
1930
1931 return true;
1932 }
1933
1934 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
1935 Returns 0 if this is not possible and a PS_INSN otherwise. */
1936 static ps_insn_ptr
1937 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle)
1938 {
1939 ps_insn_ptr ps_i, next_ps_i, advance_after;
1940 int rest_count = 1;
1941 int row = SMODULO (cycle, ps->ii);
1942 ddg_edge_ptr e;
1943
1944 if (ps->rows[row]
1945 && ps->rows[row]->row_rest_count >= issue_rate)
1946 return NULL;
1947
1948 if (ps->rows[row])
1949 rest_count += ps->rows[row]->row_rest_count;
1950
1951 ps_i = create_ps_insn (node, rest_count, cycle);
1952 ps_i->next_in_row = ps->rows[row];
1953 ps_i->prev_in_row = NULL;
1954 if (ps_i->next_in_row)
1955 ps_i->next_in_row->prev_in_row = ps_i;
1956 ps->rows[row] = ps_i;
1957
1958 /* Check if n is dependent on an insn already in row, having same cycle
1959 (typically ANTI_DEP). If so, n must skip over it. */
1960 advance_after = NULL;
1961 for (next_ps_i = ps_i->next_in_row;
1962 next_ps_i;
1963 next_ps_i = next_ps_i->next_in_row)
1964 if (next_ps_i->cycle == cycle)
1965 for (e = node->in; e; e = e->next_in)
1966 if (e->src == next_ps_i->node)
1967 advance_after = next_ps_i;
1968
1969 if (advance_after)
1970 while (ps_i->prev_in_row != advance_after)
1971 if (!ps_insn_advance_column (ps, ps_i))
1972 {
1973 remove_node_from_ps (ps, ps_i);
1974 return NULL;
1975 }
1976
1977 return ps_i;
1978 }
1979
1980 /* Advance time one cycle. Assumes DFA is being used. */
1981 static void
1982 advance_one_cycle (void)
1983 {
1984 if (targetm.sched.use_dfa_pipeline_interface
1985 && (*targetm.sched.use_dfa_pipeline_interface) ())
1986 {
1987 if (targetm.sched.dfa_pre_cycle_insn)
1988 state_transition (curr_state,
1989 (*targetm.sched.dfa_pre_cycle_insn) ());
1990
1991 state_transition (curr_state, NULL);
1992
1993 if (targetm.sched.dfa_post_cycle_insn)
1994 state_transition (curr_state,
1995 (*targetm.sched.dfa_post_cycle_insn) ());
1996 }
1997 }
1998
1999 /* Checks if PS has resource conflicts according to DFA, starting from
2000 FROM cycle to TO cycle; returns true if there are conflicts and false
2001 if there are no conflicts. Assumes DFA is being used. */
2002 static int
2003 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2004 {
2005 int cycle;
2006
2007 if (! targetm.sched.use_dfa_pipeline_interface
2008 || ! (*targetm.sched.use_dfa_pipeline_interface) ())
2009 return true;
2010
2011 state_reset (curr_state);
2012
2013 for (cycle = from; cycle <= to; cycle++)
2014 {
2015 ps_insn_ptr crr_insn;
2016 /* Holds the remaining issue slots in the current row. */
2017 int can_issue_more = issue_rate;
2018
2019 /* Walk through the DFA for the current row. */
2020 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2021 crr_insn;
2022 crr_insn = crr_insn->next_in_row)
2023 {
2024 rtx insn = crr_insn->node->insn;
2025
2026 if (!INSN_P (insn))
2027 continue;
2028
2029 /* Check if there is room for the current insn. */
2030 if (!can_issue_more || state_dead_lock_p (curr_state))
2031 return true;
2032
2033 /* Update the DFA state and return with failure if the DFA found
2034 recource conflicts. */
2035 if (state_transition (curr_state, insn) >= 0)
2036 return true;
2037
2038 if (targetm.sched.variable_issue)
2039 can_issue_more =
2040 (*targetm.sched.variable_issue) (sched_dump, sched_verbose,
2041 insn, can_issue_more);
2042 /* A naked CLOBBER or USE generates no instruction, so don't
2043 let them consume issue slots. */
2044 else if (GET_CODE (PATTERN (insn)) != USE
2045 && GET_CODE (PATTERN (insn)) != CLOBBER)
2046 can_issue_more--;
2047 }
2048
2049 /* Advance the DFA to the next cycle. */
2050 advance_one_cycle ();
2051 }
2052 return false;
2053 }
2054
2055 /* Checks if the given node causes resource conflicts when added to PS at
2056 cycle C. If not the node is added to PS and returned; otherwise zero
2057 is returned. */
2058 ps_insn_ptr
2059 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, int c)
2060 {
2061 int has_conflicts = 0;
2062 ps_insn_ptr ps_i;
2063
2064 /* First add the node to the PS, if this succeeds check for conflicts,
2065 trying different issue slots in the same row. */
2066 if (! (ps_i = add_node_to_ps (ps, n, c)))
2067 return NULL; /* Failed to insert the node at the given cycle. */
2068
2069 has_conflicts = ps_has_conflicts (ps, c, c)
2070 || (ps->history > 0
2071 && ps_has_conflicts (ps,
2072 c - ps->history,
2073 c + ps->history));
2074
2075 /* Try different issue slots to find one that the given node can be
2076 scheduled in without conflicts. */
2077 while (has_conflicts)
2078 {
2079 if (! ps_insn_advance_column (ps, ps_i))
2080 break;
2081 has_conflicts = ps_has_conflicts (ps, c, c)
2082 || (ps->history > 0
2083 && ps_has_conflicts (ps,
2084 c - ps->history,
2085 c + ps->history));
2086 }
2087
2088 if (has_conflicts)
2089 {
2090 remove_node_from_ps (ps, ps_i);
2091 return NULL;
2092 }
2093
2094 ps->min_cycle = MIN (ps->min_cycle, c);
2095 ps->max_cycle = MAX (ps->max_cycle, c);
2096 return ps_i;
2097 }
2098
2099 /* Rotate the rows of PS such that insns scheduled at time
2100 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2101 void
2102 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2103 {
2104 int i, row, backward_rotates;
2105 int last_row = ps->ii - 1;
2106
2107 if (start_cycle == 0)
2108 return;
2109
2110 backward_rotates = SMODULO (start_cycle, ps->ii);
2111
2112 /* Revisit later and optimize this into a single loop. */
2113 for (i = 0; i < backward_rotates; i++)
2114 {
2115 ps_insn_ptr first_row = ps->rows[0];
2116
2117 for (row = 0; row < last_row; row++)
2118 ps->rows[row] = ps->rows[row+1];
2119
2120 ps->rows[last_row] = first_row;
2121 }
2122
2123 ps->max_cycle -= start_cycle;
2124 ps->min_cycle -= start_cycle;
2125 }