exp_smem.adb, [...]: Remove OK_For_Stream flag, not used, not needed.
[gcc.git] / gcc / modulo-sched.c
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
22
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tm_p.h"
31 #include "hard-reg-set.h"
32 #include "regs.h"
33 #include "function.h"
34 #include "flags.h"
35 #include "insn-config.h"
36 #include "insn-attr.h"
37 #include "except.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "sched-int.h"
41 #include "target.h"
42 #include "cfglayout.h"
43 #include "cfgloop.h"
44 #include "cfghooks.h"
45 #include "expr.h"
46 #include "params.h"
47 #include "gcov-io.h"
48 #include "df.h"
49 #include "ddg.h"
50
51 #ifdef INSN_SCHEDULING
52
53 /* This file contains the implementation of the Swing Modulo Scheduler,
54 described in the following references:
55 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
56 Lifetime--sensitive modulo scheduling in a production environment.
57 IEEE Trans. on Comps., 50(3), March 2001
58 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
59 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
60 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
61
62 The basic structure is:
63 1. Build a data-dependence graph (DDG) for each loop.
64 2. Use the DDG to order the insns of a loop (not in topological order
65 necessarily, but rather) trying to place each insn after all its
66 predecessors _or_ after all its successors.
67 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
68 4. Use the ordering to perform list-scheduling of the loop:
69 1. Set II = MII. We will try to schedule the loop within II cycles.
70 2. Try to schedule the insns one by one according to the ordering.
71 For each insn compute an interval of cycles by considering already-
72 scheduled preds and succs (and associated latencies); try to place
73 the insn in the cycles of this window checking for potential
74 resource conflicts (using the DFA interface).
75 Note: this is different from the cycle-scheduling of schedule_insns;
76 here the insns are not scheduled monotonically top-down (nor bottom-
77 up).
78 3. If failed in scheduling all insns - bump II++ and try again, unless
79 II reaches an upper bound MaxII, in which case report failure.
80 5. If we succeeded in scheduling the loop within II cycles, we now
81 generate prolog and epilog, decrease the counter of the loop, and
82 perform modulo variable expansion for live ranges that span more than
83 II cycles (i.e. use register copies to prevent a def from overwriting
84 itself before reaching the use).
85 */
86
87 \f
88 /* This page defines partial-schedule structures and functions for
89 modulo scheduling. */
90
91 typedef struct partial_schedule *partial_schedule_ptr;
92 typedef struct ps_insn *ps_insn_ptr;
93
94 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
95 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
96
97 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
98 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
99
100 /* Perform signed modulo, always returning a non-negative value. */
101 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
102
103 /* The number of different iterations the nodes in ps span, assuming
104 the stage boundaries are placed efficiently. */
105 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
106 + 1 + (ps)->ii - 1) / (ps)->ii)
107
108 #define CFG_HOOKS cfg_layout_rtl_cfg_hooks
109
110 /* A single instruction in the partial schedule. */
111 struct ps_insn
112 {
113 /* The corresponding DDG_NODE. */
114 ddg_node_ptr node;
115
116 /* The (absolute) cycle in which the PS instruction is scheduled.
117 Same as SCHED_TIME (node). */
118 int cycle;
119
120 /* The next/prev PS_INSN in the same row. */
121 ps_insn_ptr next_in_row,
122 prev_in_row;
123
124 /* The number of nodes in the same row that come after this node. */
125 int row_rest_count;
126 };
127
128 /* Holds the partial schedule as an array of II rows. Each entry of the
129 array points to a linked list of PS_INSNs, which represents the
130 instructions that are scheduled for that row. */
131 struct partial_schedule
132 {
133 int ii; /* Number of rows in the partial schedule. */
134 int history; /* Threshold for conflict checking using DFA. */
135
136 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
137 ps_insn_ptr *rows;
138
139 /* The earliest absolute cycle of an insn in the partial schedule. */
140 int min_cycle;
141
142 /* The latest absolute cycle of an insn in the partial schedule. */
143 int max_cycle;
144
145 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
146 };
147
148
149 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
150 static void free_partial_schedule (partial_schedule_ptr);
151 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
152 void print_partial_schedule (partial_schedule_ptr, FILE *);
153 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
154 ddg_node_ptr node, int cycle,
155 sbitmap must_precede,
156 sbitmap must_follow);
157 static void rotate_partial_schedule (partial_schedule_ptr, int);
158 \f
159 /* This page defines constants and structures for the modulo scheduling
160 driver. */
161
162 /* As in haifa-sched.c: */
163 /* issue_rate is the number of insns that can be scheduled in the same
164 machine cycle. It can be defined in the config/mach/mach.h file,
165 otherwise we set it to 1. */
166
167 static int issue_rate;
168
169 /* For printing statistics. */
170 static FILE *stats_file;
171
172 static int sms_order_nodes (ddg_ptr, int, int * result);
173 static void set_node_sched_params (ddg_ptr);
174 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int,
175 int *, FILE*);
176 static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
177 static void generate_prolog_epilog (partial_schedule_ptr, rtx, rtx, int);
178 static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
179 int from_stage, int to_stage,
180 int is_prolog);
181
182
183 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
184 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
185 #define SCHED_FIRST_REG_MOVE(x) \
186 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
187 #define SCHED_NREG_MOVES(x) \
188 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
189 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
190 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
191 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
192
193 /* The scheduling parameters held for each node. */
194 typedef struct node_sched_params
195 {
196 int asap; /* A lower-bound on the absolute scheduling cycle. */
197 int time; /* The absolute scheduling cycle (time >= asap). */
198
199 /* The following field (first_reg_move) is a pointer to the first
200 register-move instruction added to handle the modulo-variable-expansion
201 of the register defined by this node. This register-move copies the
202 original register defined by the node. */
203 rtx first_reg_move;
204
205 /* The number of register-move instructions added, immediately preceding
206 first_reg_move. */
207 int nreg_moves;
208
209 int row; /* Holds time % ii. */
210 int stage; /* Holds time / ii. */
211
212 /* The column of a node inside the ps. If nodes u, v are on the same row,
213 u will precede v if column (u) < column (v). */
214 int column;
215 } *node_sched_params_ptr;
216
217 \f
218 /* The following three functions are copied from the current scheduler
219 code in order to use sched_analyze() for computing the dependencies.
220 They are used when initializing the sched_info structure. */
221 static const char *
222 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
223 {
224 static char tmp[80];
225
226 sprintf (tmp, "i%4d", INSN_UID (insn));
227 return tmp;
228 }
229
230 static int
231 contributes_to_priority (rtx next, rtx insn)
232 {
233 return BLOCK_NUM (next) == BLOCK_NUM (insn);
234 }
235
236 static void
237 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
238 regset cond_exec ATTRIBUTE_UNUSED,
239 regset used ATTRIBUTE_UNUSED,
240 regset set ATTRIBUTE_UNUSED)
241 {
242 }
243
244 static struct sched_info sms_sched_info =
245 {
246 NULL,
247 NULL,
248 NULL,
249 NULL,
250 NULL,
251 sms_print_insn,
252 contributes_to_priority,
253 compute_jump_reg_dependencies,
254 NULL, NULL,
255 NULL, NULL,
256 0, 0, 0
257 };
258
259
260 /* Return the register decremented and tested or zero if it is not a decrement
261 and branch jump insn (similar to doloop_condition_get). */
262 static rtx
263 doloop_register_get (rtx insn, rtx *comp)
264 {
265 rtx pattern, cmp, inc, reg, condition;
266
267 if (!JUMP_P (insn))
268 return NULL_RTX;
269 pattern = PATTERN (insn);
270
271 /* The canonical doloop pattern we expect is:
272
273 (parallel [(set (pc) (if_then_else (condition)
274 (label_ref (label))
275 (pc)))
276 (set (reg) (plus (reg) (const_int -1)))
277 (additional clobbers and uses)])
278
279 where condition is further restricted to be
280 (ne (reg) (const_int 1)). */
281
282 if (GET_CODE (pattern) != PARALLEL)
283 return NULL_RTX;
284
285 cmp = XVECEXP (pattern, 0, 0);
286 inc = XVECEXP (pattern, 0, 1);
287 /* Return the compare rtx. */
288 *comp = cmp;
289
290 /* Check for (set (reg) (something)). */
291 if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
292 return NULL_RTX;
293
294 /* Extract loop counter register. */
295 reg = SET_DEST (inc);
296
297 /* Check if something = (plus (reg) (const_int -1)). */
298 if (GET_CODE (SET_SRC (inc)) != PLUS
299 || XEXP (SET_SRC (inc), 0) != reg
300 || XEXP (SET_SRC (inc), 1) != constm1_rtx)
301 return NULL_RTX;
302
303 /* Check for (set (pc) (if_then_else (condition)
304 (label_ref (label))
305 (pc))). */
306 if (GET_CODE (cmp) != SET
307 || SET_DEST (cmp) != pc_rtx
308 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
309 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
310 || XEXP (SET_SRC (cmp), 2) != pc_rtx)
311 return NULL_RTX;
312
313 /* Extract loop termination condition. */
314 condition = XEXP (SET_SRC (cmp), 0);
315
316 /* Check if condition = (ne (reg) (const_int 1)), which is more
317 restrictive than the check in doloop_condition_get:
318 if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
319 || GET_CODE (XEXP (condition, 1)) != CONST_INT). */
320 if (GET_CODE (condition) != NE
321 || XEXP (condition, 1) != const1_rtx)
322 return NULL_RTX;
323
324 if (XEXP (condition, 0) == reg)
325 return reg;
326
327 return NULL_RTX;
328 }
329
330 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
331 that the number of iterations is a compile-time constant. If so,
332 return the rtx that sets COUNT_REG to a constant, and set COUNT to
333 this constant. Otherwise return 0. */
334 static rtx
335 const_iteration_count (rtx count_reg, basic_block pre_header,
336 HOST_WIDEST_INT * count)
337 {
338 rtx insn;
339 rtx head, tail;
340 get_block_head_tail (pre_header->index, &head, &tail);
341
342 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
343 if (INSN_P (insn) && single_set (insn) &&
344 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
345 {
346 rtx pat = single_set (insn);
347
348 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
349 {
350 *count = INTVAL (SET_SRC (pat));
351 return insn;
352 }
353
354 return NULL_RTX;
355 }
356
357 return NULL_RTX;
358 }
359
360 /* A very simple resource-based lower bound on the initiation interval.
361 ??? Improve the accuracy of this bound by considering the
362 utilization of various units. */
363 static int
364 res_MII (ddg_ptr g)
365 {
366 return (g->num_nodes / issue_rate);
367 }
368
369
370 /* Points to the array that contains the sched data for each node. */
371 static node_sched_params_ptr node_sched_params;
372
373 /* Allocate sched_params for each node and initialize it. Assumes that
374 the aux field of each node contain the asap bound (computed earlier),
375 and copies it into the sched_params field. */
376 static void
377 set_node_sched_params (ddg_ptr g)
378 {
379 int i;
380
381 /* Allocate for each node in the DDG a place to hold the "sched_data". */
382 /* Initialize ASAP/ALAP/HIGHT to zero. */
383 node_sched_params = (node_sched_params_ptr)
384 xcalloc (g->num_nodes,
385 sizeof (struct node_sched_params));
386
387 /* Set the pointer of the general data of the node to point to the
388 appropriate sched_params structure. */
389 for (i = 0; i < g->num_nodes; i++)
390 {
391 /* Watch out for aliasing problems? */
392 node_sched_params[i].asap = g->nodes[i].aux.count;
393 g->nodes[i].aux.info = &node_sched_params[i];
394 }
395 }
396
397 static void
398 print_node_sched_params (FILE * dump_file, int num_nodes)
399 {
400 int i;
401
402 for (i = 0; i < num_nodes; i++)
403 {
404 node_sched_params_ptr nsp = &node_sched_params[i];
405 rtx reg_move = nsp->first_reg_move;
406 int j;
407
408 fprintf (dump_file, "Node %d:\n", i);
409 fprintf (dump_file, " asap = %d:\n", nsp->asap);
410 fprintf (dump_file, " time = %d:\n", nsp->time);
411 fprintf (dump_file, " nreg_moves = %d:\n", nsp->nreg_moves);
412 for (j = 0; j < nsp->nreg_moves; j++)
413 {
414 fprintf (dump_file, " reg_move = ");
415 print_rtl_single (dump_file, reg_move);
416 reg_move = PREV_INSN (reg_move);
417 }
418 }
419 }
420
421 /* Calculate an upper bound for II. SMS should not schedule the loop if it
422 requires more cycles than this bound. Currently set to the sum of the
423 longest latency edge for each node. Reset based on experiments. */
424 static int
425 calculate_maxii (ddg_ptr g)
426 {
427 int i;
428 int maxii = 0;
429
430 for (i = 0; i < g->num_nodes; i++)
431 {
432 ddg_node_ptr u = &g->nodes[i];
433 ddg_edge_ptr e;
434 int max_edge_latency = 0;
435
436 for (e = u->out; e; e = e->next_out)
437 max_edge_latency = MAX (max_edge_latency, e->latency);
438
439 maxii += max_edge_latency;
440 }
441 return maxii;
442 }
443
444
445 /* Given the partial schedule, generate register moves when the length
446 of the register live range is more than ii; the number of moves is
447 determined according to the following equation:
448 SCHED_TIME (use) - SCHED_TIME (def) { 1 broken loop-carried
449 nreg_moves = ----------------------------------- - { dependence.
450 ii { 0 if not.
451 This handles the modulo-variable-expansions (mve's) needed for the ps. */
452 static void
453 generate_reg_moves (partial_schedule_ptr ps)
454 {
455 ddg_ptr g = ps->g;
456 int ii = ps->ii;
457 int i;
458
459 for (i = 0; i < g->num_nodes; i++)
460 {
461 ddg_node_ptr u = &g->nodes[i];
462 ddg_edge_ptr e;
463 int nreg_moves = 0, i_reg_move;
464 sbitmap *uses_of_defs;
465 rtx last_reg_move;
466 rtx prev_reg, old_reg;
467
468 /* Compute the number of reg_moves needed for u, by looking at life
469 ranges started at u (excluding self-loops). */
470 for (e = u->out; e; e = e->next_out)
471 if (e->type == TRUE_DEP && e->dest != e->src)
472 {
473 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
474
475 /* If dest precedes src in the schedule of the kernel, then dest
476 will read before src writes and we can save one reg_copy. */
477 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
478 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
479 nreg_moves4e--;
480
481 nreg_moves = MAX (nreg_moves, nreg_moves4e);
482 }
483
484 if (nreg_moves == 0)
485 continue;
486
487 /* Every use of the register defined by node may require a different
488 copy of this register, depending on the time the use is scheduled.
489 Set a bitmap vector, telling which nodes use each copy of this
490 register. */
491 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
492 sbitmap_vector_zero (uses_of_defs, nreg_moves);
493 for (e = u->out; e; e = e->next_out)
494 if (e->type == TRUE_DEP && e->dest != e->src)
495 {
496 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
497
498 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
499 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
500 dest_copy--;
501
502 if (dest_copy)
503 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
504 }
505
506 /* Now generate the reg_moves, attaching relevant uses to them. */
507 SCHED_NREG_MOVES (u) = nreg_moves;
508 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
509 last_reg_move = u->insn;
510
511 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
512 {
513 int i_use;
514 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
515 rtx reg_move = gen_move_insn (new_reg, prev_reg);
516
517 add_insn_before (reg_move, last_reg_move);
518 last_reg_move = reg_move;
519
520 if (!SCHED_FIRST_REG_MOVE (u))
521 SCHED_FIRST_REG_MOVE (u) = reg_move;
522
523 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use,
524 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg));
525
526 prev_reg = new_reg;
527 }
528 }
529 }
530
531 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
532 of SCHED_ROW and SCHED_STAGE. */
533 static void
534 normalize_sched_times (partial_schedule_ptr ps)
535 {
536 int i;
537 ddg_ptr g = ps->g;
538 int amount = PS_MIN_CYCLE (ps);
539 int ii = ps->ii;
540
541 for (i = 0; i < g->num_nodes; i++)
542 {
543 ddg_node_ptr u = &g->nodes[i];
544 int normalized_time = SCHED_TIME (u) - amount;
545
546 if (normalized_time < 0)
547 abort ();
548
549 SCHED_TIME (u) = normalized_time;
550 SCHED_ROW (u) = normalized_time % ii;
551 SCHED_STAGE (u) = normalized_time / ii;
552 }
553 }
554
555 /* Set SCHED_COLUMN of each node according to its position in PS. */
556 static void
557 set_columns_for_ps (partial_schedule_ptr ps)
558 {
559 int row;
560
561 for (row = 0; row < ps->ii; row++)
562 {
563 ps_insn_ptr cur_insn = ps->rows[row];
564 int column = 0;
565
566 for (; cur_insn; cur_insn = cur_insn->next_in_row)
567 SCHED_COLUMN (cur_insn->node) = column++;
568 }
569 }
570
571 /* Permute the insns according to their order in PS, from row 0 to
572 row ii-1, and position them right before LAST. This schedules
573 the insns of the loop kernel. */
574 static void
575 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
576 {
577 int ii = ps->ii;
578 int row;
579 ps_insn_ptr ps_ij;
580
581 for (row = 0; row < ii ; row++)
582 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
583 if (PREV_INSN (last) != ps_ij->node->insn)
584 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
585 PREV_INSN (last));
586 }
587
588 /* Used to generate the prologue & epilogue. Duplicate the subset of
589 nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive
590 of both), together with a prefix/suffix of their reg_moves. */
591 static void
592 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
593 int to_stage, int for_prolog)
594 {
595 int row;
596 ps_insn_ptr ps_ij;
597
598 for (row = 0; row < ps->ii; row++)
599 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
600 {
601 ddg_node_ptr u_node = ps_ij->node;
602 int j, i_reg_moves;
603 rtx reg_move = NULL_RTX;
604
605 if (for_prolog)
606 {
607 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
608 number of reg_moves starting with the second occurrence of
609 u_node, which is generated if its SCHED_STAGE <= to_stage. */
610 i_reg_moves = to_stage - SCHED_STAGE (u_node);
611 i_reg_moves = MAX (i_reg_moves, 0);
612 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
613
614 /* The reg_moves start from the *first* reg_move backwards. */
615 if (i_reg_moves)
616 {
617 reg_move = SCHED_FIRST_REG_MOVE (u_node);
618 for (j = 1; j < i_reg_moves; j++)
619 reg_move = PREV_INSN (reg_move);
620 }
621 }
622 else /* It's for the epilog. */
623 {
624 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
625 starting to decrease one stage after u_node no longer occurs;
626 that is, generate all reg_moves until
627 SCHED_STAGE (u_node) == from_stage - 1. */
628 i_reg_moves = SCHED_NREG_MOVES (u_node)
629 - (from_stage - SCHED_STAGE (u_node) - 1);
630 i_reg_moves = MAX (i_reg_moves, 0);
631 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
632
633 /* The reg_moves start from the *last* reg_move forwards. */
634 if (i_reg_moves)
635 {
636 reg_move = SCHED_FIRST_REG_MOVE (u_node);
637 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
638 reg_move = PREV_INSN (reg_move);
639 }
640 }
641
642 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
643 emit_insn (copy_rtx (PATTERN (reg_move)));
644
645 if (SCHED_STAGE (u_node) >= from_stage
646 && SCHED_STAGE (u_node) <= to_stage)
647 duplicate_insn_chain (u_node->first_note, u_node->insn);
648 }
649 }
650
651
652 /* Generate the instructions (including reg_moves) for prolog & epilog. */
653 static void
654 generate_prolog_epilog (partial_schedule_ptr ps, rtx orig_loop_beg,
655 rtx orig_loop_end, int unknown_count)
656 {
657 int i;
658 int last_stage = PS_STAGE_COUNT (ps) - 1;
659 edge e;
660 rtx c_reg = NULL_RTX;
661 rtx cmp = NULL_RTX;
662 rtx precond_jump = NULL_RTX;
663 rtx precond_exit_label = NULL_RTX;
664 rtx precond_exit_label_insn = NULL_RTX;
665 rtx last_epilog_insn = NULL_RTX;
666 rtx loop_exit_label = NULL_RTX;
667 rtx loop_exit_label_insn = NULL_RTX;
668 rtx orig_loop_bct = NULL_RTX;
669
670 /* Loop header edge. */
671 e = EDGE_PRED (ps->g->bb, 0);
672 if (e->src == ps->g->bb)
673 e = EDGE_PRED (ps->g->bb, 1);
674
675 /* Generate the prolog, inserting its insns on the loop-entry edge. */
676 start_sequence ();
677
678 /* This is the place where we want to insert the precondition. */
679 if (unknown_count)
680 precond_jump = emit_note (NOTE_INSN_DELETED);
681
682 for (i = 0; i < last_stage; i++)
683 duplicate_insns_of_cycles (ps, 0, i, 1);
684
685 /* No need to call insert_insn_on_edge; we prepared the sequence. */
686 e->insns.r = get_insns ();
687 end_sequence ();
688
689 /* Generate the epilog, inserting its insns on the loop-exit edge. */
690 start_sequence ();
691
692 for (i = 0; i < last_stage; i++)
693 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
694
695 last_epilog_insn = emit_note (NOTE_INSN_DELETED);
696
697 /* Emit the label where to put the original loop code. */
698 if (unknown_count)
699 {
700 rtx label, cond;
701
702 precond_exit_label = gen_label_rtx ();
703 precond_exit_label_insn = emit_label (precond_exit_label);
704
705 /* Put the original loop code. */
706 reorder_insns_nobb (orig_loop_beg, orig_loop_end, precond_exit_label_insn);
707
708 /* Change the label of the BCT to be the PRECOND_EXIT_LABEL. */
709 orig_loop_bct = get_last_insn ();
710 c_reg = doloop_register_get (orig_loop_bct, &cmp);
711 label = XEXP (SET_SRC (cmp), 1);
712 cond = XEXP (SET_SRC (cmp), 0);
713
714 if (! c_reg || GET_CODE (cond) != NE)
715 abort ();
716
717 XEXP (label, 0) = precond_exit_label;
718 JUMP_LABEL (orig_loop_bct) = precond_exit_label_insn;
719 LABEL_NUSES (precond_exit_label_insn)++;
720
721 /* Generate the loop exit label. */
722 loop_exit_label = gen_label_rtx ();
723 loop_exit_label_insn = emit_label (loop_exit_label);
724 }
725
726 e = EDGE_SUCC (ps->g->bb, 0);
727 if (e->dest == ps->g->bb)
728 e = EDGE_SUCC (ps->g->bb, 1);
729
730 e->insns.r = get_insns ();
731 end_sequence ();
732
733 commit_edge_insertions ();
734
735 if (unknown_count)
736 {
737 rtx precond_insns, epilog_jump, insert_after_insn;
738 basic_block loop_exit_bb = BLOCK_FOR_INSN (loop_exit_label_insn);
739 basic_block epilog_bb = BLOCK_FOR_INSN (last_epilog_insn);
740 basic_block precond_bb = BLOCK_FOR_INSN (precond_jump);
741 basic_block orig_loop_bb = BLOCK_FOR_INSN (precond_exit_label_insn);
742 edge epilog_exit_edge = single_succ_edge (epilog_bb);
743
744 /* Do loop preconditioning to take care of cases were the loop count is
745 less than the stage count. Update the CFG properly. */
746 insert_after_insn = precond_jump;
747 start_sequence ();
748 c_reg = doloop_register_get (ps->g->closing_branch->insn, &cmp);
749 emit_cmp_and_jump_insns (c_reg, GEN_INT (PS_STAGE_COUNT (ps)), LT, NULL,
750 GET_MODE (c_reg), 1, precond_exit_label);
751 precond_insns = get_insns ();
752 precond_jump = get_last_insn ();
753 end_sequence ();
754 reorder_insns (precond_insns, precond_jump, insert_after_insn);
755
756 /* Generate a subtract instruction at the beginning of the prolog to
757 adjust the loop count by STAGE_COUNT. */
758 emit_insn_after (gen_sub2_insn (c_reg, GEN_INT (PS_STAGE_COUNT (ps) - 1)),
759 precond_jump);
760 update_bb_for_insn (precond_bb);
761 delete_insn (insert_after_insn);
762
763 /* Update label info for the precondition jump. */
764 JUMP_LABEL (precond_jump) = precond_exit_label_insn;
765 LABEL_NUSES (precond_exit_label_insn)++;
766
767 /* Update the CFG. */
768 split_block (precond_bb, precond_jump);
769 make_edge (precond_bb, orig_loop_bb, 0);
770
771 /* Add a jump at end of the epilog to the LOOP_EXIT_LABEL to jump over the
772 original loop copy and update the CFG. */
773 epilog_jump = emit_jump_insn_after (gen_jump (loop_exit_label),
774 last_epilog_insn);
775 delete_insn (last_epilog_insn);
776 JUMP_LABEL (epilog_jump) = loop_exit_label_insn;
777 LABEL_NUSES (loop_exit_label_insn)++;
778
779 redirect_edge_succ (epilog_exit_edge, loop_exit_bb);
780 epilog_exit_edge->flags &= ~EDGE_FALLTHRU;
781 emit_barrier_after (BB_END (epilog_bb));
782 }
783 }
784
785 /* Return the line note insn preceding INSN, for debugging. Taken from
786 emit-rtl.c. */
787 static rtx
788 find_line_note (rtx insn)
789 {
790 for (; insn; insn = PREV_INSN (insn))
791 if (NOTE_P (insn)
792 && NOTE_LINE_NUMBER (insn) >= 0)
793 break;
794
795 return insn;
796 }
797
798 /* Main entry point, perform SMS scheduling on the loops of the function
799 that consist of single basic blocks. */
800 void
801 sms_schedule (FILE *dump_file)
802 {
803 static int passes = 0;
804 rtx insn;
805 ddg_ptr *g_arr, g;
806 basic_block bb, pre_header = NULL;
807 int * node_order;
808 int maxii;
809 int i;
810 partial_schedule_ptr ps;
811 int max_bb_index = last_basic_block;
812 struct df *df;
813
814 stats_file = dump_file;
815
816 /* Initialize issue_rate. */
817 if (targetm.sched.issue_rate)
818 {
819 int temp = reload_completed;
820
821 reload_completed = 1;
822 issue_rate = (*targetm.sched.issue_rate) ();
823 reload_completed = temp;
824 }
825 else
826 issue_rate = 1;
827
828 /* Initialize the scheduler. */
829 current_sched_info = &sms_sched_info;
830 sched_init (NULL);
831
832 /* Init Data Flow analysis, to be used in interloop dep calculation. */
833 df = df_init ();
834 df_analyze (df, 0, DF_ALL);
835
836 /* Allocate memory to hold the DDG array. */
837 g_arr = xcalloc (max_bb_index, sizeof (ddg_ptr));
838
839 /* Build DDGs for all the relevant loops and hold them in G_ARR
840 indexed by the loop BB index. */
841 FOR_EACH_BB (bb)
842 {
843 rtx head, tail;
844 rtx count_reg, comp;
845 edge e, pre_header_edge;
846
847 if (bb->index < 0)
848 continue;
849
850 /* Check if bb has two successors, one being itself. */
851 if (EDGE_COUNT (bb->succs) != 2)
852 continue;
853
854 if (EDGE_SUCC (bb, 0)->dest != bb && EDGE_SUCC (bb, 1)->dest != bb)
855 continue;
856
857 if ((EDGE_SUCC (bb, 0)->flags & EDGE_COMPLEX)
858 || (EDGE_SUCC (bb, 1)->flags & EDGE_COMPLEX))
859 continue;
860
861 /* Check if bb has two predecessors, one being itself. */
862 if (EDGE_COUNT (bb->preds) != 2)
863 continue;
864
865 if (EDGE_PRED (bb, 0)->src != bb && EDGE_PRED (bb, 1)->src != bb)
866 continue;
867
868 if ((EDGE_PRED (bb, 0)->flags & EDGE_COMPLEX)
869 || (EDGE_PRED (bb, 1)->flags & EDGE_COMPLEX))
870 continue;
871
872 /* For debugging. */
873 if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
874 {
875 if (dump_file)
876 fprintf (dump_file, "SMS reached MAX_PASSES... \n");
877 break;
878 }
879
880 get_block_head_tail (bb->index, &head, &tail);
881 pre_header_edge = EDGE_PRED (bb, 0);
882 if (EDGE_PRED (bb, 0)->src != bb)
883 pre_header_edge = EDGE_PRED (bb, 1);
884
885 /* Perfrom SMS only on loops that their average count is above threshold. */
886 if (bb->count < pre_header_edge->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD)
887 {
888 if (stats_file)
889 {
890 rtx line_note = find_line_note (tail);
891
892 if (line_note)
893 {
894 expanded_location xloc;
895 NOTE_EXPANDED_LOCATION (xloc, line_note);
896 fprintf (stats_file, "SMS bb %s %d (file, line)\n",
897 xloc.file, xloc.line);
898 }
899 fprintf (stats_file, "SMS single-bb-loop\n");
900 if (profile_info && flag_branch_probabilities)
901 {
902 fprintf (stats_file, "SMS loop-count ");
903 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
904 (HOST_WIDEST_INT) bb->count);
905 fprintf (stats_file, "\n");
906 fprintf (stats_file, "SMS preheader-count ");
907 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
908 (HOST_WIDEST_INT) pre_header_edge->count);
909 fprintf (stats_file, "\n");
910 fprintf (stats_file, "SMS profile-sum-max ");
911 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
912 (HOST_WIDEST_INT) profile_info->sum_max);
913 fprintf (stats_file, "\n");
914 }
915 }
916 continue;
917 }
918
919 /* Make sure this is a doloop. */
920 if ( !(count_reg = doloop_register_get (tail, &comp)))
921 continue;
922
923 e = EDGE_PRED (bb, 0);
924 if (e->src == bb)
925 pre_header = EDGE_PRED (bb, 1)->src;
926 else
927 pre_header = e->src;
928
929 /* Don't handle BBs with calls or barriers, or !single_set insns. */
930 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
931 if (CALL_P (insn)
932 || BARRIER_P (insn)
933 || (INSN_P (insn) && !JUMP_P (insn)
934 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
935 break;
936
937 if (insn != NEXT_INSN (tail))
938 {
939 if (stats_file)
940 {
941 if (CALL_P (insn))
942 fprintf (stats_file, "SMS loop-with-call\n");
943 else if (BARRIER_P (insn))
944 fprintf (stats_file, "SMS loop-with-barrier\n");
945 else
946 fprintf (stats_file, "SMS loop-with-not-single-set\n");
947 print_rtl_single (stats_file, insn);
948 }
949
950 continue;
951 }
952
953 if (! (g = create_ddg (bb, df, 0)))
954 {
955 if (stats_file)
956 fprintf (stats_file, "SMS doloop\n");
957 continue;
958 }
959
960 g_arr[bb->index] = g;
961 }
962
963 /* Release Data Flow analysis data structures. */
964 df_finish (df);
965
966 /* Go over the built DDGs and perfrom SMS for each one of them. */
967 for (i = 0; i < max_bb_index; i++)
968 {
969 rtx head, tail;
970 rtx count_reg, count_init, comp;
971 edge pre_header_edge;
972 int mii, rec_mii;
973 int stage_count = 0;
974 HOST_WIDEST_INT loop_count = 0;
975
976 if (! (g = g_arr[i]))
977 continue;
978
979 if (dump_file)
980 print_ddg (dump_file, g);
981
982 get_block_head_tail (g->bb->index, &head, &tail);
983
984 pre_header_edge = EDGE_PRED (g->bb, 0);
985 if (EDGE_PRED (g->bb, 0)->src != g->bb)
986 pre_header_edge = EDGE_PRED (g->bb, 1);
987
988 if (stats_file)
989 {
990 rtx line_note = find_line_note (tail);
991
992 if (line_note)
993 {
994 expanded_location xloc;
995 NOTE_EXPANDED_LOCATION (xloc, line_note);
996 fprintf (stats_file, "SMS bb %s %d (file, line)\n",
997 xloc.file, xloc.line);
998 }
999 fprintf (stats_file, "SMS single-bb-loop\n");
1000 if (profile_info && flag_branch_probabilities)
1001 {
1002 fprintf (stats_file, "SMS loop-count ");
1003 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1004 (HOST_WIDEST_INT) bb->count);
1005 fprintf (stats_file, "\n");
1006 fprintf (stats_file, "SMS preheader-count ");
1007 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1008 (HOST_WIDEST_INT) pre_header_edge->count);
1009 fprintf (stats_file, "\n");
1010 fprintf (stats_file, "SMS profile-sum-max ");
1011 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1012 (HOST_WIDEST_INT) profile_info->sum_max);
1013 fprintf (stats_file, "\n");
1014 }
1015 fprintf (stats_file, "SMS doloop\n");
1016 fprintf (stats_file, "SMS built-ddg %d\n", g->num_nodes);
1017 fprintf (stats_file, "SMS num-loads %d\n", g->num_loads);
1018 fprintf (stats_file, "SMS num-stores %d\n", g->num_stores);
1019 }
1020
1021 /* Make sure this is a doloop. */
1022 if ( !(count_reg = doloop_register_get (tail, &comp)))
1023 abort ();
1024
1025 /* This should be NULL_RTX if the count is unknown at compile time. */
1026 count_init = const_iteration_count (count_reg, pre_header, &loop_count);
1027
1028 if (stats_file && count_init)
1029 {
1030 fprintf (stats_file, "SMS const-doloop ");
1031 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1032 fprintf (stats_file, "\n");
1033 }
1034
1035 node_order = (int *) xmalloc (sizeof (int) * g->num_nodes);
1036
1037 mii = 1; /* Need to pass some estimate of mii. */
1038 rec_mii = sms_order_nodes (g, mii, node_order);
1039 mii = MAX (res_MII (g), rec_mii);
1040 maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100;
1041
1042 if (stats_file)
1043 fprintf (stats_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1044 rec_mii, mii, maxii);
1045
1046 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1047 ASAP. */
1048 set_node_sched_params (g);
1049
1050 ps = sms_schedule_by_order (g, mii, maxii, node_order, dump_file);
1051
1052 if (ps)
1053 stage_count = PS_STAGE_COUNT (ps);
1054
1055 if (stage_count == 0 || (count_init && (stage_count > loop_count)))
1056 {
1057 if (dump_file)
1058 fprintf (dump_file, "SMS failed... \n");
1059 if (stats_file)
1060 fprintf (stats_file, "SMS sched-failed %d\n", stage_count);
1061 }
1062 else
1063 {
1064 rtx orig_loop_beg = NULL_RTX;
1065 rtx orig_loop_end = NULL_RTX;
1066
1067 if (stats_file)
1068 {
1069 fprintf (stats_file,
1070 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1071 stage_count);
1072 print_partial_schedule (ps, dump_file);
1073 fprintf (dump_file,
1074 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1075 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1076 }
1077
1078 /* Save the original loop if we want to do loop preconditioning in
1079 case the BCT count is not known. */
1080 if (! count_init)
1081 {
1082 int i;
1083
1084 start_sequence ();
1085 /* Copy the original loop code before modifying it -
1086 so we can use it later. */
1087 for (i = 0; i < ps->g->num_nodes; i++)
1088 duplicate_insn_chain (ps->g->nodes[i].first_note,
1089 ps->g->nodes[i].insn);
1090
1091 orig_loop_beg = get_insns ();
1092 orig_loop_end = get_last_insn ();
1093 end_sequence ();
1094 }
1095 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1096 the closing_branch was scheduled and should appear in the last (ii-1)
1097 row. Otherwise, we are free to schedule the branch, and we let nodes
1098 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1099 row; this should reduce stage_count to minimum. */
1100 normalize_sched_times (ps);
1101 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1102 set_columns_for_ps (ps);
1103
1104 permute_partial_schedule (ps, g->closing_branch->first_note);
1105
1106 /* Mark this loop as software pipelined so the later
1107 scheduling passes doesn't touch it. */
1108 if (! flag_resched_modulo_sched)
1109 g->bb->flags |= BB_DISABLE_SCHEDULE;
1110
1111 generate_reg_moves (ps);
1112 if (dump_file)
1113 print_node_sched_params (dump_file, g->num_nodes);
1114
1115 /* Set new iteration count of loop kernel. */
1116 if (count_init)
1117 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1118 - stage_count + 1);
1119
1120 /* Generate prolog and epilog. */
1121 generate_prolog_epilog (ps, orig_loop_beg, orig_loop_end,
1122 count_init ? 0 : 1);
1123 }
1124 free_partial_schedule (ps);
1125 free (node_sched_params);
1126 free (node_order);
1127 free_ddg (g);
1128 }
1129
1130 /* Release scheduler data, needed until now because of DFA. */
1131 sched_finish ();
1132 }
1133
1134 /* The SMS scheduling algorithm itself
1135 -----------------------------------
1136 Input: 'O' an ordered list of insns of a loop.
1137 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1138
1139 'Q' is the empty Set
1140 'PS' is the partial schedule; it holds the currently scheduled nodes with
1141 their cycle/slot.
1142 'PSP' previously scheduled predecessors.
1143 'PSS' previously scheduled successors.
1144 't(u)' the cycle where u is scheduled.
1145 'l(u)' is the latency of u.
1146 'd(v,u)' is the dependence distance from v to u.
1147 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1148 the node ordering phase.
1149 'check_hardware_resources_conflicts(u, PS, c)'
1150 run a trace around cycle/slot through DFA model
1151 to check resource conflicts involving instruction u
1152 at cycle c given the partial schedule PS.
1153 'add_to_partial_schedule_at_time(u, PS, c)'
1154 Add the node/instruction u to the partial schedule
1155 PS at time c.
1156 'calculate_register_pressure(PS)'
1157 Given a schedule of instructions, calculate the register
1158 pressure it implies. One implementation could be the
1159 maximum number of overlapping live ranges.
1160 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1161 registers available in the hardware.
1162
1163 1. II = MII.
1164 2. PS = empty list
1165 3. for each node u in O in pre-computed order
1166 4. if (PSP(u) != Q && PSS(u) == Q) then
1167 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1168 6. start = Early_start; end = Early_start + II - 1; step = 1
1169 11. else if (PSP(u) == Q && PSS(u) != Q) then
1170 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1171 13. start = Late_start; end = Late_start - II + 1; step = -1
1172 14. else if (PSP(u) != Q && PSS(u) != Q) then
1173 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1174 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1175 17. start = Early_start;
1176 18. end = min(Early_start + II - 1 , Late_start);
1177 19. step = 1
1178 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1179 21. start = ASAP(u); end = start + II - 1; step = 1
1180 22. endif
1181
1182 23. success = false
1183 24. for (c = start ; c != end ; c += step)
1184 25. if check_hardware_resources_conflicts(u, PS, c) then
1185 26. add_to_partial_schedule_at_time(u, PS, c)
1186 27. success = true
1187 28. break
1188 29. endif
1189 30. endfor
1190 31. if (success == false) then
1191 32. II = II + 1
1192 33. if (II > maxII) then
1193 34. finish - failed to schedule
1194 35. endif
1195 36. goto 2.
1196 37. endif
1197 38. endfor
1198 39. if (calculate_register_pressure(PS) > maxRP) then
1199 40. goto 32.
1200 41. endif
1201 42. compute epilogue & prologue
1202 43. finish - succeeded to schedule
1203 */
1204
1205 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1206 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1207 set to 0 to save compile time. */
1208 #define DFA_HISTORY SMS_DFA_HISTORY
1209
1210 static partial_schedule_ptr
1211 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *dump_file)
1212 {
1213 int ii = mii;
1214 int i, c, success;
1215 int try_again_with_larger_ii = true;
1216 int num_nodes = g->num_nodes;
1217 ddg_edge_ptr e;
1218 int start, end, step; /* Place together into one struct? */
1219 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1220 sbitmap must_precede = sbitmap_alloc (num_nodes);
1221 sbitmap must_follow = sbitmap_alloc (num_nodes);
1222
1223 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1224
1225 while (try_again_with_larger_ii && ii < maxii)
1226 {
1227 if (dump_file)
1228 fprintf(dump_file, "Starting with ii=%d\n", ii);
1229 try_again_with_larger_ii = false;
1230 sbitmap_zero (sched_nodes);
1231
1232 for (i = 0; i < num_nodes; i++)
1233 {
1234 int u = nodes_order[i];
1235 ddg_node_ptr u_node = &g->nodes[u];
1236 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1237 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1238 int psp_not_empty;
1239 int pss_not_empty;
1240 rtx insn = u_node->insn;
1241
1242 if (!INSN_P (insn))
1243 continue;
1244
1245 if (JUMP_P (insn)) /* Closing branch handled later. */
1246 continue;
1247
1248 /* 1. compute sched window for u (start, end, step). */
1249 psp_not_empty = sbitmap_any_common_bits (u_node_preds, sched_nodes);
1250 pss_not_empty = sbitmap_any_common_bits (u_node_succs, sched_nodes);
1251
1252 if (psp_not_empty && !pss_not_empty)
1253 {
1254 int early_start = 0;
1255
1256 end = INT_MAX;
1257 for (e = u_node->in; e != 0; e = e->next_in)
1258 {
1259 ddg_node_ptr v_node = e->src;
1260 if (TEST_BIT (sched_nodes, v_node->cuid))
1261 {
1262 int node_st = SCHED_TIME (v_node)
1263 + e->latency - (e->distance * ii);
1264
1265 early_start = MAX (early_start, node_st);
1266
1267 if (e->data_type == MEM_DEP)
1268 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1269 }
1270 }
1271 start = early_start;
1272 end = MIN (end, early_start + ii);
1273 step = 1;
1274 }
1275
1276 else if (!psp_not_empty && pss_not_empty)
1277 {
1278 int late_start = INT_MAX;
1279
1280 end = INT_MIN;
1281 for (e = u_node->out; e != 0; e = e->next_out)
1282 {
1283 ddg_node_ptr v_node = e->dest;
1284 if (TEST_BIT (sched_nodes, v_node->cuid))
1285 {
1286 late_start = MIN (late_start,
1287 SCHED_TIME (v_node) - e->latency
1288 + (e->distance * ii));
1289 if (e->data_type == MEM_DEP)
1290 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1291 }
1292 }
1293 start = late_start;
1294 end = MAX (end, late_start - ii);
1295 step = -1;
1296 }
1297
1298 else if (psp_not_empty && pss_not_empty)
1299 {
1300 int early_start = 0;
1301 int late_start = INT_MAX;
1302
1303 start = INT_MIN;
1304 end = INT_MAX;
1305 for (e = u_node->in; e != 0; e = e->next_in)
1306 {
1307 ddg_node_ptr v_node = e->src;
1308
1309 if (TEST_BIT (sched_nodes, v_node->cuid))
1310 {
1311 early_start = MAX (early_start,
1312 SCHED_TIME (v_node) + e->latency
1313 - (e->distance * ii));
1314 if (e->data_type == MEM_DEP)
1315 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1316 }
1317 }
1318 for (e = u_node->out; e != 0; e = e->next_out)
1319 {
1320 ddg_node_ptr v_node = e->dest;
1321
1322 if (TEST_BIT (sched_nodes, v_node->cuid))
1323 {
1324 late_start = MIN (late_start,
1325 SCHED_TIME (v_node) - e->latency
1326 + (e->distance * ii));
1327 if (e->data_type == MEM_DEP)
1328 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1329 }
1330 }
1331 start = MAX (start, early_start);
1332 end = MIN (end, MIN (early_start + ii, late_start + 1));
1333 step = 1;
1334 }
1335 else /* psp is empty && pss is empty. */
1336 {
1337 start = SCHED_ASAP (u_node);
1338 end = start + ii;
1339 step = 1;
1340 }
1341
1342 /* 2. Try scheduling u in window. */
1343 if (dump_file)
1344 fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
1345 u, start, end, step);
1346
1347 /* use must_follow & must_precede bitmaps to determine order
1348 of nodes within the cycle. */
1349 sbitmap_zero (must_precede);
1350 sbitmap_zero (must_follow);
1351 for (e = u_node->in; e != 0; e = e->next_in)
1352 if (TEST_BIT (sched_nodes, e->src->cuid)
1353 && e->latency == (ii * e->distance)
1354 && start == SCHED_TIME (e->src))
1355 SET_BIT (must_precede, e->src->cuid);
1356
1357 for (e = u_node->out; e != 0; e = e->next_out)
1358 if (TEST_BIT (sched_nodes, e->dest->cuid)
1359 && e->latency == (ii * e->distance)
1360 && end == SCHED_TIME (e->dest))
1361 SET_BIT (must_follow, e->dest->cuid);
1362
1363 success = 0;
1364 if ((step > 0 && start < end) || (step < 0 && start > end))
1365 for (c = start; c != end; c += step)
1366 {
1367 ps_insn_ptr psi;
1368
1369 psi = ps_add_node_check_conflicts (ps, u_node, c,
1370 must_precede,
1371 must_follow);
1372
1373 if (psi)
1374 {
1375 SCHED_TIME (u_node) = c;
1376 SET_BIT (sched_nodes, u);
1377 success = 1;
1378 if (dump_file)
1379 fprintf(dump_file, "Schedule in %d\n", c);
1380 break;
1381 }
1382 }
1383 if (!success)
1384 {
1385 /* ??? Try backtracking instead of immediately ii++? */
1386 ii++;
1387 try_again_with_larger_ii = true;
1388 reset_partial_schedule (ps, ii);
1389 break;
1390 }
1391 /* ??? If (success), check register pressure estimates. */
1392 } /* Continue with next node. */
1393 } /* While try_again_with_larger_ii. */
1394
1395 sbitmap_free (sched_nodes);
1396
1397 if (ii >= maxii)
1398 {
1399 free_partial_schedule (ps);
1400 ps = NULL;
1401 }
1402 return ps;
1403 }
1404
1405 \f
1406 /* This page implements the algorithm for ordering the nodes of a DDG
1407 for modulo scheduling, activated through the
1408 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1409
1410 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1411 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1412 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1413 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1414 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1415 #define DEPTH(x) (ASAP ((x)))
1416
1417 typedef struct node_order_params * nopa;
1418
1419 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1420 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1421 static nopa calculate_order_params (ddg_ptr, int mii);
1422 static int find_max_asap (ddg_ptr, sbitmap);
1423 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1424 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1425
1426 enum sms_direction {BOTTOMUP, TOPDOWN};
1427
1428 struct node_order_params
1429 {
1430 int asap;
1431 int alap;
1432 int height;
1433 };
1434
1435 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1436 static void
1437 check_nodes_order (int *node_order, int num_nodes)
1438 {
1439 int i;
1440 sbitmap tmp = sbitmap_alloc (num_nodes);
1441
1442 sbitmap_zero (tmp);
1443
1444 for (i = 0; i < num_nodes; i++)
1445 {
1446 int u = node_order[i];
1447
1448 if (u >= num_nodes || u < 0 || TEST_BIT (tmp, u))
1449 abort ();
1450
1451 SET_BIT (tmp, u);
1452 }
1453
1454 sbitmap_free (tmp);
1455 }
1456
1457 /* Order the nodes of G for scheduling and pass the result in
1458 NODE_ORDER. Also set aux.count of each node to ASAP.
1459 Return the recMII for the given DDG. */
1460 static int
1461 sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1462 {
1463 int i;
1464 int rec_mii = 0;
1465 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1466
1467 nopa nops = calculate_order_params (g, mii);
1468
1469 order_nodes_of_sccs (sccs, node_order);
1470
1471 if (sccs->num_sccs > 0)
1472 /* First SCC has the largest recurrence_length. */
1473 rec_mii = sccs->sccs[0]->recurrence_length;
1474
1475 /* Save ASAP before destroying node_order_params. */
1476 for (i = 0; i < g->num_nodes; i++)
1477 {
1478 ddg_node_ptr v = &g->nodes[i];
1479 v->aux.count = ASAP (v);
1480 }
1481
1482 free (nops);
1483 free_ddg_all_sccs (sccs);
1484 check_nodes_order (node_order, g->num_nodes);
1485
1486 return rec_mii;
1487 }
1488
1489 static void
1490 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1491 {
1492 int i, pos = 0;
1493 ddg_ptr g = all_sccs->ddg;
1494 int num_nodes = g->num_nodes;
1495 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1496 sbitmap on_path = sbitmap_alloc (num_nodes);
1497 sbitmap tmp = sbitmap_alloc (num_nodes);
1498 sbitmap ones = sbitmap_alloc (num_nodes);
1499
1500 sbitmap_zero (prev_sccs);
1501 sbitmap_ones (ones);
1502
1503 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1504 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1505 for (i = 0; i < all_sccs->num_sccs; i++)
1506 {
1507 ddg_scc_ptr scc = all_sccs->sccs[i];
1508
1509 /* Add nodes on paths from previous SCCs to the current SCC. */
1510 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1511 sbitmap_a_or_b (tmp, scc->nodes, on_path);
1512
1513 /* Add nodes on paths from the current SCC to previous SCCs. */
1514 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1515 sbitmap_a_or_b (tmp, tmp, on_path);
1516
1517 /* Remove nodes of previous SCCs from current extended SCC. */
1518 sbitmap_difference (tmp, tmp, prev_sccs);
1519
1520 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1521 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1522 }
1523
1524 /* Handle the remaining nodes that do not belong to any scc. Each call
1525 to order_nodes_in_scc handles a single connected component. */
1526 while (pos < g->num_nodes)
1527 {
1528 sbitmap_difference (tmp, ones, prev_sccs);
1529 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1530 }
1531 sbitmap_free (prev_sccs);
1532 sbitmap_free (on_path);
1533 sbitmap_free (tmp);
1534 sbitmap_free (ones);
1535 }
1536
1537 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1538 static struct node_order_params *
1539 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1540 {
1541 int u;
1542 int max_asap;
1543 int num_nodes = g->num_nodes;
1544 ddg_edge_ptr e;
1545 /* Allocate a place to hold ordering params for each node in the DDG. */
1546 nopa node_order_params_arr;
1547
1548 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1549 node_order_params_arr = (nopa) xcalloc (num_nodes,
1550 sizeof (struct node_order_params));
1551
1552 /* Set the aux pointer of each node to point to its order_params structure. */
1553 for (u = 0; u < num_nodes; u++)
1554 g->nodes[u].aux.info = &node_order_params_arr[u];
1555
1556 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1557 calculate ASAP, ALAP, mobility, distance, and height for each node
1558 in the dependence (direct acyclic) graph. */
1559
1560 /* We assume that the nodes in the array are in topological order. */
1561
1562 max_asap = 0;
1563 for (u = 0; u < num_nodes; u++)
1564 {
1565 ddg_node_ptr u_node = &g->nodes[u];
1566
1567 ASAP (u_node) = 0;
1568 for (e = u_node->in; e; e = e->next_in)
1569 if (e->distance == 0)
1570 ASAP (u_node) = MAX (ASAP (u_node),
1571 ASAP (e->src) + e->latency);
1572 max_asap = MAX (max_asap, ASAP (u_node));
1573 }
1574
1575 for (u = num_nodes - 1; u > -1; u--)
1576 {
1577 ddg_node_ptr u_node = &g->nodes[u];
1578
1579 ALAP (u_node) = max_asap;
1580 HEIGHT (u_node) = 0;
1581 for (e = u_node->out; e; e = e->next_out)
1582 if (e->distance == 0)
1583 {
1584 ALAP (u_node) = MIN (ALAP (u_node),
1585 ALAP (e->dest) - e->latency);
1586 HEIGHT (u_node) = MAX (HEIGHT (u_node),
1587 HEIGHT (e->dest) + e->latency);
1588 }
1589 }
1590
1591 return node_order_params_arr;
1592 }
1593
1594 static int
1595 find_max_asap (ddg_ptr g, sbitmap nodes)
1596 {
1597 int u;
1598 int max_asap = -1;
1599 int result = -1;
1600
1601 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1602 {
1603 ddg_node_ptr u_node = &g->nodes[u];
1604
1605 if (max_asap < ASAP (u_node))
1606 {
1607 max_asap = ASAP (u_node);
1608 result = u;
1609 }
1610 });
1611 return result;
1612 }
1613
1614 static int
1615 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1616 {
1617 int u;
1618 int max_hv = -1;
1619 int min_mob = INT_MAX;
1620 int result = -1;
1621
1622 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1623 {
1624 ddg_node_ptr u_node = &g->nodes[u];
1625
1626 if (max_hv < HEIGHT (u_node))
1627 {
1628 max_hv = HEIGHT (u_node);
1629 min_mob = MOB (u_node);
1630 result = u;
1631 }
1632 else if ((max_hv == HEIGHT (u_node))
1633 && (min_mob > MOB (u_node)))
1634 {
1635 min_mob = MOB (u_node);
1636 result = u;
1637 }
1638 });
1639 return result;
1640 }
1641
1642 static int
1643 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1644 {
1645 int u;
1646 int max_dv = -1;
1647 int min_mob = INT_MAX;
1648 int result = -1;
1649
1650 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1651 {
1652 ddg_node_ptr u_node = &g->nodes[u];
1653
1654 if (max_dv < DEPTH (u_node))
1655 {
1656 max_dv = DEPTH (u_node);
1657 min_mob = MOB (u_node);
1658 result = u;
1659 }
1660 else if ((max_dv == DEPTH (u_node))
1661 && (min_mob > MOB (u_node)))
1662 {
1663 min_mob = MOB (u_node);
1664 result = u;
1665 }
1666 });
1667 return result;
1668 }
1669
1670 /* Places the nodes of SCC into the NODE_ORDER array starting
1671 at position POS, according to the SMS ordering algorithm.
1672 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1673 the NODE_ORDER array, starting from position zero. */
1674 static int
1675 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1676 int * node_order, int pos)
1677 {
1678 enum sms_direction dir;
1679 int num_nodes = g->num_nodes;
1680 sbitmap workset = sbitmap_alloc (num_nodes);
1681 sbitmap tmp = sbitmap_alloc (num_nodes);
1682 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1683 sbitmap predecessors = sbitmap_alloc (num_nodes);
1684 sbitmap successors = sbitmap_alloc (num_nodes);
1685
1686 sbitmap_zero (predecessors);
1687 find_predecessors (predecessors, g, nodes_ordered);
1688
1689 sbitmap_zero (successors);
1690 find_successors (successors, g, nodes_ordered);
1691
1692 sbitmap_zero (tmp);
1693 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1694 {
1695 sbitmap_copy (workset, tmp);
1696 dir = BOTTOMUP;
1697 }
1698 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1699 {
1700 sbitmap_copy (workset, tmp);
1701 dir = TOPDOWN;
1702 }
1703 else
1704 {
1705 int u;
1706
1707 sbitmap_zero (workset);
1708 if ((u = find_max_asap (g, scc)) >= 0)
1709 SET_BIT (workset, u);
1710 dir = BOTTOMUP;
1711 }
1712
1713 sbitmap_zero (zero_bitmap);
1714 while (!sbitmap_equal (workset, zero_bitmap))
1715 {
1716 int v;
1717 ddg_node_ptr v_node;
1718 sbitmap v_node_preds;
1719 sbitmap v_node_succs;
1720
1721 if (dir == TOPDOWN)
1722 {
1723 while (!sbitmap_equal (workset, zero_bitmap))
1724 {
1725 v = find_max_hv_min_mob (g, workset);
1726 v_node = &g->nodes[v];
1727 node_order[pos++] = v;
1728 v_node_succs = NODE_SUCCESSORS (v_node);
1729 sbitmap_a_and_b (tmp, v_node_succs, scc);
1730
1731 /* Don't consider the already ordered successors again. */
1732 sbitmap_difference (tmp, tmp, nodes_ordered);
1733 sbitmap_a_or_b (workset, workset, tmp);
1734 RESET_BIT (workset, v);
1735 SET_BIT (nodes_ordered, v);
1736 }
1737 dir = BOTTOMUP;
1738 sbitmap_zero (predecessors);
1739 find_predecessors (predecessors, g, nodes_ordered);
1740 sbitmap_a_and_b (workset, predecessors, scc);
1741 }
1742 else
1743 {
1744 while (!sbitmap_equal (workset, zero_bitmap))
1745 {
1746 v = find_max_dv_min_mob (g, workset);
1747 v_node = &g->nodes[v];
1748 node_order[pos++] = v;
1749 v_node_preds = NODE_PREDECESSORS (v_node);
1750 sbitmap_a_and_b (tmp, v_node_preds, scc);
1751
1752 /* Don't consider the already ordered predecessors again. */
1753 sbitmap_difference (tmp, tmp, nodes_ordered);
1754 sbitmap_a_or_b (workset, workset, tmp);
1755 RESET_BIT (workset, v);
1756 SET_BIT (nodes_ordered, v);
1757 }
1758 dir = TOPDOWN;
1759 sbitmap_zero (successors);
1760 find_successors (successors, g, nodes_ordered);
1761 sbitmap_a_and_b (workset, successors, scc);
1762 }
1763 }
1764 sbitmap_free (tmp);
1765 sbitmap_free (workset);
1766 sbitmap_free (zero_bitmap);
1767 sbitmap_free (predecessors);
1768 sbitmap_free (successors);
1769 return pos;
1770 }
1771
1772 \f
1773 /* This page contains functions for manipulating partial-schedules during
1774 modulo scheduling. */
1775
1776 /* Create a partial schedule and allocate a memory to hold II rows. */
1777 static partial_schedule_ptr
1778 create_partial_schedule (int ii, ddg_ptr g, int history)
1779 {
1780 partial_schedule_ptr ps = (partial_schedule_ptr)
1781 xmalloc (sizeof (struct partial_schedule));
1782 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
1783 ps->ii = ii;
1784 ps->history = history;
1785 ps->min_cycle = INT_MAX;
1786 ps->max_cycle = INT_MIN;
1787 ps->g = g;
1788
1789 return ps;
1790 }
1791
1792 /* Free the PS_INSNs in rows array of the given partial schedule.
1793 ??? Consider caching the PS_INSN's. */
1794 static void
1795 free_ps_insns (partial_schedule_ptr ps)
1796 {
1797 int i;
1798
1799 for (i = 0; i < ps->ii; i++)
1800 {
1801 while (ps->rows[i])
1802 {
1803 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
1804
1805 free (ps->rows[i]);
1806 ps->rows[i] = ps_insn;
1807 }
1808 ps->rows[i] = NULL;
1809 }
1810 }
1811
1812 /* Free all the memory allocated to the partial schedule. */
1813 static void
1814 free_partial_schedule (partial_schedule_ptr ps)
1815 {
1816 if (!ps)
1817 return;
1818 free_ps_insns (ps);
1819 free (ps->rows);
1820 free (ps);
1821 }
1822
1823 /* Clear the rows array with its PS_INSNs, and create a new one with
1824 NEW_II rows. */
1825 static void
1826 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
1827 {
1828 if (!ps)
1829 return;
1830 free_ps_insns (ps);
1831 if (new_ii == ps->ii)
1832 return;
1833 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
1834 * sizeof (ps_insn_ptr));
1835 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
1836 ps->ii = new_ii;
1837 ps->min_cycle = INT_MAX;
1838 ps->max_cycle = INT_MIN;
1839 }
1840
1841 /* Prints the partial schedule as an ii rows array, for each rows
1842 print the ids of the insns in it. */
1843 void
1844 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
1845 {
1846 int i;
1847
1848 for (i = 0; i < ps->ii; i++)
1849 {
1850 ps_insn_ptr ps_i = ps->rows[i];
1851
1852 fprintf (dump, "\n[CYCLE %d ]: ", i);
1853 while (ps_i)
1854 {
1855 fprintf (dump, "%d, ",
1856 INSN_UID (ps_i->node->insn));
1857 ps_i = ps_i->next_in_row;
1858 }
1859 }
1860 }
1861
1862 /* Creates an object of PS_INSN and initializes it to the given parameters. */
1863 static ps_insn_ptr
1864 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
1865 {
1866 ps_insn_ptr ps_i = xmalloc (sizeof (struct ps_insn));
1867
1868 ps_i->node = node;
1869 ps_i->next_in_row = NULL;
1870 ps_i->prev_in_row = NULL;
1871 ps_i->row_rest_count = rest_count;
1872 ps_i->cycle = cycle;
1873
1874 return ps_i;
1875 }
1876
1877
1878 /* Removes the given PS_INSN from the partial schedule. Returns false if the
1879 node is not found in the partial schedule, else returns true. */
1880 static int
1881 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
1882 {
1883 int row;
1884
1885 if (!ps || !ps_i)
1886 return false;
1887
1888 row = SMODULO (ps_i->cycle, ps->ii);
1889 if (! ps_i->prev_in_row)
1890 {
1891 if (ps_i != ps->rows[row])
1892 return false;
1893
1894 ps->rows[row] = ps_i->next_in_row;
1895 if (ps->rows[row])
1896 ps->rows[row]->prev_in_row = NULL;
1897 }
1898 else
1899 {
1900 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
1901 if (ps_i->next_in_row)
1902 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
1903 }
1904 free (ps_i);
1905 return true;
1906 }
1907
1908 /* Unlike what literature describes for modulo scheduling (which focuses
1909 on VLIW machines) the order of the instructions inside a cycle is
1910 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
1911 where the current instruction should go relative to the already
1912 scheduled instructions in the given cycle. Go over these
1913 instructions and find the first possible column to put it in. */
1914 static bool
1915 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
1916 sbitmap must_precede, sbitmap must_follow)
1917 {
1918 ps_insn_ptr next_ps_i;
1919 ps_insn_ptr first_must_follow = NULL;
1920 ps_insn_ptr last_must_precede = NULL;
1921 int row;
1922
1923 if (! ps_i)
1924 return false;
1925
1926 row = SMODULO (ps_i->cycle, ps->ii);
1927
1928 /* Find the first must follow and the last must precede
1929 and insert the node immediately after the must precede
1930 but make sure that it there is no must follow after it. */
1931 for (next_ps_i = ps->rows[row];
1932 next_ps_i;
1933 next_ps_i = next_ps_i->next_in_row)
1934 {
1935 if (TEST_BIT (must_follow, next_ps_i->node->cuid)
1936 && ! first_must_follow)
1937 first_must_follow = next_ps_i;
1938 if (TEST_BIT (must_precede, next_ps_i->node->cuid))
1939 {
1940 /* If we have already met a node that must follow, then
1941 there is no possible column. */
1942 if (first_must_follow)
1943 return false;
1944 else
1945 last_must_precede = next_ps_i;
1946 }
1947 }
1948
1949 /* Now insert the node after INSERT_AFTER_PSI. */
1950
1951 if (! last_must_precede)
1952 {
1953 ps_i->next_in_row = ps->rows[row];
1954 ps_i->prev_in_row = NULL;
1955 if (ps_i->next_in_row)
1956 ps_i->next_in_row->prev_in_row = ps_i;
1957 ps->rows[row] = ps_i;
1958 }
1959 else
1960 {
1961 ps_i->next_in_row = last_must_precede->next_in_row;
1962 last_must_precede->next_in_row = ps_i;
1963 ps_i->prev_in_row = last_must_precede;
1964 if (ps_i->next_in_row)
1965 ps_i->next_in_row->prev_in_row = ps_i;
1966 }
1967
1968 return true;
1969 }
1970
1971 /* Advances the PS_INSN one column in its current row; returns false
1972 in failure and true in success. Bit N is set in MUST_FOLLOW if
1973 the node with cuid N must be come after the node pointed to by
1974 PS_I when scheduled in the same cycle. */
1975 static int
1976 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
1977 sbitmap must_follow)
1978 {
1979 ps_insn_ptr prev, next;
1980 int row;
1981 ddg_node_ptr next_node;
1982
1983 if (!ps || !ps_i)
1984 return false;
1985
1986 row = SMODULO (ps_i->cycle, ps->ii);
1987
1988 if (! ps_i->next_in_row)
1989 return false;
1990
1991 next_node = ps_i->next_in_row->node;
1992
1993 /* Check if next_in_row is dependent on ps_i, both having same sched
1994 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
1995 if (TEST_BIT (must_follow, next_node->cuid))
1996 return false;
1997
1998 /* Advance PS_I over its next_in_row in the doubly linked list. */
1999 prev = ps_i->prev_in_row;
2000 next = ps_i->next_in_row;
2001
2002 if (ps_i == ps->rows[row])
2003 ps->rows[row] = next;
2004
2005 ps_i->next_in_row = next->next_in_row;
2006
2007 if (next->next_in_row)
2008 next->next_in_row->prev_in_row = ps_i;
2009
2010 next->next_in_row = ps_i;
2011 ps_i->prev_in_row = next;
2012
2013 next->prev_in_row = prev;
2014 if (prev)
2015 prev->next_in_row = next;
2016
2017 return true;
2018 }
2019
2020 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2021 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2022 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2023 before/after (respectively) the node pointed to by PS_I when scheduled
2024 in the same cycle. */
2025 static ps_insn_ptr
2026 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2027 sbitmap must_precede, sbitmap must_follow)
2028 {
2029 ps_insn_ptr ps_i;
2030 int rest_count = 1;
2031 int row = SMODULO (cycle, ps->ii);
2032
2033 if (ps->rows[row]
2034 && ps->rows[row]->row_rest_count >= issue_rate)
2035 return NULL;
2036
2037 if (ps->rows[row])
2038 rest_count += ps->rows[row]->row_rest_count;
2039
2040 ps_i = create_ps_insn (node, rest_count, cycle);
2041
2042 /* Finds and inserts PS_I according to MUST_FOLLOW and
2043 MUST_PRECEDE. */
2044 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2045 {
2046 free (ps_i);
2047 return NULL;
2048 }
2049
2050 return ps_i;
2051 }
2052
2053 /* Advance time one cycle. Assumes DFA is being used. */
2054 static void
2055 advance_one_cycle (void)
2056 {
2057 if (targetm.sched.dfa_pre_cycle_insn)
2058 state_transition (curr_state,
2059 (*targetm.sched.dfa_pre_cycle_insn) ());
2060
2061 state_transition (curr_state, NULL);
2062
2063 if (targetm.sched.dfa_post_cycle_insn)
2064 state_transition (curr_state,
2065 (*targetm.sched.dfa_post_cycle_insn) ());
2066 }
2067
2068 /* Checks if PS has resource conflicts according to DFA, starting from
2069 FROM cycle to TO cycle; returns true if there are conflicts and false
2070 if there are no conflicts. Assumes DFA is being used. */
2071 static int
2072 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2073 {
2074 int cycle;
2075
2076 state_reset (curr_state);
2077
2078 for (cycle = from; cycle <= to; cycle++)
2079 {
2080 ps_insn_ptr crr_insn;
2081 /* Holds the remaining issue slots in the current row. */
2082 int can_issue_more = issue_rate;
2083
2084 /* Walk through the DFA for the current row. */
2085 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2086 crr_insn;
2087 crr_insn = crr_insn->next_in_row)
2088 {
2089 rtx insn = crr_insn->node->insn;
2090
2091 if (!INSN_P (insn))
2092 continue;
2093
2094 /* Check if there is room for the current insn. */
2095 if (!can_issue_more || state_dead_lock_p (curr_state))
2096 return true;
2097
2098 /* Update the DFA state and return with failure if the DFA found
2099 recource conflicts. */
2100 if (state_transition (curr_state, insn) >= 0)
2101 return true;
2102
2103 if (targetm.sched.variable_issue)
2104 can_issue_more =
2105 (*targetm.sched.variable_issue) (sched_dump, sched_verbose,
2106 insn, can_issue_more);
2107 /* A naked CLOBBER or USE generates no instruction, so don't
2108 let them consume issue slots. */
2109 else if (GET_CODE (PATTERN (insn)) != USE
2110 && GET_CODE (PATTERN (insn)) != CLOBBER)
2111 can_issue_more--;
2112 }
2113
2114 /* Advance the DFA to the next cycle. */
2115 advance_one_cycle ();
2116 }
2117 return false;
2118 }
2119
2120 /* Checks if the given node causes resource conflicts when added to PS at
2121 cycle C. If not the node is added to PS and returned; otherwise zero
2122 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2123 cuid N must be come before/after (respectively) the node pointed to by
2124 PS_I when scheduled in the same cycle. */
2125 static ps_insn_ptr
2126 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2127 int c, sbitmap must_precede,
2128 sbitmap must_follow)
2129 {
2130 int has_conflicts = 0;
2131 ps_insn_ptr ps_i;
2132
2133 /* First add the node to the PS, if this succeeds check for
2134 conflicts, trying different issue slots in the same row. */
2135 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2136 return NULL; /* Failed to insert the node at the given cycle. */
2137
2138 has_conflicts = ps_has_conflicts (ps, c, c)
2139 || (ps->history > 0
2140 && ps_has_conflicts (ps,
2141 c - ps->history,
2142 c + ps->history));
2143
2144 /* Try different issue slots to find one that the given node can be
2145 scheduled in without conflicts. */
2146 while (has_conflicts)
2147 {
2148 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2149 break;
2150 has_conflicts = ps_has_conflicts (ps, c, c)
2151 || (ps->history > 0
2152 && ps_has_conflicts (ps,
2153 c - ps->history,
2154 c + ps->history));
2155 }
2156
2157 if (has_conflicts)
2158 {
2159 remove_node_from_ps (ps, ps_i);
2160 return NULL;
2161 }
2162
2163 ps->min_cycle = MIN (ps->min_cycle, c);
2164 ps->max_cycle = MAX (ps->max_cycle, c);
2165 return ps_i;
2166 }
2167
2168 /* Rotate the rows of PS such that insns scheduled at time
2169 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2170 static void
2171 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2172 {
2173 int i, row, backward_rotates;
2174 int last_row = ps->ii - 1;
2175
2176 if (start_cycle == 0)
2177 return;
2178
2179 backward_rotates = SMODULO (start_cycle, ps->ii);
2180
2181 /* Revisit later and optimize this into a single loop. */
2182 for (i = 0; i < backward_rotates; i++)
2183 {
2184 ps_insn_ptr first_row = ps->rows[0];
2185
2186 for (row = 0; row < last_row; row++)
2187 ps->rows[row] = ps->rows[row+1];
2188
2189 ps->rows[last_row] = first_row;
2190 }
2191
2192 ps->max_cycle -= start_cycle;
2193 ps->min_cycle -= start_cycle;
2194 }
2195
2196 #endif /* INSN_SCHEDULING*/