basic-block.h, [...]: Update copyright.
[gcc.git] / gcc / sched-rgn.c
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 02111-1307, USA. */
23
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
27
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
31
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
37
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
47 \f
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "regs.h"
57 #include "function.h"
58 #include "flags.h"
59 #include "insn-config.h"
60 #include "insn-attr.h"
61 #include "except.h"
62 #include "toplev.h"
63 #include "recog.h"
64 #include "cfglayout.h"
65 #include "params.h"
66 #include "sched-int.h"
67 #include "target.h"
68
69 /* Define when we want to do count REG_DEAD notes before and after scheduling
70 for sanity checking. We can't do that when conditional execution is used,
71 as REG_DEAD exist only for unconditional deaths. */
72
73 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
74 #define CHECK_DEAD_NOTES 1
75 #else
76 #define CHECK_DEAD_NOTES 0
77 #endif
78
79
80 #ifdef INSN_SCHEDULING
81 /* Some accessor macros for h_i_d members only used within this file. */
82 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
83 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
84 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
85
86 /* nr_inter/spec counts interblock/speculative motion for the function. */
87 static int nr_inter, nr_spec;
88
89 static int is_cfg_nonregular (void);
90 static bool sched_is_disabled_for_current_region_p (void);
91
92 /* A region is the main entity for interblock scheduling: insns
93 are allowed to move between blocks in the same region, along
94 control flow graph edges, in the 'up' direction. */
95 typedef struct
96 {
97 int rgn_nr_blocks; /* Number of blocks in region. */
98 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
99 }
100 region;
101
102 /* Number of regions in the procedure. */
103 static int nr_regions;
104
105 /* Table of region descriptions. */
106 static region *rgn_table;
107
108 /* Array of lists of regions' blocks. */
109 static int *rgn_bb_table;
110
111 /* Topological order of blocks in the region (if b2 is reachable from
112 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
113 always referred to by either block or b, while its topological
114 order name (in the region) is referred to by bb. */
115 static int *block_to_bb;
116
117 /* The number of the region containing a block. */
118 static int *containing_rgn;
119
120 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
121 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
122 #define BLOCK_TO_BB(block) (block_to_bb[block])
123 #define CONTAINING_RGN(block) (containing_rgn[block])
124
125 void debug_regions (void);
126 static void find_single_block_region (void);
127 static void find_rgns (void);
128 static bool too_large (int, int *, int *);
129
130 extern void debug_live (int, int);
131
132 /* Blocks of the current region being scheduled. */
133 static int current_nr_blocks;
134 static int current_blocks;
135
136 /* The mapping from bb to block. */
137 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
138
139 /* Target info declarations.
140
141 The block currently being scheduled is referred to as the "target" block,
142 while other blocks in the region from which insns can be moved to the
143 target are called "source" blocks. The candidate structure holds info
144 about such sources: are they valid? Speculative? Etc. */
145 typedef struct
146 {
147 basic_block *first_member;
148 int nr_members;
149 }
150 bblst;
151
152 typedef struct
153 {
154 char is_valid;
155 char is_speculative;
156 int src_prob;
157 bblst split_bbs;
158 bblst update_bbs;
159 }
160 candidate;
161
162 static candidate *candidate_table;
163
164 /* A speculative motion requires checking live information on the path
165 from 'source' to 'target'. The split blocks are those to be checked.
166 After a speculative motion, live information should be modified in
167 the 'update' blocks.
168
169 Lists of split and update blocks for each candidate of the current
170 target are in array bblst_table. */
171 static basic_block *bblst_table;
172 static int bblst_size, bblst_last;
173
174 #define IS_VALID(src) ( candidate_table[src].is_valid )
175 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
176 #define SRC_PROB(src) ( candidate_table[src].src_prob )
177
178 /* The bb being currently scheduled. */
179 static int target_bb;
180
181 /* List of edges. */
182 typedef struct
183 {
184 edge *first_member;
185 int nr_members;
186 }
187 edgelst;
188
189 static edge *edgelst_table;
190 static int edgelst_last;
191
192 static void extract_edgelst (sbitmap, edgelst *);
193
194
195 /* Target info functions. */
196 static void split_edges (int, int, edgelst *);
197 static void compute_trg_info (int);
198 void debug_candidate (int);
199 void debug_candidates (int);
200
201 /* Dominators array: dom[i] contains the sbitmap of dominators of
202 bb i in the region. */
203 static sbitmap *dom;
204
205 /* bb 0 is the only region entry. */
206 #define IS_RGN_ENTRY(bb) (!bb)
207
208 /* Is bb_src dominated by bb_trg. */
209 #define IS_DOMINATED(bb_src, bb_trg) \
210 ( TEST_BIT (dom[bb_src], bb_trg) )
211
212 /* Probability: Prob[i] is a float in [0, 1] which is the probability
213 of bb i relative to the region entry. */
214 static float *prob;
215
216 /* The probability of bb_src, relative to bb_trg. Note, that while the
217 'prob[bb]' is a float in [0, 1], this macro returns an integer
218 in [0, 100]. */
219 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
220 prob[bb_trg])))
221
222 /* Bit-set of edges, where bit i stands for edge i. */
223 typedef sbitmap edgeset;
224
225 /* Number of edges in the region. */
226 static int rgn_nr_edges;
227
228 /* Array of size rgn_nr_edges. */
229 static edge *rgn_edges;
230
231 /* Mapping from each edge in the graph to its number in the rgn. */
232 #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
233 #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
234
235 /* The split edges of a source bb is different for each target
236 bb. In order to compute this efficiently, the 'potential-split edges'
237 are computed for each bb prior to scheduling a region. This is actually
238 the split edges of each bb relative to the region entry.
239
240 pot_split[bb] is the set of potential split edges of bb. */
241 static edgeset *pot_split;
242
243 /* For every bb, a set of its ancestor edges. */
244 static edgeset *ancestor_edges;
245
246 static void compute_dom_prob_ps (int);
247
248 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
249 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
250 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
251
252 /* Parameters affecting the decision of rank_for_schedule().
253 ??? Nope. But MIN_PROBABILITY is used in compute_trg_info. */
254 #define MIN_PROBABILITY 40
255
256 /* Speculative scheduling functions. */
257 static int check_live_1 (int, rtx);
258 static void update_live_1 (int, rtx);
259 static int check_live (rtx, int);
260 static void update_live (rtx, int);
261 static void set_spec_fed (rtx);
262 static int is_pfree (rtx, int, int);
263 static int find_conditional_protection (rtx, int);
264 static int is_conditionally_protected (rtx, int, int);
265 static int is_prisky (rtx, int, int);
266 static int is_exception_free (rtx, int, int);
267
268 static bool sets_likely_spilled (rtx);
269 static void sets_likely_spilled_1 (rtx, rtx, void *);
270 static void add_branch_dependences (rtx, rtx);
271 static void compute_block_backward_dependences (int);
272 void debug_dependencies (void);
273
274 static void init_regions (void);
275 static void schedule_region (int);
276 static rtx concat_INSN_LIST (rtx, rtx);
277 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
278 static void propagate_deps (int, struct deps *);
279 static void free_pending_lists (void);
280
281 /* Functions for construction of the control flow graph. */
282
283 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
284
285 We decide not to build the control flow graph if there is possibly more
286 than one entry to the function, if computed branches exist, if we
287 have nonlocal gotos, or if we have an unreachable loop. */
288
289 static int
290 is_cfg_nonregular (void)
291 {
292 basic_block b;
293 rtx insn;
294 RTX_CODE code;
295
296 /* If we have a label that could be the target of a nonlocal goto, then
297 the cfg is not well structured. */
298 if (nonlocal_goto_handler_labels)
299 return 1;
300
301 /* If we have any forced labels, then the cfg is not well structured. */
302 if (forced_labels)
303 return 1;
304
305 /* If this function has a computed jump, then we consider the cfg
306 not well structured. */
307 if (current_function_has_computed_jump)
308 return 1;
309
310 /* If we have exception handlers, then we consider the cfg not well
311 structured. ?!? We should be able to handle this now that flow.c
312 computes an accurate cfg for EH. */
313 if (current_function_has_exception_handlers ())
314 return 1;
315
316 /* If we have non-jumping insns which refer to labels, then we consider
317 the cfg not well structured. */
318 /* Check for labels referred to other thn by jumps. */
319 FOR_EACH_BB (b)
320 for (insn = BB_HEAD (b); ; insn = NEXT_INSN (insn))
321 {
322 code = GET_CODE (insn);
323 if (INSN_P (insn) && code != JUMP_INSN)
324 {
325 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
326
327 if (note
328 && ! (JUMP_P (NEXT_INSN (insn))
329 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
330 XEXP (note, 0))))
331 return 1;
332 }
333
334 if (insn == BB_END (b))
335 break;
336 }
337
338 /* Unreachable loops with more than one basic block are detected
339 during the DFS traversal in find_rgns.
340
341 Unreachable loops with a single block are detected here. This
342 test is redundant with the one in find_rgns, but it's much
343 cheaper to go ahead and catch the trivial case here. */
344 FOR_EACH_BB (b)
345 {
346 if (EDGE_COUNT (b->preds) == 0
347 || (EDGE_PRED (b, 0)->src == b
348 && EDGE_COUNT (b->preds) == 1))
349 return 1;
350 }
351
352 /* All the tests passed. Consider the cfg well structured. */
353 return 0;
354 }
355
356 /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */
357
358 static void
359 extract_edgelst (sbitmap set, edgelst *el)
360 {
361 int i;
362
363 /* edgelst table space is reused in each call to extract_edgelst. */
364 edgelst_last = 0;
365
366 el->first_member = &edgelst_table[edgelst_last];
367 el->nr_members = 0;
368
369 /* Iterate over each word in the bitset. */
370 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
371 {
372 edgelst_table[edgelst_last++] = rgn_edges[i];
373 el->nr_members++;
374 });
375 }
376
377 /* Functions for the construction of regions. */
378
379 /* Print the regions, for debugging purposes. Callable from debugger. */
380
381 void
382 debug_regions (void)
383 {
384 int rgn, bb;
385
386 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
387 for (rgn = 0; rgn < nr_regions; rgn++)
388 {
389 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
390 rgn_table[rgn].rgn_nr_blocks);
391 fprintf (sched_dump, ";;\tbb/block: ");
392
393 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
394 {
395 current_blocks = RGN_BLOCKS (rgn);
396
397 gcc_assert (bb == BLOCK_TO_BB (BB_TO_BLOCK (bb)));
398 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
399 }
400
401 fprintf (sched_dump, "\n\n");
402 }
403 }
404
405 /* Build a single block region for each basic block in the function.
406 This allows for using the same code for interblock and basic block
407 scheduling. */
408
409 static void
410 find_single_block_region (void)
411 {
412 basic_block bb;
413
414 nr_regions = 0;
415
416 FOR_EACH_BB (bb)
417 {
418 rgn_bb_table[nr_regions] = bb->index;
419 RGN_NR_BLOCKS (nr_regions) = 1;
420 RGN_BLOCKS (nr_regions) = nr_regions;
421 CONTAINING_RGN (bb->index) = nr_regions;
422 BLOCK_TO_BB (bb->index) = 0;
423 nr_regions++;
424 }
425 }
426
427 /* Update number of blocks and the estimate for number of insns
428 in the region. Return true if the region is "too large" for interblock
429 scheduling (compile time considerations). */
430
431 static bool
432 too_large (int block, int *num_bbs, int *num_insns)
433 {
434 (*num_bbs)++;
435 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
436 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
437
438 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
439 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
440 }
441
442 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
443 is still an inner loop. Put in max_hdr[blk] the header of the most inner
444 loop containing blk. */
445 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
446 { \
447 if (max_hdr[blk] == -1) \
448 max_hdr[blk] = hdr; \
449 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
450 RESET_BIT (inner, hdr); \
451 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
452 { \
453 RESET_BIT (inner,max_hdr[blk]); \
454 max_hdr[blk] = hdr; \
455 } \
456 }
457
458 /* Find regions for interblock scheduling.
459
460 A region for scheduling can be:
461
462 * A loop-free procedure, or
463
464 * A reducible inner loop, or
465
466 * A basic block not contained in any other region.
467
468 ?!? In theory we could build other regions based on extended basic
469 blocks or reverse extended basic blocks. Is it worth the trouble?
470
471 Loop blocks that form a region are put into the region's block list
472 in topological order.
473
474 This procedure stores its results into the following global (ick) variables
475
476 * rgn_nr
477 * rgn_table
478 * rgn_bb_table
479 * block_to_bb
480 * containing region
481
482 We use dominator relationships to avoid making regions out of non-reducible
483 loops.
484
485 This procedure needs to be converted to work on pred/succ lists instead
486 of edge tables. That would simplify it somewhat. */
487
488 static void
489 find_rgns (void)
490 {
491 int *max_hdr, *dfs_nr, *degree;
492 char no_loops = 1;
493 int node, child, loop_head, i, head, tail;
494 int count = 0, sp, idx = 0;
495 edge_iterator current_edge;
496 edge_iterator *stack;
497 int num_bbs, num_insns, unreachable;
498 int too_large_failure;
499 basic_block bb;
500
501 /* Note if a block is a natural loop header. */
502 sbitmap header;
503
504 /* Note if a block is a natural inner loop header. */
505 sbitmap inner;
506
507 /* Note if a block is in the block queue. */
508 sbitmap in_queue;
509
510 /* Note if a block is in the block queue. */
511 sbitmap in_stack;
512
513 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
514 and a mapping from block to its loop header (if the block is contained
515 in a loop, else -1).
516
517 Store results in HEADER, INNER, and MAX_HDR respectively, these will
518 be used as inputs to the second traversal.
519
520 STACK, SP and DFS_NR are only used during the first traversal. */
521
522 /* Allocate and initialize variables for the first traversal. */
523 max_hdr = xmalloc (last_basic_block * sizeof (int));
524 dfs_nr = xcalloc (last_basic_block, sizeof (int));
525 stack = xmalloc (n_edges * sizeof (edge_iterator));
526
527 inner = sbitmap_alloc (last_basic_block);
528 sbitmap_ones (inner);
529
530 header = sbitmap_alloc (last_basic_block);
531 sbitmap_zero (header);
532
533 in_queue = sbitmap_alloc (last_basic_block);
534 sbitmap_zero (in_queue);
535
536 in_stack = sbitmap_alloc (last_basic_block);
537 sbitmap_zero (in_stack);
538
539 for (i = 0; i < last_basic_block; i++)
540 max_hdr[i] = -1;
541
542 #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
543 #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
544
545 /* DFS traversal to find inner loops in the cfg. */
546
547 current_edge = ei_start (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest->succs);
548 sp = -1;
549
550 while (1)
551 {
552 if (EDGE_PASSED (current_edge))
553 {
554 /* We have reached a leaf node or a node that was already
555 processed. Pop edges off the stack until we find
556 an edge that has not yet been processed. */
557 while (sp >= 0 && EDGE_PASSED (current_edge))
558 {
559 /* Pop entry off the stack. */
560 current_edge = stack[sp--];
561 node = ei_edge (current_edge)->src->index;
562 gcc_assert (node != ENTRY_BLOCK);
563 child = ei_edge (current_edge)->dest->index;
564 gcc_assert (child != EXIT_BLOCK);
565 RESET_BIT (in_stack, child);
566 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
567 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
568 ei_next (&current_edge);
569 }
570
571 /* See if have finished the DFS tree traversal. */
572 if (sp < 0 && EDGE_PASSED (current_edge))
573 break;
574
575 /* Nope, continue the traversal with the popped node. */
576 continue;
577 }
578
579 /* Process a node. */
580 node = ei_edge (current_edge)->src->index;
581 gcc_assert (node != ENTRY_BLOCK);
582 SET_BIT (in_stack, node);
583 dfs_nr[node] = ++count;
584
585 /* We don't traverse to the exit block. */
586 child = ei_edge (current_edge)->dest->index;
587 if (child == EXIT_BLOCK)
588 {
589 SET_EDGE_PASSED (current_edge);
590 ei_next (&current_edge);
591 continue;
592 }
593
594 /* If the successor is in the stack, then we've found a loop.
595 Mark the loop, if it is not a natural loop, then it will
596 be rejected during the second traversal. */
597 if (TEST_BIT (in_stack, child))
598 {
599 no_loops = 0;
600 SET_BIT (header, child);
601 UPDATE_LOOP_RELATIONS (node, child);
602 SET_EDGE_PASSED (current_edge);
603 ei_next (&current_edge);
604 continue;
605 }
606
607 /* If the child was already visited, then there is no need to visit
608 it again. Just update the loop relationships and restart
609 with a new edge. */
610 if (dfs_nr[child])
611 {
612 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
613 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
614 SET_EDGE_PASSED (current_edge);
615 ei_next (&current_edge);
616 continue;
617 }
618
619 /* Push an entry on the stack and continue DFS traversal. */
620 stack[++sp] = current_edge;
621 SET_EDGE_PASSED (current_edge);
622 current_edge = ei_start (ei_edge (current_edge)->dest->succs);
623 }
624
625 /* Reset ->aux field used by EDGE_PASSED. */
626 FOR_ALL_BB (bb)
627 {
628 edge_iterator ei;
629 edge e;
630 FOR_EACH_EDGE (e, ei, bb->succs)
631 e->aux = NULL;
632 }
633
634
635 /* Another check for unreachable blocks. The earlier test in
636 is_cfg_nonregular only finds unreachable blocks that do not
637 form a loop.
638
639 The DFS traversal will mark every block that is reachable from
640 the entry node by placing a nonzero value in dfs_nr. Thus if
641 dfs_nr is zero for any block, then it must be unreachable. */
642 unreachable = 0;
643 FOR_EACH_BB (bb)
644 if (dfs_nr[bb->index] == 0)
645 {
646 unreachable = 1;
647 break;
648 }
649
650 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
651 to hold degree counts. */
652 degree = dfs_nr;
653
654 FOR_EACH_BB (bb)
655 degree[bb->index] = EDGE_COUNT (bb->preds);
656
657 /* Do not perform region scheduling if there are any unreachable
658 blocks. */
659 if (!unreachable)
660 {
661 int *queue;
662
663 if (no_loops)
664 SET_BIT (header, 0);
665
666 /* Second traversal:find reducible inner loops and topologically sort
667 block of each region. */
668
669 queue = xmalloc (n_basic_blocks * sizeof (int));
670
671 /* Find blocks which are inner loop headers. We still have non-reducible
672 loops to consider at this point. */
673 FOR_EACH_BB (bb)
674 {
675 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
676 {
677 edge e;
678 edge_iterator ei;
679 basic_block jbb;
680
681 /* Now check that the loop is reducible. We do this separate
682 from finding inner loops so that we do not find a reducible
683 loop which contains an inner non-reducible loop.
684
685 A simple way to find reducible/natural loops is to verify
686 that each block in the loop is dominated by the loop
687 header.
688
689 If there exists a block that is not dominated by the loop
690 header, then the block is reachable from outside the loop
691 and thus the loop is not a natural loop. */
692 FOR_EACH_BB (jbb)
693 {
694 /* First identify blocks in the loop, except for the loop
695 entry block. */
696 if (bb->index == max_hdr[jbb->index] && bb != jbb)
697 {
698 /* Now verify that the block is dominated by the loop
699 header. */
700 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
701 break;
702 }
703 }
704
705 /* If we exited the loop early, then I is the header of
706 a non-reducible loop and we should quit processing it
707 now. */
708 if (jbb != EXIT_BLOCK_PTR)
709 continue;
710
711 /* I is a header of an inner loop, or block 0 in a subroutine
712 with no loops at all. */
713 head = tail = -1;
714 too_large_failure = 0;
715 loop_head = max_hdr[bb->index];
716
717 /* Decrease degree of all I's successors for topological
718 ordering. */
719 FOR_EACH_EDGE (e, ei, bb->succs)
720 if (e->dest != EXIT_BLOCK_PTR)
721 --degree[e->dest->index];
722
723 /* Estimate # insns, and count # blocks in the region. */
724 num_bbs = 1;
725 num_insns = (INSN_LUID (BB_END (bb))
726 - INSN_LUID (BB_HEAD (bb)));
727
728 /* Find all loop latches (blocks with back edges to the loop
729 header) or all the leaf blocks in the cfg has no loops.
730
731 Place those blocks into the queue. */
732 if (no_loops)
733 {
734 FOR_EACH_BB (jbb)
735 /* Leaf nodes have only a single successor which must
736 be EXIT_BLOCK. */
737 if (EDGE_COUNT (jbb->succs) == 1
738 && EDGE_SUCC (jbb, 0)->dest == EXIT_BLOCK_PTR)
739 {
740 queue[++tail] = jbb->index;
741 SET_BIT (in_queue, jbb->index);
742
743 if (too_large (jbb->index, &num_bbs, &num_insns))
744 {
745 too_large_failure = 1;
746 break;
747 }
748 }
749 }
750 else
751 {
752 edge e;
753
754 FOR_EACH_EDGE (e, ei, bb->preds)
755 {
756 if (e->src == ENTRY_BLOCK_PTR)
757 continue;
758
759 node = e->src->index;
760
761 if (max_hdr[node] == loop_head && node != bb->index)
762 {
763 /* This is a loop latch. */
764 queue[++tail] = node;
765 SET_BIT (in_queue, node);
766
767 if (too_large (node, &num_bbs, &num_insns))
768 {
769 too_large_failure = 1;
770 break;
771 }
772 }
773 }
774 }
775
776 /* Now add all the blocks in the loop to the queue.
777
778 We know the loop is a natural loop; however the algorithm
779 above will not always mark certain blocks as being in the
780 loop. Consider:
781 node children
782 a b,c
783 b c
784 c a,d
785 d b
786
787 The algorithm in the DFS traversal may not mark B & D as part
788 of the loop (i.e. they will not have max_hdr set to A).
789
790 We know they can not be loop latches (else they would have
791 had max_hdr set since they'd have a backedge to a dominator
792 block). So we don't need them on the initial queue.
793
794 We know they are part of the loop because they are dominated
795 by the loop header and can be reached by a backwards walk of
796 the edges starting with nodes on the initial queue.
797
798 It is safe and desirable to include those nodes in the
799 loop/scheduling region. To do so we would need to decrease
800 the degree of a node if it is the target of a backedge
801 within the loop itself as the node is placed in the queue.
802
803 We do not do this because I'm not sure that the actual
804 scheduling code will properly handle this case. ?!? */
805
806 while (head < tail && !too_large_failure)
807 {
808 edge e;
809 child = queue[++head];
810
811 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
812 {
813 node = e->src->index;
814
815 /* See discussion above about nodes not marked as in
816 this loop during the initial DFS traversal. */
817 if (e->src == ENTRY_BLOCK_PTR
818 || max_hdr[node] != loop_head)
819 {
820 tail = -1;
821 break;
822 }
823 else if (!TEST_BIT (in_queue, node) && node != bb->index)
824 {
825 queue[++tail] = node;
826 SET_BIT (in_queue, node);
827
828 if (too_large (node, &num_bbs, &num_insns))
829 {
830 too_large_failure = 1;
831 break;
832 }
833 }
834 }
835 }
836
837 if (tail >= 0 && !too_large_failure)
838 {
839 /* Place the loop header into list of region blocks. */
840 degree[bb->index] = -1;
841 rgn_bb_table[idx] = bb->index;
842 RGN_NR_BLOCKS (nr_regions) = num_bbs;
843 RGN_BLOCKS (nr_regions) = idx++;
844 CONTAINING_RGN (bb->index) = nr_regions;
845 BLOCK_TO_BB (bb->index) = count = 0;
846
847 /* Remove blocks from queue[] when their in degree
848 becomes zero. Repeat until no blocks are left on the
849 list. This produces a topological list of blocks in
850 the region. */
851 while (tail >= 0)
852 {
853 if (head < 0)
854 head = tail;
855 child = queue[head];
856 if (degree[child] == 0)
857 {
858 edge e;
859
860 degree[child] = -1;
861 rgn_bb_table[idx++] = child;
862 BLOCK_TO_BB (child) = ++count;
863 CONTAINING_RGN (child) = nr_regions;
864 queue[head] = queue[tail--];
865
866 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
867 if (e->dest != EXIT_BLOCK_PTR)
868 --degree[e->dest->index];
869 }
870 else
871 --head;
872 }
873 ++nr_regions;
874 }
875 }
876 }
877 free (queue);
878 }
879
880 /* Any block that did not end up in a region is placed into a region
881 by itself. */
882 FOR_EACH_BB (bb)
883 if (degree[bb->index] >= 0)
884 {
885 rgn_bb_table[idx] = bb->index;
886 RGN_NR_BLOCKS (nr_regions) = 1;
887 RGN_BLOCKS (nr_regions) = idx++;
888 CONTAINING_RGN (bb->index) = nr_regions++;
889 BLOCK_TO_BB (bb->index) = 0;
890 }
891
892 free (max_hdr);
893 free (dfs_nr);
894 free (stack);
895 sbitmap_free (header);
896 sbitmap_free (inner);
897 sbitmap_free (in_queue);
898 sbitmap_free (in_stack);
899 }
900
901 /* Functions for regions scheduling information. */
902
903 /* Compute dominators, probability, and potential-split-edges of bb.
904 Assume that these values were already computed for bb's predecessors. */
905
906 static void
907 compute_dom_prob_ps (int bb)
908 {
909 int pred_bb;
910 int nr_out_edges, nr_rgn_out_edges;
911 edge_iterator in_ei, out_ei;
912 edge in_edge, out_edge;
913
914 prob[bb] = 0.0;
915 if (IS_RGN_ENTRY (bb))
916 {
917 SET_BIT (dom[bb], 0);
918 prob[bb] = 1.0;
919 return;
920 }
921
922 /* Initialize dom[bb] to '111..1'. */
923 sbitmap_ones (dom[bb]);
924
925 FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
926 {
927 if (in_edge->src == ENTRY_BLOCK_PTR)
928 continue;
929
930 pred_bb = BLOCK_TO_BB (in_edge->src->index);
931 sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]);
932 sbitmap_a_or_b (ancestor_edges[bb],
933 ancestor_edges[bb], ancestor_edges[pred_bb]);
934
935 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
936
937 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
938
939 nr_out_edges = 0;
940 nr_rgn_out_edges = 0;
941
942 FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
943 {
944 ++nr_out_edges;
945
946 /* The successor doesn't belong in the region? */
947 if (out_edge->dest != EXIT_BLOCK_PTR
948 && CONTAINING_RGN (out_edge->dest->index)
949 != CONTAINING_RGN (BB_TO_BLOCK (bb)))
950 ++nr_rgn_out_edges;
951
952 SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
953 }
954
955 /* Now nr_rgn_out_edges is the number of region-exit edges from
956 pred, and nr_out_edges will be the number of pred out edges
957 not leaving the region. */
958 nr_out_edges -= nr_rgn_out_edges;
959 if (nr_rgn_out_edges > 0)
960 prob[bb] += 0.9 * prob[pred_bb] / nr_out_edges;
961 else
962 prob[bb] += prob[pred_bb] / nr_out_edges;
963 }
964
965 SET_BIT (dom[bb], bb);
966 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
967
968 if (sched_verbose >= 2)
969 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
970 (int) (100.0 * prob[bb]));
971 }
972
973 /* Functions for target info. */
974
975 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
976 Note that bb_trg dominates bb_src. */
977
978 static void
979 split_edges (int bb_src, int bb_trg, edgelst *bl)
980 {
981 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
982 sbitmap_copy (src, pot_split[bb_src]);
983
984 sbitmap_difference (src, src, pot_split[bb_trg]);
985 extract_edgelst (src, bl);
986 sbitmap_free (src);
987 }
988
989 /* Find the valid candidate-source-blocks for the target block TRG, compute
990 their probability, and check if they are speculative or not.
991 For speculative sources, compute their update-blocks and split-blocks. */
992
993 static void
994 compute_trg_info (int trg)
995 {
996 candidate *sp;
997 edgelst el;
998 int i, j, k, update_idx;
999 basic_block block;
1000 sbitmap visited;
1001 edge_iterator ei;
1002 edge e;
1003
1004 /* Define some of the fields for the target bb as well. */
1005 sp = candidate_table + trg;
1006 sp->is_valid = 1;
1007 sp->is_speculative = 0;
1008 sp->src_prob = 100;
1009
1010 visited = sbitmap_alloc (last_basic_block - (INVALID_BLOCK + 1));
1011
1012 for (i = trg + 1; i < current_nr_blocks; i++)
1013 {
1014 sp = candidate_table + i;
1015
1016 sp->is_valid = IS_DOMINATED (i, trg);
1017 if (sp->is_valid)
1018 {
1019 sp->src_prob = GET_SRC_PROB (i, trg);
1020 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1021 }
1022
1023 if (sp->is_valid)
1024 {
1025 split_edges (i, trg, &el);
1026 sp->is_speculative = (el.nr_members) ? 1 : 0;
1027 if (sp->is_speculative && !flag_schedule_speculative)
1028 sp->is_valid = 0;
1029 }
1030
1031 if (sp->is_valid)
1032 {
1033 /* Compute split blocks and store them in bblst_table.
1034 The TO block of every split edge is a split block. */
1035 sp->split_bbs.first_member = &bblst_table[bblst_last];
1036 sp->split_bbs.nr_members = el.nr_members;
1037 for (j = 0; j < el.nr_members; bblst_last++, j++)
1038 bblst_table[bblst_last] = el.first_member[j]->dest;
1039 sp->update_bbs.first_member = &bblst_table[bblst_last];
1040
1041 /* Compute update blocks and store them in bblst_table.
1042 For every split edge, look at the FROM block, and check
1043 all out edges. For each out edge that is not a split edge,
1044 add the TO block to the update block list. This list can end
1045 up with a lot of duplicates. We need to weed them out to avoid
1046 overrunning the end of the bblst_table. */
1047
1048 update_idx = 0;
1049 sbitmap_zero (visited);
1050 for (j = 0; j < el.nr_members; j++)
1051 {
1052 block = el.first_member[j]->src;
1053 FOR_EACH_EDGE (e, ei, block->succs)
1054 {
1055 if (!TEST_BIT (visited,
1056 e->dest->index - (INVALID_BLOCK + 1)))
1057 {
1058 for (k = 0; k < el.nr_members; k++)
1059 if (e == el.first_member[k])
1060 break;
1061
1062 if (k >= el.nr_members)
1063 {
1064 bblst_table[bblst_last++] = e->dest;
1065 SET_BIT (visited,
1066 e->dest->index - (INVALID_BLOCK + 1));
1067 update_idx++;
1068 }
1069 }
1070 }
1071 }
1072 sp->update_bbs.nr_members = update_idx;
1073
1074 /* Make sure we didn't overrun the end of bblst_table. */
1075 gcc_assert (bblst_last <= bblst_size);
1076 }
1077 else
1078 {
1079 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1080
1081 sp->is_speculative = 0;
1082 sp->src_prob = 0;
1083 }
1084 }
1085
1086 sbitmap_free (visited);
1087 }
1088
1089 /* Print candidates info, for debugging purposes. Callable from debugger. */
1090
1091 void
1092 debug_candidate (int i)
1093 {
1094 if (!candidate_table[i].is_valid)
1095 return;
1096
1097 if (candidate_table[i].is_speculative)
1098 {
1099 int j;
1100 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1101
1102 fprintf (sched_dump, "split path: ");
1103 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1104 {
1105 int b = candidate_table[i].split_bbs.first_member[j]->index;
1106
1107 fprintf (sched_dump, " %d ", b);
1108 }
1109 fprintf (sched_dump, "\n");
1110
1111 fprintf (sched_dump, "update path: ");
1112 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1113 {
1114 int b = candidate_table[i].update_bbs.first_member[j]->index;
1115
1116 fprintf (sched_dump, " %d ", b);
1117 }
1118 fprintf (sched_dump, "\n");
1119 }
1120 else
1121 {
1122 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1123 }
1124 }
1125
1126 /* Print candidates info, for debugging purposes. Callable from debugger. */
1127
1128 void
1129 debug_candidates (int trg)
1130 {
1131 int i;
1132
1133 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1134 BB_TO_BLOCK (trg), trg);
1135 for (i = trg + 1; i < current_nr_blocks; i++)
1136 debug_candidate (i);
1137 }
1138
1139 /* Functions for speculative scheduling. */
1140
1141 /* Return 0 if x is a set of a register alive in the beginning of one
1142 of the split-blocks of src, otherwise return 1. */
1143
1144 static int
1145 check_live_1 (int src, rtx x)
1146 {
1147 int i;
1148 int regno;
1149 rtx reg = SET_DEST (x);
1150
1151 if (reg == 0)
1152 return 1;
1153
1154 while (GET_CODE (reg) == SUBREG
1155 || GET_CODE (reg) == ZERO_EXTRACT
1156 || GET_CODE (reg) == STRICT_LOW_PART)
1157 reg = XEXP (reg, 0);
1158
1159 if (GET_CODE (reg) == PARALLEL)
1160 {
1161 int i;
1162
1163 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1164 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1165 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1166 return 1;
1167
1168 return 0;
1169 }
1170
1171 if (!REG_P (reg))
1172 return 1;
1173
1174 regno = REGNO (reg);
1175
1176 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1177 {
1178 /* Global registers are assumed live. */
1179 return 0;
1180 }
1181 else
1182 {
1183 if (regno < FIRST_PSEUDO_REGISTER)
1184 {
1185 /* Check for hard registers. */
1186 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1187 while (--j >= 0)
1188 {
1189 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1190 {
1191 basic_block b = candidate_table[src].split_bbs.first_member[i];
1192
1193 if (REGNO_REG_SET_P (b->global_live_at_start, regno + j))
1194 {
1195 return 0;
1196 }
1197 }
1198 }
1199 }
1200 else
1201 {
1202 /* Check for pseudo registers. */
1203 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1204 {
1205 basic_block b = candidate_table[src].split_bbs.first_member[i];
1206
1207 if (REGNO_REG_SET_P (b->global_live_at_start, regno))
1208 {
1209 return 0;
1210 }
1211 }
1212 }
1213 }
1214
1215 return 1;
1216 }
1217
1218 /* If x is a set of a register R, mark that R is alive in the beginning
1219 of every update-block of src. */
1220
1221 static void
1222 update_live_1 (int src, rtx x)
1223 {
1224 int i;
1225 int regno;
1226 rtx reg = SET_DEST (x);
1227
1228 if (reg == 0)
1229 return;
1230
1231 while (GET_CODE (reg) == SUBREG
1232 || GET_CODE (reg) == ZERO_EXTRACT
1233 || GET_CODE (reg) == STRICT_LOW_PART)
1234 reg = XEXP (reg, 0);
1235
1236 if (GET_CODE (reg) == PARALLEL)
1237 {
1238 int i;
1239
1240 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1241 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1242 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1243
1244 return;
1245 }
1246
1247 if (!REG_P (reg))
1248 return;
1249
1250 /* Global registers are always live, so the code below does not apply
1251 to them. */
1252
1253 regno = REGNO (reg);
1254
1255 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1256 {
1257 if (regno < FIRST_PSEUDO_REGISTER)
1258 {
1259 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1260 while (--j >= 0)
1261 {
1262 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1263 {
1264 basic_block b = candidate_table[src].update_bbs.first_member[i];
1265
1266 SET_REGNO_REG_SET (b->global_live_at_start, regno + j);
1267 }
1268 }
1269 }
1270 else
1271 {
1272 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1273 {
1274 basic_block b = candidate_table[src].update_bbs.first_member[i];
1275
1276 SET_REGNO_REG_SET (b->global_live_at_start, regno);
1277 }
1278 }
1279 }
1280 }
1281
1282 /* Return 1 if insn can be speculatively moved from block src to trg,
1283 otherwise return 0. Called before first insertion of insn to
1284 ready-list or before the scheduling. */
1285
1286 static int
1287 check_live (rtx insn, int src)
1288 {
1289 /* Find the registers set by instruction. */
1290 if (GET_CODE (PATTERN (insn)) == SET
1291 || GET_CODE (PATTERN (insn)) == CLOBBER)
1292 return check_live_1 (src, PATTERN (insn));
1293 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1294 {
1295 int j;
1296 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1297 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1298 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1299 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1300 return 0;
1301
1302 return 1;
1303 }
1304
1305 return 1;
1306 }
1307
1308 /* Update the live registers info after insn was moved speculatively from
1309 block src to trg. */
1310
1311 static void
1312 update_live (rtx insn, int src)
1313 {
1314 /* Find the registers set by instruction. */
1315 if (GET_CODE (PATTERN (insn)) == SET
1316 || GET_CODE (PATTERN (insn)) == CLOBBER)
1317 update_live_1 (src, PATTERN (insn));
1318 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1319 {
1320 int j;
1321 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1322 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1323 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1324 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1325 }
1326 }
1327
1328 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1329 #define IS_REACHABLE(bb_from, bb_to) \
1330 (bb_from == bb_to \
1331 || IS_RGN_ENTRY (bb_from) \
1332 || (TEST_BIT (ancestor_edges[bb_to], \
1333 EDGE_TO_BIT (EDGE_PRED (BASIC_BLOCK (BB_TO_BLOCK (bb_from)), 0)))))
1334
1335 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1336
1337 static void
1338 set_spec_fed (rtx load_insn)
1339 {
1340 rtx link;
1341
1342 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1343 if (GET_MODE (link) == VOIDmode)
1344 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1345 } /* set_spec_fed */
1346
1347 /* On the path from the insn to load_insn_bb, find a conditional
1348 branch depending on insn, that guards the speculative load. */
1349
1350 static int
1351 find_conditional_protection (rtx insn, int load_insn_bb)
1352 {
1353 rtx link;
1354
1355 /* Iterate through DEF-USE forward dependences. */
1356 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1357 {
1358 rtx next = XEXP (link, 0);
1359 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1360 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1361 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1362 && load_insn_bb != INSN_BB (next)
1363 && GET_MODE (link) == VOIDmode
1364 && (JUMP_P (next)
1365 || find_conditional_protection (next, load_insn_bb)))
1366 return 1;
1367 }
1368 return 0;
1369 } /* find_conditional_protection */
1370
1371 /* Returns 1 if the same insn1 that participates in the computation
1372 of load_insn's address is feeding a conditional branch that is
1373 guarding on load_insn. This is true if we find a the two DEF-USE
1374 chains:
1375 insn1 -> ... -> conditional-branch
1376 insn1 -> ... -> load_insn,
1377 and if a flow path exist:
1378 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1379 and if insn1 is on the path
1380 region-entry -> ... -> bb_trg -> ... load_insn.
1381
1382 Locate insn1 by climbing on LOG_LINKS from load_insn.
1383 Locate the branch by following INSN_DEPEND from insn1. */
1384
1385 static int
1386 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1387 {
1388 rtx link;
1389
1390 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1391 {
1392 rtx insn1 = XEXP (link, 0);
1393
1394 /* Must be a DEF-USE dependence upon non-branch. */
1395 if (GET_MODE (link) != VOIDmode
1396 || JUMP_P (insn1))
1397 continue;
1398
1399 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1400 if (INSN_BB (insn1) == bb_src
1401 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1402 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1403 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1404 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1405 continue;
1406
1407 /* Now search for the conditional-branch. */
1408 if (find_conditional_protection (insn1, bb_src))
1409 return 1;
1410
1411 /* Recursive step: search another insn1, "above" current insn1. */
1412 return is_conditionally_protected (insn1, bb_src, bb_trg);
1413 }
1414
1415 /* The chain does not exist. */
1416 return 0;
1417 } /* is_conditionally_protected */
1418
1419 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1420 load_insn can move speculatively from bb_src to bb_trg. All the
1421 following must hold:
1422
1423 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1424 (2) load_insn and load1 have a def-use dependence upon
1425 the same insn 'insn1'.
1426 (3) either load2 is in bb_trg, or:
1427 - there's only one split-block, and
1428 - load1 is on the escape path, and
1429
1430 From all these we can conclude that the two loads access memory
1431 addresses that differ at most by a constant, and hence if moving
1432 load_insn would cause an exception, it would have been caused by
1433 load2 anyhow. */
1434
1435 static int
1436 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1437 {
1438 rtx back_link;
1439 candidate *candp = candidate_table + bb_src;
1440
1441 if (candp->split_bbs.nr_members != 1)
1442 /* Must have exactly one escape block. */
1443 return 0;
1444
1445 for (back_link = LOG_LINKS (load_insn);
1446 back_link; back_link = XEXP (back_link, 1))
1447 {
1448 rtx insn1 = XEXP (back_link, 0);
1449
1450 if (GET_MODE (back_link) == VOIDmode)
1451 {
1452 /* Found a DEF-USE dependence (insn1, load_insn). */
1453 rtx fore_link;
1454
1455 for (fore_link = INSN_DEPEND (insn1);
1456 fore_link; fore_link = XEXP (fore_link, 1))
1457 {
1458 rtx insn2 = XEXP (fore_link, 0);
1459 if (GET_MODE (fore_link) == VOIDmode)
1460 {
1461 /* Found a DEF-USE dependence (insn1, insn2). */
1462 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1463 /* insn2 not guaranteed to be a 1 base reg load. */
1464 continue;
1465
1466 if (INSN_BB (insn2) == bb_trg)
1467 /* insn2 is the similar load, in the target block. */
1468 return 1;
1469
1470 if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1471 /* insn2 is a similar load, in a split-block. */
1472 return 1;
1473 }
1474 }
1475 }
1476 }
1477
1478 /* Couldn't find a similar load. */
1479 return 0;
1480 } /* is_pfree */
1481
1482 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1483 a load moved speculatively, or if load_insn is protected by
1484 a compare on load_insn's address). */
1485
1486 static int
1487 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1488 {
1489 if (FED_BY_SPEC_LOAD (load_insn))
1490 return 1;
1491
1492 if (LOG_LINKS (load_insn) == NULL)
1493 /* Dependence may 'hide' out of the region. */
1494 return 1;
1495
1496 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1497 return 1;
1498
1499 return 0;
1500 }
1501
1502 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1503 Return 1 if insn is exception-free (and the motion is valid)
1504 and 0 otherwise. */
1505
1506 static int
1507 is_exception_free (rtx insn, int bb_src, int bb_trg)
1508 {
1509 int insn_class = haifa_classify_insn (insn);
1510
1511 /* Handle non-load insns. */
1512 switch (insn_class)
1513 {
1514 case TRAP_FREE:
1515 return 1;
1516 case TRAP_RISKY:
1517 return 0;
1518 default:;
1519 }
1520
1521 /* Handle loads. */
1522 if (!flag_schedule_speculative_load)
1523 return 0;
1524 IS_LOAD_INSN (insn) = 1;
1525 switch (insn_class)
1526 {
1527 case IFREE:
1528 return (1);
1529 case IRISKY:
1530 return 0;
1531 case PFREE_CANDIDATE:
1532 if (is_pfree (insn, bb_src, bb_trg))
1533 return 1;
1534 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1535 case PRISKY_CANDIDATE:
1536 if (!flag_schedule_speculative_load_dangerous
1537 || is_prisky (insn, bb_src, bb_trg))
1538 return 0;
1539 break;
1540 default:;
1541 }
1542
1543 return flag_schedule_speculative_load_dangerous;
1544 }
1545 \f
1546 /* The number of insns from the current block scheduled so far. */
1547 static int sched_target_n_insns;
1548 /* The number of insns from the current block to be scheduled in total. */
1549 static int target_n_insns;
1550 /* The number of insns from the entire region scheduled so far. */
1551 static int sched_n_insns;
1552 /* Nonzero if the last scheduled insn was a jump. */
1553 static int last_was_jump;
1554
1555 /* Implementations of the sched_info functions for region scheduling. */
1556 static void init_ready_list (struct ready_list *);
1557 static int can_schedule_ready_p (rtx);
1558 static int new_ready (rtx);
1559 static int schedule_more_p (void);
1560 static const char *rgn_print_insn (rtx, int);
1561 static int rgn_rank (rtx, rtx);
1562 static int contributes_to_priority (rtx, rtx);
1563 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1564
1565 /* Return nonzero if there are more insns that should be scheduled. */
1566
1567 static int
1568 schedule_more_p (void)
1569 {
1570 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1571 }
1572
1573 /* Add all insns that are initially ready to the ready list READY. Called
1574 once before scheduling a set of insns. */
1575
1576 static void
1577 init_ready_list (struct ready_list *ready)
1578 {
1579 rtx prev_head = current_sched_info->prev_head;
1580 rtx next_tail = current_sched_info->next_tail;
1581 int bb_src;
1582 rtx insn;
1583
1584 target_n_insns = 0;
1585 sched_target_n_insns = 0;
1586 sched_n_insns = 0;
1587 last_was_jump = 0;
1588
1589 /* Print debugging information. */
1590 if (sched_verbose >= 5)
1591 debug_dependencies ();
1592
1593 /* Prepare current target block info. */
1594 if (current_nr_blocks > 1)
1595 {
1596 candidate_table = xmalloc (current_nr_blocks * sizeof (candidate));
1597
1598 bblst_last = 0;
1599 /* bblst_table holds split blocks and update blocks for each block after
1600 the current one in the region. split blocks and update blocks are
1601 the TO blocks of region edges, so there can be at most rgn_nr_edges
1602 of them. */
1603 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1604 bblst_table = xmalloc (bblst_size * sizeof (basic_block));
1605
1606 edgelst_last = 0;
1607 edgelst_table = xmalloc (rgn_nr_edges * sizeof (edge));
1608
1609 compute_trg_info (target_bb);
1610 }
1611
1612 /* Initialize ready list with all 'ready' insns in target block.
1613 Count number of insns in the target block being scheduled. */
1614 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
1615 {
1616 if (INSN_DEP_COUNT (insn) == 0)
1617 {
1618 ready_add (ready, insn);
1619
1620 if (targetm.sched.adjust_priority)
1621 INSN_PRIORITY (insn) =
1622 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1623 }
1624 target_n_insns++;
1625 }
1626
1627 /* Add to ready list all 'ready' insns in valid source blocks.
1628 For speculative insns, check-live, exception-free, and
1629 issue-delay. */
1630 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
1631 if (IS_VALID (bb_src))
1632 {
1633 rtx src_head;
1634 rtx src_next_tail;
1635 rtx tail, head;
1636
1637 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
1638 src_next_tail = NEXT_INSN (tail);
1639 src_head = head;
1640
1641 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
1642 {
1643 if (! INSN_P (insn))
1644 continue;
1645
1646 if (!CANT_MOVE (insn)
1647 && (!IS_SPECULATIVE_INSN (insn)
1648 || ((recog_memoized (insn) < 0
1649 || min_insn_conflict_delay (curr_state,
1650 insn, insn) <= 3)
1651 && check_live (insn, bb_src)
1652 && is_exception_free (insn, bb_src, target_bb))))
1653 if (INSN_DEP_COUNT (insn) == 0)
1654 {
1655 ready_add (ready, insn);
1656
1657 if (targetm.sched.adjust_priority)
1658 INSN_PRIORITY (insn) =
1659 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1660 }
1661 }
1662 }
1663 }
1664
1665 /* Called after taking INSN from the ready list. Returns nonzero if this
1666 insn can be scheduled, nonzero if we should silently discard it. */
1667
1668 static int
1669 can_schedule_ready_p (rtx insn)
1670 {
1671 if (JUMP_P (insn))
1672 last_was_jump = 1;
1673
1674 /* An interblock motion? */
1675 if (INSN_BB (insn) != target_bb)
1676 {
1677 basic_block b1;
1678
1679 if (IS_SPECULATIVE_INSN (insn))
1680 {
1681 if (!check_live (insn, INSN_BB (insn)))
1682 return 0;
1683 update_live (insn, INSN_BB (insn));
1684
1685 /* For speculative load, mark insns fed by it. */
1686 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
1687 set_spec_fed (insn);
1688
1689 nr_spec++;
1690 }
1691 nr_inter++;
1692
1693 /* Update source block boundaries. */
1694 b1 = BLOCK_FOR_INSN (insn);
1695 if (insn == BB_HEAD (b1) && insn == BB_END (b1))
1696 {
1697 /* We moved all the insns in the basic block.
1698 Emit a note after the last insn and update the
1699 begin/end boundaries to point to the note. */
1700 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
1701 BB_HEAD (b1) = note;
1702 BB_END (b1) = note;
1703 }
1704 else if (insn == BB_END (b1))
1705 {
1706 /* We took insns from the end of the basic block,
1707 so update the end of block boundary so that it
1708 points to the first insn we did not move. */
1709 BB_END (b1) = PREV_INSN (insn);
1710 }
1711 else if (insn == BB_HEAD (b1))
1712 {
1713 /* We took insns from the start of the basic block,
1714 so update the start of block boundary so that
1715 it points to the first insn we did not move. */
1716 BB_HEAD (b1) = NEXT_INSN (insn);
1717 }
1718 }
1719 else
1720 {
1721 /* In block motion. */
1722 sched_target_n_insns++;
1723 }
1724 sched_n_insns++;
1725
1726 return 1;
1727 }
1728
1729 /* Called after INSN has all its dependencies resolved. Return nonzero
1730 if it should be moved to the ready list or the queue, or zero if we
1731 should silently discard it. */
1732 static int
1733 new_ready (rtx next)
1734 {
1735 /* For speculative insns, before inserting to ready/queue,
1736 check live, exception-free, and issue-delay. */
1737 if (INSN_BB (next) != target_bb
1738 && (!IS_VALID (INSN_BB (next))
1739 || CANT_MOVE (next)
1740 || (IS_SPECULATIVE_INSN (next)
1741 && ((recog_memoized (next) >= 0
1742 && min_insn_conflict_delay (curr_state, next, next) > 3)
1743 || !check_live (next, INSN_BB (next))
1744 || !is_exception_free (next, INSN_BB (next), target_bb)))))
1745 return 0;
1746 return 1;
1747 }
1748
1749 /* Return a string that contains the insn uid and optionally anything else
1750 necessary to identify this insn in an output. It's valid to use a
1751 static buffer for this. The ALIGNED parameter should cause the string
1752 to be formatted so that multiple output lines will line up nicely. */
1753
1754 static const char *
1755 rgn_print_insn (rtx insn, int aligned)
1756 {
1757 static char tmp[80];
1758
1759 if (aligned)
1760 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
1761 else
1762 {
1763 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
1764 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
1765 else
1766 sprintf (tmp, "%d", INSN_UID (insn));
1767 }
1768 return tmp;
1769 }
1770
1771 /* Compare priority of two insns. Return a positive number if the second
1772 insn is to be preferred for scheduling, and a negative one if the first
1773 is to be preferred. Zero if they are equally good. */
1774
1775 static int
1776 rgn_rank (rtx insn1, rtx insn2)
1777 {
1778 /* Some comparison make sense in interblock scheduling only. */
1779 if (INSN_BB (insn1) != INSN_BB (insn2))
1780 {
1781 int spec_val, prob_val;
1782
1783 /* Prefer an inblock motion on an interblock motion. */
1784 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
1785 return 1;
1786 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
1787 return -1;
1788
1789 /* Prefer a useful motion on a speculative one. */
1790 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
1791 if (spec_val)
1792 return spec_val;
1793
1794 /* Prefer a more probable (speculative) insn. */
1795 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
1796 if (prob_val)
1797 return prob_val;
1798 }
1799 return 0;
1800 }
1801
1802 /* NEXT is an instruction that depends on INSN (a backward dependence);
1803 return nonzero if we should include this dependence in priority
1804 calculations. */
1805
1806 static int
1807 contributes_to_priority (rtx next, rtx insn)
1808 {
1809 return BLOCK_NUM (next) == BLOCK_NUM (insn);
1810 }
1811
1812 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
1813 conditionally set before INSN. Store the set of registers that
1814 must be considered as used by this jump in USED and that of
1815 registers that must be considered as set in SET. */
1816
1817 static void
1818 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
1819 regset cond_exec ATTRIBUTE_UNUSED,
1820 regset used ATTRIBUTE_UNUSED,
1821 regset set ATTRIBUTE_UNUSED)
1822 {
1823 /* Nothing to do here, since we postprocess jumps in
1824 add_branch_dependences. */
1825 }
1826
1827 /* Used in schedule_insns to initialize current_sched_info for scheduling
1828 regions (or single basic blocks). */
1829
1830 static struct sched_info region_sched_info =
1831 {
1832 init_ready_list,
1833 can_schedule_ready_p,
1834 schedule_more_p,
1835 new_ready,
1836 rgn_rank,
1837 rgn_print_insn,
1838 contributes_to_priority,
1839 compute_jump_reg_dependencies,
1840
1841 NULL, NULL,
1842 NULL, NULL,
1843 0, 0, 0
1844 };
1845
1846 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
1847
1848 static bool
1849 sets_likely_spilled (rtx pat)
1850 {
1851 bool ret = false;
1852 note_stores (pat, sets_likely_spilled_1, &ret);
1853 return ret;
1854 }
1855
1856 static void
1857 sets_likely_spilled_1 (rtx x, rtx pat, void *data)
1858 {
1859 bool *ret = (bool *) data;
1860
1861 if (GET_CODE (pat) == SET
1862 && REG_P (x)
1863 && REGNO (x) < FIRST_PSEUDO_REGISTER
1864 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
1865 *ret = true;
1866 }
1867
1868 /* Add dependences so that branches are scheduled to run last in their
1869 block. */
1870
1871 static void
1872 add_branch_dependences (rtx head, rtx tail)
1873 {
1874 rtx insn, last;
1875
1876 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
1877 that can throw exceptions, force them to remain in order at the end of
1878 the block by adding dependencies and giving the last a high priority.
1879 There may be notes present, and prev_head may also be a note.
1880
1881 Branches must obviously remain at the end. Calls should remain at the
1882 end since moving them results in worse register allocation. Uses remain
1883 at the end to ensure proper register allocation.
1884
1885 cc0 setters remain at the end because they can't be moved away from
1886 their cc0 user.
1887
1888 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
1889 are not moved before reload because we can wind up with register
1890 allocation failures. */
1891
1892 insn = tail;
1893 last = 0;
1894 while (CALL_P (insn)
1895 || JUMP_P (insn)
1896 || (NONJUMP_INSN_P (insn)
1897 && (GET_CODE (PATTERN (insn)) == USE
1898 || GET_CODE (PATTERN (insn)) == CLOBBER
1899 || can_throw_internal (insn)
1900 #ifdef HAVE_cc0
1901 || sets_cc0_p (PATTERN (insn))
1902 #endif
1903 || (!reload_completed
1904 && sets_likely_spilled (PATTERN (insn)))))
1905 || NOTE_P (insn))
1906 {
1907 if (!NOTE_P (insn))
1908 {
1909 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
1910 {
1911 add_dependence (last, insn, REG_DEP_ANTI);
1912 INSN_REF_COUNT (insn)++;
1913 }
1914
1915 CANT_MOVE (insn) = 1;
1916
1917 last = insn;
1918 }
1919
1920 /* Don't overrun the bounds of the basic block. */
1921 if (insn == head)
1922 break;
1923
1924 insn = PREV_INSN (insn);
1925 }
1926
1927 /* Make sure these insns are scheduled last in their block. */
1928 insn = last;
1929 if (insn != 0)
1930 while (insn != head)
1931 {
1932 insn = prev_nonnote_insn (insn);
1933
1934 if (INSN_REF_COUNT (insn) != 0)
1935 continue;
1936
1937 add_dependence (last, insn, REG_DEP_ANTI);
1938 INSN_REF_COUNT (insn) = 1;
1939 }
1940 }
1941
1942 /* Data structures for the computation of data dependences in a regions. We
1943 keep one `deps' structure for every basic block. Before analyzing the
1944 data dependences for a bb, its variables are initialized as a function of
1945 the variables of its predecessors. When the analysis for a bb completes,
1946 we save the contents to the corresponding bb_deps[bb] variable. */
1947
1948 static struct deps *bb_deps;
1949
1950 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
1951
1952 static rtx
1953 concat_INSN_LIST (rtx copy, rtx old)
1954 {
1955 rtx new = old;
1956 for (; copy ; copy = XEXP (copy, 1))
1957 new = alloc_INSN_LIST (XEXP (copy, 0), new);
1958 return new;
1959 }
1960
1961 static void
1962 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
1963 rtx *old_mems_p)
1964 {
1965 rtx new_insns = *old_insns_p;
1966 rtx new_mems = *old_mems_p;
1967
1968 while (copy_insns)
1969 {
1970 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
1971 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
1972 copy_insns = XEXP (copy_insns, 1);
1973 copy_mems = XEXP (copy_mems, 1);
1974 }
1975
1976 *old_insns_p = new_insns;
1977 *old_mems_p = new_mems;
1978 }
1979
1980 /* After computing the dependencies for block BB, propagate the dependencies
1981 found in TMP_DEPS to the successors of the block. */
1982 static void
1983 propagate_deps (int bb, struct deps *pred_deps)
1984 {
1985 basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb));
1986 edge_iterator ei;
1987 edge e;
1988
1989 /* bb's structures are inherited by its successors. */
1990 FOR_EACH_EDGE (e, ei, block->succs)
1991 {
1992 struct deps *succ_deps;
1993 unsigned reg;
1994 reg_set_iterator rsi;
1995
1996 /* Only bbs "below" bb, in the same region, are interesting. */
1997 if (e->dest == EXIT_BLOCK_PTR
1998 || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
1999 || BLOCK_TO_BB (e->dest->index) <= bb)
2000 continue;
2001
2002 succ_deps = bb_deps + BLOCK_TO_BB (e->dest->index);
2003
2004 /* The reg_last lists are inherited by successor. */
2005 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2006 {
2007 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2008 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2009
2010 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2011 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2012 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2013 succ_rl->clobbers);
2014 succ_rl->uses_length += pred_rl->uses_length;
2015 succ_rl->clobbers_length += pred_rl->clobbers_length;
2016 }
2017 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2018
2019 /* Mem read/write lists are inherited by successor. */
2020 concat_insn_mem_list (pred_deps->pending_read_insns,
2021 pred_deps->pending_read_mems,
2022 &succ_deps->pending_read_insns,
2023 &succ_deps->pending_read_mems);
2024 concat_insn_mem_list (pred_deps->pending_write_insns,
2025 pred_deps->pending_write_mems,
2026 &succ_deps->pending_write_insns,
2027 &succ_deps->pending_write_mems);
2028
2029 succ_deps->last_pending_memory_flush
2030 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2031 succ_deps->last_pending_memory_flush);
2032
2033 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2034 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2035
2036 /* last_function_call is inherited by successor. */
2037 succ_deps->last_function_call
2038 = concat_INSN_LIST (pred_deps->last_function_call,
2039 succ_deps->last_function_call);
2040
2041 /* sched_before_next_call is inherited by successor. */
2042 succ_deps->sched_before_next_call
2043 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2044 succ_deps->sched_before_next_call);
2045 }
2046
2047 /* These lists should point to the right place, for correct
2048 freeing later. */
2049 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2050 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2051 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2052 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2053
2054 /* Can't allow these to be freed twice. */
2055 pred_deps->pending_read_insns = 0;
2056 pred_deps->pending_read_mems = 0;
2057 pred_deps->pending_write_insns = 0;
2058 pred_deps->pending_write_mems = 0;
2059 }
2060
2061 /* Compute backward dependences inside bb. In a multiple blocks region:
2062 (1) a bb is analyzed after its predecessors, and (2) the lists in
2063 effect at the end of bb (after analyzing for bb) are inherited by
2064 bb's successors.
2065
2066 Specifically for reg-reg data dependences, the block insns are
2067 scanned by sched_analyze () top-to-bottom. Two lists are
2068 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2069 and reg_last[].uses for register USEs.
2070
2071 When analysis is completed for bb, we update for its successors:
2072 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2073 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2074
2075 The mechanism for computing mem-mem data dependence is very
2076 similar, and the result is interblock dependences in the region. */
2077
2078 static void
2079 compute_block_backward_dependences (int bb)
2080 {
2081 rtx head, tail;
2082 struct deps tmp_deps;
2083
2084 tmp_deps = bb_deps[bb];
2085
2086 /* Do the analysis for this block. */
2087 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2088 sched_analyze (&tmp_deps, head, tail);
2089 add_branch_dependences (head, tail);
2090
2091 if (current_nr_blocks > 1)
2092 propagate_deps (bb, &tmp_deps);
2093
2094 /* Free up the INSN_LISTs. */
2095 free_deps (&tmp_deps);
2096 }
2097
2098 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2099 them to the unused_*_list variables, so that they can be reused. */
2100
2101 static void
2102 free_pending_lists (void)
2103 {
2104 int bb;
2105
2106 for (bb = 0; bb < current_nr_blocks; bb++)
2107 {
2108 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2109 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2110 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2111 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2112 }
2113 }
2114 \f
2115 /* Print dependences for debugging, callable from debugger. */
2116
2117 void
2118 debug_dependencies (void)
2119 {
2120 int bb;
2121
2122 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2123 for (bb = 0; bb < current_nr_blocks; bb++)
2124 {
2125 rtx head, tail;
2126 rtx next_tail;
2127 rtx insn;
2128
2129 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2130 next_tail = NEXT_INSN (tail);
2131 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2132 BB_TO_BLOCK (bb), bb);
2133
2134 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2135 "insn", "code", "bb", "dep", "prio", "cost",
2136 "reservation");
2137 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2138 "----", "----", "--", "---", "----", "----",
2139 "-----------");
2140
2141 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2142 {
2143 rtx link;
2144
2145 if (! INSN_P (insn))
2146 {
2147 int n;
2148 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2149 if (NOTE_P (insn))
2150 {
2151 n = NOTE_LINE_NUMBER (insn);
2152 if (n < 0)
2153 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2154 else
2155 {
2156 expanded_location xloc;
2157 NOTE_EXPANDED_LOCATION (xloc, insn);
2158 fprintf (sched_dump, "line %d, file %s\n",
2159 xloc.line, xloc.file);
2160 }
2161 }
2162 else
2163 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2164 continue;
2165 }
2166
2167 fprintf (sched_dump,
2168 ";; %s%5d%6d%6d%6d%6d%6d ",
2169 (SCHED_GROUP_P (insn) ? "+" : " "),
2170 INSN_UID (insn),
2171 INSN_CODE (insn),
2172 INSN_BB (insn),
2173 INSN_DEP_COUNT (insn),
2174 INSN_PRIORITY (insn),
2175 insn_cost (insn, 0, 0));
2176
2177 if (recog_memoized (insn) < 0)
2178 fprintf (sched_dump, "nothing");
2179 else
2180 print_reservation (sched_dump, insn);
2181
2182 fprintf (sched_dump, "\t: ");
2183 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2184 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2185 fprintf (sched_dump, "\n");
2186 }
2187 }
2188 fprintf (sched_dump, "\n");
2189 }
2190 \f
2191 /* Returns true if all the basic blocks of the current region have
2192 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2193 static bool
2194 sched_is_disabled_for_current_region_p (void)
2195 {
2196 int bb;
2197
2198 for (bb = 0; bb < current_nr_blocks; bb++)
2199 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2200 return false;
2201
2202 return true;
2203 }
2204
2205 /* Schedule a region. A region is either an inner loop, a loop-free
2206 subroutine, or a single basic block. Each bb in the region is
2207 scheduled after its flow predecessors. */
2208
2209 static void
2210 schedule_region (int rgn)
2211 {
2212 basic_block block;
2213 edge_iterator ei;
2214 edge e;
2215 int bb;
2216 int rgn_n_insns = 0;
2217 int sched_rgn_n_insns = 0;
2218
2219 /* Set variables for the current region. */
2220 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2221 current_blocks = RGN_BLOCKS (rgn);
2222
2223 /* Don't schedule region that is marked by
2224 NOTE_DISABLE_SCHED_OF_BLOCK. */
2225 if (sched_is_disabled_for_current_region_p ())
2226 return;
2227
2228 init_deps_global ();
2229
2230 /* Initializations for region data dependence analysis. */
2231 bb_deps = xmalloc (sizeof (struct deps) * current_nr_blocks);
2232 for (bb = 0; bb < current_nr_blocks; bb++)
2233 init_deps (bb_deps + bb);
2234
2235 /* Compute LOG_LINKS. */
2236 for (bb = 0; bb < current_nr_blocks; bb++)
2237 compute_block_backward_dependences (bb);
2238
2239 /* Compute INSN_DEPEND. */
2240 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2241 {
2242 rtx head, tail;
2243 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2244
2245 compute_forward_dependences (head, tail);
2246
2247 if (targetm.sched.dependencies_evaluation_hook)
2248 targetm.sched.dependencies_evaluation_hook (head, tail);
2249
2250 }
2251
2252 /* Set priorities. */
2253 for (bb = 0; bb < current_nr_blocks; bb++)
2254 {
2255 rtx head, tail;
2256 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2257
2258 rgn_n_insns += set_priorities (head, tail);
2259 }
2260
2261 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2262 if (current_nr_blocks > 1)
2263 {
2264 prob = xmalloc ((current_nr_blocks) * sizeof (float));
2265
2266 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2267 sbitmap_vector_zero (dom, current_nr_blocks);
2268
2269 /* Use ->aux to implement EDGE_TO_BIT mapping. */
2270 rgn_nr_edges = 0;
2271 FOR_EACH_BB (block)
2272 {
2273 if (CONTAINING_RGN (block->index) != rgn)
2274 continue;
2275 FOR_EACH_EDGE (e, ei, block->succs)
2276 SET_EDGE_TO_BIT (e, rgn_nr_edges++);
2277 }
2278
2279 rgn_edges = xmalloc (rgn_nr_edges * sizeof (edge));
2280 rgn_nr_edges = 0;
2281 FOR_EACH_BB (block)
2282 {
2283 if (CONTAINING_RGN (block->index) != rgn)
2284 continue;
2285 FOR_EACH_EDGE (e, ei, block->succs)
2286 rgn_edges[rgn_nr_edges++] = e;
2287 }
2288
2289 /* Split edges. */
2290 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2291 sbitmap_vector_zero (pot_split, current_nr_blocks);
2292 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2293 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2294
2295 /* Compute probabilities, dominators, split_edges. */
2296 for (bb = 0; bb < current_nr_blocks; bb++)
2297 compute_dom_prob_ps (bb);
2298 }
2299
2300 /* Now we can schedule all blocks. */
2301 for (bb = 0; bb < current_nr_blocks; bb++)
2302 {
2303 rtx head, tail;
2304 int b = BB_TO_BLOCK (bb);
2305
2306 get_block_head_tail (b, &head, &tail);
2307
2308 if (no_real_insns_p (head, tail))
2309 continue;
2310
2311 current_sched_info->prev_head = PREV_INSN (head);
2312 current_sched_info->next_tail = NEXT_INSN (tail);
2313
2314 if (write_symbols != NO_DEBUG)
2315 {
2316 save_line_notes (b, head, tail);
2317 rm_line_notes (head, tail);
2318 }
2319
2320 /* rm_other_notes only removes notes which are _inside_ the
2321 block---that is, it won't remove notes before the first real insn
2322 or after the last real insn of the block. So if the first insn
2323 has a REG_SAVE_NOTE which would otherwise be emitted before the
2324 insn, it is redundant with the note before the start of the
2325 block, and so we have to take it out. */
2326 if (INSN_P (head))
2327 {
2328 rtx note;
2329
2330 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2331 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2332 remove_note (head, note);
2333 }
2334
2335 /* Remove remaining note insns from the block, save them in
2336 note_list. These notes are restored at the end of
2337 schedule_block (). */
2338 rm_other_notes (head, tail);
2339
2340 target_bb = bb;
2341
2342 current_sched_info->queue_must_finish_empty
2343 = current_nr_blocks > 1 && !flag_schedule_interblock;
2344
2345 schedule_block (b, rgn_n_insns);
2346 sched_rgn_n_insns += sched_n_insns;
2347
2348 /* Update target block boundaries. */
2349 if (head == BB_HEAD (BASIC_BLOCK (b)))
2350 BB_HEAD (BASIC_BLOCK (b)) = current_sched_info->head;
2351 if (tail == BB_END (BASIC_BLOCK (b)))
2352 BB_END (BASIC_BLOCK (b)) = current_sched_info->tail;
2353
2354 /* Clean up. */
2355 if (current_nr_blocks > 1)
2356 {
2357 free (candidate_table);
2358 free (bblst_table);
2359 free (edgelst_table);
2360 }
2361 }
2362
2363 /* Sanity check: verify that all region insns were scheduled. */
2364 gcc_assert (sched_rgn_n_insns == rgn_n_insns);
2365
2366 /* Restore line notes. */
2367 if (write_symbols != NO_DEBUG)
2368 {
2369 for (bb = 0; bb < current_nr_blocks; bb++)
2370 {
2371 rtx head, tail;
2372 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2373 restore_line_notes (head, tail);
2374 }
2375 }
2376
2377 /* Done with this region. */
2378 free_pending_lists ();
2379
2380 finish_deps_global ();
2381
2382 free (bb_deps);
2383
2384 if (current_nr_blocks > 1)
2385 {
2386 /* Cleanup ->aux used for EDGE_TO_BIT mapping. */
2387 FOR_EACH_BB (block)
2388 {
2389 if (CONTAINING_RGN (block->index) != rgn)
2390 continue;
2391 FOR_EACH_EDGE (e, ei, block->succs)
2392 e->aux = NULL;
2393 }
2394
2395 free (prob);
2396 sbitmap_vector_free (dom);
2397 sbitmap_vector_free (pot_split);
2398 sbitmap_vector_free (ancestor_edges);
2399 free (rgn_edges);
2400 }
2401 }
2402
2403 /* Indexed by region, holds the number of death notes found in that region.
2404 Used for consistency checks. */
2405 static int *deaths_in_region;
2406
2407 /* Initialize data structures for region scheduling. */
2408
2409 static void
2410 init_regions (void)
2411 {
2412 sbitmap blocks;
2413 int rgn;
2414
2415 nr_regions = 0;
2416 rgn_table = xmalloc ((n_basic_blocks) * sizeof (region));
2417 rgn_bb_table = xmalloc ((n_basic_blocks) * sizeof (int));
2418 block_to_bb = xmalloc ((last_basic_block) * sizeof (int));
2419 containing_rgn = xmalloc ((last_basic_block) * sizeof (int));
2420
2421 /* Compute regions for scheduling. */
2422 if (reload_completed
2423 || n_basic_blocks == 1
2424 || !flag_schedule_interblock
2425 || is_cfg_nonregular ())
2426 {
2427 find_single_block_region ();
2428 }
2429 else
2430 {
2431 /* Compute the dominators and post dominators. */
2432 calculate_dominance_info (CDI_DOMINATORS);
2433
2434 /* Find regions. */
2435 find_rgns ();
2436
2437 if (sched_verbose >= 3)
2438 debug_regions ();
2439
2440 /* For now. This will move as more and more of haifa is converted
2441 to using the cfg code in flow.c. */
2442 free_dominance_info (CDI_DOMINATORS);
2443 }
2444
2445
2446 if (CHECK_DEAD_NOTES)
2447 {
2448 blocks = sbitmap_alloc (last_basic_block);
2449 deaths_in_region = xmalloc (sizeof (int) * nr_regions);
2450 /* Remove all death notes from the subroutine. */
2451 for (rgn = 0; rgn < nr_regions; rgn++)
2452 {
2453 int b;
2454
2455 sbitmap_zero (blocks);
2456 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2457 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2458
2459 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2460 }
2461 sbitmap_free (blocks);
2462 }
2463 else
2464 count_or_remove_death_notes (NULL, 1);
2465 }
2466
2467 /* The one entry point in this file. DUMP_FILE is the dump file for
2468 this pass. */
2469
2470 void
2471 schedule_insns (FILE *dump_file)
2472 {
2473 sbitmap large_region_blocks, blocks;
2474 int rgn;
2475 int any_large_regions;
2476 basic_block bb;
2477
2478 /* Taking care of this degenerate case makes the rest of
2479 this code simpler. */
2480 if (n_basic_blocks == 0)
2481 return;
2482
2483 nr_inter = 0;
2484 nr_spec = 0;
2485
2486 sched_init (dump_file);
2487
2488 init_regions ();
2489
2490 current_sched_info = &region_sched_info;
2491
2492 /* Schedule every region in the subroutine. */
2493 for (rgn = 0; rgn < nr_regions; rgn++)
2494 schedule_region (rgn);
2495
2496 /* Update life analysis for the subroutine. Do single block regions
2497 first so that we can verify that live_at_start didn't change. Then
2498 do all other blocks. */
2499 /* ??? There is an outside possibility that update_life_info, or more
2500 to the point propagate_block, could get called with nonzero flags
2501 more than once for one basic block. This would be kinda bad if it
2502 were to happen, since REG_INFO would be accumulated twice for the
2503 block, and we'd have twice the REG_DEAD notes.
2504
2505 I'm fairly certain that this _shouldn't_ happen, since I don't think
2506 that live_at_start should change at region heads. Not sure what the
2507 best way to test for this kind of thing... */
2508
2509 allocate_reg_life_data ();
2510 compute_bb_for_insn ();
2511
2512 any_large_regions = 0;
2513 large_region_blocks = sbitmap_alloc (last_basic_block);
2514 sbitmap_zero (large_region_blocks);
2515 FOR_EACH_BB (bb)
2516 SET_BIT (large_region_blocks, bb->index);
2517
2518 blocks = sbitmap_alloc (last_basic_block);
2519 sbitmap_zero (blocks);
2520
2521 /* Update life information. For regions consisting of multiple blocks
2522 we've possibly done interblock scheduling that affects global liveness.
2523 For regions consisting of single blocks we need to do only local
2524 liveness. */
2525 for (rgn = 0; rgn < nr_regions; rgn++)
2526 if (RGN_NR_BLOCKS (rgn) > 1)
2527 any_large_regions = 1;
2528 else
2529 {
2530 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2531 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2532 }
2533
2534 /* Don't update reg info after reload, since that affects
2535 regs_ever_live, which should not change after reload. */
2536 update_life_info (blocks, UPDATE_LIFE_LOCAL,
2537 (reload_completed ? PROP_DEATH_NOTES
2538 : PROP_DEATH_NOTES | PROP_REG_INFO));
2539 if (any_large_regions)
2540 {
2541 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
2542 PROP_DEATH_NOTES | PROP_REG_INFO);
2543 }
2544
2545 if (CHECK_DEAD_NOTES)
2546 {
2547 /* Verify the counts of basic block notes in single the basic block
2548 regions. */
2549 for (rgn = 0; rgn < nr_regions; rgn++)
2550 if (RGN_NR_BLOCKS (rgn) == 1)
2551 {
2552 sbitmap_zero (blocks);
2553 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2554
2555 gcc_assert (deaths_in_region[rgn]
2556 == count_or_remove_death_notes (blocks, 0));
2557 }
2558 free (deaths_in_region);
2559 }
2560
2561 /* Reposition the prologue and epilogue notes in case we moved the
2562 prologue/epilogue insns. */
2563 if (reload_completed)
2564 reposition_prologue_and_epilogue_notes (get_insns ());
2565
2566 /* Delete redundant line notes. */
2567 if (write_symbols != NO_DEBUG)
2568 rm_redundant_line_notes ();
2569
2570 if (sched_verbose)
2571 {
2572 if (reload_completed == 0 && flag_schedule_interblock)
2573 {
2574 fprintf (sched_dump,
2575 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2576 nr_inter, nr_spec);
2577 }
2578 else
2579 gcc_assert (nr_inter <= 0);
2580 fprintf (sched_dump, "\n\n");
2581 }
2582
2583 /* Clean up. */
2584 free (rgn_table);
2585 free (rgn_bb_table);
2586 free (block_to_bb);
2587 free (containing_rgn);
2588
2589 sched_finish ();
2590
2591 sbitmap_free (blocks);
2592 sbitmap_free (large_region_blocks);
2593 }
2594 #endif