PR middle end/18718
[gcc.git] / gcc / sched-rgn.c
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 02111-1307, USA. */
23
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
27
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
31
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
37
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
47 \f
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "regs.h"
57 #include "function.h"
58 #include "flags.h"
59 #include "insn-config.h"
60 #include "insn-attr.h"
61 #include "except.h"
62 #include "toplev.h"
63 #include "recog.h"
64 #include "cfglayout.h"
65 #include "params.h"
66 #include "sched-int.h"
67 #include "target.h"
68
69 /* Define when we want to do count REG_DEAD notes before and after scheduling
70 for sanity checking. We can't do that when conditional execution is used,
71 as REG_DEAD exist only for unconditional deaths. */
72
73 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
74 #define CHECK_DEAD_NOTES 1
75 #else
76 #define CHECK_DEAD_NOTES 0
77 #endif
78
79
80 #ifdef INSN_SCHEDULING
81 /* Some accessor macros for h_i_d members only used within this file. */
82 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
83 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
84 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
85
86 /* nr_inter/spec counts interblock/speculative motion for the function. */
87 static int nr_inter, nr_spec;
88
89 static int is_cfg_nonregular (void);
90 static bool sched_is_disabled_for_current_region_p (void);
91
92 /* A region is the main entity for interblock scheduling: insns
93 are allowed to move between blocks in the same region, along
94 control flow graph edges, in the 'up' direction. */
95 typedef struct
96 {
97 int rgn_nr_blocks; /* Number of blocks in region. */
98 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
99 }
100 region;
101
102 /* Number of regions in the procedure. */
103 static int nr_regions;
104
105 /* Table of region descriptions. */
106 static region *rgn_table;
107
108 /* Array of lists of regions' blocks. */
109 static int *rgn_bb_table;
110
111 /* Topological order of blocks in the region (if b2 is reachable from
112 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
113 always referred to by either block or b, while its topological
114 order name (in the region) is referred to by bb. */
115 static int *block_to_bb;
116
117 /* The number of the region containing a block. */
118 static int *containing_rgn;
119
120 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
121 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
122 #define BLOCK_TO_BB(block) (block_to_bb[block])
123 #define CONTAINING_RGN(block) (containing_rgn[block])
124
125 void debug_regions (void);
126 static void find_single_block_region (void);
127 static void find_rgns (void);
128 static bool too_large (int, int *, int *);
129
130 extern void debug_live (int, int);
131
132 /* Blocks of the current region being scheduled. */
133 static int current_nr_blocks;
134 static int current_blocks;
135
136 /* The mapping from bb to block. */
137 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
138
139 /* Target info declarations.
140
141 The block currently being scheduled is referred to as the "target" block,
142 while other blocks in the region from which insns can be moved to the
143 target are called "source" blocks. The candidate structure holds info
144 about such sources: are they valid? Speculative? Etc. */
145 typedef struct
146 {
147 basic_block *first_member;
148 int nr_members;
149 }
150 bblst;
151
152 typedef struct
153 {
154 char is_valid;
155 char is_speculative;
156 int src_prob;
157 bblst split_bbs;
158 bblst update_bbs;
159 }
160 candidate;
161
162 static candidate *candidate_table;
163
164 /* A speculative motion requires checking live information on the path
165 from 'source' to 'target'. The split blocks are those to be checked.
166 After a speculative motion, live information should be modified in
167 the 'update' blocks.
168
169 Lists of split and update blocks for each candidate of the current
170 target are in array bblst_table. */
171 static basic_block *bblst_table;
172 static int bblst_size, bblst_last;
173
174 #define IS_VALID(src) ( candidate_table[src].is_valid )
175 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
176 #define SRC_PROB(src) ( candidate_table[src].src_prob )
177
178 /* The bb being currently scheduled. */
179 static int target_bb;
180
181 /* List of edges. */
182 typedef struct
183 {
184 edge *first_member;
185 int nr_members;
186 }
187 edgelst;
188
189 static edge *edgelst_table;
190 static int edgelst_last;
191
192 static void extract_edgelst (sbitmap, edgelst *);
193
194
195 /* Target info functions. */
196 static void split_edges (int, int, edgelst *);
197 static void compute_trg_info (int);
198 void debug_candidate (int);
199 void debug_candidates (int);
200
201 /* Dominators array: dom[i] contains the sbitmap of dominators of
202 bb i in the region. */
203 static sbitmap *dom;
204
205 /* bb 0 is the only region entry. */
206 #define IS_RGN_ENTRY(bb) (!bb)
207
208 /* Is bb_src dominated by bb_trg. */
209 #define IS_DOMINATED(bb_src, bb_trg) \
210 ( TEST_BIT (dom[bb_src], bb_trg) )
211
212 /* Probability: Prob[i] is a float in [0, 1] which is the probability
213 of bb i relative to the region entry. */
214 static float *prob;
215
216 /* The probability of bb_src, relative to bb_trg. Note, that while the
217 'prob[bb]' is a float in [0, 1], this macro returns an integer
218 in [0, 100]. */
219 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
220 prob[bb_trg])))
221
222 /* Bit-set of edges, where bit i stands for edge i. */
223 typedef sbitmap edgeset;
224
225 /* Number of edges in the region. */
226 static int rgn_nr_edges;
227
228 /* Array of size rgn_nr_edges. */
229 static edge *rgn_edges;
230
231 /* Mapping from each edge in the graph to its number in the rgn. */
232 #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
233 #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
234
235 /* The split edges of a source bb is different for each target
236 bb. In order to compute this efficiently, the 'potential-split edges'
237 are computed for each bb prior to scheduling a region. This is actually
238 the split edges of each bb relative to the region entry.
239
240 pot_split[bb] is the set of potential split edges of bb. */
241 static edgeset *pot_split;
242
243 /* For every bb, a set of its ancestor edges. */
244 static edgeset *ancestor_edges;
245
246 static void compute_dom_prob_ps (int);
247
248 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
249 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
250 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
251
252 /* Parameters affecting the decision of rank_for_schedule().
253 ??? Nope. But MIN_PROBABILITY is used in compute_trg_info. */
254 #define MIN_PROBABILITY 40
255
256 /* Speculative scheduling functions. */
257 static int check_live_1 (int, rtx);
258 static void update_live_1 (int, rtx);
259 static int check_live (rtx, int);
260 static void update_live (rtx, int);
261 static void set_spec_fed (rtx);
262 static int is_pfree (rtx, int, int);
263 static int find_conditional_protection (rtx, int);
264 static int is_conditionally_protected (rtx, int, int);
265 static int is_prisky (rtx, int, int);
266 static int is_exception_free (rtx, int, int);
267
268 static bool sets_likely_spilled (rtx);
269 static void sets_likely_spilled_1 (rtx, rtx, void *);
270 static void add_branch_dependences (rtx, rtx);
271 static void compute_block_backward_dependences (int);
272 void debug_dependencies (void);
273
274 static void init_regions (void);
275 static void schedule_region (int);
276 static rtx concat_INSN_LIST (rtx, rtx);
277 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
278 static void propagate_deps (int, struct deps *);
279 static void free_pending_lists (void);
280
281 /* Functions for construction of the control flow graph. */
282
283 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
284
285 We decide not to build the control flow graph if there is possibly more
286 than one entry to the function, if computed branches exist, if we
287 have nonlocal gotos, or if we have an unreachable loop. */
288
289 static int
290 is_cfg_nonregular (void)
291 {
292 basic_block b;
293 rtx insn;
294 RTX_CODE code;
295
296 /* If we have a label that could be the target of a nonlocal goto, then
297 the cfg is not well structured. */
298 if (nonlocal_goto_handler_labels)
299 return 1;
300
301 /* If we have any forced labels, then the cfg is not well structured. */
302 if (forced_labels)
303 return 1;
304
305 /* If this function has a computed jump, then we consider the cfg
306 not well structured. */
307 if (current_function_has_computed_jump)
308 return 1;
309
310 /* If we have exception handlers, then we consider the cfg not well
311 structured. ?!? We should be able to handle this now that flow.c
312 computes an accurate cfg for EH. */
313 if (current_function_has_exception_handlers ())
314 return 1;
315
316 /* If we have non-jumping insns which refer to labels, then we consider
317 the cfg not well structured. */
318 /* Check for labels referred to other thn by jumps. */
319 FOR_EACH_BB (b)
320 for (insn = BB_HEAD (b); ; insn = NEXT_INSN (insn))
321 {
322 code = GET_CODE (insn);
323 if (INSN_P (insn) && code != JUMP_INSN)
324 {
325 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
326
327 if (note
328 && ! (JUMP_P (NEXT_INSN (insn))
329 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
330 XEXP (note, 0))))
331 return 1;
332 }
333
334 if (insn == BB_END (b))
335 break;
336 }
337
338 /* Unreachable loops with more than one basic block are detected
339 during the DFS traversal in find_rgns.
340
341 Unreachable loops with a single block are detected here. This
342 test is redundant with the one in find_rgns, but it's much
343 cheaper to go ahead and catch the trivial case here. */
344 FOR_EACH_BB (b)
345 {
346 if (EDGE_COUNT (b->preds) == 0
347 || (EDGE_PRED (b, 0)->src == b
348 && EDGE_COUNT (b->preds) == 1))
349 return 1;
350 }
351
352 /* All the tests passed. Consider the cfg well structured. */
353 return 0;
354 }
355
356 /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */
357
358 static void
359 extract_edgelst (sbitmap set, edgelst *el)
360 {
361 int i;
362
363 /* edgelst table space is reused in each call to extract_edgelst. */
364 edgelst_last = 0;
365
366 el->first_member = &edgelst_table[edgelst_last];
367 el->nr_members = 0;
368
369 /* Iterate over each word in the bitset. */
370 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
371 {
372 edgelst_table[edgelst_last++] = rgn_edges[i];
373 el->nr_members++;
374 });
375 }
376
377 /* Functions for the construction of regions. */
378
379 /* Print the regions, for debugging purposes. Callable from debugger. */
380
381 void
382 debug_regions (void)
383 {
384 int rgn, bb;
385
386 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
387 for (rgn = 0; rgn < nr_regions; rgn++)
388 {
389 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
390 rgn_table[rgn].rgn_nr_blocks);
391 fprintf (sched_dump, ";;\tbb/block: ");
392
393 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
394 {
395 current_blocks = RGN_BLOCKS (rgn);
396
397 gcc_assert (bb == BLOCK_TO_BB (BB_TO_BLOCK (bb)));
398 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
399 }
400
401 fprintf (sched_dump, "\n\n");
402 }
403 }
404
405 /* Build a single block region for each basic block in the function.
406 This allows for using the same code for interblock and basic block
407 scheduling. */
408
409 static void
410 find_single_block_region (void)
411 {
412 basic_block bb;
413
414 nr_regions = 0;
415
416 FOR_EACH_BB (bb)
417 {
418 rgn_bb_table[nr_regions] = bb->index;
419 RGN_NR_BLOCKS (nr_regions) = 1;
420 RGN_BLOCKS (nr_regions) = nr_regions;
421 CONTAINING_RGN (bb->index) = nr_regions;
422 BLOCK_TO_BB (bb->index) = 0;
423 nr_regions++;
424 }
425 }
426
427 /* Update number of blocks and the estimate for number of insns
428 in the region. Return true if the region is "too large" for interblock
429 scheduling (compile time considerations). */
430
431 static bool
432 too_large (int block, int *num_bbs, int *num_insns)
433 {
434 (*num_bbs)++;
435 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
436 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
437
438 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
439 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
440 }
441
442 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
443 is still an inner loop. Put in max_hdr[blk] the header of the most inner
444 loop containing blk. */
445 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
446 { \
447 if (max_hdr[blk] == -1) \
448 max_hdr[blk] = hdr; \
449 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
450 RESET_BIT (inner, hdr); \
451 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
452 { \
453 RESET_BIT (inner,max_hdr[blk]); \
454 max_hdr[blk] = hdr; \
455 } \
456 }
457
458 /* Find regions for interblock scheduling.
459
460 A region for scheduling can be:
461
462 * A loop-free procedure, or
463
464 * A reducible inner loop, or
465
466 * A basic block not contained in any other region.
467
468 ?!? In theory we could build other regions based on extended basic
469 blocks or reverse extended basic blocks. Is it worth the trouble?
470
471 Loop blocks that form a region are put into the region's block list
472 in topological order.
473
474 This procedure stores its results into the following global (ick) variables
475
476 * rgn_nr
477 * rgn_table
478 * rgn_bb_table
479 * block_to_bb
480 * containing region
481
482 We use dominator relationships to avoid making regions out of non-reducible
483 loops.
484
485 This procedure needs to be converted to work on pred/succ lists instead
486 of edge tables. That would simplify it somewhat. */
487
488 static void
489 find_rgns (void)
490 {
491 int *max_hdr, *dfs_nr, *degree;
492 char no_loops = 1;
493 int node, child, loop_head, i, head, tail;
494 int count = 0, sp, idx = 0;
495 edge_iterator current_edge;
496 edge_iterator *stack;
497 int num_bbs, num_insns, unreachable;
498 int too_large_failure;
499 basic_block bb;
500
501 /* Note if a block is a natural loop header. */
502 sbitmap header;
503
504 /* Note if a block is a natural inner loop header. */
505 sbitmap inner;
506
507 /* Note if a block is in the block queue. */
508 sbitmap in_queue;
509
510 /* Note if a block is in the block queue. */
511 sbitmap in_stack;
512
513 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
514 and a mapping from block to its loop header (if the block is contained
515 in a loop, else -1).
516
517 Store results in HEADER, INNER, and MAX_HDR respectively, these will
518 be used as inputs to the second traversal.
519
520 STACK, SP and DFS_NR are only used during the first traversal. */
521
522 /* Allocate and initialize variables for the first traversal. */
523 max_hdr = xmalloc (last_basic_block * sizeof (int));
524 dfs_nr = xcalloc (last_basic_block, sizeof (int));
525 stack = xmalloc (n_edges * sizeof (edge_iterator));
526
527 inner = sbitmap_alloc (last_basic_block);
528 sbitmap_ones (inner);
529
530 header = sbitmap_alloc (last_basic_block);
531 sbitmap_zero (header);
532
533 in_queue = sbitmap_alloc (last_basic_block);
534 sbitmap_zero (in_queue);
535
536 in_stack = sbitmap_alloc (last_basic_block);
537 sbitmap_zero (in_stack);
538
539 for (i = 0; i < last_basic_block; i++)
540 max_hdr[i] = -1;
541
542 #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
543 #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
544
545 /* DFS traversal to find inner loops in the cfg. */
546
547 current_edge = ei_start (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest->succs);
548 sp = -1;
549
550 while (1)
551 {
552 if (EDGE_PASSED (current_edge))
553 {
554 /* We have reached a leaf node or a node that was already
555 processed. Pop edges off the stack until we find
556 an edge that has not yet been processed. */
557 while (sp >= 0 && EDGE_PASSED (current_edge))
558 {
559 /* Pop entry off the stack. */
560 current_edge = stack[sp--];
561 node = ei_edge (current_edge)->src->index;
562 gcc_assert (node != ENTRY_BLOCK);
563 child = ei_edge (current_edge)->dest->index;
564 gcc_assert (child != EXIT_BLOCK);
565 RESET_BIT (in_stack, child);
566 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
567 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
568 ei_next (&current_edge);
569 }
570
571 /* See if have finished the DFS tree traversal. */
572 if (sp < 0 && EDGE_PASSED (current_edge))
573 break;
574
575 /* Nope, continue the traversal with the popped node. */
576 continue;
577 }
578
579 /* Process a node. */
580 node = ei_edge (current_edge)->src->index;
581 gcc_assert (node != ENTRY_BLOCK);
582 SET_BIT (in_stack, node);
583 dfs_nr[node] = ++count;
584
585 /* We don't traverse to the exit block. */
586 child = ei_edge (current_edge)->dest->index;
587 if (child == EXIT_BLOCK)
588 {
589 SET_EDGE_PASSED (current_edge);
590 ei_next (&current_edge);
591 continue;
592 }
593
594 /* If the successor is in the stack, then we've found a loop.
595 Mark the loop, if it is not a natural loop, then it will
596 be rejected during the second traversal. */
597 if (TEST_BIT (in_stack, child))
598 {
599 no_loops = 0;
600 SET_BIT (header, child);
601 UPDATE_LOOP_RELATIONS (node, child);
602 SET_EDGE_PASSED (current_edge);
603 ei_next (&current_edge);
604 continue;
605 }
606
607 /* If the child was already visited, then there is no need to visit
608 it again. Just update the loop relationships and restart
609 with a new edge. */
610 if (dfs_nr[child])
611 {
612 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
613 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
614 SET_EDGE_PASSED (current_edge);
615 ei_next (&current_edge);
616 continue;
617 }
618
619 /* Push an entry on the stack and continue DFS traversal. */
620 stack[++sp] = current_edge;
621 SET_EDGE_PASSED (current_edge);
622 current_edge = ei_start (ei_edge (current_edge)->dest->succs);
623 }
624
625 /* Reset ->aux field used by EDGE_PASSED. */
626 FOR_ALL_BB (bb)
627 {
628 edge_iterator ei;
629 edge e;
630 FOR_EACH_EDGE (e, ei, bb->succs)
631 e->aux = NULL;
632 }
633
634
635 /* Another check for unreachable blocks. The earlier test in
636 is_cfg_nonregular only finds unreachable blocks that do not
637 form a loop.
638
639 The DFS traversal will mark every block that is reachable from
640 the entry node by placing a nonzero value in dfs_nr. Thus if
641 dfs_nr is zero for any block, then it must be unreachable. */
642 unreachable = 0;
643 FOR_EACH_BB (bb)
644 if (dfs_nr[bb->index] == 0)
645 {
646 unreachable = 1;
647 break;
648 }
649
650 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
651 to hold degree counts. */
652 degree = dfs_nr;
653
654 FOR_EACH_BB (bb)
655 degree[bb->index] = EDGE_COUNT (bb->preds);
656
657 /* Do not perform region scheduling if there are any unreachable
658 blocks. */
659 if (!unreachable)
660 {
661 int *queue;
662
663 if (no_loops)
664 SET_BIT (header, 0);
665
666 /* Second traversal:find reducible inner loops and topologically sort
667 block of each region. */
668
669 queue = xmalloc (n_basic_blocks * sizeof (int));
670
671 /* Find blocks which are inner loop headers. We still have non-reducible
672 loops to consider at this point. */
673 FOR_EACH_BB (bb)
674 {
675 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
676 {
677 edge e;
678 edge_iterator ei;
679 basic_block jbb;
680
681 /* Now check that the loop is reducible. We do this separate
682 from finding inner loops so that we do not find a reducible
683 loop which contains an inner non-reducible loop.
684
685 A simple way to find reducible/natural loops is to verify
686 that each block in the loop is dominated by the loop
687 header.
688
689 If there exists a block that is not dominated by the loop
690 header, then the block is reachable from outside the loop
691 and thus the loop is not a natural loop. */
692 FOR_EACH_BB (jbb)
693 {
694 /* First identify blocks in the loop, except for the loop
695 entry block. */
696 if (bb->index == max_hdr[jbb->index] && bb != jbb)
697 {
698 /* Now verify that the block is dominated by the loop
699 header. */
700 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
701 break;
702 }
703 }
704
705 /* If we exited the loop early, then I is the header of
706 a non-reducible loop and we should quit processing it
707 now. */
708 if (jbb != EXIT_BLOCK_PTR)
709 continue;
710
711 /* I is a header of an inner loop, or block 0 in a subroutine
712 with no loops at all. */
713 head = tail = -1;
714 too_large_failure = 0;
715 loop_head = max_hdr[bb->index];
716
717 /* Decrease degree of all I's successors for topological
718 ordering. */
719 FOR_EACH_EDGE (e, ei, bb->succs)
720 if (e->dest != EXIT_BLOCK_PTR)
721 --degree[e->dest->index];
722
723 /* Estimate # insns, and count # blocks in the region. */
724 num_bbs = 1;
725 num_insns = (INSN_LUID (BB_END (bb))
726 - INSN_LUID (BB_HEAD (bb)));
727
728 /* Find all loop latches (blocks with back edges to the loop
729 header) or all the leaf blocks in the cfg has no loops.
730
731 Place those blocks into the queue. */
732 if (no_loops)
733 {
734 FOR_EACH_BB (jbb)
735 /* Leaf nodes have only a single successor which must
736 be EXIT_BLOCK. */
737 if (EDGE_COUNT (jbb->succs) == 1
738 && EDGE_SUCC (jbb, 0)->dest == EXIT_BLOCK_PTR)
739 {
740 queue[++tail] = jbb->index;
741 SET_BIT (in_queue, jbb->index);
742
743 if (too_large (jbb->index, &num_bbs, &num_insns))
744 {
745 too_large_failure = 1;
746 break;
747 }
748 }
749 }
750 else
751 {
752 edge e;
753
754 FOR_EACH_EDGE (e, ei, bb->preds)
755 {
756 if (e->src == ENTRY_BLOCK_PTR)
757 continue;
758
759 node = e->src->index;
760
761 if (max_hdr[node] == loop_head && node != bb->index)
762 {
763 /* This is a loop latch. */
764 queue[++tail] = node;
765 SET_BIT (in_queue, node);
766
767 if (too_large (node, &num_bbs, &num_insns))
768 {
769 too_large_failure = 1;
770 break;
771 }
772 }
773 }
774 }
775
776 /* Now add all the blocks in the loop to the queue.
777
778 We know the loop is a natural loop; however the algorithm
779 above will not always mark certain blocks as being in the
780 loop. Consider:
781 node children
782 a b,c
783 b c
784 c a,d
785 d b
786
787 The algorithm in the DFS traversal may not mark B & D as part
788 of the loop (i.e. they will not have max_hdr set to A).
789
790 We know they can not be loop latches (else they would have
791 had max_hdr set since they'd have a backedge to a dominator
792 block). So we don't need them on the initial queue.
793
794 We know they are part of the loop because they are dominated
795 by the loop header and can be reached by a backwards walk of
796 the edges starting with nodes on the initial queue.
797
798 It is safe and desirable to include those nodes in the
799 loop/scheduling region. To do so we would need to decrease
800 the degree of a node if it is the target of a backedge
801 within the loop itself as the node is placed in the queue.
802
803 We do not do this because I'm not sure that the actual
804 scheduling code will properly handle this case. ?!? */
805
806 while (head < tail && !too_large_failure)
807 {
808 edge e;
809 child = queue[++head];
810
811 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
812 {
813 node = e->src->index;
814
815 /* See discussion above about nodes not marked as in
816 this loop during the initial DFS traversal. */
817 if (e->src == ENTRY_BLOCK_PTR
818 || max_hdr[node] != loop_head)
819 {
820 tail = -1;
821 break;
822 }
823 else if (!TEST_BIT (in_queue, node) && node != bb->index)
824 {
825 queue[++tail] = node;
826 SET_BIT (in_queue, node);
827
828 if (too_large (node, &num_bbs, &num_insns))
829 {
830 too_large_failure = 1;
831 break;
832 }
833 }
834 }
835 }
836
837 if (tail >= 0 && !too_large_failure)
838 {
839 /* Place the loop header into list of region blocks. */
840 degree[bb->index] = -1;
841 rgn_bb_table[idx] = bb->index;
842 RGN_NR_BLOCKS (nr_regions) = num_bbs;
843 RGN_BLOCKS (nr_regions) = idx++;
844 CONTAINING_RGN (bb->index) = nr_regions;
845 BLOCK_TO_BB (bb->index) = count = 0;
846
847 /* Remove blocks from queue[] when their in degree
848 becomes zero. Repeat until no blocks are left on the
849 list. This produces a topological list of blocks in
850 the region. */
851 while (tail >= 0)
852 {
853 if (head < 0)
854 head = tail;
855 child = queue[head];
856 if (degree[child] == 0)
857 {
858 edge e;
859
860 degree[child] = -1;
861 rgn_bb_table[idx++] = child;
862 BLOCK_TO_BB (child) = ++count;
863 CONTAINING_RGN (child) = nr_regions;
864 queue[head] = queue[tail--];
865
866 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
867 if (e->dest != EXIT_BLOCK_PTR)
868 --degree[e->dest->index];
869 }
870 else
871 --head;
872 }
873 ++nr_regions;
874 }
875 }
876 }
877 free (queue);
878 }
879
880 /* Any block that did not end up in a region is placed into a region
881 by itself. */
882 FOR_EACH_BB (bb)
883 if (degree[bb->index] >= 0)
884 {
885 rgn_bb_table[idx] = bb->index;
886 RGN_NR_BLOCKS (nr_regions) = 1;
887 RGN_BLOCKS (nr_regions) = idx++;
888 CONTAINING_RGN (bb->index) = nr_regions++;
889 BLOCK_TO_BB (bb->index) = 0;
890 }
891
892 free (max_hdr);
893 free (dfs_nr);
894 free (stack);
895 sbitmap_free (header);
896 sbitmap_free (inner);
897 sbitmap_free (in_queue);
898 sbitmap_free (in_stack);
899 }
900
901 /* Functions for regions scheduling information. */
902
903 /* Compute dominators, probability, and potential-split-edges of bb.
904 Assume that these values were already computed for bb's predecessors. */
905
906 static void
907 compute_dom_prob_ps (int bb)
908 {
909 int pred_bb;
910 int nr_out_edges, nr_rgn_out_edges;
911 edge_iterator in_ei, out_ei;
912 edge in_edge, out_edge;
913
914 prob[bb] = 0.0;
915 if (IS_RGN_ENTRY (bb))
916 {
917 SET_BIT (dom[bb], 0);
918 prob[bb] = 1.0;
919 return;
920 }
921
922 /* Initialize dom[bb] to '111..1'. */
923 sbitmap_ones (dom[bb]);
924
925 FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
926 {
927 if (in_edge->src == ENTRY_BLOCK_PTR)
928 continue;
929
930 pred_bb = BLOCK_TO_BB (in_edge->src->index);
931 sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]);
932 sbitmap_a_or_b (ancestor_edges[bb],
933 ancestor_edges[bb], ancestor_edges[pred_bb]);
934
935 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
936
937 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
938
939 nr_out_edges = 0;
940 nr_rgn_out_edges = 0;
941
942 FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
943 {
944 ++nr_out_edges;
945
946 /* The successor doesn't belong in the region? */
947 if (out_edge->dest != EXIT_BLOCK_PTR
948 && CONTAINING_RGN (out_edge->dest->index)
949 != CONTAINING_RGN (BB_TO_BLOCK (bb)))
950 ++nr_rgn_out_edges;
951
952 SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
953 }
954
955 /* Now nr_rgn_out_edges is the number of region-exit edges from
956 pred, and nr_out_edges will be the number of pred out edges
957 not leaving the region. */
958 nr_out_edges -= nr_rgn_out_edges;
959 if (nr_rgn_out_edges > 0)
960 prob[bb] += 0.9 * prob[pred_bb] / nr_out_edges;
961 else
962 prob[bb] += prob[pred_bb] / nr_out_edges;
963 }
964
965 SET_BIT (dom[bb], bb);
966 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
967
968 if (sched_verbose >= 2)
969 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
970 (int) (100.0 * prob[bb]));
971 }
972
973 /* Functions for target info. */
974
975 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
976 Note that bb_trg dominates bb_src. */
977
978 static void
979 split_edges (int bb_src, int bb_trg, edgelst *bl)
980 {
981 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
982 sbitmap_copy (src, pot_split[bb_src]);
983
984 sbitmap_difference (src, src, pot_split[bb_trg]);
985 extract_edgelst (src, bl);
986 sbitmap_free (src);
987 }
988
989 /* Find the valid candidate-source-blocks for the target block TRG, compute
990 their probability, and check if they are speculative or not.
991 For speculative sources, compute their update-blocks and split-blocks. */
992
993 static void
994 compute_trg_info (int trg)
995 {
996 candidate *sp;
997 edgelst el;
998 int i, j, k, update_idx;
999 basic_block block;
1000 edge_iterator ei;
1001 edge e;
1002
1003 /* Define some of the fields for the target bb as well. */
1004 sp = candidate_table + trg;
1005 sp->is_valid = 1;
1006 sp->is_speculative = 0;
1007 sp->src_prob = 100;
1008
1009 for (i = trg + 1; i < current_nr_blocks; i++)
1010 {
1011 sp = candidate_table + i;
1012
1013 sp->is_valid = IS_DOMINATED (i, trg);
1014 if (sp->is_valid)
1015 {
1016 sp->src_prob = GET_SRC_PROB (i, trg);
1017 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1018 }
1019
1020 if (sp->is_valid)
1021 {
1022 split_edges (i, trg, &el);
1023 sp->is_speculative = (el.nr_members) ? 1 : 0;
1024 if (sp->is_speculative && !flag_schedule_speculative)
1025 sp->is_valid = 0;
1026 }
1027
1028 if (sp->is_valid)
1029 {
1030 /* Compute split blocks and store them in bblst_table.
1031 The TO block of every split edge is a split block. */
1032 sp->split_bbs.first_member = &bblst_table[bblst_last];
1033 sp->split_bbs.nr_members = el.nr_members;
1034 for (j = 0; j < el.nr_members; bblst_last++, j++)
1035 bblst_table[bblst_last] = el.first_member[j]->dest;
1036 sp->update_bbs.first_member = &bblst_table[bblst_last];
1037
1038 /* Compute update blocks and store them in bblst_table.
1039 For every split edge, look at the FROM block, and check
1040 all out edges. For each out edge that is not a split edge,
1041 add the TO block to the update block list. This list can end
1042 up with a lot of duplicates. We need to weed them out to avoid
1043 overrunning the end of the bblst_table. */
1044
1045 update_idx = 0;
1046 for (j = 0; j < el.nr_members; j++)
1047 {
1048 block = el.first_member[j]->src;
1049 FOR_EACH_EDGE (e, ei, block->succs)
1050 {
1051 if (!(e->dest->flags & BB_VISITED))
1052 {
1053 for (k = 0; k < el.nr_members; k++)
1054 if (e == el.first_member[k])
1055 break;
1056
1057 if (k >= el.nr_members)
1058 {
1059 bblst_table[bblst_last++] = e->dest;
1060 e->dest->flags |= BB_VISITED;
1061 update_idx++;
1062 }
1063 }
1064 }
1065 }
1066 sp->update_bbs.nr_members = update_idx;
1067
1068 FOR_ALL_BB (block)
1069 block->flags &= ~BB_VISITED;
1070
1071 /* Make sure we didn't overrun the end of bblst_table. */
1072 gcc_assert (bblst_last <= bblst_size);
1073 }
1074 else
1075 {
1076 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1077
1078 sp->is_speculative = 0;
1079 sp->src_prob = 0;
1080 }
1081 }
1082 }
1083
1084 /* Print candidates info, for debugging purposes. Callable from debugger. */
1085
1086 void
1087 debug_candidate (int i)
1088 {
1089 if (!candidate_table[i].is_valid)
1090 return;
1091
1092 if (candidate_table[i].is_speculative)
1093 {
1094 int j;
1095 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1096
1097 fprintf (sched_dump, "split path: ");
1098 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1099 {
1100 int b = candidate_table[i].split_bbs.first_member[j]->index;
1101
1102 fprintf (sched_dump, " %d ", b);
1103 }
1104 fprintf (sched_dump, "\n");
1105
1106 fprintf (sched_dump, "update path: ");
1107 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1108 {
1109 int b = candidate_table[i].update_bbs.first_member[j]->index;
1110
1111 fprintf (sched_dump, " %d ", b);
1112 }
1113 fprintf (sched_dump, "\n");
1114 }
1115 else
1116 {
1117 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1118 }
1119 }
1120
1121 /* Print candidates info, for debugging purposes. Callable from debugger. */
1122
1123 void
1124 debug_candidates (int trg)
1125 {
1126 int i;
1127
1128 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1129 BB_TO_BLOCK (trg), trg);
1130 for (i = trg + 1; i < current_nr_blocks; i++)
1131 debug_candidate (i);
1132 }
1133
1134 /* Functions for speculative scheduling. */
1135
1136 /* Return 0 if x is a set of a register alive in the beginning of one
1137 of the split-blocks of src, otherwise return 1. */
1138
1139 static int
1140 check_live_1 (int src, rtx x)
1141 {
1142 int i;
1143 int regno;
1144 rtx reg = SET_DEST (x);
1145
1146 if (reg == 0)
1147 return 1;
1148
1149 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1150 || GET_CODE (reg) == SIGN_EXTRACT
1151 || GET_CODE (reg) == STRICT_LOW_PART)
1152 reg = XEXP (reg, 0);
1153
1154 if (GET_CODE (reg) == PARALLEL)
1155 {
1156 int i;
1157
1158 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1159 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1160 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1161 return 1;
1162
1163 return 0;
1164 }
1165
1166 if (!REG_P (reg))
1167 return 1;
1168
1169 regno = REGNO (reg);
1170
1171 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1172 {
1173 /* Global registers are assumed live. */
1174 return 0;
1175 }
1176 else
1177 {
1178 if (regno < FIRST_PSEUDO_REGISTER)
1179 {
1180 /* Check for hard registers. */
1181 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1182 while (--j >= 0)
1183 {
1184 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1185 {
1186 basic_block b = candidate_table[src].split_bbs.first_member[i];
1187
1188 if (REGNO_REG_SET_P (b->global_live_at_start, regno + j))
1189 {
1190 return 0;
1191 }
1192 }
1193 }
1194 }
1195 else
1196 {
1197 /* Check for pseudo registers. */
1198 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1199 {
1200 basic_block b = candidate_table[src].split_bbs.first_member[i];
1201
1202 if (REGNO_REG_SET_P (b->global_live_at_start, regno))
1203 {
1204 return 0;
1205 }
1206 }
1207 }
1208 }
1209
1210 return 1;
1211 }
1212
1213 /* If x is a set of a register R, mark that R is alive in the beginning
1214 of every update-block of src. */
1215
1216 static void
1217 update_live_1 (int src, rtx x)
1218 {
1219 int i;
1220 int regno;
1221 rtx reg = SET_DEST (x);
1222
1223 if (reg == 0)
1224 return;
1225
1226 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1227 || GET_CODE (reg) == SIGN_EXTRACT
1228 || GET_CODE (reg) == STRICT_LOW_PART)
1229 reg = XEXP (reg, 0);
1230
1231 if (GET_CODE (reg) == PARALLEL)
1232 {
1233 int i;
1234
1235 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1236 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1237 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1238
1239 return;
1240 }
1241
1242 if (!REG_P (reg))
1243 return;
1244
1245 /* Global registers are always live, so the code below does not apply
1246 to them. */
1247
1248 regno = REGNO (reg);
1249
1250 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1251 {
1252 if (regno < FIRST_PSEUDO_REGISTER)
1253 {
1254 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1255 while (--j >= 0)
1256 {
1257 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1258 {
1259 basic_block b = candidate_table[src].update_bbs.first_member[i];
1260
1261 SET_REGNO_REG_SET (b->global_live_at_start, regno + j);
1262 }
1263 }
1264 }
1265 else
1266 {
1267 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1268 {
1269 basic_block b = candidate_table[src].update_bbs.first_member[i];
1270
1271 SET_REGNO_REG_SET (b->global_live_at_start, regno);
1272 }
1273 }
1274 }
1275 }
1276
1277 /* Return 1 if insn can be speculatively moved from block src to trg,
1278 otherwise return 0. Called before first insertion of insn to
1279 ready-list or before the scheduling. */
1280
1281 static int
1282 check_live (rtx insn, int src)
1283 {
1284 /* Find the registers set by instruction. */
1285 if (GET_CODE (PATTERN (insn)) == SET
1286 || GET_CODE (PATTERN (insn)) == CLOBBER)
1287 return check_live_1 (src, PATTERN (insn));
1288 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1289 {
1290 int j;
1291 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1292 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1293 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1294 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1295 return 0;
1296
1297 return 1;
1298 }
1299
1300 return 1;
1301 }
1302
1303 /* Update the live registers info after insn was moved speculatively from
1304 block src to trg. */
1305
1306 static void
1307 update_live (rtx insn, int src)
1308 {
1309 /* Find the registers set by instruction. */
1310 if (GET_CODE (PATTERN (insn)) == SET
1311 || GET_CODE (PATTERN (insn)) == CLOBBER)
1312 update_live_1 (src, PATTERN (insn));
1313 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1314 {
1315 int j;
1316 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1317 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1318 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1319 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1320 }
1321 }
1322
1323 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1324 #define IS_REACHABLE(bb_from, bb_to) \
1325 (bb_from == bb_to \
1326 || IS_RGN_ENTRY (bb_from) \
1327 || (TEST_BIT (ancestor_edges[bb_to], \
1328 EDGE_TO_BIT (EDGE_PRED (BASIC_BLOCK (BB_TO_BLOCK (bb_from)), 0)))))
1329
1330 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1331
1332 static void
1333 set_spec_fed (rtx load_insn)
1334 {
1335 rtx link;
1336
1337 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1338 if (GET_MODE (link) == VOIDmode)
1339 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1340 } /* set_spec_fed */
1341
1342 /* On the path from the insn to load_insn_bb, find a conditional
1343 branch depending on insn, that guards the speculative load. */
1344
1345 static int
1346 find_conditional_protection (rtx insn, int load_insn_bb)
1347 {
1348 rtx link;
1349
1350 /* Iterate through DEF-USE forward dependences. */
1351 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1352 {
1353 rtx next = XEXP (link, 0);
1354 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1355 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1356 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1357 && load_insn_bb != INSN_BB (next)
1358 && GET_MODE (link) == VOIDmode
1359 && (JUMP_P (next)
1360 || find_conditional_protection (next, load_insn_bb)))
1361 return 1;
1362 }
1363 return 0;
1364 } /* find_conditional_protection */
1365
1366 /* Returns 1 if the same insn1 that participates in the computation
1367 of load_insn's address is feeding a conditional branch that is
1368 guarding on load_insn. This is true if we find a the two DEF-USE
1369 chains:
1370 insn1 -> ... -> conditional-branch
1371 insn1 -> ... -> load_insn,
1372 and if a flow path exist:
1373 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1374 and if insn1 is on the path
1375 region-entry -> ... -> bb_trg -> ... load_insn.
1376
1377 Locate insn1 by climbing on LOG_LINKS from load_insn.
1378 Locate the branch by following INSN_DEPEND from insn1. */
1379
1380 static int
1381 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1382 {
1383 rtx link;
1384
1385 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1386 {
1387 rtx insn1 = XEXP (link, 0);
1388
1389 /* Must be a DEF-USE dependence upon non-branch. */
1390 if (GET_MODE (link) != VOIDmode
1391 || JUMP_P (insn1))
1392 continue;
1393
1394 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1395 if (INSN_BB (insn1) == bb_src
1396 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1397 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1398 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1399 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1400 continue;
1401
1402 /* Now search for the conditional-branch. */
1403 if (find_conditional_protection (insn1, bb_src))
1404 return 1;
1405
1406 /* Recursive step: search another insn1, "above" current insn1. */
1407 return is_conditionally_protected (insn1, bb_src, bb_trg);
1408 }
1409
1410 /* The chain does not exist. */
1411 return 0;
1412 } /* is_conditionally_protected */
1413
1414 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1415 load_insn can move speculatively from bb_src to bb_trg. All the
1416 following must hold:
1417
1418 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1419 (2) load_insn and load1 have a def-use dependence upon
1420 the same insn 'insn1'.
1421 (3) either load2 is in bb_trg, or:
1422 - there's only one split-block, and
1423 - load1 is on the escape path, and
1424
1425 From all these we can conclude that the two loads access memory
1426 addresses that differ at most by a constant, and hence if moving
1427 load_insn would cause an exception, it would have been caused by
1428 load2 anyhow. */
1429
1430 static int
1431 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1432 {
1433 rtx back_link;
1434 candidate *candp = candidate_table + bb_src;
1435
1436 if (candp->split_bbs.nr_members != 1)
1437 /* Must have exactly one escape block. */
1438 return 0;
1439
1440 for (back_link = LOG_LINKS (load_insn);
1441 back_link; back_link = XEXP (back_link, 1))
1442 {
1443 rtx insn1 = XEXP (back_link, 0);
1444
1445 if (GET_MODE (back_link) == VOIDmode)
1446 {
1447 /* Found a DEF-USE dependence (insn1, load_insn). */
1448 rtx fore_link;
1449
1450 for (fore_link = INSN_DEPEND (insn1);
1451 fore_link; fore_link = XEXP (fore_link, 1))
1452 {
1453 rtx insn2 = XEXP (fore_link, 0);
1454 if (GET_MODE (fore_link) == VOIDmode)
1455 {
1456 /* Found a DEF-USE dependence (insn1, insn2). */
1457 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1458 /* insn2 not guaranteed to be a 1 base reg load. */
1459 continue;
1460
1461 if (INSN_BB (insn2) == bb_trg)
1462 /* insn2 is the similar load, in the target block. */
1463 return 1;
1464
1465 if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1466 /* insn2 is a similar load, in a split-block. */
1467 return 1;
1468 }
1469 }
1470 }
1471 }
1472
1473 /* Couldn't find a similar load. */
1474 return 0;
1475 } /* is_pfree */
1476
1477 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1478 a load moved speculatively, or if load_insn is protected by
1479 a compare on load_insn's address). */
1480
1481 static int
1482 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1483 {
1484 if (FED_BY_SPEC_LOAD (load_insn))
1485 return 1;
1486
1487 if (LOG_LINKS (load_insn) == NULL)
1488 /* Dependence may 'hide' out of the region. */
1489 return 1;
1490
1491 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1492 return 1;
1493
1494 return 0;
1495 }
1496
1497 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1498 Return 1 if insn is exception-free (and the motion is valid)
1499 and 0 otherwise. */
1500
1501 static int
1502 is_exception_free (rtx insn, int bb_src, int bb_trg)
1503 {
1504 int insn_class = haifa_classify_insn (insn);
1505
1506 /* Handle non-load insns. */
1507 switch (insn_class)
1508 {
1509 case TRAP_FREE:
1510 return 1;
1511 case TRAP_RISKY:
1512 return 0;
1513 default:;
1514 }
1515
1516 /* Handle loads. */
1517 if (!flag_schedule_speculative_load)
1518 return 0;
1519 IS_LOAD_INSN (insn) = 1;
1520 switch (insn_class)
1521 {
1522 case IFREE:
1523 return (1);
1524 case IRISKY:
1525 return 0;
1526 case PFREE_CANDIDATE:
1527 if (is_pfree (insn, bb_src, bb_trg))
1528 return 1;
1529 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1530 case PRISKY_CANDIDATE:
1531 if (!flag_schedule_speculative_load_dangerous
1532 || is_prisky (insn, bb_src, bb_trg))
1533 return 0;
1534 break;
1535 default:;
1536 }
1537
1538 return flag_schedule_speculative_load_dangerous;
1539 }
1540 \f
1541 /* The number of insns from the current block scheduled so far. */
1542 static int sched_target_n_insns;
1543 /* The number of insns from the current block to be scheduled in total. */
1544 static int target_n_insns;
1545 /* The number of insns from the entire region scheduled so far. */
1546 static int sched_n_insns;
1547 /* Nonzero if the last scheduled insn was a jump. */
1548 static int last_was_jump;
1549
1550 /* Implementations of the sched_info functions for region scheduling. */
1551 static void init_ready_list (struct ready_list *);
1552 static int can_schedule_ready_p (rtx);
1553 static int new_ready (rtx);
1554 static int schedule_more_p (void);
1555 static const char *rgn_print_insn (rtx, int);
1556 static int rgn_rank (rtx, rtx);
1557 static int contributes_to_priority (rtx, rtx);
1558 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1559
1560 /* Return nonzero if there are more insns that should be scheduled. */
1561
1562 static int
1563 schedule_more_p (void)
1564 {
1565 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1566 }
1567
1568 /* Add all insns that are initially ready to the ready list READY. Called
1569 once before scheduling a set of insns. */
1570
1571 static void
1572 init_ready_list (struct ready_list *ready)
1573 {
1574 rtx prev_head = current_sched_info->prev_head;
1575 rtx next_tail = current_sched_info->next_tail;
1576 int bb_src;
1577 rtx insn;
1578
1579 target_n_insns = 0;
1580 sched_target_n_insns = 0;
1581 sched_n_insns = 0;
1582 last_was_jump = 0;
1583
1584 /* Print debugging information. */
1585 if (sched_verbose >= 5)
1586 debug_dependencies ();
1587
1588 /* Prepare current target block info. */
1589 if (current_nr_blocks > 1)
1590 {
1591 candidate_table = xmalloc (current_nr_blocks * sizeof (candidate));
1592
1593 bblst_last = 0;
1594 /* bblst_table holds split blocks and update blocks for each block after
1595 the current one in the region. split blocks and update blocks are
1596 the TO blocks of region edges, so there can be at most rgn_nr_edges
1597 of them. */
1598 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1599 bblst_table = xmalloc (bblst_size * sizeof (basic_block));
1600
1601 edgelst_last = 0;
1602 edgelst_table = xmalloc (rgn_nr_edges * sizeof (edge));
1603
1604 compute_trg_info (target_bb);
1605 }
1606
1607 /* Initialize ready list with all 'ready' insns in target block.
1608 Count number of insns in the target block being scheduled. */
1609 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
1610 {
1611 if (INSN_DEP_COUNT (insn) == 0)
1612 {
1613 ready_add (ready, insn);
1614
1615 if (targetm.sched.adjust_priority)
1616 INSN_PRIORITY (insn) =
1617 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1618 }
1619 target_n_insns++;
1620 }
1621
1622 /* Add to ready list all 'ready' insns in valid source blocks.
1623 For speculative insns, check-live, exception-free, and
1624 issue-delay. */
1625 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
1626 if (IS_VALID (bb_src))
1627 {
1628 rtx src_head;
1629 rtx src_next_tail;
1630 rtx tail, head;
1631
1632 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
1633 src_next_tail = NEXT_INSN (tail);
1634 src_head = head;
1635
1636 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
1637 {
1638 if (! INSN_P (insn))
1639 continue;
1640
1641 if (!CANT_MOVE (insn)
1642 && (!IS_SPECULATIVE_INSN (insn)
1643 || ((recog_memoized (insn) < 0
1644 || min_insn_conflict_delay (curr_state,
1645 insn, insn) <= 3)
1646 && check_live (insn, bb_src)
1647 && is_exception_free (insn, bb_src, target_bb))))
1648 if (INSN_DEP_COUNT (insn) == 0)
1649 {
1650 ready_add (ready, insn);
1651
1652 if (targetm.sched.adjust_priority)
1653 INSN_PRIORITY (insn) =
1654 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1655 }
1656 }
1657 }
1658 }
1659
1660 /* Called after taking INSN from the ready list. Returns nonzero if this
1661 insn can be scheduled, nonzero if we should silently discard it. */
1662
1663 static int
1664 can_schedule_ready_p (rtx insn)
1665 {
1666 if (JUMP_P (insn))
1667 last_was_jump = 1;
1668
1669 /* An interblock motion? */
1670 if (INSN_BB (insn) != target_bb)
1671 {
1672 basic_block b1;
1673
1674 if (IS_SPECULATIVE_INSN (insn))
1675 {
1676 if (!check_live (insn, INSN_BB (insn)))
1677 return 0;
1678 update_live (insn, INSN_BB (insn));
1679
1680 /* For speculative load, mark insns fed by it. */
1681 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
1682 set_spec_fed (insn);
1683
1684 nr_spec++;
1685 }
1686 nr_inter++;
1687
1688 /* Update source block boundaries. */
1689 b1 = BLOCK_FOR_INSN (insn);
1690 if (insn == BB_HEAD (b1) && insn == BB_END (b1))
1691 {
1692 /* We moved all the insns in the basic block.
1693 Emit a note after the last insn and update the
1694 begin/end boundaries to point to the note. */
1695 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
1696 BB_HEAD (b1) = note;
1697 BB_END (b1) = note;
1698 }
1699 else if (insn == BB_END (b1))
1700 {
1701 /* We took insns from the end of the basic block,
1702 so update the end of block boundary so that it
1703 points to the first insn we did not move. */
1704 BB_END (b1) = PREV_INSN (insn);
1705 }
1706 else if (insn == BB_HEAD (b1))
1707 {
1708 /* We took insns from the start of the basic block,
1709 so update the start of block boundary so that
1710 it points to the first insn we did not move. */
1711 BB_HEAD (b1) = NEXT_INSN (insn);
1712 }
1713 }
1714 else
1715 {
1716 /* In block motion. */
1717 sched_target_n_insns++;
1718 }
1719 sched_n_insns++;
1720
1721 return 1;
1722 }
1723
1724 /* Called after INSN has all its dependencies resolved. Return nonzero
1725 if it should be moved to the ready list or the queue, or zero if we
1726 should silently discard it. */
1727 static int
1728 new_ready (rtx next)
1729 {
1730 /* For speculative insns, before inserting to ready/queue,
1731 check live, exception-free, and issue-delay. */
1732 if (INSN_BB (next) != target_bb
1733 && (!IS_VALID (INSN_BB (next))
1734 || CANT_MOVE (next)
1735 || (IS_SPECULATIVE_INSN (next)
1736 && ((recog_memoized (next) >= 0
1737 && min_insn_conflict_delay (curr_state, next, next) > 3)
1738 || !check_live (next, INSN_BB (next))
1739 || !is_exception_free (next, INSN_BB (next), target_bb)))))
1740 return 0;
1741 return 1;
1742 }
1743
1744 /* Return a string that contains the insn uid and optionally anything else
1745 necessary to identify this insn in an output. It's valid to use a
1746 static buffer for this. The ALIGNED parameter should cause the string
1747 to be formatted so that multiple output lines will line up nicely. */
1748
1749 static const char *
1750 rgn_print_insn (rtx insn, int aligned)
1751 {
1752 static char tmp[80];
1753
1754 if (aligned)
1755 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
1756 else
1757 {
1758 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
1759 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
1760 else
1761 sprintf (tmp, "%d", INSN_UID (insn));
1762 }
1763 return tmp;
1764 }
1765
1766 /* Compare priority of two insns. Return a positive number if the second
1767 insn is to be preferred for scheduling, and a negative one if the first
1768 is to be preferred. Zero if they are equally good. */
1769
1770 static int
1771 rgn_rank (rtx insn1, rtx insn2)
1772 {
1773 /* Some comparison make sense in interblock scheduling only. */
1774 if (INSN_BB (insn1) != INSN_BB (insn2))
1775 {
1776 int spec_val, prob_val;
1777
1778 /* Prefer an inblock motion on an interblock motion. */
1779 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
1780 return 1;
1781 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
1782 return -1;
1783
1784 /* Prefer a useful motion on a speculative one. */
1785 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
1786 if (spec_val)
1787 return spec_val;
1788
1789 /* Prefer a more probable (speculative) insn. */
1790 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
1791 if (prob_val)
1792 return prob_val;
1793 }
1794 return 0;
1795 }
1796
1797 /* NEXT is an instruction that depends on INSN (a backward dependence);
1798 return nonzero if we should include this dependence in priority
1799 calculations. */
1800
1801 static int
1802 contributes_to_priority (rtx next, rtx insn)
1803 {
1804 return BLOCK_NUM (next) == BLOCK_NUM (insn);
1805 }
1806
1807 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
1808 conditionally set before INSN. Store the set of registers that
1809 must be considered as used by this jump in USED and that of
1810 registers that must be considered as set in SET. */
1811
1812 static void
1813 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
1814 regset cond_exec ATTRIBUTE_UNUSED,
1815 regset used ATTRIBUTE_UNUSED,
1816 regset set ATTRIBUTE_UNUSED)
1817 {
1818 /* Nothing to do here, since we postprocess jumps in
1819 add_branch_dependences. */
1820 }
1821
1822 /* Used in schedule_insns to initialize current_sched_info for scheduling
1823 regions (or single basic blocks). */
1824
1825 static struct sched_info region_sched_info =
1826 {
1827 init_ready_list,
1828 can_schedule_ready_p,
1829 schedule_more_p,
1830 new_ready,
1831 rgn_rank,
1832 rgn_print_insn,
1833 contributes_to_priority,
1834 compute_jump_reg_dependencies,
1835
1836 NULL, NULL,
1837 NULL, NULL,
1838 0, 0, 0
1839 };
1840
1841 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
1842
1843 static bool
1844 sets_likely_spilled (rtx pat)
1845 {
1846 bool ret = false;
1847 note_stores (pat, sets_likely_spilled_1, &ret);
1848 return ret;
1849 }
1850
1851 static void
1852 sets_likely_spilled_1 (rtx x, rtx pat, void *data)
1853 {
1854 bool *ret = (bool *) data;
1855
1856 if (GET_CODE (pat) == SET
1857 && REG_P (x)
1858 && REGNO (x) < FIRST_PSEUDO_REGISTER
1859 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
1860 *ret = true;
1861 }
1862
1863 /* Add dependences so that branches are scheduled to run last in their
1864 block. */
1865
1866 static void
1867 add_branch_dependences (rtx head, rtx tail)
1868 {
1869 rtx insn, last;
1870
1871 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
1872 that can throw exceptions, force them to remain in order at the end of
1873 the block by adding dependencies and giving the last a high priority.
1874 There may be notes present, and prev_head may also be a note.
1875
1876 Branches must obviously remain at the end. Calls should remain at the
1877 end since moving them results in worse register allocation. Uses remain
1878 at the end to ensure proper register allocation.
1879
1880 cc0 setters remain at the end because they can't be moved away from
1881 their cc0 user.
1882
1883 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
1884 are not moved before reload because we can wind up with register
1885 allocation failures. */
1886
1887 insn = tail;
1888 last = 0;
1889 while (CALL_P (insn)
1890 || JUMP_P (insn)
1891 || (NONJUMP_INSN_P (insn)
1892 && (GET_CODE (PATTERN (insn)) == USE
1893 || GET_CODE (PATTERN (insn)) == CLOBBER
1894 || can_throw_internal (insn)
1895 #ifdef HAVE_cc0
1896 || sets_cc0_p (PATTERN (insn))
1897 #endif
1898 || (!reload_completed
1899 && sets_likely_spilled (PATTERN (insn)))))
1900 || NOTE_P (insn))
1901 {
1902 if (!NOTE_P (insn))
1903 {
1904 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
1905 {
1906 add_dependence (last, insn, REG_DEP_ANTI);
1907 INSN_REF_COUNT (insn)++;
1908 }
1909
1910 CANT_MOVE (insn) = 1;
1911
1912 last = insn;
1913 }
1914
1915 /* Don't overrun the bounds of the basic block. */
1916 if (insn == head)
1917 break;
1918
1919 insn = PREV_INSN (insn);
1920 }
1921
1922 /* Make sure these insns are scheduled last in their block. */
1923 insn = last;
1924 if (insn != 0)
1925 while (insn != head)
1926 {
1927 insn = prev_nonnote_insn (insn);
1928
1929 if (INSN_REF_COUNT (insn) != 0)
1930 continue;
1931
1932 add_dependence (last, insn, REG_DEP_ANTI);
1933 INSN_REF_COUNT (insn) = 1;
1934 }
1935 }
1936
1937 /* Data structures for the computation of data dependences in a regions. We
1938 keep one `deps' structure for every basic block. Before analyzing the
1939 data dependences for a bb, its variables are initialized as a function of
1940 the variables of its predecessors. When the analysis for a bb completes,
1941 we save the contents to the corresponding bb_deps[bb] variable. */
1942
1943 static struct deps *bb_deps;
1944
1945 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
1946
1947 static rtx
1948 concat_INSN_LIST (rtx copy, rtx old)
1949 {
1950 rtx new = old;
1951 for (; copy ; copy = XEXP (copy, 1))
1952 new = alloc_INSN_LIST (XEXP (copy, 0), new);
1953 return new;
1954 }
1955
1956 static void
1957 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
1958 rtx *old_mems_p)
1959 {
1960 rtx new_insns = *old_insns_p;
1961 rtx new_mems = *old_mems_p;
1962
1963 while (copy_insns)
1964 {
1965 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
1966 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
1967 copy_insns = XEXP (copy_insns, 1);
1968 copy_mems = XEXP (copy_mems, 1);
1969 }
1970
1971 *old_insns_p = new_insns;
1972 *old_mems_p = new_mems;
1973 }
1974
1975 /* After computing the dependencies for block BB, propagate the dependencies
1976 found in TMP_DEPS to the successors of the block. */
1977 static void
1978 propagate_deps (int bb, struct deps *pred_deps)
1979 {
1980 basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb));
1981 edge_iterator ei;
1982 edge e;
1983
1984 /* bb's structures are inherited by its successors. */
1985 FOR_EACH_EDGE (e, ei, block->succs)
1986 {
1987 struct deps *succ_deps;
1988 unsigned reg;
1989 reg_set_iterator rsi;
1990
1991 /* Only bbs "below" bb, in the same region, are interesting. */
1992 if (e->dest == EXIT_BLOCK_PTR
1993 || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
1994 || BLOCK_TO_BB (e->dest->index) <= bb)
1995 continue;
1996
1997 succ_deps = bb_deps + BLOCK_TO_BB (e->dest->index);
1998
1999 /* The reg_last lists are inherited by successor. */
2000 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2001 {
2002 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2003 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2004
2005 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2006 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2007 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2008 succ_rl->clobbers);
2009 succ_rl->uses_length += pred_rl->uses_length;
2010 succ_rl->clobbers_length += pred_rl->clobbers_length;
2011 }
2012 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2013
2014 /* Mem read/write lists are inherited by successor. */
2015 concat_insn_mem_list (pred_deps->pending_read_insns,
2016 pred_deps->pending_read_mems,
2017 &succ_deps->pending_read_insns,
2018 &succ_deps->pending_read_mems);
2019 concat_insn_mem_list (pred_deps->pending_write_insns,
2020 pred_deps->pending_write_mems,
2021 &succ_deps->pending_write_insns,
2022 &succ_deps->pending_write_mems);
2023
2024 succ_deps->last_pending_memory_flush
2025 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2026 succ_deps->last_pending_memory_flush);
2027
2028 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2029 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2030
2031 /* last_function_call is inherited by successor. */
2032 succ_deps->last_function_call
2033 = concat_INSN_LIST (pred_deps->last_function_call,
2034 succ_deps->last_function_call);
2035
2036 /* sched_before_next_call is inherited by successor. */
2037 succ_deps->sched_before_next_call
2038 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2039 succ_deps->sched_before_next_call);
2040 }
2041
2042 /* These lists should point to the right place, for correct
2043 freeing later. */
2044 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2045 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2046 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2047 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2048
2049 /* Can't allow these to be freed twice. */
2050 pred_deps->pending_read_insns = 0;
2051 pred_deps->pending_read_mems = 0;
2052 pred_deps->pending_write_insns = 0;
2053 pred_deps->pending_write_mems = 0;
2054 }
2055
2056 /* Compute backward dependences inside bb. In a multiple blocks region:
2057 (1) a bb is analyzed after its predecessors, and (2) the lists in
2058 effect at the end of bb (after analyzing for bb) are inherited by
2059 bb's successors.
2060
2061 Specifically for reg-reg data dependences, the block insns are
2062 scanned by sched_analyze () top-to-bottom. Two lists are
2063 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2064 and reg_last[].uses for register USEs.
2065
2066 When analysis is completed for bb, we update for its successors:
2067 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2068 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2069
2070 The mechanism for computing mem-mem data dependence is very
2071 similar, and the result is interblock dependences in the region. */
2072
2073 static void
2074 compute_block_backward_dependences (int bb)
2075 {
2076 rtx head, tail;
2077 struct deps tmp_deps;
2078
2079 tmp_deps = bb_deps[bb];
2080
2081 /* Do the analysis for this block. */
2082 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2083 sched_analyze (&tmp_deps, head, tail);
2084 add_branch_dependences (head, tail);
2085
2086 if (current_nr_blocks > 1)
2087 propagate_deps (bb, &tmp_deps);
2088
2089 /* Free up the INSN_LISTs. */
2090 free_deps (&tmp_deps);
2091 }
2092
2093 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2094 them to the unused_*_list variables, so that they can be reused. */
2095
2096 static void
2097 free_pending_lists (void)
2098 {
2099 int bb;
2100
2101 for (bb = 0; bb < current_nr_blocks; bb++)
2102 {
2103 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2104 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2105 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2106 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2107 }
2108 }
2109 \f
2110 /* Print dependences for debugging, callable from debugger. */
2111
2112 void
2113 debug_dependencies (void)
2114 {
2115 int bb;
2116
2117 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2118 for (bb = 0; bb < current_nr_blocks; bb++)
2119 {
2120 rtx head, tail;
2121 rtx next_tail;
2122 rtx insn;
2123
2124 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2125 next_tail = NEXT_INSN (tail);
2126 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2127 BB_TO_BLOCK (bb), bb);
2128
2129 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2130 "insn", "code", "bb", "dep", "prio", "cost",
2131 "reservation");
2132 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2133 "----", "----", "--", "---", "----", "----",
2134 "-----------");
2135
2136 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2137 {
2138 rtx link;
2139
2140 if (! INSN_P (insn))
2141 {
2142 int n;
2143 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2144 if (NOTE_P (insn))
2145 {
2146 n = NOTE_LINE_NUMBER (insn);
2147 if (n < 0)
2148 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2149 else
2150 {
2151 expanded_location xloc;
2152 NOTE_EXPANDED_LOCATION (xloc, insn);
2153 fprintf (sched_dump, "line %d, file %s\n",
2154 xloc.line, xloc.file);
2155 }
2156 }
2157 else
2158 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2159 continue;
2160 }
2161
2162 fprintf (sched_dump,
2163 ";; %s%5d%6d%6d%6d%6d%6d ",
2164 (SCHED_GROUP_P (insn) ? "+" : " "),
2165 INSN_UID (insn),
2166 INSN_CODE (insn),
2167 INSN_BB (insn),
2168 INSN_DEP_COUNT (insn),
2169 INSN_PRIORITY (insn),
2170 insn_cost (insn, 0, 0));
2171
2172 if (recog_memoized (insn) < 0)
2173 fprintf (sched_dump, "nothing");
2174 else
2175 print_reservation (sched_dump, insn);
2176
2177 fprintf (sched_dump, "\t: ");
2178 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2179 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2180 fprintf (sched_dump, "\n");
2181 }
2182 }
2183 fprintf (sched_dump, "\n");
2184 }
2185 \f
2186 /* Returns true if all the basic blocks of the current region have
2187 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2188 static bool
2189 sched_is_disabled_for_current_region_p (void)
2190 {
2191 int bb;
2192
2193 for (bb = 0; bb < current_nr_blocks; bb++)
2194 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2195 return false;
2196
2197 return true;
2198 }
2199
2200 /* Schedule a region. A region is either an inner loop, a loop-free
2201 subroutine, or a single basic block. Each bb in the region is
2202 scheduled after its flow predecessors. */
2203
2204 static void
2205 schedule_region (int rgn)
2206 {
2207 basic_block block;
2208 edge_iterator ei;
2209 edge e;
2210 int bb;
2211 int rgn_n_insns = 0;
2212 int sched_rgn_n_insns = 0;
2213
2214 /* Set variables for the current region. */
2215 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2216 current_blocks = RGN_BLOCKS (rgn);
2217
2218 /* Don't schedule region that is marked by
2219 NOTE_DISABLE_SCHED_OF_BLOCK. */
2220 if (sched_is_disabled_for_current_region_p ())
2221 return;
2222
2223 init_deps_global ();
2224
2225 /* Initializations for region data dependence analysis. */
2226 bb_deps = xmalloc (sizeof (struct deps) * current_nr_blocks);
2227 for (bb = 0; bb < current_nr_blocks; bb++)
2228 init_deps (bb_deps + bb);
2229
2230 /* Compute LOG_LINKS. */
2231 for (bb = 0; bb < current_nr_blocks; bb++)
2232 compute_block_backward_dependences (bb);
2233
2234 /* Compute INSN_DEPEND. */
2235 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2236 {
2237 rtx head, tail;
2238 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2239
2240 compute_forward_dependences (head, tail);
2241
2242 if (targetm.sched.dependencies_evaluation_hook)
2243 targetm.sched.dependencies_evaluation_hook (head, tail);
2244
2245 }
2246
2247 /* Set priorities. */
2248 for (bb = 0; bb < current_nr_blocks; bb++)
2249 {
2250 rtx head, tail;
2251 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2252
2253 rgn_n_insns += set_priorities (head, tail);
2254 }
2255
2256 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2257 if (current_nr_blocks > 1)
2258 {
2259 prob = xmalloc ((current_nr_blocks) * sizeof (float));
2260
2261 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2262 sbitmap_vector_zero (dom, current_nr_blocks);
2263
2264 /* Use ->aux to implement EDGE_TO_BIT mapping. */
2265 rgn_nr_edges = 0;
2266 FOR_EACH_BB (block)
2267 {
2268 if (CONTAINING_RGN (block->index) != rgn)
2269 continue;
2270 FOR_EACH_EDGE (e, ei, block->succs)
2271 SET_EDGE_TO_BIT (e, rgn_nr_edges++);
2272 }
2273
2274 rgn_edges = xmalloc (rgn_nr_edges * sizeof (edge));
2275 rgn_nr_edges = 0;
2276 FOR_EACH_BB (block)
2277 {
2278 if (CONTAINING_RGN (block->index) != rgn)
2279 continue;
2280 FOR_EACH_EDGE (e, ei, block->succs)
2281 rgn_edges[rgn_nr_edges++] = e;
2282 }
2283
2284 /* Split edges. */
2285 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2286 sbitmap_vector_zero (pot_split, current_nr_blocks);
2287 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2288 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2289
2290 /* Compute probabilities, dominators, split_edges. */
2291 for (bb = 0; bb < current_nr_blocks; bb++)
2292 compute_dom_prob_ps (bb);
2293 }
2294
2295 /* Now we can schedule all blocks. */
2296 for (bb = 0; bb < current_nr_blocks; bb++)
2297 {
2298 rtx head, tail;
2299 int b = BB_TO_BLOCK (bb);
2300
2301 get_block_head_tail (b, &head, &tail);
2302
2303 if (no_real_insns_p (head, tail))
2304 continue;
2305
2306 current_sched_info->prev_head = PREV_INSN (head);
2307 current_sched_info->next_tail = NEXT_INSN (tail);
2308
2309 if (write_symbols != NO_DEBUG)
2310 {
2311 save_line_notes (b, head, tail);
2312 rm_line_notes (head, tail);
2313 }
2314
2315 /* rm_other_notes only removes notes which are _inside_ the
2316 block---that is, it won't remove notes before the first real insn
2317 or after the last real insn of the block. So if the first insn
2318 has a REG_SAVE_NOTE which would otherwise be emitted before the
2319 insn, it is redundant with the note before the start of the
2320 block, and so we have to take it out. */
2321 if (INSN_P (head))
2322 {
2323 rtx note;
2324
2325 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2326 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2327 remove_note (head, note);
2328 }
2329
2330 /* Remove remaining note insns from the block, save them in
2331 note_list. These notes are restored at the end of
2332 schedule_block (). */
2333 rm_other_notes (head, tail);
2334
2335 target_bb = bb;
2336
2337 current_sched_info->queue_must_finish_empty
2338 = current_nr_blocks > 1 && !flag_schedule_interblock;
2339
2340 schedule_block (b, rgn_n_insns);
2341 sched_rgn_n_insns += sched_n_insns;
2342
2343 /* Update target block boundaries. */
2344 if (head == BB_HEAD (BASIC_BLOCK (b)))
2345 BB_HEAD (BASIC_BLOCK (b)) = current_sched_info->head;
2346 if (tail == BB_END (BASIC_BLOCK (b)))
2347 BB_END (BASIC_BLOCK (b)) = current_sched_info->tail;
2348
2349 /* Clean up. */
2350 if (current_nr_blocks > 1)
2351 {
2352 free (candidate_table);
2353 free (bblst_table);
2354 free (edgelst_table);
2355 }
2356 }
2357
2358 /* Sanity check: verify that all region insns were scheduled. */
2359 gcc_assert (sched_rgn_n_insns == rgn_n_insns);
2360
2361 /* Restore line notes. */
2362 if (write_symbols != NO_DEBUG)
2363 {
2364 for (bb = 0; bb < current_nr_blocks; bb++)
2365 {
2366 rtx head, tail;
2367 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2368 restore_line_notes (head, tail);
2369 }
2370 }
2371
2372 /* Done with this region. */
2373 free_pending_lists ();
2374
2375 finish_deps_global ();
2376
2377 free (bb_deps);
2378
2379 if (current_nr_blocks > 1)
2380 {
2381 /* Cleanup ->aux used for EDGE_TO_BIT mapping. */
2382 FOR_EACH_BB (block)
2383 {
2384 if (CONTAINING_RGN (block->index) != rgn)
2385 continue;
2386 FOR_EACH_EDGE (e, ei, block->succs)
2387 e->aux = NULL;
2388 }
2389
2390 free (prob);
2391 sbitmap_vector_free (dom);
2392 sbitmap_vector_free (pot_split);
2393 sbitmap_vector_free (ancestor_edges);
2394 free (rgn_edges);
2395 }
2396 }
2397
2398 /* Indexed by region, holds the number of death notes found in that region.
2399 Used for consistency checks. */
2400 static int *deaths_in_region;
2401
2402 /* Initialize data structures for region scheduling. */
2403
2404 static void
2405 init_regions (void)
2406 {
2407 sbitmap blocks;
2408 int rgn;
2409
2410 nr_regions = 0;
2411 rgn_table = xmalloc ((n_basic_blocks) * sizeof (region));
2412 rgn_bb_table = xmalloc ((n_basic_blocks) * sizeof (int));
2413 block_to_bb = xmalloc ((last_basic_block) * sizeof (int));
2414 containing_rgn = xmalloc ((last_basic_block) * sizeof (int));
2415
2416 /* Compute regions for scheduling. */
2417 if (reload_completed
2418 || n_basic_blocks == 1
2419 || !flag_schedule_interblock
2420 || is_cfg_nonregular ())
2421 {
2422 find_single_block_region ();
2423 }
2424 else
2425 {
2426 /* Compute the dominators and post dominators. */
2427 calculate_dominance_info (CDI_DOMINATORS);
2428
2429 /* Find regions. */
2430 find_rgns ();
2431
2432 if (sched_verbose >= 3)
2433 debug_regions ();
2434
2435 /* For now. This will move as more and more of haifa is converted
2436 to using the cfg code in flow.c. */
2437 free_dominance_info (CDI_DOMINATORS);
2438 }
2439
2440
2441 if (CHECK_DEAD_NOTES)
2442 {
2443 blocks = sbitmap_alloc (last_basic_block);
2444 deaths_in_region = xmalloc (sizeof (int) * nr_regions);
2445 /* Remove all death notes from the subroutine. */
2446 for (rgn = 0; rgn < nr_regions; rgn++)
2447 {
2448 int b;
2449
2450 sbitmap_zero (blocks);
2451 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2452 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2453
2454 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2455 }
2456 sbitmap_free (blocks);
2457 }
2458 else
2459 count_or_remove_death_notes (NULL, 1);
2460 }
2461
2462 /* The one entry point in this file. DUMP_FILE is the dump file for
2463 this pass. */
2464
2465 void
2466 schedule_insns (FILE *dump_file)
2467 {
2468 sbitmap large_region_blocks, blocks;
2469 int rgn;
2470 int any_large_regions;
2471 basic_block bb;
2472
2473 /* Taking care of this degenerate case makes the rest of
2474 this code simpler. */
2475 if (n_basic_blocks == 0)
2476 return;
2477
2478 nr_inter = 0;
2479 nr_spec = 0;
2480
2481 sched_init (dump_file);
2482
2483 init_regions ();
2484
2485 current_sched_info = &region_sched_info;
2486
2487 /* Schedule every region in the subroutine. */
2488 for (rgn = 0; rgn < nr_regions; rgn++)
2489 schedule_region (rgn);
2490
2491 /* Update life analysis for the subroutine. Do single block regions
2492 first so that we can verify that live_at_start didn't change. Then
2493 do all other blocks. */
2494 /* ??? There is an outside possibility that update_life_info, or more
2495 to the point propagate_block, could get called with nonzero flags
2496 more than once for one basic block. This would be kinda bad if it
2497 were to happen, since REG_INFO would be accumulated twice for the
2498 block, and we'd have twice the REG_DEAD notes.
2499
2500 I'm fairly certain that this _shouldn't_ happen, since I don't think
2501 that live_at_start should change at region heads. Not sure what the
2502 best way to test for this kind of thing... */
2503
2504 allocate_reg_life_data ();
2505 compute_bb_for_insn ();
2506
2507 any_large_regions = 0;
2508 large_region_blocks = sbitmap_alloc (last_basic_block);
2509 sbitmap_zero (large_region_blocks);
2510 FOR_EACH_BB (bb)
2511 SET_BIT (large_region_blocks, bb->index);
2512
2513 blocks = sbitmap_alloc (last_basic_block);
2514 sbitmap_zero (blocks);
2515
2516 /* Update life information. For regions consisting of multiple blocks
2517 we've possibly done interblock scheduling that affects global liveness.
2518 For regions consisting of single blocks we need to do only local
2519 liveness. */
2520 for (rgn = 0; rgn < nr_regions; rgn++)
2521 if (RGN_NR_BLOCKS (rgn) > 1)
2522 any_large_regions = 1;
2523 else
2524 {
2525 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2526 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2527 }
2528
2529 /* Don't update reg info after reload, since that affects
2530 regs_ever_live, which should not change after reload. */
2531 update_life_info (blocks, UPDATE_LIFE_LOCAL,
2532 (reload_completed ? PROP_DEATH_NOTES
2533 : PROP_DEATH_NOTES | PROP_REG_INFO));
2534 if (any_large_regions)
2535 {
2536 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
2537 PROP_DEATH_NOTES | PROP_REG_INFO);
2538 }
2539
2540 if (CHECK_DEAD_NOTES)
2541 {
2542 /* Verify the counts of basic block notes in single the basic block
2543 regions. */
2544 for (rgn = 0; rgn < nr_regions; rgn++)
2545 if (RGN_NR_BLOCKS (rgn) == 1)
2546 {
2547 sbitmap_zero (blocks);
2548 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2549
2550 gcc_assert (deaths_in_region[rgn]
2551 == count_or_remove_death_notes (blocks, 0));
2552 }
2553 free (deaths_in_region);
2554 }
2555
2556 /* Reposition the prologue and epilogue notes in case we moved the
2557 prologue/epilogue insns. */
2558 if (reload_completed)
2559 reposition_prologue_and_epilogue_notes (get_insns ());
2560
2561 /* Delete redundant line notes. */
2562 if (write_symbols != NO_DEBUG)
2563 rm_redundant_line_notes ();
2564
2565 if (sched_verbose)
2566 {
2567 if (reload_completed == 0 && flag_schedule_interblock)
2568 {
2569 fprintf (sched_dump,
2570 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2571 nr_inter, nr_spec);
2572 }
2573 else
2574 gcc_assert (nr_inter <= 0);
2575 fprintf (sched_dump, "\n\n");
2576 }
2577
2578 /* Clean up. */
2579 free (rgn_table);
2580 free (rgn_bb_table);
2581 free (block_to_bb);
2582 free (containing_rgn);
2583
2584 sched_finish ();
2585
2586 sbitmap_free (blocks);
2587 sbitmap_free (large_region_blocks);
2588 }
2589 #endif