avr.c (asm_output_section_name): output section attributes.
[gcc.git] / gcc / haifa-sched.c
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
6
7 This file is part of GNU CC.
8
9 GNU CC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
13
14 GNU CC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to the Free
21 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 02111-1307, USA. */
23
24
25 /* Instruction scheduling pass.
26
27 This pass implements list scheduling within basic blocks. It is
28 run twice: (1) after flow analysis, but before register allocation,
29 and (2) after register allocation.
30
31 The first run performs interblock scheduling, moving insns between
32 different blocks in the same "region", and the second runs only
33 basic block scheduling.
34
35 Interblock motions performed are useful motions and speculative
36 motions, including speculative loads. Motions requiring code
37 duplication are not supported. The identification of motion type
38 and the check for validity of speculative motions requires
39 construction and analysis of the function's control flow graph.
40 The scheduler works as follows:
41
42 We compute insn priorities based on data dependencies. Flow
43 analysis only creates a fraction of the data-dependencies we must
44 observe: namely, only those dependencies which the combiner can be
45 expected to use. For this pass, we must therefore create the
46 remaining dependencies we need to observe: register dependencies,
47 memory dependencies, dependencies to keep function calls in order,
48 and the dependence between a conditional branch and the setting of
49 condition codes are all dealt with here.
50
51 The scheduler first traverses the data flow graph, starting with
52 the last instruction, and proceeding to the first, assigning values
53 to insn_priority as it goes. This sorts the instructions
54 topologically by data dependence.
55
56 Once priorities have been established, we order the insns using
57 list scheduling. This works as follows: starting with a list of
58 all the ready insns, and sorted according to priority number, we
59 schedule the insn from the end of the list by placing its
60 predecessors in the list according to their priority order. We
61 consider this insn scheduled by setting the pointer to the "end" of
62 the list to point to the previous insn. When an insn has no
63 predecessors, we either queue it until sufficient time has elapsed
64 or add it to the ready list. As the instructions are scheduled or
65 when stalls are introduced, the queue advances and dumps insns into
66 the ready list. When all insns down to the lowest priority have
67 been scheduled, the critical path of the basic block has been made
68 as short as possible. The remaining insns are then scheduled in
69 remaining slots.
70
71 Function unit conflicts are resolved during forward list scheduling
72 by tracking the time when each insn is committed to the schedule
73 and from that, the time the function units it uses must be free.
74 As insns on the ready list are considered for scheduling, those
75 that would result in a blockage of the already committed insns are
76 queued until no blockage will result.
77
78 The following list shows the order in which we want to break ties
79 among insns in the ready list:
80
81 1. choose insn with the longest path to end of bb, ties
82 broken by
83 2. choose insn with least contribution to register pressure,
84 ties broken by
85 3. prefer in-block upon interblock motion, ties broken by
86 4. prefer useful upon speculative motion, ties broken by
87 5. choose insn with largest control flow probability, ties
88 broken by
89 6. choose insn with the least dependences upon the previously
90 scheduled insn, or finally
91 7 choose the insn which has the most insns dependent on it.
92 8. choose insn with lowest UID.
93
94 Memory references complicate matters. Only if we can be certain
95 that memory references are not part of the data dependency graph
96 (via true, anti, or output dependence), can we move operations past
97 memory references. To first approximation, reads can be done
98 independently, while writes introduce dependencies. Better
99 approximations will yield fewer dependencies.
100
101 Before reload, an extended analysis of interblock data dependences
102 is required for interblock scheduling. This is performed in
103 compute_block_backward_dependences ().
104
105 Dependencies set up by memory references are treated in exactly the
106 same way as other dependencies, by using LOG_LINKS backward
107 dependences. LOG_LINKS are translated into INSN_DEPEND forward
108 dependences for the purpose of forward list scheduling.
109
110 Having optimized the critical path, we may have also unduly
111 extended the lifetimes of some registers. If an operation requires
112 that constants be loaded into registers, it is certainly desirable
113 to load those constants as early as necessary, but no earlier.
114 I.e., it will not do to load up a bunch of registers at the
115 beginning of a basic block only to use them at the end, if they
116 could be loaded later, since this may result in excessive register
117 utilization.
118
119 Note that since branches are never in basic blocks, but only end
120 basic blocks, this pass will not move branches. But that is ok,
121 since we can use GNU's delayed branch scheduling pass to take care
122 of this case.
123
124 Also note that no further optimizations based on algebraic
125 identities are performed, so this pass would be a good one to
126 perform instruction splitting, such as breaking up a multiply
127 instruction into shifts and adds where that is profitable.
128
129 Given the memory aliasing analysis that this pass should perform,
130 it should be possible to remove redundant stores to memory, and to
131 load values from registers instead of hitting memory.
132
133 Before reload, speculative insns are moved only if a 'proof' exists
134 that no exception will be caused by this, and if no live registers
135 exist that inhibit the motion (live registers constraints are not
136 represented by data dependence edges).
137
138 This pass must update information that subsequent passes expect to
139 be correct. Namely: reg_n_refs, reg_n_sets, reg_n_deaths,
140 reg_n_calls_crossed, and reg_live_length. Also, BLOCK_HEAD,
141 BLOCK_END.
142
143 The information in the line number notes is carefully retained by
144 this pass. Notes that refer to the starting and ending of
145 exception regions are also carefully retained by this pass. All
146 other NOTE insns are grouped in their same relative order at the
147 beginning of basic blocks and regions that have been scheduled.
148
149 The main entry point for this pass is schedule_insns(), called for
150 each function. The work of the scheduler is organized in three
151 levels: (1) function level: insns are subject to splitting,
152 control-flow-graph is constructed, regions are computed (after
153 reload, each region is of one block), (2) region level: control
154 flow graph attributes required for interblock scheduling are
155 computed (dominators, reachability, etc.), data dependences and
156 priorities are computed, and (3) block level: insns in the block
157 are actually scheduled. */
158 \f
159 #include "config.h"
160 #include "system.h"
161 #include "toplev.h"
162 #include "rtl.h"
163 #include "tm_p.h"
164 #include "hard-reg-set.h"
165 #include "basic-block.h"
166 #include "regs.h"
167 #include "function.h"
168 #include "flags.h"
169 #include "insn-config.h"
170 #include "insn-attr.h"
171 #include "except.h"
172 #include "toplev.h"
173 #include "recog.h"
174
175 extern char *reg_known_equiv_p;
176 extern rtx *reg_known_value;
177
178 #ifdef INSN_SCHEDULING
179
180 /* target_units bitmask has 1 for each unit in the cpu. It should be
181 possible to compute this variable from the machine description.
182 But currently it is computed by examining the insn list. Since
183 this is only needed for visualization, it seems an acceptable
184 solution. (For understanding the mapping of bits to units, see
185 definition of function_units[] in "insn-attrtab.c".) */
186
187 static int target_units = 0;
188
189 /* issue_rate is the number of insns that can be scheduled in the same
190 machine cycle. It can be defined in the config/mach/mach.h file,
191 otherwise we set it to 1. */
192
193 static int issue_rate;
194
195 #ifndef ISSUE_RATE
196 #define ISSUE_RATE 1
197 #endif
198
199 /* sched-verbose controls the amount of debugging output the
200 scheduler prints. It is controlled by -fsched-verbose=N:
201 N>0 and no -DSR : the output is directed to stderr.
202 N>=10 will direct the printouts to stderr (regardless of -dSR).
203 N=1: same as -dSR.
204 N=2: bb's probabilities, detailed ready list info, unit/insn info.
205 N=3: rtl at abort point, control-flow, regions info.
206 N=5: dependences info. */
207
208 #define MAX_RGN_BLOCKS 10
209 #define MAX_RGN_INSNS 100
210
211 static int sched_verbose_param = 0;
212 static int sched_verbose = 0;
213
214 /* nr_inter/spec counts interblock/speculative motion for the function. */
215 static int nr_inter, nr_spec;
216
217
218 /* Debugging file. All printouts are sent to dump, which is always set,
219 either to stderr, or to the dump listing file (-dRS). */
220 static FILE *dump = 0;
221
222 /* fix_sched_param() is called from toplev.c upon detection
223 of the -fsched-verbose=N option. */
224
225 void
226 fix_sched_param (param, val)
227 const char *param, *val;
228 {
229 if (!strcmp (param, "verbose"))
230 sched_verbose_param = atoi (val);
231 else
232 warning ("fix_sched_param: unknown param: %s", param);
233 }
234
235 /* Describe state of dependencies used during sched_analyze phase. */
236 struct deps
237 {
238 /* The *_insns and *_mems are paired lists. Each pending memory operation
239 will have a pointer to the MEM rtx on one list and a pointer to the
240 containing insn on the other list in the same place in the list. */
241
242 /* We can't use add_dependence like the old code did, because a single insn
243 may have multiple memory accesses, and hence needs to be on the list
244 once for each memory access. Add_dependence won't let you add an insn
245 to a list more than once. */
246
247 /* An INSN_LIST containing all insns with pending read operations. */
248 rtx pending_read_insns;
249
250 /* An EXPR_LIST containing all MEM rtx's which are pending reads. */
251 rtx pending_read_mems;
252
253 /* An INSN_LIST containing all insns with pending write operations. */
254 rtx pending_write_insns;
255
256 /* An EXPR_LIST containing all MEM rtx's which are pending writes. */
257 rtx pending_write_mems;
258
259 /* Indicates the combined length of the two pending lists. We must prevent
260 these lists from ever growing too large since the number of dependencies
261 produced is at least O(N*N), and execution time is at least O(4*N*N), as
262 a function of the length of these pending lists. */
263 int pending_lists_length;
264
265 /* The last insn upon which all memory references must depend.
266 This is an insn which flushed the pending lists, creating a dependency
267 between it and all previously pending memory references. This creates
268 a barrier (or a checkpoint) which no memory reference is allowed to cross.
269
270 This includes all non constant CALL_INSNs. When we do interprocedural
271 alias analysis, this restriction can be relaxed.
272 This may also be an INSN that writes memory if the pending lists grow
273 too large. */
274 rtx last_pending_memory_flush;
275
276 /* The last function call we have seen. All hard regs, and, of course,
277 the last function call, must depend on this. */
278 rtx last_function_call;
279
280 /* The LOG_LINKS field of this is a list of insns which use a pseudo register
281 that does not already cross a call. We create dependencies between each
282 of those insn and the next call insn, to ensure that they won't cross a call
283 after scheduling is done. */
284 rtx sched_before_next_call;
285
286 /* Element N is the next insn that sets (hard or pseudo) register
287 N within the current basic block; or zero, if there is no
288 such insn. Needed for new registers which may be introduced
289 by splitting insns. */
290 rtx *reg_last_uses;
291 rtx *reg_last_sets;
292 rtx *reg_last_clobbers;
293 };
294
295 static regset reg_pending_sets;
296 static regset reg_pending_clobbers;
297 static int reg_pending_sets_all;
298
299 /* To speed up the test for duplicate dependency links we keep a record
300 of true dependencies created by add_dependence when the average number
301 of instructions in a basic block is very large.
302
303 Studies have shown that there is typically around 5 instructions between
304 branches for typical C code. So we can make a guess that the average
305 basic block is approximately 5 instructions long; we will choose 100X
306 the average size as a very large basic block.
307
308 Each insn has an associated bitmap for its dependencies. Each bitmap
309 has enough entries to represent a dependency on any other insn in the
310 insn chain. */
311 static sbitmap *true_dependency_cache;
312
313 /* Indexed by INSN_UID, the collection of all data associated with
314 a single instruction. */
315
316 struct haifa_insn_data
317 {
318 /* A list of insns which depend on the instruction. Unlike LOG_LINKS,
319 it represents forward dependancies. */
320 rtx depend;
321
322 /* The line number note in effect for each insn. For line number
323 notes, this indicates whether the note may be reused. */
324 rtx line_note;
325
326 /* Logical uid gives the original ordering of the insns. */
327 int luid;
328
329 /* A priority for each insn. */
330 int priority;
331
332 /* The number of incoming edges in the forward dependency graph.
333 As scheduling proceds, counts are decreased. An insn moves to
334 the ready queue when its counter reaches zero. */
335 int dep_count;
336
337 /* An encoding of the blockage range function. Both unit and range
338 are coded. */
339 unsigned int blockage;
340
341 /* Number of instructions referring to this insn. */
342 int ref_count;
343
344 /* The minimum clock tick at which the insn becomes ready. This is
345 used to note timing constraints for the insns in the pending list. */
346 int tick;
347
348 short cost;
349
350 /* An encoding of the function units used. */
351 short units;
352
353 /* This weight is an estimation of the insn's contribution to
354 register pressure. */
355 short reg_weight;
356
357 /* Some insns (e.g. call) are not allowed to move across blocks. */
358 unsigned int cant_move : 1;
359
360 /* Set if there's DEF-USE dependance between some speculatively
361 moved load insn and this one. */
362 unsigned int fed_by_spec_load : 1;
363 unsigned int is_load_insn : 1;
364 };
365
366 static struct haifa_insn_data *h_i_d;
367
368 #define INSN_DEPEND(INSN) (h_i_d[INSN_UID (INSN)].depend)
369 #define INSN_LUID(INSN) (h_i_d[INSN_UID (INSN)].luid)
370 #define INSN_PRIORITY(INSN) (h_i_d[INSN_UID (INSN)].priority)
371 #define INSN_DEP_COUNT(INSN) (h_i_d[INSN_UID (INSN)].dep_count)
372 #define INSN_COST(INSN) (h_i_d[INSN_UID (INSN)].cost)
373 #define INSN_UNIT(INSN) (h_i_d[INSN_UID (INSN)].units)
374 #define INSN_REG_WEIGHT(INSN) (h_i_d[INSN_UID (INSN)].reg_weight)
375
376 #define INSN_BLOCKAGE(INSN) (h_i_d[INSN_UID (INSN)].blockage)
377 #define UNIT_BITS 5
378 #define BLOCKAGE_MASK ((1 << BLOCKAGE_BITS) - 1)
379 #define ENCODE_BLOCKAGE(U, R) \
380 (((U) << BLOCKAGE_BITS \
381 | MIN_BLOCKAGE_COST (R)) << BLOCKAGE_BITS \
382 | MAX_BLOCKAGE_COST (R))
383 #define UNIT_BLOCKED(B) ((B) >> (2 * BLOCKAGE_BITS))
384 #define BLOCKAGE_RANGE(B) \
385 (((((B) >> BLOCKAGE_BITS) & BLOCKAGE_MASK) << (HOST_BITS_PER_INT / 2)) \
386 | ((B) & BLOCKAGE_MASK))
387
388 /* Encodings of the `<name>_unit_blockage_range' function. */
389 #define MIN_BLOCKAGE_COST(R) ((R) >> (HOST_BITS_PER_INT / 2))
390 #define MAX_BLOCKAGE_COST(R) ((R) & ((1 << (HOST_BITS_PER_INT / 2)) - 1))
391
392 #define DONE_PRIORITY -1
393 #define MAX_PRIORITY 0x7fffffff
394 #define TAIL_PRIORITY 0x7ffffffe
395 #define LAUNCH_PRIORITY 0x7f000001
396 #define DONE_PRIORITY_P(INSN) (INSN_PRIORITY (INSN) < 0)
397 #define LOW_PRIORITY_P(INSN) ((INSN_PRIORITY (INSN) & 0x7f000000) == 0)
398
399 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
400 #define LINE_NOTE(INSN) (h_i_d[INSN_UID (INSN)].line_note)
401 #define INSN_TICK(INSN) (h_i_d[INSN_UID (INSN)].tick)
402 #define CANT_MOVE(insn) (h_i_d[INSN_UID (insn)].cant_move)
403 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
404 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
405
406 /* Vector indexed by basic block number giving the starting line-number
407 for each basic block. */
408 static rtx *line_note_head;
409
410 /* List of important notes we must keep around. This is a pointer to the
411 last element in the list. */
412 static rtx note_list;
413
414 /* Queues, etc. */
415
416 /* An instruction is ready to be scheduled when all insns preceding it
417 have already been scheduled. It is important to ensure that all
418 insns which use its result will not be executed until its result
419 has been computed. An insn is maintained in one of four structures:
420
421 (P) the "Pending" set of insns which cannot be scheduled until
422 their dependencies have been satisfied.
423 (Q) the "Queued" set of insns that can be scheduled when sufficient
424 time has passed.
425 (R) the "Ready" list of unscheduled, uncommitted insns.
426 (S) the "Scheduled" list of insns.
427
428 Initially, all insns are either "Pending" or "Ready" depending on
429 whether their dependencies are satisfied.
430
431 Insns move from the "Ready" list to the "Scheduled" list as they
432 are committed to the schedule. As this occurs, the insns in the
433 "Pending" list have their dependencies satisfied and move to either
434 the "Ready" list or the "Queued" set depending on whether
435 sufficient time has passed to make them ready. As time passes,
436 insns move from the "Queued" set to the "Ready" list. Insns may
437 move from the "Ready" list to the "Queued" set if they are blocked
438 due to a function unit conflict.
439
440 The "Pending" list (P) are the insns in the INSN_DEPEND of the unscheduled
441 insns, i.e., those that are ready, queued, and pending.
442 The "Queued" set (Q) is implemented by the variable `insn_queue'.
443 The "Ready" list (R) is implemented by the variables `ready' and
444 `n_ready'.
445 The "Scheduled" list (S) is the new insn chain built by this pass.
446
447 The transition (R->S) is implemented in the scheduling loop in
448 `schedule_block' when the best insn to schedule is chosen.
449 The transition (R->Q) is implemented in `queue_insn' when an
450 insn is found to have a function unit conflict with the already
451 committed insns.
452 The transitions (P->R and P->Q) are implemented in `schedule_insn' as
453 insns move from the ready list to the scheduled list.
454 The transition (Q->R) is implemented in 'queue_to_insn' as time
455 passes or stalls are introduced. */
456
457 /* Implement a circular buffer to delay instructions until sufficient
458 time has passed. INSN_QUEUE_SIZE is a power of two larger than
459 MAX_BLOCKAGE and MAX_READY_COST computed by genattr.c. This is the
460 longest time an isnsn may be queued. */
461 static rtx insn_queue[INSN_QUEUE_SIZE];
462 static int q_ptr = 0;
463 static int q_size = 0;
464 #define NEXT_Q(X) (((X)+1) & (INSN_QUEUE_SIZE-1))
465 #define NEXT_Q_AFTER(X, C) (((X)+C) & (INSN_QUEUE_SIZE-1))
466
467 /* Forward declarations. */
468 static void add_dependence PARAMS ((rtx, rtx, enum reg_note));
469 #ifdef HAVE_cc0
470 static void remove_dependence PARAMS ((rtx, rtx));
471 #endif
472 static rtx find_insn_list PARAMS ((rtx, rtx));
473 static int insn_unit PARAMS ((rtx));
474 static unsigned int blockage_range PARAMS ((int, rtx));
475 static void clear_units PARAMS ((void));
476 static int actual_hazard_this_instance PARAMS ((int, int, rtx, int, int));
477 static void schedule_unit PARAMS ((int, rtx, int));
478 static int actual_hazard PARAMS ((int, rtx, int, int));
479 static int potential_hazard PARAMS ((int, rtx, int));
480 static int insn_cost PARAMS ((rtx, rtx, rtx));
481 static int priority PARAMS ((rtx));
482 static void free_pending_lists PARAMS ((void));
483 static void add_insn_mem_dependence PARAMS ((struct deps *, rtx *, rtx *, rtx,
484 rtx));
485 static void flush_pending_lists PARAMS ((struct deps *, rtx, int));
486 static void sched_analyze_1 PARAMS ((struct deps *, rtx, rtx));
487 static void sched_analyze_2 PARAMS ((struct deps *, rtx, rtx));
488 static void sched_analyze_insn PARAMS ((struct deps *, rtx, rtx, rtx));
489 static void sched_analyze PARAMS ((struct deps *, rtx, rtx));
490 static int rank_for_schedule PARAMS ((const PTR, const PTR));
491 static void swap_sort PARAMS ((rtx *, int));
492 static void queue_insn PARAMS ((rtx, int));
493 static int schedule_insn PARAMS ((rtx, rtx *, int, int));
494 static void find_insn_reg_weight PARAMS ((int));
495 static int schedule_block PARAMS ((int, int));
496 static char *safe_concat PARAMS ((char *, char *, const char *));
497 static int insn_issue_delay PARAMS ((rtx));
498 static void adjust_priority PARAMS ((rtx));
499
500 /* Control flow graph edges are kept in circular lists. */
501 typedef struct
502 {
503 int from_block;
504 int to_block;
505 int next_in;
506 int next_out;
507 }
508 haifa_edge;
509 static haifa_edge *edge_table;
510
511 #define NEXT_IN(edge) (edge_table[edge].next_in)
512 #define NEXT_OUT(edge) (edge_table[edge].next_out)
513 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
514 #define TO_BLOCK(edge) (edge_table[edge].to_block)
515
516 /* Number of edges in the control flow graph. (In fact, larger than
517 that by 1, since edge 0 is unused.) */
518 static int nr_edges;
519
520 /* Circular list of incoming/outgoing edges of a block. */
521 static int *in_edges;
522 static int *out_edges;
523
524 #define IN_EDGES(block) (in_edges[block])
525 #define OUT_EDGES(block) (out_edges[block])
526
527
528
529 static int is_cfg_nonregular PARAMS ((void));
530 static int build_control_flow PARAMS ((struct edge_list *));
531 static void new_edge PARAMS ((int, int));
532
533
534 /* A region is the main entity for interblock scheduling: insns
535 are allowed to move between blocks in the same region, along
536 control flow graph edges, in the 'up' direction. */
537 typedef struct
538 {
539 int rgn_nr_blocks; /* Number of blocks in region. */
540 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
541 }
542 region;
543
544 /* Number of regions in the procedure. */
545 static int nr_regions;
546
547 /* Table of region descriptions. */
548 static region *rgn_table;
549
550 /* Array of lists of regions' blocks. */
551 static int *rgn_bb_table;
552
553 /* Topological order of blocks in the region (if b2 is reachable from
554 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
555 always referred to by either block or b, while its topological
556 order name (in the region) is refered to by bb. */
557 static int *block_to_bb;
558
559 /* The number of the region containing a block. */
560 static int *containing_rgn;
561
562 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
563 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
564 #define BLOCK_TO_BB(block) (block_to_bb[block])
565 #define CONTAINING_RGN(block) (containing_rgn[block])
566
567 void debug_regions PARAMS ((void));
568 static void find_single_block_region PARAMS ((void));
569 static void find_rgns PARAMS ((struct edge_list *, sbitmap *));
570 static int too_large PARAMS ((int, int *, int *));
571
572 extern void debug_live PARAMS ((int, int));
573
574 /* Blocks of the current region being scheduled. */
575 static int current_nr_blocks;
576 static int current_blocks;
577
578 /* The mapping from bb to block. */
579 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
580
581
582 /* Bit vectors and bitset operations are needed for computations on
583 the control flow graph. */
584
585 typedef unsigned HOST_WIDE_INT *bitset;
586 typedef struct
587 {
588 int *first_member; /* Pointer to the list start in bitlst_table. */
589 int nr_members; /* The number of members of the bit list. */
590 }
591 bitlst;
592
593 static int bitlst_table_last;
594 static int bitlst_table_size;
595 static int *bitlst_table;
596
597 static char bitset_member PARAMS ((bitset, int, int));
598 static void extract_bitlst PARAMS ((bitset, int, int, bitlst *));
599
600 /* Target info declarations.
601
602 The block currently being scheduled is referred to as the "target" block,
603 while other blocks in the region from which insns can be moved to the
604 target are called "source" blocks. The candidate structure holds info
605 about such sources: are they valid? Speculative? Etc. */
606 typedef bitlst bblst;
607 typedef struct
608 {
609 char is_valid;
610 char is_speculative;
611 int src_prob;
612 bblst split_bbs;
613 bblst update_bbs;
614 }
615 candidate;
616
617 static candidate *candidate_table;
618
619 /* A speculative motion requires checking live information on the path
620 from 'source' to 'target'. The split blocks are those to be checked.
621 After a speculative motion, live information should be modified in
622 the 'update' blocks.
623
624 Lists of split and update blocks for each candidate of the current
625 target are in array bblst_table. */
626 static int *bblst_table, bblst_size, bblst_last;
627
628 #define IS_VALID(src) ( candidate_table[src].is_valid )
629 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
630 #define SRC_PROB(src) ( candidate_table[src].src_prob )
631
632 /* The bb being currently scheduled. */
633 static int target_bb;
634
635 /* List of edges. */
636 typedef bitlst edgelst;
637
638 /* Target info functions. */
639 static void split_edges PARAMS ((int, int, edgelst *));
640 static void compute_trg_info PARAMS ((int));
641 void debug_candidate PARAMS ((int));
642 void debug_candidates PARAMS ((int));
643
644
645 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
646 typedef bitset bbset;
647
648 /* Number of words of the bbset. */
649 static int bbset_size;
650
651 /* Dominators array: dom[i] contains the bbset of dominators of
652 bb i in the region. */
653 static bbset *dom;
654
655 /* bb 0 is the only region entry. */
656 #define IS_RGN_ENTRY(bb) (!bb)
657
658 /* Is bb_src dominated by bb_trg. */
659 #define IS_DOMINATED(bb_src, bb_trg) \
660 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
661
662 /* Probability: Prob[i] is a float in [0, 1] which is the probability
663 of bb i relative to the region entry. */
664 static float *prob;
665
666 /* The probability of bb_src, relative to bb_trg. Note, that while the
667 'prob[bb]' is a float in [0, 1], this macro returns an integer
668 in [0, 100]. */
669 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
670 prob[bb_trg])))
671
672 /* Bit-set of edges, where bit i stands for edge i. */
673 typedef bitset edgeset;
674
675 /* Number of edges in the region. */
676 static int rgn_nr_edges;
677
678 /* Array of size rgn_nr_edges. */
679 static int *rgn_edges;
680
681 /* Number of words in an edgeset. */
682 static int edgeset_size;
683
684 /* Number of bits in an edgeset. */
685 static int edgeset_bitsize;
686
687 /* Mapping from each edge in the graph to its number in the rgn. */
688 static int *edge_to_bit;
689 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
690
691 /* The split edges of a source bb is different for each target
692 bb. In order to compute this efficiently, the 'potential-split edges'
693 are computed for each bb prior to scheduling a region. This is actually
694 the split edges of each bb relative to the region entry.
695
696 pot_split[bb] is the set of potential split edges of bb. */
697 static edgeset *pot_split;
698
699 /* For every bb, a set of its ancestor edges. */
700 static edgeset *ancestor_edges;
701
702 static void compute_dom_prob_ps PARAMS ((int));
703
704 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
705 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
706 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
707 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
708
709 /* Parameters affecting the decision of rank_for_schedule(). */
710 #define MIN_DIFF_PRIORITY 2
711 #define MIN_PROBABILITY 40
712 #define MIN_PROB_DIFF 10
713
714 /* Speculative scheduling functions. */
715 static int check_live_1 PARAMS ((int, rtx));
716 static void update_live_1 PARAMS ((int, rtx));
717 static int check_live PARAMS ((rtx, int));
718 static void update_live PARAMS ((rtx, int));
719 static void set_spec_fed PARAMS ((rtx));
720 static int is_pfree PARAMS ((rtx, int, int));
721 static int find_conditional_protection PARAMS ((rtx, int));
722 static int is_conditionally_protected PARAMS ((rtx, int, int));
723 static int may_trap_exp PARAMS ((rtx, int));
724 static int haifa_classify_insn PARAMS ((rtx));
725 static int is_prisky PARAMS ((rtx, int, int));
726 static int is_exception_free PARAMS ((rtx, int, int));
727
728 static char find_insn_mem_list PARAMS ((rtx, rtx, rtx, rtx));
729 static void compute_block_forward_dependences PARAMS ((int));
730 static void add_branch_dependences PARAMS ((rtx, rtx));
731 static void compute_block_backward_dependences PARAMS ((int));
732 void debug_dependencies PARAMS ((void));
733
734 /* Notes handling mechanism:
735 =========================
736 Generally, NOTES are saved before scheduling and restored after scheduling.
737 The scheduler distinguishes between three types of notes:
738
739 (1) LINE_NUMBER notes, generated and used for debugging. Here,
740 before scheduling a region, a pointer to the LINE_NUMBER note is
741 added to the insn following it (in save_line_notes()), and the note
742 is removed (in rm_line_notes() and unlink_line_notes()). After
743 scheduling the region, this pointer is used for regeneration of
744 the LINE_NUMBER note (in restore_line_notes()).
745
746 (2) LOOP_BEGIN, LOOP_END, SETJMP, EHREGION_BEG, EHREGION_END notes:
747 Before scheduling a region, a pointer to the note is added to the insn
748 that follows or precedes it. (This happens as part of the data dependence
749 computation). After scheduling an insn, the pointer contained in it is
750 used for regenerating the corresponding note (in reemit_notes).
751
752 (3) All other notes (e.g. INSN_DELETED): Before scheduling a block,
753 these notes are put in a list (in rm_other_notes() and
754 unlink_other_notes ()). After scheduling the block, these notes are
755 inserted at the beginning of the block (in schedule_block()). */
756
757 static rtx unlink_other_notes PARAMS ((rtx, rtx));
758 static rtx unlink_line_notes PARAMS ((rtx, rtx));
759 static void rm_line_notes PARAMS ((int));
760 static void save_line_notes PARAMS ((int));
761 static void restore_line_notes PARAMS ((int));
762 static void rm_redundant_line_notes PARAMS ((void));
763 static void rm_other_notes PARAMS ((rtx, rtx));
764 static rtx reemit_notes PARAMS ((rtx, rtx));
765
766 static void get_block_head_tail PARAMS ((int, rtx *, rtx *));
767 static void get_bb_head_tail PARAMS ((int, rtx *, rtx *));
768
769 static int queue_to_ready PARAMS ((rtx [], int));
770
771 static void debug_ready_list PARAMS ((rtx[], int));
772 static void init_target_units PARAMS ((void));
773 static void insn_print_units PARAMS ((rtx));
774 static int get_visual_tbl_length PARAMS ((void));
775 static void init_block_visualization PARAMS ((void));
776 static void print_block_visualization PARAMS ((int, const char *));
777 static void visualize_scheduled_insns PARAMS ((int, int));
778 static void visualize_no_unit PARAMS ((rtx));
779 static void visualize_stall_cycles PARAMS ((int, int));
780 static void print_exp PARAMS ((char *, rtx, int));
781 static void print_value PARAMS ((char *, rtx, int));
782 static void print_pattern PARAMS ((char *, rtx, int));
783 static void print_insn PARAMS ((char *, rtx, int));
784 void debug_reg_vector PARAMS ((regset));
785
786 static rtx move_insn1 PARAMS ((rtx, rtx));
787 static rtx move_insn PARAMS ((rtx, rtx));
788 static rtx group_leader PARAMS ((rtx));
789 static int set_priorities PARAMS ((int));
790 static void init_deps PARAMS ((struct deps *));
791 static void schedule_region PARAMS ((int));
792 static void propagate_deps PARAMS ((int, struct deps *, int));
793
794 #endif /* INSN_SCHEDULING */
795 \f
796 #define SIZE_FOR_MODE(X) (GET_MODE_SIZE (GET_MODE (X)))
797
798 /* Add ELEM wrapped in an INSN_LIST with reg note kind DEP_TYPE to the
799 LOG_LINKS of INSN, if not already there. DEP_TYPE indicates the type
800 of dependence that this link represents. */
801
802 static void
803 add_dependence (insn, elem, dep_type)
804 rtx insn;
805 rtx elem;
806 enum reg_note dep_type;
807 {
808 rtx link, next;
809
810 /* Don't depend an insn on itself. */
811 if (insn == elem)
812 return;
813
814 /* We can get a dependency on deleted insns due to optimizations in
815 the register allocation and reloading or due to splitting. Any
816 such dependency is useless and can be ignored. */
817 if (GET_CODE (elem) == NOTE)
818 return;
819
820 /* If elem is part of a sequence that must be scheduled together, then
821 make the dependence point to the last insn of the sequence.
822 When HAVE_cc0, it is possible for NOTEs to exist between users and
823 setters of the condition codes, so we must skip past notes here.
824 Otherwise, NOTEs are impossible here. */
825
826 next = NEXT_INSN (elem);
827
828 #ifdef HAVE_cc0
829 while (next && GET_CODE (next) == NOTE)
830 next = NEXT_INSN (next);
831 #endif
832
833 if (next && SCHED_GROUP_P (next)
834 && GET_CODE (next) != CODE_LABEL)
835 {
836 /* Notes will never intervene here though, so don't bother checking
837 for them. */
838 /* We must reject CODE_LABELs, so that we don't get confused by one
839 that has LABEL_PRESERVE_P set, which is represented by the same
840 bit in the rtl as SCHED_GROUP_P. A CODE_LABEL can never be
841 SCHED_GROUP_P. */
842 while (NEXT_INSN (next) && SCHED_GROUP_P (NEXT_INSN (next))
843 && GET_CODE (NEXT_INSN (next)) != CODE_LABEL)
844 next = NEXT_INSN (next);
845
846 /* Again, don't depend an insn on itself. */
847 if (insn == next)
848 return;
849
850 /* Make the dependence to NEXT, the last insn of the group, instead
851 of the original ELEM. */
852 elem = next;
853 }
854
855 #ifdef INSN_SCHEDULING
856 /* (This code is guarded by INSN_SCHEDULING, otherwise INSN_BB is undefined.)
857 No need for interblock dependences with calls, since
858 calls are not moved between blocks. Note: the edge where
859 elem is a CALL is still required. */
860 if (GET_CODE (insn) == CALL_INSN
861 && (INSN_BB (elem) != INSN_BB (insn)))
862 return;
863
864
865 /* If we already have a true dependency for ELEM, then we do not
866 need to do anything. Avoiding the list walk below can cut
867 compile times dramatically for some code. */
868 if (true_dependency_cache
869 && TEST_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem)))
870 return;
871 #endif
872
873 /* Check that we don't already have this dependence. */
874 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
875 if (XEXP (link, 0) == elem)
876 {
877 /* If this is a more restrictive type of dependence than the existing
878 one, then change the existing dependence to this type. */
879 if ((int) dep_type < (int) REG_NOTE_KIND (link))
880 PUT_REG_NOTE_KIND (link, dep_type);
881
882 #ifdef INSN_SCHEDULING
883 /* If we are adding a true dependency to INSN's LOG_LINKs, then
884 note that in the bitmap cache of true dependency information. */
885 if ((int)dep_type == 0 && true_dependency_cache)
886 SET_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
887 #endif
888 return;
889 }
890 /* Might want to check one level of transitivity to save conses. */
891
892 link = alloc_INSN_LIST (elem, LOG_LINKS (insn));
893 LOG_LINKS (insn) = link;
894
895 /* Insn dependency, not data dependency. */
896 PUT_REG_NOTE_KIND (link, dep_type);
897
898 #ifdef INSN_SCHEDULING
899 /* If we are adding a true dependency to INSN's LOG_LINKs, then
900 note that in the bitmap cache of true dependency information. */
901 if ((int)dep_type == 0 && true_dependency_cache)
902 SET_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
903 #endif
904 }
905
906 #ifdef HAVE_cc0
907 /* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS
908 of INSN. Abort if not found. */
909
910 static void
911 remove_dependence (insn, elem)
912 rtx insn;
913 rtx elem;
914 {
915 rtx prev, link, next;
916 int found = 0;
917
918 for (prev = 0, link = LOG_LINKS (insn); link; link = next)
919 {
920 next = XEXP (link, 1);
921 if (XEXP (link, 0) == elem)
922 {
923 if (prev)
924 XEXP (prev, 1) = next;
925 else
926 LOG_LINKS (insn) = next;
927
928 #ifdef INSN_SCHEDULING
929 /* If we are removing a true dependency from the LOG_LINKS list,
930 make sure to remove it from the cache too. */
931 if (REG_NOTE_KIND (link) == 0 && true_dependency_cache)
932 RESET_BIT (true_dependency_cache[INSN_LUID (insn)],
933 INSN_LUID (elem));
934 #endif
935
936 free_INSN_LIST_node (link);
937
938 found = 1;
939 }
940 else
941 prev = link;
942 }
943
944 if (!found)
945 abort ();
946 return;
947 }
948 #endif /* HAVE_cc0 */
949 \f
950 #ifndef INSN_SCHEDULING
951 void
952 schedule_insns (dump_file)
953 FILE *dump_file ATTRIBUTE_UNUSED;
954 {
955 }
956 #else
957 #ifndef __GNUC__
958 #define __inline
959 #endif
960
961 #ifndef HAIFA_INLINE
962 #define HAIFA_INLINE __inline
963 #endif
964
965 /* Computation of memory dependencies. */
966
967 /* Data structures for the computation of data dependences in a regions. We
968 keep one mem_deps structure for every basic block. Before analyzing the
969 data dependences for a bb, its variables are initialized as a function of
970 the variables of its predecessors. When the analysis for a bb completes,
971 we save the contents to the corresponding bb_mem_deps[bb] variable. */
972
973 static struct deps *bb_deps;
974
975 /* Pointer to the last instruction scheduled. Used by rank_for_schedule,
976 so that insns independent of the last scheduled insn will be preferred
977 over dependent instructions. */
978
979 static rtx last_scheduled_insn;
980
981 /* Functions for construction of the control flow graph. */
982
983 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
984
985 We decide not to build the control flow graph if there is possibly more
986 than one entry to the function, if computed branches exist, of if we
987 have nonlocal gotos. */
988
989 static int
990 is_cfg_nonregular ()
991 {
992 int b;
993 rtx insn;
994 RTX_CODE code;
995
996 /* If we have a label that could be the target of a nonlocal goto, then
997 the cfg is not well structured. */
998 if (nonlocal_goto_handler_labels)
999 return 1;
1000
1001 /* If we have any forced labels, then the cfg is not well structured. */
1002 if (forced_labels)
1003 return 1;
1004
1005 /* If this function has a computed jump, then we consider the cfg
1006 not well structured. */
1007 if (current_function_has_computed_jump)
1008 return 1;
1009
1010 /* If we have exception handlers, then we consider the cfg not well
1011 structured. ?!? We should be able to handle this now that flow.c
1012 computes an accurate cfg for EH. */
1013 if (exception_handler_labels)
1014 return 1;
1015
1016 /* If we have non-jumping insns which refer to labels, then we consider
1017 the cfg not well structured. */
1018 /* Check for labels referred to other thn by jumps. */
1019 for (b = 0; b < n_basic_blocks; b++)
1020 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
1021 {
1022 code = GET_CODE (insn);
1023 if (GET_RTX_CLASS (code) == 'i')
1024 {
1025 rtx note;
1026
1027 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
1028 if (REG_NOTE_KIND (note) == REG_LABEL)
1029 return 1;
1030 }
1031
1032 if (insn == BLOCK_END (b))
1033 break;
1034 }
1035
1036 /* All the tests passed. Consider the cfg well structured. */
1037 return 0;
1038 }
1039
1040 /* Build the control flow graph and set nr_edges.
1041
1042 Instead of trying to build a cfg ourselves, we rely on flow to
1043 do it for us. Stamp out useless code (and bug) duplication.
1044
1045 Return nonzero if an irregularity in the cfg is found which would
1046 prevent cross block scheduling. */
1047
1048 static int
1049 build_control_flow (edge_list)
1050 struct edge_list *edge_list;
1051 {
1052 int i, unreachable, num_edges;
1053
1054 /* This already accounts for entry/exit edges. */
1055 num_edges = NUM_EDGES (edge_list);
1056
1057 /* Unreachable loops with more than one basic block are detected
1058 during the DFS traversal in find_rgns.
1059
1060 Unreachable loops with a single block are detected here. This
1061 test is redundant with the one in find_rgns, but it's much
1062 cheaper to go ahead and catch the trivial case here. */
1063 unreachable = 0;
1064 for (i = 0; i < n_basic_blocks; i++)
1065 {
1066 basic_block b = BASIC_BLOCK (i);
1067
1068 if (b->pred == NULL
1069 || (b->pred->src == b
1070 && b->pred->pred_next == NULL))
1071 unreachable = 1;
1072 }
1073
1074 /* ??? We can kill these soon. */
1075 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1076 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1077 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
1078
1079 nr_edges = 0;
1080 for (i = 0; i < num_edges; i++)
1081 {
1082 edge e = INDEX_EDGE (edge_list, i);
1083
1084 if (e->dest != EXIT_BLOCK_PTR
1085 && e->src != ENTRY_BLOCK_PTR)
1086 new_edge (e->src->index, e->dest->index);
1087 }
1088
1089 /* Increment by 1, since edge 0 is unused. */
1090 nr_edges++;
1091
1092 return unreachable;
1093 }
1094
1095
1096 /* Record an edge in the control flow graph from SOURCE to TARGET.
1097
1098 In theory, this is redundant with the s_succs computed above, but
1099 we have not converted all of haifa to use information from the
1100 integer lists. */
1101
1102 static void
1103 new_edge (source, target)
1104 int source, target;
1105 {
1106 int e, next_edge;
1107 int curr_edge, fst_edge;
1108
1109 /* Check for duplicates. */
1110 fst_edge = curr_edge = OUT_EDGES (source);
1111 while (curr_edge)
1112 {
1113 if (FROM_BLOCK (curr_edge) == source
1114 && TO_BLOCK (curr_edge) == target)
1115 {
1116 return;
1117 }
1118
1119 curr_edge = NEXT_OUT (curr_edge);
1120
1121 if (fst_edge == curr_edge)
1122 break;
1123 }
1124
1125 e = ++nr_edges;
1126
1127 FROM_BLOCK (e) = source;
1128 TO_BLOCK (e) = target;
1129
1130 if (OUT_EDGES (source))
1131 {
1132 next_edge = NEXT_OUT (OUT_EDGES (source));
1133 NEXT_OUT (OUT_EDGES (source)) = e;
1134 NEXT_OUT (e) = next_edge;
1135 }
1136 else
1137 {
1138 OUT_EDGES (source) = e;
1139 NEXT_OUT (e) = e;
1140 }
1141
1142 if (IN_EDGES (target))
1143 {
1144 next_edge = NEXT_IN (IN_EDGES (target));
1145 NEXT_IN (IN_EDGES (target)) = e;
1146 NEXT_IN (e) = next_edge;
1147 }
1148 else
1149 {
1150 IN_EDGES (target) = e;
1151 NEXT_IN (e) = e;
1152 }
1153 }
1154
1155
1156 /* BITSET macros for operations on the control flow graph. */
1157
1158 /* Compute bitwise union of two bitsets. */
1159 #define BITSET_UNION(set1, set2, len) \
1160 do { register bitset tp = set1, sp = set2; \
1161 register int i; \
1162 for (i = 0; i < len; i++) \
1163 *(tp++) |= *(sp++); } while (0)
1164
1165 /* Compute bitwise intersection of two bitsets. */
1166 #define BITSET_INTER(set1, set2, len) \
1167 do { register bitset tp = set1, sp = set2; \
1168 register int i; \
1169 for (i = 0; i < len; i++) \
1170 *(tp++) &= *(sp++); } while (0)
1171
1172 /* Compute bitwise difference of two bitsets. */
1173 #define BITSET_DIFFER(set1, set2, len) \
1174 do { register bitset tp = set1, sp = set2; \
1175 register int i; \
1176 for (i = 0; i < len; i++) \
1177 *(tp++) &= ~*(sp++); } while (0)
1178
1179 /* Inverts every bit of bitset 'set'. */
1180 #define BITSET_INVERT(set, len) \
1181 do { register bitset tmpset = set; \
1182 register int i; \
1183 for (i = 0; i < len; i++, tmpset++) \
1184 *tmpset = ~*tmpset; } while (0)
1185
1186 /* Turn on the index'th bit in bitset set. */
1187 #define BITSET_ADD(set, index, len) \
1188 { \
1189 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1190 abort (); \
1191 else \
1192 set[index/HOST_BITS_PER_WIDE_INT] |= \
1193 1 << (index % HOST_BITS_PER_WIDE_INT); \
1194 }
1195
1196 /* Turn off the index'th bit in set. */
1197 #define BITSET_REMOVE(set, index, len) \
1198 { \
1199 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1200 abort (); \
1201 else \
1202 set[index/HOST_BITS_PER_WIDE_INT] &= \
1203 ~(1 << (index%HOST_BITS_PER_WIDE_INT)); \
1204 }
1205
1206
1207 /* Check if the index'th bit in bitset set is on. */
1208
1209 static char
1210 bitset_member (set, index, len)
1211 bitset set;
1212 int index, len;
1213 {
1214 if (index >= HOST_BITS_PER_WIDE_INT * len)
1215 abort ();
1216 return (set[index / HOST_BITS_PER_WIDE_INT] &
1217 1 << (index % HOST_BITS_PER_WIDE_INT)) ? 1 : 0;
1218 }
1219
1220
1221 /* Translate a bit-set SET to a list BL of the bit-set members. */
1222
1223 static void
1224 extract_bitlst (set, len, bitlen, bl)
1225 bitset set;
1226 int len;
1227 int bitlen;
1228 bitlst *bl;
1229 {
1230 int i, j, offset;
1231 unsigned HOST_WIDE_INT word;
1232
1233 /* bblst table space is reused in each call to extract_bitlst. */
1234 bitlst_table_last = 0;
1235
1236 bl->first_member = &bitlst_table[bitlst_table_last];
1237 bl->nr_members = 0;
1238
1239 /* Iterate over each word in the bitset. */
1240 for (i = 0; i < len; i++)
1241 {
1242 word = set[i];
1243 offset = i * HOST_BITS_PER_WIDE_INT;
1244
1245 /* Iterate over each bit in the word, but do not
1246 go beyond the end of the defined bits. */
1247 for (j = 0; offset < bitlen && word; j++)
1248 {
1249 if (word & 1)
1250 {
1251 bitlst_table[bitlst_table_last++] = offset;
1252 (bl->nr_members)++;
1253 }
1254 word >>= 1;
1255 ++offset;
1256 }
1257 }
1258
1259 }
1260
1261
1262 /* Functions for the construction of regions. */
1263
1264 /* Print the regions, for debugging purposes. Callable from debugger. */
1265
1266 void
1267 debug_regions ()
1268 {
1269 int rgn, bb;
1270
1271 fprintf (dump, "\n;; ------------ REGIONS ----------\n\n");
1272 for (rgn = 0; rgn < nr_regions; rgn++)
1273 {
1274 fprintf (dump, ";;\trgn %d nr_blocks %d:\n", rgn,
1275 rgn_table[rgn].rgn_nr_blocks);
1276 fprintf (dump, ";;\tbb/block: ");
1277
1278 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
1279 {
1280 current_blocks = RGN_BLOCKS (rgn);
1281
1282 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
1283 abort ();
1284
1285 fprintf (dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
1286 }
1287
1288 fprintf (dump, "\n\n");
1289 }
1290 }
1291
1292
1293 /* Build a single block region for each basic block in the function.
1294 This allows for using the same code for interblock and basic block
1295 scheduling. */
1296
1297 static void
1298 find_single_block_region ()
1299 {
1300 int i;
1301
1302 for (i = 0; i < n_basic_blocks; i++)
1303 {
1304 rgn_bb_table[i] = i;
1305 RGN_NR_BLOCKS (i) = 1;
1306 RGN_BLOCKS (i) = i;
1307 CONTAINING_RGN (i) = i;
1308 BLOCK_TO_BB (i) = 0;
1309 }
1310 nr_regions = n_basic_blocks;
1311 }
1312
1313
1314 /* Update number of blocks and the estimate for number of insns
1315 in the region. Return 1 if the region is "too large" for interblock
1316 scheduling (compile time considerations), otherwise return 0. */
1317
1318 static int
1319 too_large (block, num_bbs, num_insns)
1320 int block, *num_bbs, *num_insns;
1321 {
1322 (*num_bbs)++;
1323 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
1324 INSN_LUID (BLOCK_HEAD (block)));
1325 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
1326 return 1;
1327 else
1328 return 0;
1329 }
1330
1331
1332 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
1333 is still an inner loop. Put in max_hdr[blk] the header of the most inner
1334 loop containing blk. */
1335 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
1336 { \
1337 if (max_hdr[blk] == -1) \
1338 max_hdr[blk] = hdr; \
1339 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
1340 RESET_BIT (inner, hdr); \
1341 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
1342 { \
1343 RESET_BIT (inner,max_hdr[blk]); \
1344 max_hdr[blk] = hdr; \
1345 } \
1346 }
1347
1348
1349 /* Find regions for interblock scheduling.
1350
1351 A region for scheduling can be:
1352
1353 * A loop-free procedure, or
1354
1355 * A reducible inner loop, or
1356
1357 * A basic block not contained in any other region.
1358
1359
1360 ?!? In theory we could build other regions based on extended basic
1361 blocks or reverse extended basic blocks. Is it worth the trouble?
1362
1363 Loop blocks that form a region are put into the region's block list
1364 in topological order.
1365
1366 This procedure stores its results into the following global (ick) variables
1367
1368 * rgn_nr
1369 * rgn_table
1370 * rgn_bb_table
1371 * block_to_bb
1372 * containing region
1373
1374
1375 We use dominator relationships to avoid making regions out of non-reducible
1376 loops.
1377
1378 This procedure needs to be converted to work on pred/succ lists instead
1379 of edge tables. That would simplify it somewhat. */
1380
1381 static void
1382 find_rgns (edge_list, dom)
1383 struct edge_list *edge_list;
1384 sbitmap *dom;
1385 {
1386 int *max_hdr, *dfs_nr, *stack, *degree;
1387 char no_loops = 1;
1388 int node, child, loop_head, i, head, tail;
1389 int count = 0, sp, idx = 0, current_edge = out_edges[0];
1390 int num_bbs, num_insns, unreachable;
1391 int too_large_failure;
1392
1393 /* Note if an edge has been passed. */
1394 sbitmap passed;
1395
1396 /* Note if a block is a natural loop header. */
1397 sbitmap header;
1398
1399 /* Note if a block is an natural inner loop header. */
1400 sbitmap inner;
1401
1402 /* Note if a block is in the block queue. */
1403 sbitmap in_queue;
1404
1405 /* Note if a block is in the block queue. */
1406 sbitmap in_stack;
1407
1408 int num_edges = NUM_EDGES (edge_list);
1409
1410 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
1411 and a mapping from block to its loop header (if the block is contained
1412 in a loop, else -1).
1413
1414 Store results in HEADER, INNER, and MAX_HDR respectively, these will
1415 be used as inputs to the second traversal.
1416
1417 STACK, SP and DFS_NR are only used during the first traversal. */
1418
1419 /* Allocate and initialize variables for the first traversal. */
1420 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
1421 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
1422 stack = (int *) xmalloc (nr_edges * sizeof (int));
1423
1424 inner = sbitmap_alloc (n_basic_blocks);
1425 sbitmap_ones (inner);
1426
1427 header = sbitmap_alloc (n_basic_blocks);
1428 sbitmap_zero (header);
1429
1430 passed = sbitmap_alloc (nr_edges);
1431 sbitmap_zero (passed);
1432
1433 in_queue = sbitmap_alloc (n_basic_blocks);
1434 sbitmap_zero (in_queue);
1435
1436 in_stack = sbitmap_alloc (n_basic_blocks);
1437 sbitmap_zero (in_stack);
1438
1439 for (i = 0; i < n_basic_blocks; i++)
1440 max_hdr[i] = -1;
1441
1442 /* DFS traversal to find inner loops in the cfg. */
1443
1444 sp = -1;
1445 while (1)
1446 {
1447 if (current_edge == 0 || TEST_BIT (passed, current_edge))
1448 {
1449 /* We have reached a leaf node or a node that was already
1450 processed. Pop edges off the stack until we find
1451 an edge that has not yet been processed. */
1452 while (sp >= 0
1453 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
1454 {
1455 /* Pop entry off the stack. */
1456 current_edge = stack[sp--];
1457 node = FROM_BLOCK (current_edge);
1458 child = TO_BLOCK (current_edge);
1459 RESET_BIT (in_stack, child);
1460 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1461 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1462 current_edge = NEXT_OUT (current_edge);
1463 }
1464
1465 /* See if have finished the DFS tree traversal. */
1466 if (sp < 0 && TEST_BIT (passed, current_edge))
1467 break;
1468
1469 /* Nope, continue the traversal with the popped node. */
1470 continue;
1471 }
1472
1473 /* Process a node. */
1474 node = FROM_BLOCK (current_edge);
1475 child = TO_BLOCK (current_edge);
1476 SET_BIT (in_stack, node);
1477 dfs_nr[node] = ++count;
1478
1479 /* If the successor is in the stack, then we've found a loop.
1480 Mark the loop, if it is not a natural loop, then it will
1481 be rejected during the second traversal. */
1482 if (TEST_BIT (in_stack, child))
1483 {
1484 no_loops = 0;
1485 SET_BIT (header, child);
1486 UPDATE_LOOP_RELATIONS (node, child);
1487 SET_BIT (passed, current_edge);
1488 current_edge = NEXT_OUT (current_edge);
1489 continue;
1490 }
1491
1492 /* If the child was already visited, then there is no need to visit
1493 it again. Just update the loop relationships and restart
1494 with a new edge. */
1495 if (dfs_nr[child])
1496 {
1497 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1498 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1499 SET_BIT (passed, current_edge);
1500 current_edge = NEXT_OUT (current_edge);
1501 continue;
1502 }
1503
1504 /* Push an entry on the stack and continue DFS traversal. */
1505 stack[++sp] = current_edge;
1506 SET_BIT (passed, current_edge);
1507 current_edge = OUT_EDGES (child);
1508
1509 /* This is temporary until haifa is converted to use rth's new
1510 cfg routines which have true entry/exit blocks and the
1511 appropriate edges from/to those blocks.
1512
1513 Generally we update dfs_nr for a node when we process its
1514 out edge. However, if the node has no out edge then we will
1515 not set dfs_nr for that node. This can confuse the scheduler
1516 into thinking that we have unreachable blocks, which in turn
1517 disables cross block scheduling.
1518
1519 So, if we have a node with no out edges, go ahead and mark it
1520 as reachable now. */
1521 if (current_edge == 0)
1522 dfs_nr[child] = ++count;
1523 }
1524
1525 /* Another check for unreachable blocks. The earlier test in
1526 is_cfg_nonregular only finds unreachable blocks that do not
1527 form a loop.
1528
1529 The DFS traversal will mark every block that is reachable from
1530 the entry node by placing a nonzero value in dfs_nr. Thus if
1531 dfs_nr is zero for any block, then it must be unreachable. */
1532 unreachable = 0;
1533 for (i = 0; i < n_basic_blocks; i++)
1534 if (dfs_nr[i] == 0)
1535 {
1536 unreachable = 1;
1537 break;
1538 }
1539
1540 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
1541 to hold degree counts. */
1542 degree = dfs_nr;
1543
1544 for (i = 0; i < n_basic_blocks; i++)
1545 degree[i] = 0;
1546 for (i = 0; i < num_edges; i++)
1547 {
1548 edge e = INDEX_EDGE (edge_list, i);
1549
1550 if (e->dest != EXIT_BLOCK_PTR)
1551 degree[e->dest->index]++;
1552 }
1553
1554 /* Do not perform region scheduling if there are any unreachable
1555 blocks. */
1556 if (!unreachable)
1557 {
1558 int *queue;
1559
1560 if (no_loops)
1561 SET_BIT (header, 0);
1562
1563 /* Second travsersal:find reducible inner loops and topologically sort
1564 block of each region. */
1565
1566 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
1567
1568 /* Find blocks which are inner loop headers. We still have non-reducible
1569 loops to consider at this point. */
1570 for (i = 0; i < n_basic_blocks; i++)
1571 {
1572 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
1573 {
1574 edge e;
1575 int j;
1576
1577 /* Now check that the loop is reducible. We do this separate
1578 from finding inner loops so that we do not find a reducible
1579 loop which contains an inner non-reducible loop.
1580
1581 A simple way to find reducible/natural loops is to verify
1582 that each block in the loop is dominated by the loop
1583 header.
1584
1585 If there exists a block that is not dominated by the loop
1586 header, then the block is reachable from outside the loop
1587 and thus the loop is not a natural loop. */
1588 for (j = 0; j < n_basic_blocks; j++)
1589 {
1590 /* First identify blocks in the loop, except for the loop
1591 entry block. */
1592 if (i == max_hdr[j] && i != j)
1593 {
1594 /* Now verify that the block is dominated by the loop
1595 header. */
1596 if (!TEST_BIT (dom[j], i))
1597 break;
1598 }
1599 }
1600
1601 /* If we exited the loop early, then I is the header of
1602 a non-reducible loop and we should quit processing it
1603 now. */
1604 if (j != n_basic_blocks)
1605 continue;
1606
1607 /* I is a header of an inner loop, or block 0 in a subroutine
1608 with no loops at all. */
1609 head = tail = -1;
1610 too_large_failure = 0;
1611 loop_head = max_hdr[i];
1612
1613 /* Decrease degree of all I's successors for topological
1614 ordering. */
1615 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
1616 if (e->dest != EXIT_BLOCK_PTR)
1617 --degree[e->dest->index];
1618
1619 /* Estimate # insns, and count # blocks in the region. */
1620 num_bbs = 1;
1621 num_insns = (INSN_LUID (BLOCK_END (i))
1622 - INSN_LUID (BLOCK_HEAD (i)));
1623
1624
1625 /* Find all loop latches (blocks with back edges to the loop
1626 header) or all the leaf blocks in the cfg has no loops.
1627
1628 Place those blocks into the queue. */
1629 if (no_loops)
1630 {
1631 for (j = 0; j < n_basic_blocks; j++)
1632 /* Leaf nodes have only a single successor which must
1633 be EXIT_BLOCK. */
1634 if (BASIC_BLOCK (j)->succ
1635 && BASIC_BLOCK (j)->succ->dest == EXIT_BLOCK_PTR
1636 && BASIC_BLOCK (j)->succ->succ_next == NULL)
1637 {
1638 queue[++tail] = j;
1639 SET_BIT (in_queue, j);
1640
1641 if (too_large (j, &num_bbs, &num_insns))
1642 {
1643 too_large_failure = 1;
1644 break;
1645 }
1646 }
1647 }
1648 else
1649 {
1650 edge e;
1651
1652 for (e = BASIC_BLOCK (i)->pred; e; e = e->pred_next)
1653 {
1654 if (e->src == ENTRY_BLOCK_PTR)
1655 continue;
1656
1657 node = e->src->index;
1658
1659 if (max_hdr[node] == loop_head && node != i)
1660 {
1661 /* This is a loop latch. */
1662 queue[++tail] = node;
1663 SET_BIT (in_queue, node);
1664
1665 if (too_large (node, &num_bbs, &num_insns))
1666 {
1667 too_large_failure = 1;
1668 break;
1669 }
1670 }
1671
1672 }
1673 }
1674
1675 /* Now add all the blocks in the loop to the queue.
1676
1677 We know the loop is a natural loop; however the algorithm
1678 above will not always mark certain blocks as being in the
1679 loop. Consider:
1680 node children
1681 a b,c
1682 b c
1683 c a,d
1684 d b
1685
1686
1687 The algorithm in the DFS traversal may not mark B & D as part
1688 of the loop (ie they will not have max_hdr set to A).
1689
1690 We know they can not be loop latches (else they would have
1691 had max_hdr set since they'd have a backedge to a dominator
1692 block). So we don't need them on the initial queue.
1693
1694 We know they are part of the loop because they are dominated
1695 by the loop header and can be reached by a backwards walk of
1696 the edges starting with nodes on the initial queue.
1697
1698 It is safe and desirable to include those nodes in the
1699 loop/scheduling region. To do so we would need to decrease
1700 the degree of a node if it is the target of a backedge
1701 within the loop itself as the node is placed in the queue.
1702
1703 We do not do this because I'm not sure that the actual
1704 scheduling code will properly handle this case. ?!? */
1705
1706 while (head < tail && !too_large_failure)
1707 {
1708 edge e;
1709 child = queue[++head];
1710
1711 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
1712 {
1713 node = e->src->index;
1714
1715 /* See discussion above about nodes not marked as in
1716 this loop during the initial DFS traversal. */
1717 if (e->src == ENTRY_BLOCK_PTR
1718 || max_hdr[node] != loop_head)
1719 {
1720 tail = -1;
1721 break;
1722 }
1723 else if (!TEST_BIT (in_queue, node) && node != i)
1724 {
1725 queue[++tail] = node;
1726 SET_BIT (in_queue, node);
1727
1728 if (too_large (node, &num_bbs, &num_insns))
1729 {
1730 too_large_failure = 1;
1731 break;
1732 }
1733 }
1734 }
1735 }
1736
1737 if (tail >= 0 && !too_large_failure)
1738 {
1739 /* Place the loop header into list of region blocks. */
1740 degree[i] = -1;
1741 rgn_bb_table[idx] = i;
1742 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1743 RGN_BLOCKS (nr_regions) = idx++;
1744 CONTAINING_RGN (i) = nr_regions;
1745 BLOCK_TO_BB (i) = count = 0;
1746
1747 /* Remove blocks from queue[] when their in degree
1748 becomes zero. Repeat until no blocks are left on the
1749 list. This produces a topological list of blocks in
1750 the region. */
1751 while (tail >= 0)
1752 {
1753 if (head < 0)
1754 head = tail;
1755 child = queue[head];
1756 if (degree[child] == 0)
1757 {
1758 edge e;
1759
1760 degree[child] = -1;
1761 rgn_bb_table[idx++] = child;
1762 BLOCK_TO_BB (child) = ++count;
1763 CONTAINING_RGN (child) = nr_regions;
1764 queue[head] = queue[tail--];
1765
1766 for (e = BASIC_BLOCK (child)->succ;
1767 e;
1768 e = e->succ_next)
1769 if (e->dest != EXIT_BLOCK_PTR)
1770 --degree[e->dest->index];
1771 }
1772 else
1773 --head;
1774 }
1775 ++nr_regions;
1776 }
1777 }
1778 }
1779 free (queue);
1780 }
1781
1782 /* Any block that did not end up in a region is placed into a region
1783 by itself. */
1784 for (i = 0; i < n_basic_blocks; i++)
1785 if (degree[i] >= 0)
1786 {
1787 rgn_bb_table[idx] = i;
1788 RGN_NR_BLOCKS (nr_regions) = 1;
1789 RGN_BLOCKS (nr_regions) = idx++;
1790 CONTAINING_RGN (i) = nr_regions++;
1791 BLOCK_TO_BB (i) = 0;
1792 }
1793
1794 free (max_hdr);
1795 free (dfs_nr);
1796 free (stack);
1797 free (passed);
1798 free (header);
1799 free (inner);
1800 free (in_queue);
1801 free (in_stack);
1802 }
1803
1804
1805 /* Functions for regions scheduling information. */
1806
1807 /* Compute dominators, probability, and potential-split-edges of bb.
1808 Assume that these values were already computed for bb's predecessors. */
1809
1810 static void
1811 compute_dom_prob_ps (bb)
1812 int bb;
1813 {
1814 int nxt_in_edge, fst_in_edge, pred;
1815 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1816
1817 prob[bb] = 0.0;
1818 if (IS_RGN_ENTRY (bb))
1819 {
1820 BITSET_ADD (dom[bb], 0, bbset_size);
1821 prob[bb] = 1.0;
1822 return;
1823 }
1824
1825 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1826
1827 /* Intialize dom[bb] to '111..1'. */
1828 BITSET_INVERT (dom[bb], bbset_size);
1829
1830 do
1831 {
1832 pred = FROM_BLOCK (nxt_in_edge);
1833 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1834
1835 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1836 edgeset_size);
1837
1838 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1839
1840 nr_out_edges = 1;
1841 nr_rgn_out_edges = 0;
1842 fst_out_edge = OUT_EDGES (pred);
1843 nxt_out_edge = NEXT_OUT (fst_out_edge);
1844 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1845 edgeset_size);
1846
1847 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1848
1849 /* The successor doesn't belong in the region? */
1850 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1851 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1852 ++nr_rgn_out_edges;
1853
1854 while (fst_out_edge != nxt_out_edge)
1855 {
1856 ++nr_out_edges;
1857 /* The successor doesn't belong in the region? */
1858 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1859 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1860 ++nr_rgn_out_edges;
1861 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1862 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1863
1864 }
1865
1866 /* Now nr_rgn_out_edges is the number of region-exit edges from
1867 pred, and nr_out_edges will be the number of pred out edges
1868 not leaving the region. */
1869 nr_out_edges -= nr_rgn_out_edges;
1870 if (nr_rgn_out_edges > 0)
1871 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1872 else
1873 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1874 nxt_in_edge = NEXT_IN (nxt_in_edge);
1875 }
1876 while (fst_in_edge != nxt_in_edge);
1877
1878 BITSET_ADD (dom[bb], bb, bbset_size);
1879 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1880
1881 if (sched_verbose >= 2)
1882 fprintf (dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb), (int) (100.0 * prob[bb]));
1883 } /* compute_dom_prob_ps */
1884
1885 /* Functions for target info. */
1886
1887 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1888 Note that bb_trg dominates bb_src. */
1889
1890 static void
1891 split_edges (bb_src, bb_trg, bl)
1892 int bb_src;
1893 int bb_trg;
1894 edgelst *bl;
1895 {
1896 int es = edgeset_size;
1897 edgeset src = (edgeset) xcalloc (es, sizeof (HOST_WIDE_INT));
1898
1899 while (es--)
1900 src[es] = (pot_split[bb_src])[es];
1901 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1902 extract_bitlst (src, edgeset_size, edgeset_bitsize, bl);
1903 free (src);
1904 }
1905
1906
1907 /* Find the valid candidate-source-blocks for the target block TRG, compute
1908 their probability, and check if they are speculative or not.
1909 For speculative sources, compute their update-blocks and split-blocks. */
1910
1911 static void
1912 compute_trg_info (trg)
1913 int trg;
1914 {
1915 register candidate *sp;
1916 edgelst el;
1917 int check_block, update_idx;
1918 int i, j, k, fst_edge, nxt_edge;
1919
1920 /* Define some of the fields for the target bb as well. */
1921 sp = candidate_table + trg;
1922 sp->is_valid = 1;
1923 sp->is_speculative = 0;
1924 sp->src_prob = 100;
1925
1926 for (i = trg + 1; i < current_nr_blocks; i++)
1927 {
1928 sp = candidate_table + i;
1929
1930 sp->is_valid = IS_DOMINATED (i, trg);
1931 if (sp->is_valid)
1932 {
1933 sp->src_prob = GET_SRC_PROB (i, trg);
1934 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1935 }
1936
1937 if (sp->is_valid)
1938 {
1939 split_edges (i, trg, &el);
1940 sp->is_speculative = (el.nr_members) ? 1 : 0;
1941 if (sp->is_speculative && !flag_schedule_speculative)
1942 sp->is_valid = 0;
1943 }
1944
1945 if (sp->is_valid)
1946 {
1947 sp->split_bbs.first_member = &bblst_table[bblst_last];
1948 sp->split_bbs.nr_members = el.nr_members;
1949 for (j = 0; j < el.nr_members; bblst_last++, j++)
1950 bblst_table[bblst_last] =
1951 TO_BLOCK (rgn_edges[el.first_member[j]]);
1952 sp->update_bbs.first_member = &bblst_table[bblst_last];
1953 update_idx = 0;
1954 for (j = 0; j < el.nr_members; j++)
1955 {
1956 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1957 fst_edge = nxt_edge = OUT_EDGES (check_block);
1958 do
1959 {
1960 for (k = 0; k < el.nr_members; k++)
1961 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1962 break;
1963
1964 if (k >= el.nr_members)
1965 {
1966 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1967 update_idx++;
1968 }
1969
1970 nxt_edge = NEXT_OUT (nxt_edge);
1971 }
1972 while (fst_edge != nxt_edge);
1973 }
1974 sp->update_bbs.nr_members = update_idx;
1975
1976 }
1977 else
1978 {
1979 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1980
1981 sp->is_speculative = 0;
1982 sp->src_prob = 0;
1983 }
1984 }
1985 } /* compute_trg_info */
1986
1987
1988 /* Print candidates info, for debugging purposes. Callable from debugger. */
1989
1990 void
1991 debug_candidate (i)
1992 int i;
1993 {
1994 if (!candidate_table[i].is_valid)
1995 return;
1996
1997 if (candidate_table[i].is_speculative)
1998 {
1999 int j;
2000 fprintf (dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
2001
2002 fprintf (dump, "split path: ");
2003 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
2004 {
2005 int b = candidate_table[i].split_bbs.first_member[j];
2006
2007 fprintf (dump, " %d ", b);
2008 }
2009 fprintf (dump, "\n");
2010
2011 fprintf (dump, "update path: ");
2012 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
2013 {
2014 int b = candidate_table[i].update_bbs.first_member[j];
2015
2016 fprintf (dump, " %d ", b);
2017 }
2018 fprintf (dump, "\n");
2019 }
2020 else
2021 {
2022 fprintf (dump, " src %d equivalent\n", BB_TO_BLOCK (i));
2023 }
2024 }
2025
2026
2027 /* Print candidates info, for debugging purposes. Callable from debugger. */
2028
2029 void
2030 debug_candidates (trg)
2031 int trg;
2032 {
2033 int i;
2034
2035 fprintf (dump, "----------- candidate table: target: b=%d bb=%d ---\n",
2036 BB_TO_BLOCK (trg), trg);
2037 for (i = trg + 1; i < current_nr_blocks; i++)
2038 debug_candidate (i);
2039 }
2040
2041
2042 /* Functions for speculative scheduing. */
2043
2044 /* Return 0 if x is a set of a register alive in the beginning of one
2045 of the split-blocks of src, otherwise return 1. */
2046
2047 static int
2048 check_live_1 (src, x)
2049 int src;
2050 rtx x;
2051 {
2052 register int i;
2053 register int regno;
2054 register rtx reg = SET_DEST (x);
2055
2056 if (reg == 0)
2057 return 1;
2058
2059 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2060 || GET_CODE (reg) == SIGN_EXTRACT
2061 || GET_CODE (reg) == STRICT_LOW_PART)
2062 reg = XEXP (reg, 0);
2063
2064 if (GET_CODE (reg) == PARALLEL
2065 && GET_MODE (reg) == BLKmode)
2066 {
2067 register int i;
2068 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2069 if (check_live_1 (src, XVECEXP (reg, 0, i)))
2070 return 1;
2071 return 0;
2072 }
2073
2074 if (GET_CODE (reg) != REG)
2075 return 1;
2076
2077 regno = REGNO (reg);
2078
2079 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
2080 {
2081 /* Global registers are assumed live. */
2082 return 0;
2083 }
2084 else
2085 {
2086 if (regno < FIRST_PSEUDO_REGISTER)
2087 {
2088 /* Check for hard registers. */
2089 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2090 while (--j >= 0)
2091 {
2092 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2093 {
2094 int b = candidate_table[src].split_bbs.first_member[i];
2095
2096 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
2097 regno + j))
2098 {
2099 return 0;
2100 }
2101 }
2102 }
2103 }
2104 else
2105 {
2106 /* Check for psuedo registers. */
2107 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2108 {
2109 int b = candidate_table[src].split_bbs.first_member[i];
2110
2111 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
2112 {
2113 return 0;
2114 }
2115 }
2116 }
2117 }
2118
2119 return 1;
2120 }
2121
2122
2123 /* If x is a set of a register R, mark that R is alive in the beginning
2124 of every update-block of src. */
2125
2126 static void
2127 update_live_1 (src, x)
2128 int src;
2129 rtx x;
2130 {
2131 register int i;
2132 register int regno;
2133 register rtx reg = SET_DEST (x);
2134
2135 if (reg == 0)
2136 return;
2137
2138 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2139 || GET_CODE (reg) == SIGN_EXTRACT
2140 || GET_CODE (reg) == STRICT_LOW_PART)
2141 reg = XEXP (reg, 0);
2142
2143 if (GET_CODE (reg) == PARALLEL
2144 && GET_MODE (reg) == BLKmode)
2145 {
2146 register int i;
2147 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2148 update_live_1 (src, XVECEXP (reg, 0, i));
2149 return;
2150 }
2151
2152 if (GET_CODE (reg) != REG)
2153 return;
2154
2155 /* Global registers are always live, so the code below does not apply
2156 to them. */
2157
2158 regno = REGNO (reg);
2159
2160 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
2161 {
2162 if (regno < FIRST_PSEUDO_REGISTER)
2163 {
2164 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2165 while (--j >= 0)
2166 {
2167 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2168 {
2169 int b = candidate_table[src].update_bbs.first_member[i];
2170
2171 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
2172 regno + j);
2173 }
2174 }
2175 }
2176 else
2177 {
2178 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2179 {
2180 int b = candidate_table[src].update_bbs.first_member[i];
2181
2182 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
2183 }
2184 }
2185 }
2186 }
2187
2188
2189 /* Return 1 if insn can be speculatively moved from block src to trg,
2190 otherwise return 0. Called before first insertion of insn to
2191 ready-list or before the scheduling. */
2192
2193 static int
2194 check_live (insn, src)
2195 rtx insn;
2196 int src;
2197 {
2198 /* Find the registers set by instruction. */
2199 if (GET_CODE (PATTERN (insn)) == SET
2200 || GET_CODE (PATTERN (insn)) == CLOBBER)
2201 return check_live_1 (src, PATTERN (insn));
2202 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2203 {
2204 int j;
2205 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2206 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2207 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2208 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
2209 return 0;
2210
2211 return 1;
2212 }
2213
2214 return 1;
2215 }
2216
2217
2218 /* Update the live registers info after insn was moved speculatively from
2219 block src to trg. */
2220
2221 static void
2222 update_live (insn, src)
2223 rtx insn;
2224 int src;
2225 {
2226 /* Find the registers set by instruction. */
2227 if (GET_CODE (PATTERN (insn)) == SET
2228 || GET_CODE (PATTERN (insn)) == CLOBBER)
2229 update_live_1 (src, PATTERN (insn));
2230 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2231 {
2232 int j;
2233 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2234 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2235 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2236 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
2237 }
2238 }
2239
2240 /* Exception Free Loads:
2241
2242 We define five classes of speculative loads: IFREE, IRISKY,
2243 PFREE, PRISKY, and MFREE.
2244
2245 IFREE loads are loads that are proved to be exception-free, just
2246 by examining the load insn. Examples for such loads are loads
2247 from TOC and loads of global data.
2248
2249 IRISKY loads are loads that are proved to be exception-risky,
2250 just by examining the load insn. Examples for such loads are
2251 volatile loads and loads from shared memory.
2252
2253 PFREE loads are loads for which we can prove, by examining other
2254 insns, that they are exception-free. Currently, this class consists
2255 of loads for which we are able to find a "similar load", either in
2256 the target block, or, if only one split-block exists, in that split
2257 block. Load2 is similar to load1 if both have same single base
2258 register. We identify only part of the similar loads, by finding
2259 an insn upon which both load1 and load2 have a DEF-USE dependence.
2260
2261 PRISKY loads are loads for which we can prove, by examining other
2262 insns, that they are exception-risky. Currently we have two proofs for
2263 such loads. The first proof detects loads that are probably guarded by a
2264 test on the memory address. This proof is based on the
2265 backward and forward data dependence information for the region.
2266 Let load-insn be the examined load.
2267 Load-insn is PRISKY iff ALL the following hold:
2268
2269 - insn1 is not in the same block as load-insn
2270 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
2271 - test-insn is either a compare or a branch, not in the same block
2272 as load-insn
2273 - load-insn is reachable from test-insn
2274 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
2275
2276 This proof might fail when the compare and the load are fed
2277 by an insn not in the region. To solve this, we will add to this
2278 group all loads that have no input DEF-USE dependence.
2279
2280 The second proof detects loads that are directly or indirectly
2281 fed by a speculative load. This proof is affected by the
2282 scheduling process. We will use the flag fed_by_spec_load.
2283 Initially, all insns have this flag reset. After a speculative
2284 motion of an insn, if insn is either a load, or marked as
2285 fed_by_spec_load, we will also mark as fed_by_spec_load every
2286 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
2287 load which is fed_by_spec_load is also PRISKY.
2288
2289 MFREE (maybe-free) loads are all the remaining loads. They may be
2290 exception-free, but we cannot prove it.
2291
2292 Now, all loads in IFREE and PFREE classes are considered
2293 exception-free, while all loads in IRISKY and PRISKY classes are
2294 considered exception-risky. As for loads in the MFREE class,
2295 these are considered either exception-free or exception-risky,
2296 depending on whether we are pessimistic or optimistic. We have
2297 to take the pessimistic approach to assure the safety of
2298 speculative scheduling, but we can take the optimistic approach
2299 by invoking the -fsched_spec_load_dangerous option. */
2300
2301 enum INSN_TRAP_CLASS
2302 {
2303 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
2304 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
2305 };
2306
2307 #define WORST_CLASS(class1, class2) \
2308 ((class1 > class2) ? class1 : class2)
2309
2310 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
2311 #define IS_REACHABLE(bb_from, bb_to) \
2312 (bb_from == bb_to \
2313 || IS_RGN_ENTRY (bb_from) \
2314 || (bitset_member (ancestor_edges[bb_to], \
2315 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
2316 edgeset_size)))
2317
2318 /* Non-zero iff the address is comprised from at most 1 register. */
2319 #define CONST_BASED_ADDRESS_P(x) \
2320 (GET_CODE (x) == REG \
2321 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
2322 || (GET_CODE (x) == LO_SUM)) \
2323 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
2324 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
2325
2326 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
2327
2328 static void
2329 set_spec_fed (load_insn)
2330 rtx load_insn;
2331 {
2332 rtx link;
2333
2334 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
2335 if (GET_MODE (link) == VOIDmode)
2336 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
2337 } /* set_spec_fed */
2338
2339 /* On the path from the insn to load_insn_bb, find a conditional
2340 branch depending on insn, that guards the speculative load. */
2341
2342 static int
2343 find_conditional_protection (insn, load_insn_bb)
2344 rtx insn;
2345 int load_insn_bb;
2346 {
2347 rtx link;
2348
2349 /* Iterate through DEF-USE forward dependences. */
2350 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2351 {
2352 rtx next = XEXP (link, 0);
2353 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
2354 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
2355 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
2356 && load_insn_bb != INSN_BB (next)
2357 && GET_MODE (link) == VOIDmode
2358 && (GET_CODE (next) == JUMP_INSN
2359 || find_conditional_protection (next, load_insn_bb)))
2360 return 1;
2361 }
2362 return 0;
2363 } /* find_conditional_protection */
2364
2365 /* Returns 1 if the same insn1 that participates in the computation
2366 of load_insn's address is feeding a conditional branch that is
2367 guarding on load_insn. This is true if we find a the two DEF-USE
2368 chains:
2369 insn1 -> ... -> conditional-branch
2370 insn1 -> ... -> load_insn,
2371 and if a flow path exist:
2372 insn1 -> ... -> conditional-branch -> ... -> load_insn,
2373 and if insn1 is on the path
2374 region-entry -> ... -> bb_trg -> ... load_insn.
2375
2376 Locate insn1 by climbing on LOG_LINKS from load_insn.
2377 Locate the branch by following INSN_DEPEND from insn1. */
2378
2379 static int
2380 is_conditionally_protected (load_insn, bb_src, bb_trg)
2381 rtx load_insn;
2382 int bb_src, bb_trg;
2383 {
2384 rtx link;
2385
2386 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
2387 {
2388 rtx insn1 = XEXP (link, 0);
2389
2390 /* Must be a DEF-USE dependence upon non-branch. */
2391 if (GET_MODE (link) != VOIDmode
2392 || GET_CODE (insn1) == JUMP_INSN)
2393 continue;
2394
2395 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
2396 if (INSN_BB (insn1) == bb_src
2397 || (CONTAINING_RGN (BLOCK_NUM (insn1))
2398 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
2399 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
2400 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
2401 continue;
2402
2403 /* Now search for the conditional-branch. */
2404 if (find_conditional_protection (insn1, bb_src))
2405 return 1;
2406
2407 /* Recursive step: search another insn1, "above" current insn1. */
2408 return is_conditionally_protected (insn1, bb_src, bb_trg);
2409 }
2410
2411 /* The chain does not exist. */
2412 return 0;
2413 } /* is_conditionally_protected */
2414
2415 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
2416 load_insn can move speculatively from bb_src to bb_trg. All the
2417 following must hold:
2418
2419 (1) both loads have 1 base register (PFREE_CANDIDATEs).
2420 (2) load_insn and load1 have a def-use dependence upon
2421 the same insn 'insn1'.
2422 (3) either load2 is in bb_trg, or:
2423 - there's only one split-block, and
2424 - load1 is on the escape path, and
2425
2426 From all these we can conclude that the two loads access memory
2427 addresses that differ at most by a constant, and hence if moving
2428 load_insn would cause an exception, it would have been caused by
2429 load2 anyhow. */
2430
2431 static int
2432 is_pfree (load_insn, bb_src, bb_trg)
2433 rtx load_insn;
2434 int bb_src, bb_trg;
2435 {
2436 rtx back_link;
2437 register candidate *candp = candidate_table + bb_src;
2438
2439 if (candp->split_bbs.nr_members != 1)
2440 /* Must have exactly one escape block. */
2441 return 0;
2442
2443 for (back_link = LOG_LINKS (load_insn);
2444 back_link; back_link = XEXP (back_link, 1))
2445 {
2446 rtx insn1 = XEXP (back_link, 0);
2447
2448 if (GET_MODE (back_link) == VOIDmode)
2449 {
2450 /* Found a DEF-USE dependence (insn1, load_insn). */
2451 rtx fore_link;
2452
2453 for (fore_link = INSN_DEPEND (insn1);
2454 fore_link; fore_link = XEXP (fore_link, 1))
2455 {
2456 rtx insn2 = XEXP (fore_link, 0);
2457 if (GET_MODE (fore_link) == VOIDmode)
2458 {
2459 /* Found a DEF-USE dependence (insn1, insn2). */
2460 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
2461 /* insn2 not guaranteed to be a 1 base reg load. */
2462 continue;
2463
2464 if (INSN_BB (insn2) == bb_trg)
2465 /* insn2 is the similar load, in the target block. */
2466 return 1;
2467
2468 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
2469 /* insn2 is a similar load, in a split-block. */
2470 return 1;
2471 }
2472 }
2473 }
2474 }
2475
2476 /* Couldn't find a similar load. */
2477 return 0;
2478 } /* is_pfree */
2479
2480 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
2481 as found by analyzing insn's expression. */
2482
2483 static int
2484 may_trap_exp (x, is_store)
2485 rtx x;
2486 int is_store;
2487 {
2488 enum rtx_code code;
2489
2490 if (x == 0)
2491 return TRAP_FREE;
2492 code = GET_CODE (x);
2493 if (is_store)
2494 {
2495 if (code == MEM)
2496 return TRAP_RISKY;
2497 else
2498 return TRAP_FREE;
2499 }
2500 if (code == MEM)
2501 {
2502 /* The insn uses memory: a volatile load. */
2503 if (MEM_VOLATILE_P (x))
2504 return IRISKY;
2505 /* An exception-free load. */
2506 if (!may_trap_p (x))
2507 return IFREE;
2508 /* A load with 1 base register, to be further checked. */
2509 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
2510 return PFREE_CANDIDATE;
2511 /* No info on the load, to be further checked. */
2512 return PRISKY_CANDIDATE;
2513 }
2514 else
2515 {
2516 const char *fmt;
2517 int i, insn_class = TRAP_FREE;
2518
2519 /* Neither store nor load, check if it may cause a trap. */
2520 if (may_trap_p (x))
2521 return TRAP_RISKY;
2522 /* Recursive step: walk the insn... */
2523 fmt = GET_RTX_FORMAT (code);
2524 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2525 {
2526 if (fmt[i] == 'e')
2527 {
2528 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
2529 insn_class = WORST_CLASS (insn_class, tmp_class);
2530 }
2531 else if (fmt[i] == 'E')
2532 {
2533 int j;
2534 for (j = 0; j < XVECLEN (x, i); j++)
2535 {
2536 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
2537 insn_class = WORST_CLASS (insn_class, tmp_class);
2538 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2539 break;
2540 }
2541 }
2542 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2543 break;
2544 }
2545 return insn_class;
2546 }
2547 } /* may_trap_exp */
2548
2549
2550 /* Classifies insn for the purpose of verifying that it can be
2551 moved speculatively, by examining it's patterns, returning:
2552 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
2553 TRAP_FREE: non-load insn.
2554 IFREE: load from a globaly safe location.
2555 IRISKY: volatile load.
2556 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
2557 being either PFREE or PRISKY. */
2558
2559 static int
2560 haifa_classify_insn (insn)
2561 rtx insn;
2562 {
2563 rtx pat = PATTERN (insn);
2564 int tmp_class = TRAP_FREE;
2565 int insn_class = TRAP_FREE;
2566 enum rtx_code code;
2567
2568 if (GET_CODE (pat) == PARALLEL)
2569 {
2570 int i, len = XVECLEN (pat, 0);
2571
2572 for (i = len - 1; i >= 0; i--)
2573 {
2574 code = GET_CODE (XVECEXP (pat, 0, i));
2575 switch (code)
2576 {
2577 case CLOBBER:
2578 /* Test if it is a 'store'. */
2579 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
2580 break;
2581 case SET:
2582 /* Test if it is a store. */
2583 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
2584 if (tmp_class == TRAP_RISKY)
2585 break;
2586 /* Test if it is a load. */
2587 tmp_class =
2588 WORST_CLASS (tmp_class,
2589 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0));
2590 break;
2591 case COND_EXEC:
2592 case TRAP_IF:
2593 tmp_class = TRAP_RISKY;
2594 break;
2595 default:;
2596 }
2597 insn_class = WORST_CLASS (insn_class, tmp_class);
2598 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2599 break;
2600 }
2601 }
2602 else
2603 {
2604 code = GET_CODE (pat);
2605 switch (code)
2606 {
2607 case CLOBBER:
2608 /* Test if it is a 'store'. */
2609 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
2610 break;
2611 case SET:
2612 /* Test if it is a store. */
2613 tmp_class = may_trap_exp (SET_DEST (pat), 1);
2614 if (tmp_class == TRAP_RISKY)
2615 break;
2616 /* Test if it is a load. */
2617 tmp_class =
2618 WORST_CLASS (tmp_class,
2619 may_trap_exp (SET_SRC (pat), 0));
2620 break;
2621 case COND_EXEC:
2622 case TRAP_IF:
2623 tmp_class = TRAP_RISKY;
2624 break;
2625 default:;
2626 }
2627 insn_class = tmp_class;
2628 }
2629
2630 return insn_class;
2631
2632 } /* haifa_classify_insn */
2633
2634 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
2635 a load moved speculatively, or if load_insn is protected by
2636 a compare on load_insn's address). */
2637
2638 static int
2639 is_prisky (load_insn, bb_src, bb_trg)
2640 rtx load_insn;
2641 int bb_src, bb_trg;
2642 {
2643 if (FED_BY_SPEC_LOAD (load_insn))
2644 return 1;
2645
2646 if (LOG_LINKS (load_insn) == NULL)
2647 /* Dependence may 'hide' out of the region. */
2648 return 1;
2649
2650 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2651 return 1;
2652
2653 return 0;
2654 } /* is_prisky */
2655
2656 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2657 Return 1 if insn is exception-free (and the motion is valid)
2658 and 0 otherwise. */
2659
2660 static int
2661 is_exception_free (insn, bb_src, bb_trg)
2662 rtx insn;
2663 int bb_src, bb_trg;
2664 {
2665 int insn_class = haifa_classify_insn (insn);
2666
2667 /* Handle non-load insns. */
2668 switch (insn_class)
2669 {
2670 case TRAP_FREE:
2671 return 1;
2672 case TRAP_RISKY:
2673 return 0;
2674 default:;
2675 }
2676
2677 /* Handle loads. */
2678 if (!flag_schedule_speculative_load)
2679 return 0;
2680 IS_LOAD_INSN (insn) = 1;
2681 switch (insn_class)
2682 {
2683 case IFREE:
2684 return (1);
2685 case IRISKY:
2686 return 0;
2687 case PFREE_CANDIDATE:
2688 if (is_pfree (insn, bb_src, bb_trg))
2689 return 1;
2690 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2691 case PRISKY_CANDIDATE:
2692 if (!flag_schedule_speculative_load_dangerous
2693 || is_prisky (insn, bb_src, bb_trg))
2694 return 0;
2695 break;
2696 default:;
2697 }
2698
2699 return flag_schedule_speculative_load_dangerous;
2700 } /* is_exception_free */
2701
2702
2703 /* Process an insn's memory dependencies. There are four kinds of
2704 dependencies:
2705
2706 (0) read dependence: read follows read
2707 (1) true dependence: read follows write
2708 (2) anti dependence: write follows read
2709 (3) output dependence: write follows write
2710
2711 We are careful to build only dependencies which actually exist, and
2712 use transitivity to avoid building too many links. */
2713 \f
2714 /* Return the INSN_LIST containing INSN in LIST, or NULL
2715 if LIST does not contain INSN. */
2716
2717 HAIFA_INLINE static rtx
2718 find_insn_list (insn, list)
2719 rtx insn;
2720 rtx list;
2721 {
2722 while (list)
2723 {
2724 if (XEXP (list, 0) == insn)
2725 return list;
2726 list = XEXP (list, 1);
2727 }
2728 return 0;
2729 }
2730
2731
2732 /* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0
2733 otherwise. */
2734
2735 HAIFA_INLINE static char
2736 find_insn_mem_list (insn, x, list, list1)
2737 rtx insn, x;
2738 rtx list, list1;
2739 {
2740 while (list)
2741 {
2742 if (XEXP (list, 0) == insn
2743 && XEXP (list1, 0) == x)
2744 return 1;
2745 list = XEXP (list, 1);
2746 list1 = XEXP (list1, 1);
2747 }
2748 return 0;
2749 }
2750
2751
2752 /* Compute the function units used by INSN. This caches the value
2753 returned by function_units_used. A function unit is encoded as the
2754 unit number if the value is non-negative and the compliment of a
2755 mask if the value is negative. A function unit index is the
2756 non-negative encoding. */
2757
2758 HAIFA_INLINE static int
2759 insn_unit (insn)
2760 rtx insn;
2761 {
2762 register int unit = INSN_UNIT (insn);
2763
2764 if (unit == 0)
2765 {
2766 recog_memoized (insn);
2767
2768 /* A USE insn, or something else we don't need to understand.
2769 We can't pass these directly to function_units_used because it will
2770 trigger a fatal error for unrecognizable insns. */
2771 if (INSN_CODE (insn) < 0)
2772 unit = -1;
2773 else
2774 {
2775 unit = function_units_used (insn);
2776 /* Increment non-negative values so we can cache zero. */
2777 if (unit >= 0)
2778 unit++;
2779 }
2780 /* We only cache 16 bits of the result, so if the value is out of
2781 range, don't cache it. */
2782 if (FUNCTION_UNITS_SIZE < HOST_BITS_PER_SHORT
2783 || unit >= 0
2784 || (unit & ~((1 << (HOST_BITS_PER_SHORT - 1)) - 1)) == 0)
2785 INSN_UNIT (insn) = unit;
2786 }
2787 return (unit > 0 ? unit - 1 : unit);
2788 }
2789
2790 /* Compute the blockage range for executing INSN on UNIT. This caches
2791 the value returned by the blockage_range_function for the unit.
2792 These values are encoded in an int where the upper half gives the
2793 minimum value and the lower half gives the maximum value. */
2794
2795 HAIFA_INLINE static unsigned int
2796 blockage_range (unit, insn)
2797 int unit;
2798 rtx insn;
2799 {
2800 unsigned int blockage = INSN_BLOCKAGE (insn);
2801 unsigned int range;
2802
2803 if ((int) UNIT_BLOCKED (blockage) != unit + 1)
2804 {
2805 range = function_units[unit].blockage_range_function (insn);
2806 /* We only cache the blockage range for one unit and then only if
2807 the values fit. */
2808 if (HOST_BITS_PER_INT >= UNIT_BITS + 2 * BLOCKAGE_BITS)
2809 INSN_BLOCKAGE (insn) = ENCODE_BLOCKAGE (unit + 1, range);
2810 }
2811 else
2812 range = BLOCKAGE_RANGE (blockage);
2813
2814 return range;
2815 }
2816
2817 /* A vector indexed by function unit instance giving the last insn to use
2818 the unit. The value of the function unit instance index for unit U
2819 instance I is (U + I * FUNCTION_UNITS_SIZE). */
2820 static rtx unit_last_insn[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2821
2822 /* A vector indexed by function unit instance giving the minimum time when
2823 the unit will unblock based on the maximum blockage cost. */
2824 static int unit_tick[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2825
2826 /* A vector indexed by function unit number giving the number of insns
2827 that remain to use the unit. */
2828 static int unit_n_insns[FUNCTION_UNITS_SIZE];
2829
2830 /* Reset the function unit state to the null state. */
2831
2832 static void
2833 clear_units ()
2834 {
2835 bzero ((char *) unit_last_insn, sizeof (unit_last_insn));
2836 bzero ((char *) unit_tick, sizeof (unit_tick));
2837 bzero ((char *) unit_n_insns, sizeof (unit_n_insns));
2838 }
2839
2840 /* Return the issue-delay of an insn. */
2841
2842 HAIFA_INLINE static int
2843 insn_issue_delay (insn)
2844 rtx insn;
2845 {
2846 int i, delay = 0;
2847 int unit = insn_unit (insn);
2848
2849 /* Efficiency note: in fact, we are working 'hard' to compute a
2850 value that was available in md file, and is not available in
2851 function_units[] structure. It would be nice to have this
2852 value there, too. */
2853 if (unit >= 0)
2854 {
2855 if (function_units[unit].blockage_range_function &&
2856 function_units[unit].blockage_function)
2857 delay = function_units[unit].blockage_function (insn, insn);
2858 }
2859 else
2860 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2861 if ((unit & 1) != 0 && function_units[i].blockage_range_function
2862 && function_units[i].blockage_function)
2863 delay = MAX (delay, function_units[i].blockage_function (insn, insn));
2864
2865 return delay;
2866 }
2867
2868 /* Return the actual hazard cost of executing INSN on the unit UNIT,
2869 instance INSTANCE at time CLOCK if the previous actual hazard cost
2870 was COST. */
2871
2872 HAIFA_INLINE static int
2873 actual_hazard_this_instance (unit, instance, insn, clock, cost)
2874 int unit, instance, clock, cost;
2875 rtx insn;
2876 {
2877 int tick = unit_tick[instance]; /* Issue time of the last issued insn. */
2878
2879 if (tick - clock > cost)
2880 {
2881 /* The scheduler is operating forward, so unit's last insn is the
2882 executing insn and INSN is the candidate insn. We want a
2883 more exact measure of the blockage if we execute INSN at CLOCK
2884 given when we committed the execution of the unit's last insn.
2885
2886 The blockage value is given by either the unit's max blockage
2887 constant, blockage range function, or blockage function. Use
2888 the most exact form for the given unit. */
2889
2890 if (function_units[unit].blockage_range_function)
2891 {
2892 if (function_units[unit].blockage_function)
2893 tick += (function_units[unit].blockage_function
2894 (unit_last_insn[instance], insn)
2895 - function_units[unit].max_blockage);
2896 else
2897 tick += ((int) MAX_BLOCKAGE_COST (blockage_range (unit, insn))
2898 - function_units[unit].max_blockage);
2899 }
2900 if (tick - clock > cost)
2901 cost = tick - clock;
2902 }
2903 return cost;
2904 }
2905
2906 /* Record INSN as having begun execution on the units encoded by UNIT at
2907 time CLOCK. */
2908
2909 HAIFA_INLINE static void
2910 schedule_unit (unit, insn, clock)
2911 int unit, clock;
2912 rtx insn;
2913 {
2914 int i;
2915
2916 if (unit >= 0)
2917 {
2918 int instance = unit;
2919 #if MAX_MULTIPLICITY > 1
2920 /* Find the first free instance of the function unit and use that
2921 one. We assume that one is free. */
2922 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
2923 {
2924 if (!actual_hazard_this_instance (unit, instance, insn, clock, 0))
2925 break;
2926 instance += FUNCTION_UNITS_SIZE;
2927 }
2928 #endif
2929 unit_last_insn[instance] = insn;
2930 unit_tick[instance] = (clock + function_units[unit].max_blockage);
2931 }
2932 else
2933 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2934 if ((unit & 1) != 0)
2935 schedule_unit (i, insn, clock);
2936 }
2937
2938 /* Return the actual hazard cost of executing INSN on the units encoded by
2939 UNIT at time CLOCK if the previous actual hazard cost was COST. */
2940
2941 HAIFA_INLINE static int
2942 actual_hazard (unit, insn, clock, cost)
2943 int unit, clock, cost;
2944 rtx insn;
2945 {
2946 int i;
2947
2948 if (unit >= 0)
2949 {
2950 /* Find the instance of the function unit with the minimum hazard. */
2951 int instance = unit;
2952 int best_cost = actual_hazard_this_instance (unit, instance, insn,
2953 clock, cost);
2954 #if MAX_MULTIPLICITY > 1
2955 int this_cost;
2956
2957 if (best_cost > cost)
2958 {
2959 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
2960 {
2961 instance += FUNCTION_UNITS_SIZE;
2962 this_cost = actual_hazard_this_instance (unit, instance, insn,
2963 clock, cost);
2964 if (this_cost < best_cost)
2965 {
2966 best_cost = this_cost;
2967 if (this_cost <= cost)
2968 break;
2969 }
2970 }
2971 }
2972 #endif
2973 cost = MAX (cost, best_cost);
2974 }
2975 else
2976 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2977 if ((unit & 1) != 0)
2978 cost = actual_hazard (i, insn, clock, cost);
2979
2980 return cost;
2981 }
2982
2983 /* Return the potential hazard cost of executing an instruction on the
2984 units encoded by UNIT if the previous potential hazard cost was COST.
2985 An insn with a large blockage time is chosen in preference to one
2986 with a smaller time; an insn that uses a unit that is more likely
2987 to be used is chosen in preference to one with a unit that is less
2988 used. We are trying to minimize a subsequent actual hazard. */
2989
2990 HAIFA_INLINE static int
2991 potential_hazard (unit, insn, cost)
2992 int unit, cost;
2993 rtx insn;
2994 {
2995 int i, ncost;
2996 unsigned int minb, maxb;
2997
2998 if (unit >= 0)
2999 {
3000 minb = maxb = function_units[unit].max_blockage;
3001 if (maxb > 1)
3002 {
3003 if (function_units[unit].blockage_range_function)
3004 {
3005 maxb = minb = blockage_range (unit, insn);
3006 maxb = MAX_BLOCKAGE_COST (maxb);
3007 minb = MIN_BLOCKAGE_COST (minb);
3008 }
3009
3010 if (maxb > 1)
3011 {
3012 /* Make the number of instructions left dominate. Make the
3013 minimum delay dominate the maximum delay. If all these
3014 are the same, use the unit number to add an arbitrary
3015 ordering. Other terms can be added. */
3016 ncost = minb * 0x40 + maxb;
3017 ncost *= (unit_n_insns[unit] - 1) * 0x1000 + unit;
3018 if (ncost > cost)
3019 cost = ncost;
3020 }
3021 }
3022 }
3023 else
3024 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
3025 if ((unit & 1) != 0)
3026 cost = potential_hazard (i, insn, cost);
3027
3028 return cost;
3029 }
3030
3031 /* Compute cost of executing INSN given the dependence LINK on the insn USED.
3032 This is the number of cycles between instruction issue and
3033 instruction results. */
3034
3035 HAIFA_INLINE static int
3036 insn_cost (insn, link, used)
3037 rtx insn, link, used;
3038 {
3039 register int cost = INSN_COST (insn);
3040
3041 if (cost == 0)
3042 {
3043 recog_memoized (insn);
3044
3045 /* A USE insn, or something else we don't need to understand.
3046 We can't pass these directly to result_ready_cost because it will
3047 trigger a fatal error for unrecognizable insns. */
3048 if (INSN_CODE (insn) < 0)
3049 {
3050 INSN_COST (insn) = 1;
3051 return 1;
3052 }
3053 else
3054 {
3055 cost = result_ready_cost (insn);
3056
3057 if (cost < 1)
3058 cost = 1;
3059
3060 INSN_COST (insn) = cost;
3061 }
3062 }
3063
3064 /* In this case estimate cost without caring how insn is used. */
3065 if (link == 0 && used == 0)
3066 return cost;
3067
3068 /* A USE insn should never require the value used to be computed. This
3069 allows the computation of a function's result and parameter values to
3070 overlap the return and call. */
3071 recog_memoized (used);
3072 if (INSN_CODE (used) < 0)
3073 LINK_COST_FREE (link) = 1;
3074
3075 /* If some dependencies vary the cost, compute the adjustment. Most
3076 commonly, the adjustment is complete: either the cost is ignored
3077 (in the case of an output- or anti-dependence), or the cost is
3078 unchanged. These values are cached in the link as LINK_COST_FREE
3079 and LINK_COST_ZERO. */
3080
3081 if (LINK_COST_FREE (link))
3082 cost = 0;
3083 #ifdef ADJUST_COST
3084 else if (!LINK_COST_ZERO (link))
3085 {
3086 int ncost = cost;
3087
3088 ADJUST_COST (used, link, insn, ncost);
3089 if (ncost < 1)
3090 {
3091 LINK_COST_FREE (link) = 1;
3092 ncost = 0;
3093 }
3094 if (cost == ncost)
3095 LINK_COST_ZERO (link) = 1;
3096 cost = ncost;
3097 }
3098 #endif
3099 return cost;
3100 }
3101
3102 /* Compute the priority number for INSN. */
3103
3104 static int
3105 priority (insn)
3106 rtx insn;
3107 {
3108 int this_priority;
3109 rtx link;
3110
3111 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
3112 return 0;
3113
3114 if ((this_priority = INSN_PRIORITY (insn)) == 0)
3115 {
3116 if (INSN_DEPEND (insn) == 0)
3117 this_priority = insn_cost (insn, 0, 0);
3118 else
3119 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
3120 {
3121 rtx next;
3122 int next_priority;
3123
3124 if (RTX_INTEGRATED_P (link))
3125 continue;
3126
3127 next = XEXP (link, 0);
3128
3129 /* Critical path is meaningful in block boundaries only. */
3130 if (BLOCK_NUM (next) != BLOCK_NUM (insn))
3131 continue;
3132
3133 next_priority = insn_cost (insn, link, next) + priority (next);
3134 if (next_priority > this_priority)
3135 this_priority = next_priority;
3136 }
3137 INSN_PRIORITY (insn) = this_priority;
3138 }
3139 return this_priority;
3140 }
3141 \f
3142
3143 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
3144 them to the unused_*_list variables, so that they can be reused. */
3145
3146 static void
3147 free_pending_lists ()
3148 {
3149 int bb;
3150
3151 for (bb = 0; bb < current_nr_blocks; bb++)
3152 {
3153 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
3154 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
3155 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
3156 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
3157 }
3158 }
3159
3160 /* Add an INSN and MEM reference pair to a pending INSN_LIST and MEM_LIST.
3161 The MEM is a memory reference contained within INSN, which we are saving
3162 so that we can do memory aliasing on it. */
3163
3164 static void
3165 add_insn_mem_dependence (deps, insn_list, mem_list, insn, mem)
3166 struct deps *deps;
3167 rtx *insn_list, *mem_list, insn, mem;
3168 {
3169 register rtx link;
3170
3171 link = alloc_INSN_LIST (insn, *insn_list);
3172 *insn_list = link;
3173
3174 link = alloc_EXPR_LIST (VOIDmode, mem, *mem_list);
3175 *mem_list = link;
3176
3177 deps->pending_lists_length++;
3178 }
3179 \f
3180 /* Make a dependency between every memory reference on the pending lists
3181 and INSN, thus flushing the pending lists. If ONLY_WRITE, don't flush
3182 the read list. */
3183
3184 static void
3185 flush_pending_lists (deps, insn, only_write)
3186 struct deps *deps;
3187 rtx insn;
3188 int only_write;
3189 {
3190 rtx u;
3191 rtx link;
3192
3193 while (deps->pending_read_insns && ! only_write)
3194 {
3195 add_dependence (insn, XEXP (deps->pending_read_insns, 0),
3196 REG_DEP_ANTI);
3197
3198 link = deps->pending_read_insns;
3199 deps->pending_read_insns = XEXP (deps->pending_read_insns, 1);
3200 free_INSN_LIST_node (link);
3201
3202 link = deps->pending_read_mems;
3203 deps->pending_read_mems = XEXP (deps->pending_read_mems, 1);
3204 free_EXPR_LIST_node (link);
3205 }
3206 while (deps->pending_write_insns)
3207 {
3208 add_dependence (insn, XEXP (deps->pending_write_insns, 0),
3209 REG_DEP_ANTI);
3210
3211 link = deps->pending_write_insns;
3212 deps->pending_write_insns = XEXP (deps->pending_write_insns, 1);
3213 free_INSN_LIST_node (link);
3214
3215 link = deps->pending_write_mems;
3216 deps->pending_write_mems = XEXP (deps->pending_write_mems, 1);
3217 free_EXPR_LIST_node (link);
3218 }
3219 deps->pending_lists_length = 0;
3220
3221 /* last_pending_memory_flush is now a list of insns. */
3222 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3223 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3224
3225 free_INSN_LIST_list (&deps->last_pending_memory_flush);
3226 deps->last_pending_memory_flush = alloc_INSN_LIST (insn, NULL_RTX);
3227 }
3228
3229 /* Analyze a single SET, CLOBBER, PRE_DEC, POST_DEC, PRE_INC or POST_INC
3230 rtx, X, creating all dependencies generated by the write to the
3231 destination of X, and reads of everything mentioned. */
3232
3233 static void
3234 sched_analyze_1 (deps, x, insn)
3235 struct deps *deps;
3236 rtx x;
3237 rtx insn;
3238 {
3239 register int regno;
3240 register rtx dest = XEXP (x, 0);
3241 enum rtx_code code = GET_CODE (x);
3242
3243 if (dest == 0)
3244 return;
3245
3246 if (GET_CODE (dest) == PARALLEL
3247 && GET_MODE (dest) == BLKmode)
3248 {
3249 register int i;
3250 for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
3251 sched_analyze_1 (deps, XVECEXP (dest, 0, i), insn);
3252 if (GET_CODE (x) == SET)
3253 sched_analyze_2 (deps, SET_SRC (x), insn);
3254 return;
3255 }
3256
3257 while (GET_CODE (dest) == STRICT_LOW_PART || GET_CODE (dest) == SUBREG
3258 || GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3259 {
3260 if (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3261 {
3262 /* The second and third arguments are values read by this insn. */
3263 sched_analyze_2 (deps, XEXP (dest, 1), insn);
3264 sched_analyze_2 (deps, XEXP (dest, 2), insn);
3265 }
3266 dest = XEXP (dest, 0);
3267 }
3268
3269 if (GET_CODE (dest) == REG)
3270 {
3271 register int i;
3272
3273 regno = REGNO (dest);
3274
3275 /* A hard reg in a wide mode may really be multiple registers.
3276 If so, mark all of them just like the first. */
3277 if (regno < FIRST_PSEUDO_REGISTER)
3278 {
3279 i = HARD_REGNO_NREGS (regno, GET_MODE (dest));
3280 while (--i >= 0)
3281 {
3282 int r = regno + i;
3283 rtx u;
3284
3285 for (u = deps->reg_last_uses[r]; u; u = XEXP (u, 1))
3286 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3287
3288 for (u = deps->reg_last_sets[r]; u; u = XEXP (u, 1))
3289 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3290
3291 /* Clobbers need not be ordered with respect to one
3292 another, but sets must be ordered with respect to a
3293 pending clobber. */
3294 if (code == SET)
3295 {
3296 free_INSN_LIST_list (&deps->reg_last_uses[r]);
3297 for (u = deps->reg_last_clobbers[r]; u; u = XEXP (u, 1))
3298 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3299 SET_REGNO_REG_SET (reg_pending_sets, r);
3300 }
3301 else
3302 SET_REGNO_REG_SET (reg_pending_clobbers, r);
3303
3304 /* Function calls clobber all call_used regs. */
3305 if (global_regs[r] || (code == SET && call_used_regs[r]))
3306 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3307 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3308 }
3309 }
3310 else
3311 {
3312 rtx u;
3313
3314 for (u = deps->reg_last_uses[regno]; u; u = XEXP (u, 1))
3315 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3316
3317 for (u = deps->reg_last_sets[regno]; u; u = XEXP (u, 1))
3318 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3319
3320 if (code == SET)
3321 {
3322 free_INSN_LIST_list (&deps->reg_last_uses[regno]);
3323 for (u = deps->reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3324 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3325 SET_REGNO_REG_SET (reg_pending_sets, regno);
3326 }
3327 else
3328 SET_REGNO_REG_SET (reg_pending_clobbers, regno);
3329
3330 /* Pseudos that are REG_EQUIV to something may be replaced
3331 by that during reloading. We need only add dependencies for
3332 the address in the REG_EQUIV note. */
3333 if (!reload_completed
3334 && reg_known_equiv_p[regno]
3335 && GET_CODE (reg_known_value[regno]) == MEM)
3336 sched_analyze_2 (deps, XEXP (reg_known_value[regno], 0), insn);
3337
3338 /* Don't let it cross a call after scheduling if it doesn't
3339 already cross one. */
3340
3341 if (REG_N_CALLS_CROSSED (regno) == 0)
3342 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3343 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3344 }
3345 }
3346 else if (GET_CODE (dest) == MEM)
3347 {
3348 /* Writing memory. */
3349
3350 if (deps->pending_lists_length > 32)
3351 {
3352 /* Flush all pending reads and writes to prevent the pending lists
3353 from getting any larger. Insn scheduling runs too slowly when
3354 these lists get long. The number 32 was chosen because it
3355 seems like a reasonable number. When compiling GCC with itself,
3356 this flush occurs 8 times for sparc, and 10 times for m88k using
3357 the number 32. */
3358 flush_pending_lists (deps, insn, 0);
3359 }
3360 else
3361 {
3362 rtx u;
3363 rtx pending, pending_mem;
3364
3365 pending = deps->pending_read_insns;
3366 pending_mem = deps->pending_read_mems;
3367 while (pending)
3368 {
3369 if (anti_dependence (XEXP (pending_mem, 0), dest))
3370 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3371
3372 pending = XEXP (pending, 1);
3373 pending_mem = XEXP (pending_mem, 1);
3374 }
3375
3376 pending = deps->pending_write_insns;
3377 pending_mem = deps->pending_write_mems;
3378 while (pending)
3379 {
3380 if (output_dependence (XEXP (pending_mem, 0), dest))
3381 add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT);
3382
3383 pending = XEXP (pending, 1);
3384 pending_mem = XEXP (pending_mem, 1);
3385 }
3386
3387 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3388 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3389
3390 add_insn_mem_dependence (deps, &deps->pending_write_insns,
3391 &deps->pending_write_mems, insn, dest);
3392 }
3393 sched_analyze_2 (deps, XEXP (dest, 0), insn);
3394 }
3395
3396 /* Analyze reads. */
3397 if (GET_CODE (x) == SET)
3398 sched_analyze_2 (deps, SET_SRC (x), insn);
3399 }
3400
3401 /* Analyze the uses of memory and registers in rtx X in INSN. */
3402
3403 static void
3404 sched_analyze_2 (deps, x, insn)
3405 struct deps *deps;
3406 rtx x;
3407 rtx insn;
3408 {
3409 register int i;
3410 register int j;
3411 register enum rtx_code code;
3412 register const char *fmt;
3413
3414 if (x == 0)
3415 return;
3416
3417 code = GET_CODE (x);
3418
3419 switch (code)
3420 {
3421 case CONST_INT:
3422 case CONST_DOUBLE:
3423 case SYMBOL_REF:
3424 case CONST:
3425 case LABEL_REF:
3426 /* Ignore constants. Note that we must handle CONST_DOUBLE here
3427 because it may have a cc0_rtx in its CONST_DOUBLE_CHAIN field, but
3428 this does not mean that this insn is using cc0. */
3429 return;
3430
3431 #ifdef HAVE_cc0
3432 case CC0:
3433 {
3434 rtx link, prev;
3435
3436 /* User of CC0 depends on immediately preceding insn. */
3437 SCHED_GROUP_P (insn) = 1;
3438
3439 /* There may be a note before this insn now, but all notes will
3440 be removed before we actually try to schedule the insns, so
3441 it won't cause a problem later. We must avoid it here though. */
3442 prev = prev_nonnote_insn (insn);
3443
3444 /* Make a copy of all dependencies on the immediately previous insn,
3445 and add to this insn. This is so that all the dependencies will
3446 apply to the group. Remove an explicit dependence on this insn
3447 as SCHED_GROUP_P now represents it. */
3448
3449 if (find_insn_list (prev, LOG_LINKS (insn)))
3450 remove_dependence (insn, prev);
3451
3452 for (link = LOG_LINKS (prev); link; link = XEXP (link, 1))
3453 add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
3454
3455 return;
3456 }
3457 #endif
3458
3459 case REG:
3460 {
3461 rtx u;
3462 int regno = REGNO (x);
3463 if (regno < FIRST_PSEUDO_REGISTER)
3464 {
3465 int i;
3466
3467 i = HARD_REGNO_NREGS (regno, GET_MODE (x));
3468 while (--i >= 0)
3469 {
3470 int r = regno + i;
3471 deps->reg_last_uses[r]
3472 = alloc_INSN_LIST (insn, deps->reg_last_uses[r]);
3473
3474 for (u = deps->reg_last_sets[r]; u; u = XEXP (u, 1))
3475 add_dependence (insn, XEXP (u, 0), 0);
3476
3477 /* ??? This should never happen. */
3478 for (u = deps->reg_last_clobbers[r]; u; u = XEXP (u, 1))
3479 add_dependence (insn, XEXP (u, 0), 0);
3480
3481 if (call_used_regs[r] || global_regs[r])
3482 /* Function calls clobber all call_used regs. */
3483 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3484 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3485 }
3486 }
3487 else
3488 {
3489 deps->reg_last_uses[regno]
3490 = alloc_INSN_LIST (insn, deps->reg_last_uses[regno]);
3491
3492 for (u = deps->reg_last_sets[regno]; u; u = XEXP (u, 1))
3493 add_dependence (insn, XEXP (u, 0), 0);
3494
3495 /* ??? This should never happen. */
3496 for (u = deps->reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3497 add_dependence (insn, XEXP (u, 0), 0);
3498
3499 /* Pseudos that are REG_EQUIV to something may be replaced
3500 by that during reloading. We need only add dependencies for
3501 the address in the REG_EQUIV note. */
3502 if (!reload_completed
3503 && reg_known_equiv_p[regno]
3504 && GET_CODE (reg_known_value[regno]) == MEM)
3505 sched_analyze_2 (deps, XEXP (reg_known_value[regno], 0), insn);
3506
3507 /* If the register does not already cross any calls, then add this
3508 insn to the sched_before_next_call list so that it will still
3509 not cross calls after scheduling. */
3510 if (REG_N_CALLS_CROSSED (regno) == 0)
3511 add_dependence (deps->sched_before_next_call, insn,
3512 REG_DEP_ANTI);
3513 }
3514 return;
3515 }
3516
3517 case MEM:
3518 {
3519 /* Reading memory. */
3520 rtx u;
3521 rtx pending, pending_mem;
3522
3523 pending = deps->pending_read_insns;
3524 pending_mem = deps->pending_read_mems;
3525 while (pending)
3526 {
3527 if (read_dependence (XEXP (pending_mem, 0), x))
3528 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3529
3530 pending = XEXP (pending, 1);
3531 pending_mem = XEXP (pending_mem, 1);
3532 }
3533
3534 pending = deps->pending_write_insns;
3535 pending_mem = deps->pending_write_mems;
3536 while (pending)
3537 {
3538 if (true_dependence (XEXP (pending_mem, 0), VOIDmode,
3539 x, rtx_varies_p))
3540 add_dependence (insn, XEXP (pending, 0), 0);
3541
3542 pending = XEXP (pending, 1);
3543 pending_mem = XEXP (pending_mem, 1);
3544 }
3545
3546 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3547 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3548
3549 /* Always add these dependencies to pending_reads, since
3550 this insn may be followed by a write. */
3551 add_insn_mem_dependence (deps, &deps->pending_read_insns,
3552 &deps->pending_read_mems, insn, x);
3553
3554 /* Take advantage of tail recursion here. */
3555 sched_analyze_2 (deps, XEXP (x, 0), insn);
3556 return;
3557 }
3558
3559 /* Force pending stores to memory in case a trap handler needs them. */
3560 case TRAP_IF:
3561 flush_pending_lists (deps, insn, 1);
3562 break;
3563
3564 case ASM_OPERANDS:
3565 case ASM_INPUT:
3566 case UNSPEC_VOLATILE:
3567 {
3568 rtx u;
3569
3570 /* Traditional and volatile asm instructions must be considered to use
3571 and clobber all hard registers, all pseudo-registers and all of
3572 memory. So must TRAP_IF and UNSPEC_VOLATILE operations.
3573
3574 Consider for instance a volatile asm that changes the fpu rounding
3575 mode. An insn should not be moved across this even if it only uses
3576 pseudo-regs because it might give an incorrectly rounded result. */
3577 if (code != ASM_OPERANDS || MEM_VOLATILE_P (x))
3578 {
3579 int max_reg = max_reg_num ();
3580 for (i = 0; i < max_reg; i++)
3581 {
3582 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3583 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3584 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3585
3586 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3587 add_dependence (insn, XEXP (u, 0), 0);
3588
3589 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3590 add_dependence (insn, XEXP (u, 0), 0);
3591 }
3592 reg_pending_sets_all = 1;
3593
3594 flush_pending_lists (deps, insn, 0);
3595 }
3596
3597 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
3598 We can not just fall through here since then we would be confused
3599 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
3600 traditional asms unlike their normal usage. */
3601
3602 if (code == ASM_OPERANDS)
3603 {
3604 for (j = 0; j < ASM_OPERANDS_INPUT_LENGTH (x); j++)
3605 sched_analyze_2 (deps, ASM_OPERANDS_INPUT (x, j), insn);
3606 return;
3607 }
3608 break;
3609 }
3610
3611 case PRE_DEC:
3612 case POST_DEC:
3613 case PRE_INC:
3614 case POST_INC:
3615 /* These both read and modify the result. We must handle them as writes
3616 to get proper dependencies for following instructions. We must handle
3617 them as reads to get proper dependencies from this to previous
3618 instructions. Thus we need to pass them to both sched_analyze_1
3619 and sched_analyze_2. We must call sched_analyze_2 first in order
3620 to get the proper antecedent for the read. */
3621 sched_analyze_2 (deps, XEXP (x, 0), insn);
3622 sched_analyze_1 (deps, x, insn);
3623 return;
3624
3625 default:
3626 break;
3627 }
3628
3629 /* Other cases: walk the insn. */
3630 fmt = GET_RTX_FORMAT (code);
3631 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3632 {
3633 if (fmt[i] == 'e')
3634 sched_analyze_2 (deps, XEXP (x, i), insn);
3635 else if (fmt[i] == 'E')
3636 for (j = 0; j < XVECLEN (x, i); j++)
3637 sched_analyze_2 (deps, XVECEXP (x, i, j), insn);
3638 }
3639 }
3640
3641 /* Analyze an INSN with pattern X to find all dependencies. */
3642
3643 static void
3644 sched_analyze_insn (deps, x, insn, loop_notes)
3645 struct deps *deps;
3646 rtx x, insn;
3647 rtx loop_notes;
3648 {
3649 register RTX_CODE code = GET_CODE (x);
3650 rtx link;
3651 int maxreg = max_reg_num ();
3652 int i;
3653
3654 if (code == COND_EXEC)
3655 {
3656 sched_analyze_2 (deps, COND_EXEC_TEST (x), insn);
3657
3658 /* ??? Should be recording conditions so we reduce the number of
3659 false dependancies. */
3660 x = COND_EXEC_CODE (x);
3661 code = GET_CODE (x);
3662 }
3663 if (code == SET || code == CLOBBER)
3664 sched_analyze_1 (deps, x, insn);
3665 else if (code == PARALLEL)
3666 {
3667 register int i;
3668 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
3669 {
3670 rtx sub = XVECEXP (x, 0, i);
3671 code = GET_CODE (sub);
3672
3673 if (code == COND_EXEC)
3674 {
3675 sched_analyze_2 (deps, COND_EXEC_TEST (sub), insn);
3676 sub = COND_EXEC_CODE (sub);
3677 code = GET_CODE (sub);
3678 }
3679 if (code == SET || code == CLOBBER)
3680 sched_analyze_1 (deps, sub, insn);
3681 else
3682 sched_analyze_2 (deps, sub, insn);
3683 }
3684 }
3685 else
3686 sched_analyze_2 (deps, x, insn);
3687
3688 /* Mark registers CLOBBERED or used by called function. */
3689 if (GET_CODE (insn) == CALL_INSN)
3690 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
3691 {
3692 if (GET_CODE (XEXP (link, 0)) == CLOBBER)
3693 sched_analyze_1 (deps, XEXP (link, 0), insn);
3694 else
3695 sched_analyze_2 (deps, XEXP (link, 0), insn);
3696 }
3697
3698 /* If there is a {LOOP,EHREGION}_{BEG,END} note in the middle of a basic
3699 block, then we must be sure that no instructions are scheduled across it.
3700 Otherwise, the reg_n_refs info (which depends on loop_depth) would
3701 become incorrect. */
3702
3703 if (loop_notes)
3704 {
3705 int max_reg = max_reg_num ();
3706 int schedule_barrier_found = 0;
3707 rtx link;
3708
3709 /* Update loop_notes with any notes from this insn. Also determine
3710 if any of the notes on the list correspond to instruction scheduling
3711 barriers (loop, eh & setjmp notes, but not range notes. */
3712 link = loop_notes;
3713 while (XEXP (link, 1))
3714 {
3715 if (INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_BEG
3716 || INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_END
3717 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_BEG
3718 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_END
3719 || INTVAL (XEXP (link, 0)) == NOTE_INSN_SETJMP)
3720 schedule_barrier_found = 1;
3721
3722 link = XEXP (link, 1);
3723 }
3724 XEXP (link, 1) = REG_NOTES (insn);
3725 REG_NOTES (insn) = loop_notes;
3726
3727 /* Add dependencies if a scheduling barrier was found. */
3728 if (schedule_barrier_found)
3729 {
3730 for (i = 0; i < max_reg; i++)
3731 {
3732 rtx u;
3733 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3734 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3735 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3736
3737 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3738 add_dependence (insn, XEXP (u, 0), 0);
3739
3740 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3741 add_dependence (insn, XEXP (u, 0), 0);
3742 }
3743 reg_pending_sets_all = 1;
3744
3745 flush_pending_lists (deps, insn, 0);
3746 }
3747
3748 }
3749
3750 /* Accumulate clobbers until the next set so that it will be output dependent
3751 on all of them. At the next set we can clear the clobber list, since
3752 subsequent sets will be output dependent on it. */
3753 EXECUTE_IF_SET_IN_REG_SET
3754 (reg_pending_sets, 0, i,
3755 {
3756 free_INSN_LIST_list (&deps->reg_last_sets[i]);
3757 free_INSN_LIST_list (&deps->reg_last_clobbers[i]);
3758 deps->reg_last_sets[i] = alloc_INSN_LIST (insn, NULL_RTX);
3759 });
3760 EXECUTE_IF_SET_IN_REG_SET
3761 (reg_pending_clobbers, 0, i,
3762 {
3763 deps->reg_last_clobbers[i]
3764 = alloc_INSN_LIST (insn, deps->reg_last_clobbers[i]);
3765 });
3766 CLEAR_REG_SET (reg_pending_sets);
3767 CLEAR_REG_SET (reg_pending_clobbers);
3768
3769 if (reg_pending_sets_all)
3770 {
3771 for (i = 0; i < maxreg; i++)
3772 {
3773 free_INSN_LIST_list (&deps->reg_last_sets[i]);
3774 free_INSN_LIST_list (&deps->reg_last_clobbers[i]);
3775 deps->reg_last_sets[i] = alloc_INSN_LIST (insn, NULL_RTX);
3776 }
3777
3778 reg_pending_sets_all = 0;
3779 }
3780
3781 /* Handle function calls and function returns created by the epilogue
3782 threading code. */
3783 if (GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
3784 {
3785 rtx dep_insn;
3786 rtx prev_dep_insn;
3787
3788 /* When scheduling instructions, we make sure calls don't lose their
3789 accompanying USE insns by depending them one on another in order.
3790
3791 Also, we must do the same thing for returns created by the epilogue
3792 threading code. Note this code works only in this special case,
3793 because other passes make no guarantee that they will never emit
3794 an instruction between a USE and a RETURN. There is such a guarantee
3795 for USE instructions immediately before a call. */
3796
3797 prev_dep_insn = insn;
3798 dep_insn = PREV_INSN (insn);
3799 while (GET_CODE (dep_insn) == INSN
3800 && GET_CODE (PATTERN (dep_insn)) == USE
3801 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == REG)
3802 {
3803 SCHED_GROUP_P (prev_dep_insn) = 1;
3804
3805 /* Make a copy of all dependencies on dep_insn, and add to insn.
3806 This is so that all of the dependencies will apply to the
3807 group. */
3808
3809 for (link = LOG_LINKS (dep_insn); link; link = XEXP (link, 1))
3810 add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
3811
3812 prev_dep_insn = dep_insn;
3813 dep_insn = PREV_INSN (dep_insn);
3814 }
3815 }
3816 }
3817
3818 /* Analyze every insn between HEAD and TAIL inclusive, creating LOG_LINKS
3819 for every dependency. */
3820
3821 static void
3822 sched_analyze (deps, head, tail)
3823 struct deps *deps;
3824 rtx head, tail;
3825 {
3826 register rtx insn;
3827 register rtx u;
3828 rtx loop_notes = 0;
3829
3830 for (insn = head;; insn = NEXT_INSN (insn))
3831 {
3832 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
3833 {
3834 /* Clear out the stale LOG_LINKS from flow. */
3835 free_INSN_LIST_list (&LOG_LINKS (insn));
3836
3837 /* Make each JUMP_INSN a scheduling barrier for memory
3838 references. */
3839 if (GET_CODE (insn) == JUMP_INSN)
3840 deps->last_pending_memory_flush
3841 = alloc_INSN_LIST (insn, deps->last_pending_memory_flush);
3842 sched_analyze_insn (deps, PATTERN (insn), insn, loop_notes);
3843 loop_notes = 0;
3844 }
3845 else if (GET_CODE (insn) == CALL_INSN)
3846 {
3847 rtx x;
3848 register int i;
3849
3850 CANT_MOVE (insn) = 1;
3851
3852 /* Clear out the stale LOG_LINKS from flow. */
3853 free_INSN_LIST_list (&LOG_LINKS (insn));
3854
3855 /* Any instruction using a hard register which may get clobbered
3856 by a call needs to be marked as dependent on this call.
3857 This prevents a use of a hard return reg from being moved
3858 past a void call (i.e. it does not explicitly set the hard
3859 return reg). */
3860
3861 /* If this call is followed by a NOTE_INSN_SETJMP, then assume that
3862 all registers, not just hard registers, may be clobbered by this
3863 call. */
3864
3865 /* Insn, being a CALL_INSN, magically depends on
3866 `last_function_call' already. */
3867
3868 if (NEXT_INSN (insn) && GET_CODE (NEXT_INSN (insn)) == NOTE
3869 && NOTE_LINE_NUMBER (NEXT_INSN (insn)) == NOTE_INSN_SETJMP)
3870 {
3871 int max_reg = max_reg_num ();
3872 for (i = 0; i < max_reg; i++)
3873 {
3874 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3875 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3876 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3877
3878 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3879 add_dependence (insn, XEXP (u, 0), 0);
3880
3881 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3882 add_dependence (insn, XEXP (u, 0), 0);
3883 }
3884 reg_pending_sets_all = 1;
3885
3886 /* Add a pair of REG_SAVE_NOTEs which we will later
3887 convert back into a NOTE_INSN_SETJMP note. See
3888 reemit_notes for why we use a pair of NOTEs. */
3889 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3890 GEN_INT (0),
3891 REG_NOTES (insn));
3892 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3893 GEN_INT (NOTE_INSN_SETJMP),
3894 REG_NOTES (insn));
3895 }
3896 else
3897 {
3898 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3899 if (call_used_regs[i] || global_regs[i])
3900 {
3901 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3902 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3903
3904 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3905 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3906
3907 SET_REGNO_REG_SET (reg_pending_clobbers, i);
3908 }
3909 }
3910
3911 /* For each insn which shouldn't cross a call, add a dependence
3912 between that insn and this call insn. */
3913 x = LOG_LINKS (deps->sched_before_next_call);
3914 while (x)
3915 {
3916 add_dependence (insn, XEXP (x, 0), REG_DEP_ANTI);
3917 x = XEXP (x, 1);
3918 }
3919 free_INSN_LIST_list (&LOG_LINKS (deps->sched_before_next_call));
3920
3921 sched_analyze_insn (deps, PATTERN (insn), insn, loop_notes);
3922 loop_notes = 0;
3923
3924 /* In the absence of interprocedural alias analysis, we must flush
3925 all pending reads and writes, and start new dependencies starting
3926 from here. But only flush writes for constant calls (which may
3927 be passed a pointer to something we haven't written yet). */
3928 flush_pending_lists (deps, insn, CONST_CALL_P (insn));
3929
3930 /* Depend this function call (actually, the user of this
3931 function call) on all hard register clobberage. */
3932
3933 /* last_function_call is now a list of insns. */
3934 free_INSN_LIST_list (&deps->last_function_call);
3935 deps->last_function_call = alloc_INSN_LIST (insn, NULL_RTX);
3936 }
3937
3938 /* See comments on reemit_notes as to why we do this.
3939 ??? Actually, the reemit_notes just say what is done, not why. */
3940
3941 else if (GET_CODE (insn) == NOTE
3942 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_BEG
3943 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_END))
3944 {
3945 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE, NOTE_RANGE_INFO (insn),
3946 loop_notes);
3947 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3948 GEN_INT (NOTE_LINE_NUMBER (insn)),
3949 loop_notes);
3950 }
3951 else if (GET_CODE (insn) == NOTE
3952 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG
3953 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END
3954 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
3955 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END
3956 || (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP
3957 && GET_CODE (PREV_INSN (insn)) != CALL_INSN)))
3958 {
3959 rtx rtx_region;
3960
3961 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
3962 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END)
3963 rtx_region = GEN_INT (NOTE_EH_HANDLER (insn));
3964 else
3965 rtx_region = GEN_INT (0);
3966
3967 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3968 rtx_region,
3969 loop_notes);
3970 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3971 GEN_INT (NOTE_LINE_NUMBER (insn)),
3972 loop_notes);
3973 CONST_CALL_P (loop_notes) = CONST_CALL_P (insn);
3974 }
3975
3976 if (insn == tail)
3977 return;
3978 }
3979 abort ();
3980 }
3981 \f
3982 /* Macros and functions for keeping the priority queue sorted, and
3983 dealing with queueing and dequeueing of instructions. */
3984
3985 #define SCHED_SORT(READY, N_READY) \
3986 do { if ((N_READY) == 2) \
3987 swap_sort (READY, N_READY); \
3988 else if ((N_READY) > 2) \
3989 qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); } \
3990 while (0)
3991
3992 /* Returns a positive value if x is preferred; returns a negative value if
3993 y is preferred. Should never return 0, since that will make the sort
3994 unstable. */
3995
3996 static int
3997 rank_for_schedule (x, y)
3998 const PTR x;
3999 const PTR y;
4000 {
4001 rtx tmp = *(const rtx *)y;
4002 rtx tmp2 = *(const rtx *)x;
4003 rtx link;
4004 int tmp_class, tmp2_class, depend_count1, depend_count2;
4005 int val, priority_val, spec_val, prob_val, weight_val;
4006
4007
4008 /* Prefer insn with higher priority. */
4009 priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp);
4010 if (priority_val)
4011 return priority_val;
4012
4013 /* Prefer an insn with smaller contribution to registers-pressure. */
4014 if (!reload_completed &&
4015 (weight_val = INSN_REG_WEIGHT (tmp) - INSN_REG_WEIGHT (tmp2)))
4016 return (weight_val);
4017
4018 /* Some comparison make sense in interblock scheduling only. */
4019 if (INSN_BB (tmp) != INSN_BB (tmp2))
4020 {
4021 /* Prefer an inblock motion on an interblock motion. */
4022 if ((INSN_BB (tmp2) == target_bb) && (INSN_BB (tmp) != target_bb))
4023 return 1;
4024 if ((INSN_BB (tmp) == target_bb) && (INSN_BB (tmp2) != target_bb))
4025 return -1;
4026
4027 /* Prefer a useful motion on a speculative one. */
4028 if ((spec_val = IS_SPECULATIVE_INSN (tmp) - IS_SPECULATIVE_INSN (tmp2)))
4029 return (spec_val);
4030
4031 /* Prefer a more probable (speculative) insn. */
4032 prob_val = INSN_PROBABILITY (tmp2) - INSN_PROBABILITY (tmp);
4033 if (prob_val)
4034 return (prob_val);
4035 }
4036
4037 /* Compare insns based on their relation to the last-scheduled-insn. */
4038 if (last_scheduled_insn)
4039 {
4040 /* Classify the instructions into three classes:
4041 1) Data dependent on last schedule insn.
4042 2) Anti/Output dependent on last scheduled insn.
4043 3) Independent of last scheduled insn, or has latency of one.
4044 Choose the insn from the highest numbered class if different. */
4045 link = find_insn_list (tmp, INSN_DEPEND (last_scheduled_insn));
4046 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp) == 1)
4047 tmp_class = 3;
4048 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4049 tmp_class = 1;
4050 else
4051 tmp_class = 2;
4052
4053 link = find_insn_list (tmp2, INSN_DEPEND (last_scheduled_insn));
4054 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp2) == 1)
4055 tmp2_class = 3;
4056 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4057 tmp2_class = 1;
4058 else
4059 tmp2_class = 2;
4060
4061 if ((val = tmp2_class - tmp_class))
4062 return val;
4063 }
4064
4065 /* Prefer the insn which has more later insns that depend on it.
4066 This gives the scheduler more freedom when scheduling later
4067 instructions at the expense of added register pressure. */
4068 depend_count1 = 0;
4069 for (link = INSN_DEPEND (tmp); link; link = XEXP (link, 1))
4070 depend_count1++;
4071
4072 depend_count2 = 0;
4073 for (link = INSN_DEPEND (tmp2); link; link = XEXP (link, 1))
4074 depend_count2++;
4075
4076 val = depend_count2 - depend_count1;
4077 if (val)
4078 return val;
4079
4080 /* If insns are equally good, sort by INSN_LUID (original insn order),
4081 so that we make the sort stable. This minimizes instruction movement,
4082 thus minimizing sched's effect on debugging and cross-jumping. */
4083 return INSN_LUID (tmp) - INSN_LUID (tmp2);
4084 }
4085
4086 /* Resort the array A in which only element at index N may be out of order. */
4087
4088 HAIFA_INLINE static void
4089 swap_sort (a, n)
4090 rtx *a;
4091 int n;
4092 {
4093 rtx insn = a[n - 1];
4094 int i = n - 2;
4095
4096 while (i >= 0 && rank_for_schedule (a + i, &insn) >= 0)
4097 {
4098 a[i + 1] = a[i];
4099 i -= 1;
4100 }
4101 a[i + 1] = insn;
4102 }
4103
4104 static int max_priority;
4105
4106 /* Add INSN to the insn queue so that it can be executed at least
4107 N_CYCLES after the currently executing insn. Preserve insns
4108 chain for debugging purposes. */
4109
4110 HAIFA_INLINE static void
4111 queue_insn (insn, n_cycles)
4112 rtx insn;
4113 int n_cycles;
4114 {
4115 int next_q = NEXT_Q_AFTER (q_ptr, n_cycles);
4116 rtx link = alloc_INSN_LIST (insn, insn_queue[next_q]);
4117 insn_queue[next_q] = link;
4118 q_size += 1;
4119
4120 if (sched_verbose >= 2)
4121 {
4122 fprintf (dump, ";;\t\tReady-->Q: insn %d: ", INSN_UID (insn));
4123
4124 if (INSN_BB (insn) != target_bb)
4125 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4126
4127 fprintf (dump, "queued for %d cycles.\n", n_cycles);
4128 }
4129
4130 }
4131
4132 /* PREV is an insn that is ready to execute. Adjust its priority if that
4133 will help shorten or lengthen register lifetimes as appropriate. Also
4134 provide a hook for the target to tweek itself. */
4135
4136 HAIFA_INLINE static void
4137 adjust_priority (prev)
4138 rtx prev ATTRIBUTE_UNUSED;
4139 {
4140 /* ??? There used to be code here to try and estimate how an insn
4141 affected register lifetimes, but it did it by looking at REG_DEAD
4142 notes, which we removed in schedule_region. Nor did it try to
4143 take into account register pressure or anything useful like that.
4144
4145 Revisit when we have a machine model to work with and not before. */
4146
4147 #ifdef ADJUST_PRIORITY
4148 ADJUST_PRIORITY (prev);
4149 #endif
4150 }
4151
4152 /* Clock at which the previous instruction was issued. */
4153 static int last_clock_var;
4154
4155 /* INSN is the "currently executing insn". Launch each insn which was
4156 waiting on INSN. READY is a vector of insns which are ready to fire.
4157 N_READY is the number of elements in READY. CLOCK is the current
4158 cycle. */
4159
4160 static int
4161 schedule_insn (insn, ready, n_ready, clock)
4162 rtx insn;
4163 rtx *ready;
4164 int n_ready;
4165 int clock;
4166 {
4167 rtx link;
4168 int unit;
4169
4170 unit = insn_unit (insn);
4171
4172 if (sched_verbose >= 2)
4173 {
4174 fprintf (dump, ";;\t\t--> scheduling insn <<<%d>>> on unit ",
4175 INSN_UID (insn));
4176 insn_print_units (insn);
4177 fprintf (dump, "\n");
4178 }
4179
4180 if (sched_verbose && unit == -1)
4181 visualize_no_unit (insn);
4182
4183 if (MAX_BLOCKAGE > 1 || issue_rate > 1 || sched_verbose)
4184 schedule_unit (unit, insn, clock);
4185
4186 if (INSN_DEPEND (insn) == 0)
4187 return n_ready;
4188
4189 /* This is used by the function adjust_priority above. */
4190 if (n_ready > 0)
4191 max_priority = MAX (INSN_PRIORITY (ready[0]), INSN_PRIORITY (insn));
4192 else
4193 max_priority = INSN_PRIORITY (insn);
4194
4195 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
4196 {
4197 rtx next = XEXP (link, 0);
4198 int cost = insn_cost (insn, link, next);
4199
4200 INSN_TICK (next) = MAX (INSN_TICK (next), clock + cost);
4201
4202 if ((INSN_DEP_COUNT (next) -= 1) == 0)
4203 {
4204 int effective_cost = INSN_TICK (next) - clock;
4205
4206 /* For speculative insns, before inserting to ready/queue,
4207 check live, exception-free, and issue-delay. */
4208 if (INSN_BB (next) != target_bb
4209 && (!IS_VALID (INSN_BB (next))
4210 || CANT_MOVE (next)
4211 || (IS_SPECULATIVE_INSN (next)
4212 && (insn_issue_delay (next) > 3
4213 || !check_live (next, INSN_BB (next))
4214 || !is_exception_free (next, INSN_BB (next), target_bb)))))
4215 continue;
4216
4217 if (sched_verbose >= 2)
4218 {
4219 fprintf (dump, ";;\t\tdependences resolved: insn %d ",
4220 INSN_UID (next));
4221
4222 if (current_nr_blocks > 1 && INSN_BB (next) != target_bb)
4223 fprintf (dump, "/b%d ", BLOCK_NUM (next));
4224
4225 if (effective_cost < 1)
4226 fprintf (dump, "into ready\n");
4227 else
4228 fprintf (dump, "into queue with cost=%d\n", effective_cost);
4229 }
4230
4231 /* Adjust the priority of NEXT and either put it on the ready
4232 list or queue it. */
4233 adjust_priority (next);
4234 if (effective_cost < 1)
4235 ready[n_ready++] = next;
4236 else
4237 queue_insn (next, effective_cost);
4238 }
4239 }
4240
4241 /* Annotate the instruction with issue information -- TImode
4242 indicates that the instruction is expected not to be able
4243 to issue on the same cycle as the previous insn. A machine
4244 may use this information to decide how the instruction should
4245 be aligned. */
4246 if (reload_completed && issue_rate > 1)
4247 {
4248 PUT_MODE (insn, clock > last_clock_var ? TImode : VOIDmode);
4249 last_clock_var = clock;
4250 }
4251
4252 return n_ready;
4253 }
4254
4255 /* Functions for handling of notes. */
4256
4257 /* Delete notes beginning with INSN and put them in the chain
4258 of notes ended by NOTE_LIST.
4259 Returns the insn following the notes. */
4260
4261 static rtx
4262 unlink_other_notes (insn, tail)
4263 rtx insn, tail;
4264 {
4265 rtx prev = PREV_INSN (insn);
4266
4267 while (insn != tail && GET_CODE (insn) == NOTE)
4268 {
4269 rtx next = NEXT_INSN (insn);
4270 /* Delete the note from its current position. */
4271 if (prev)
4272 NEXT_INSN (prev) = next;
4273 if (next)
4274 PREV_INSN (next) = prev;
4275
4276 /* See sched_analyze to see how these are handled. */
4277 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_SETJMP
4278 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG
4279 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END
4280 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_BEG
4281 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_END
4282 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_BEG
4283 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_END)
4284 {
4285 /* Insert the note at the end of the notes list. */
4286 PREV_INSN (insn) = note_list;
4287 if (note_list)
4288 NEXT_INSN (note_list) = insn;
4289 note_list = insn;
4290 }
4291
4292 insn = next;
4293 }
4294 return insn;
4295 }
4296
4297 /* Delete line notes beginning with INSN. Record line-number notes so
4298 they can be reused. Returns the insn following the notes. */
4299
4300 static rtx
4301 unlink_line_notes (insn, tail)
4302 rtx insn, tail;
4303 {
4304 rtx prev = PREV_INSN (insn);
4305
4306 while (insn != tail && GET_CODE (insn) == NOTE)
4307 {
4308 rtx next = NEXT_INSN (insn);
4309
4310 if (write_symbols != NO_DEBUG && NOTE_LINE_NUMBER (insn) > 0)
4311 {
4312 /* Delete the note from its current position. */
4313 if (prev)
4314 NEXT_INSN (prev) = next;
4315 if (next)
4316 PREV_INSN (next) = prev;
4317
4318 /* Record line-number notes so they can be reused. */
4319 LINE_NOTE (insn) = insn;
4320 }
4321 else
4322 prev = insn;
4323
4324 insn = next;
4325 }
4326 return insn;
4327 }
4328
4329 /* Return the head and tail pointers of BB. */
4330
4331 HAIFA_INLINE static void
4332 get_block_head_tail (b, headp, tailp)
4333 int b;
4334 rtx *headp;
4335 rtx *tailp;
4336 {
4337
4338 rtx head;
4339 rtx tail;
4340
4341 /* HEAD and TAIL delimit the basic block being scheduled. */
4342 head = BLOCK_HEAD (b);
4343 tail = BLOCK_END (b);
4344
4345 /* Don't include any notes or labels at the beginning of the
4346 basic block, or notes at the ends of basic blocks. */
4347 while (head != tail)
4348 {
4349 if (GET_CODE (head) == NOTE)
4350 head = NEXT_INSN (head);
4351 else if (GET_CODE (tail) == NOTE)
4352 tail = PREV_INSN (tail);
4353 else if (GET_CODE (head) == CODE_LABEL)
4354 head = NEXT_INSN (head);
4355 else
4356 break;
4357 }
4358
4359 *headp = head;
4360 *tailp = tail;
4361 }
4362
4363 HAIFA_INLINE static void
4364 get_bb_head_tail (bb, headp, tailp)
4365 int bb;
4366 rtx *headp;
4367 rtx *tailp;
4368 {
4369 get_block_head_tail (BB_TO_BLOCK (bb), headp, tailp);
4370 }
4371
4372 /* Delete line notes from bb. Save them so they can be later restored
4373 (in restore_line_notes ()). */
4374
4375 static void
4376 rm_line_notes (bb)
4377 int bb;
4378 {
4379 rtx next_tail;
4380 rtx tail;
4381 rtx head;
4382 rtx insn;
4383
4384 get_bb_head_tail (bb, &head, &tail);
4385
4386 if (head == tail
4387 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
4388 return;
4389
4390 next_tail = NEXT_INSN (tail);
4391 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4392 {
4393 rtx prev;
4394
4395 /* Farm out notes, and maybe save them in NOTE_LIST.
4396 This is needed to keep the debugger from
4397 getting completely deranged. */
4398 if (GET_CODE (insn) == NOTE)
4399 {
4400 prev = insn;
4401 insn = unlink_line_notes (insn, next_tail);
4402
4403 if (prev == tail)
4404 abort ();
4405 if (prev == head)
4406 abort ();
4407 if (insn == next_tail)
4408 abort ();
4409 }
4410 }
4411 }
4412
4413 /* Save line number notes for each insn in bb. */
4414
4415 static void
4416 save_line_notes (bb)
4417 int bb;
4418 {
4419 rtx head, tail;
4420 rtx next_tail;
4421
4422 /* We must use the true line number for the first insn in the block
4423 that was computed and saved at the start of this pass. We can't
4424 use the current line number, because scheduling of the previous
4425 block may have changed the current line number. */
4426
4427 rtx line = line_note_head[BB_TO_BLOCK (bb)];
4428 rtx insn;
4429
4430 get_bb_head_tail (bb, &head, &tail);
4431 next_tail = NEXT_INSN (tail);
4432
4433 for (insn = BLOCK_HEAD (BB_TO_BLOCK (bb));
4434 insn != next_tail;
4435 insn = NEXT_INSN (insn))
4436 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4437 line = insn;
4438 else
4439 LINE_NOTE (insn) = line;
4440 }
4441
4442
4443 /* After bb was scheduled, insert line notes into the insns list. */
4444
4445 static void
4446 restore_line_notes (bb)
4447 int bb;
4448 {
4449 rtx line, note, prev, new;
4450 int added_notes = 0;
4451 int b;
4452 rtx head, next_tail, insn;
4453
4454 b = BB_TO_BLOCK (bb);
4455
4456 head = BLOCK_HEAD (b);
4457 next_tail = NEXT_INSN (BLOCK_END (b));
4458
4459 /* Determine the current line-number. We want to know the current
4460 line number of the first insn of the block here, in case it is
4461 different from the true line number that was saved earlier. If
4462 different, then we need a line number note before the first insn
4463 of this block. If it happens to be the same, then we don't want to
4464 emit another line number note here. */
4465 for (line = head; line; line = PREV_INSN (line))
4466 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
4467 break;
4468
4469 /* Walk the insns keeping track of the current line-number and inserting
4470 the line-number notes as needed. */
4471 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4472 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4473 line = insn;
4474 /* This used to emit line number notes before every non-deleted note.
4475 However, this confuses a debugger, because line notes not separated
4476 by real instructions all end up at the same address. I can find no
4477 use for line number notes before other notes, so none are emitted. */
4478 else if (GET_CODE (insn) != NOTE
4479 && (note = LINE_NOTE (insn)) != 0
4480 && note != line
4481 && (line == 0
4482 || NOTE_LINE_NUMBER (note) != NOTE_LINE_NUMBER (line)
4483 || NOTE_SOURCE_FILE (note) != NOTE_SOURCE_FILE (line)))
4484 {
4485 line = note;
4486 prev = PREV_INSN (insn);
4487 if (LINE_NOTE (note))
4488 {
4489 /* Re-use the original line-number note. */
4490 LINE_NOTE (note) = 0;
4491 PREV_INSN (note) = prev;
4492 NEXT_INSN (prev) = note;
4493 PREV_INSN (insn) = note;
4494 NEXT_INSN (note) = insn;
4495 }
4496 else
4497 {
4498 added_notes++;
4499 new = emit_note_after (NOTE_LINE_NUMBER (note), prev);
4500 NOTE_SOURCE_FILE (new) = NOTE_SOURCE_FILE (note);
4501 RTX_INTEGRATED_P (new) = RTX_INTEGRATED_P (note);
4502 }
4503 }
4504 if (sched_verbose && added_notes)
4505 fprintf (dump, ";; added %d line-number notes\n", added_notes);
4506 }
4507
4508 /* After scheduling the function, delete redundant line notes from the
4509 insns list. */
4510
4511 static void
4512 rm_redundant_line_notes ()
4513 {
4514 rtx line = 0;
4515 rtx insn = get_insns ();
4516 int active_insn = 0;
4517 int notes = 0;
4518
4519 /* Walk the insns deleting redundant line-number notes. Many of these
4520 are already present. The remainder tend to occur at basic
4521 block boundaries. */
4522 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4523 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4524 {
4525 /* If there are no active insns following, INSN is redundant. */
4526 if (active_insn == 0)
4527 {
4528 notes++;
4529 NOTE_SOURCE_FILE (insn) = 0;
4530 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4531 }
4532 /* If the line number is unchanged, LINE is redundant. */
4533 else if (line
4534 && NOTE_LINE_NUMBER (line) == NOTE_LINE_NUMBER (insn)
4535 && NOTE_SOURCE_FILE (line) == NOTE_SOURCE_FILE (insn))
4536 {
4537 notes++;
4538 NOTE_SOURCE_FILE (line) = 0;
4539 NOTE_LINE_NUMBER (line) = NOTE_INSN_DELETED;
4540 line = insn;
4541 }
4542 else
4543 line = insn;
4544 active_insn = 0;
4545 }
4546 else if (!((GET_CODE (insn) == NOTE
4547 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED)
4548 || (GET_CODE (insn) == INSN
4549 && (GET_CODE (PATTERN (insn)) == USE
4550 || GET_CODE (PATTERN (insn)) == CLOBBER))))
4551 active_insn++;
4552
4553 if (sched_verbose && notes)
4554 fprintf (dump, ";; deleted %d line-number notes\n", notes);
4555 }
4556
4557 /* Delete notes between head and tail and put them in the chain
4558 of notes ended by NOTE_LIST. */
4559
4560 static void
4561 rm_other_notes (head, tail)
4562 rtx head;
4563 rtx tail;
4564 {
4565 rtx next_tail;
4566 rtx insn;
4567
4568 if (head == tail
4569 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
4570 return;
4571
4572 next_tail = NEXT_INSN (tail);
4573 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4574 {
4575 rtx prev;
4576
4577 /* Farm out notes, and maybe save them in NOTE_LIST.
4578 This is needed to keep the debugger from
4579 getting completely deranged. */
4580 if (GET_CODE (insn) == NOTE)
4581 {
4582 prev = insn;
4583
4584 insn = unlink_other_notes (insn, next_tail);
4585
4586 if (prev == tail)
4587 abort ();
4588 if (prev == head)
4589 abort ();
4590 if (insn == next_tail)
4591 abort ();
4592 }
4593 }
4594 }
4595
4596 /* Functions for computation of registers live/usage info. */
4597
4598 /* Calculate INSN_REG_WEIGHT for all insns of a block. */
4599
4600 static void
4601 find_insn_reg_weight (b)
4602 int b;
4603 {
4604 rtx insn, next_tail, head, tail;
4605
4606 get_block_head_tail (b, &head, &tail);
4607 next_tail = NEXT_INSN (tail);
4608
4609 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4610 {
4611 int reg_weight = 0;
4612 rtx x;
4613
4614 /* Handle register life information. */
4615 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4616 continue;
4617
4618 /* Increment weight for each register born here. */
4619 x = PATTERN (insn);
4620 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4621 && register_operand (SET_DEST (x), VOIDmode))
4622 reg_weight++;
4623 else if (GET_CODE (x) == PARALLEL)
4624 {
4625 int j;
4626 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
4627 {
4628 x = XVECEXP (PATTERN (insn), 0, j);
4629 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4630 && register_operand (SET_DEST (x), VOIDmode))
4631 reg_weight++;
4632 }
4633 }
4634
4635 /* Decrement weight for each register that dies here. */
4636 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
4637 {
4638 if (REG_NOTE_KIND (x) == REG_DEAD
4639 || REG_NOTE_KIND (x) == REG_UNUSED)
4640 reg_weight--;
4641 }
4642
4643 INSN_REG_WEIGHT (insn) = reg_weight;
4644 }
4645 }
4646
4647 /* Scheduling clock, modified in schedule_block() and queue_to_ready (). */
4648 static int clock_var;
4649
4650 /* Move insns that became ready to fire from queue to ready list. */
4651
4652 static int
4653 queue_to_ready (ready, n_ready)
4654 rtx ready[];
4655 int n_ready;
4656 {
4657 rtx insn;
4658 rtx link;
4659
4660 q_ptr = NEXT_Q (q_ptr);
4661
4662 /* Add all pending insns that can be scheduled without stalls to the
4663 ready list. */
4664 for (link = insn_queue[q_ptr]; link; link = XEXP (link, 1))
4665 {
4666
4667 insn = XEXP (link, 0);
4668 q_size -= 1;
4669
4670 if (sched_verbose >= 2)
4671 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
4672
4673 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4674 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4675
4676 ready[n_ready++] = insn;
4677 if (sched_verbose >= 2)
4678 fprintf (dump, "moving to ready without stalls\n");
4679 }
4680 insn_queue[q_ptr] = 0;
4681
4682 /* If there are no ready insns, stall until one is ready and add all
4683 of the pending insns at that point to the ready list. */
4684 if (n_ready == 0)
4685 {
4686 register int stalls;
4687
4688 for (stalls = 1; stalls < INSN_QUEUE_SIZE; stalls++)
4689 {
4690 if ((link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)]))
4691 {
4692 for (; link; link = XEXP (link, 1))
4693 {
4694 insn = XEXP (link, 0);
4695 q_size -= 1;
4696
4697 if (sched_verbose >= 2)
4698 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
4699
4700 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4701 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4702
4703 ready[n_ready++] = insn;
4704 if (sched_verbose >= 2)
4705 fprintf (dump, "moving to ready with %d stalls\n", stalls);
4706 }
4707 insn_queue[NEXT_Q_AFTER (q_ptr, stalls)] = 0;
4708
4709 if (n_ready)
4710 break;
4711 }
4712 }
4713
4714 if (sched_verbose && stalls)
4715 visualize_stall_cycles (BB_TO_BLOCK (target_bb), stalls);
4716 q_ptr = NEXT_Q_AFTER (q_ptr, stalls);
4717 clock_var += stalls;
4718 }
4719 return n_ready;
4720 }
4721
4722 /* Print the ready list for debugging purposes. Callable from debugger. */
4723
4724 static void
4725 debug_ready_list (ready, n_ready)
4726 rtx ready[];
4727 int n_ready;
4728 {
4729 int i;
4730
4731 for (i = 0; i < n_ready; i++)
4732 {
4733 fprintf (dump, " %d", INSN_UID (ready[i]));
4734 if (current_nr_blocks > 1 && INSN_BB (ready[i]) != target_bb)
4735 fprintf (dump, "/b%d", BLOCK_NUM (ready[i]));
4736 }
4737 fprintf (dump, "\n");
4738 }
4739
4740 /* Print names of units on which insn can/should execute, for debugging. */
4741
4742 static void
4743 insn_print_units (insn)
4744 rtx insn;
4745 {
4746 int i;
4747 int unit = insn_unit (insn);
4748
4749 if (unit == -1)
4750 fprintf (dump, "none");
4751 else if (unit >= 0)
4752 fprintf (dump, "%s", function_units[unit].name);
4753 else
4754 {
4755 fprintf (dump, "[");
4756 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
4757 if (unit & 1)
4758 {
4759 fprintf (dump, "%s", function_units[i].name);
4760 if (unit != 1)
4761 fprintf (dump, " ");
4762 }
4763 fprintf (dump, "]");
4764 }
4765 }
4766
4767 /* MAX_VISUAL_LINES is the maximum number of lines in visualization table
4768 of a basic block. If more lines are needed, table is splitted to two.
4769 n_visual_lines is the number of lines printed so far for a block.
4770 visual_tbl contains the block visualization info.
4771 vis_no_unit holds insns in a cycle that are not mapped to any unit. */
4772 #define MAX_VISUAL_LINES 100
4773 #define INSN_LEN 30
4774 int n_visual_lines;
4775 char *visual_tbl;
4776 int n_vis_no_unit;
4777 rtx vis_no_unit[10];
4778
4779 /* Finds units that are in use in this fuction. Required only
4780 for visualization. */
4781
4782 static void
4783 init_target_units ()
4784 {
4785 rtx insn;
4786 int unit;
4787
4788 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4789 {
4790 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4791 continue;
4792
4793 unit = insn_unit (insn);
4794
4795 if (unit < 0)
4796 target_units |= ~unit;
4797 else
4798 target_units |= (1 << unit);
4799 }
4800 }
4801
4802 /* Return the length of the visualization table. */
4803
4804 static int
4805 get_visual_tbl_length ()
4806 {
4807 int unit, i;
4808 int n, n1;
4809 char *s;
4810
4811 /* Compute length of one field in line. */
4812 s = (char *) alloca (INSN_LEN + 6);
4813 sprintf (s, " %33s", "uname");
4814 n1 = strlen (s);
4815
4816 /* Compute length of one line. */
4817 n = strlen (";; ");
4818 n += n1;
4819 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
4820 if (function_units[unit].bitmask & target_units)
4821 for (i = 0; i < function_units[unit].multiplicity; i++)
4822 n += n1;
4823 n += n1;
4824 n += strlen ("\n") + 2;
4825
4826 /* Compute length of visualization string. */
4827 return (MAX_VISUAL_LINES * n);
4828 }
4829
4830 /* Init block visualization debugging info. */
4831
4832 static void
4833 init_block_visualization ()
4834 {
4835 strcpy (visual_tbl, "");
4836 n_visual_lines = 0;
4837 n_vis_no_unit = 0;
4838 }
4839
4840 #define BUF_LEN 2048
4841
4842 static char *
4843 safe_concat (buf, cur, str)
4844 char *buf;
4845 char *cur;
4846 const char *str;
4847 {
4848 char *end = buf + BUF_LEN - 2; /* Leave room for null. */
4849 int c;
4850
4851 if (cur > end)
4852 {
4853 *end = '\0';
4854 return end;
4855 }
4856
4857 while (cur < end && (c = *str++) != '\0')
4858 *cur++ = c;
4859
4860 *cur = '\0';
4861 return cur;
4862 }
4863
4864 /* This recognizes rtx, I classified as expressions. These are always
4865 represent some action on values or results of other expression, that
4866 may be stored in objects representing values. */
4867
4868 static void
4869 print_exp (buf, x, verbose)
4870 char *buf;
4871 rtx x;
4872 int verbose;
4873 {
4874 char tmp[BUF_LEN];
4875 const char *st[4];
4876 char *cur = buf;
4877 const char *fun = (char *)0;
4878 const char *sep;
4879 rtx op[4];
4880 int i;
4881
4882 for (i = 0; i < 4; i++)
4883 {
4884 st[i] = (char *)0;
4885 op[i] = NULL_RTX;
4886 }
4887
4888 switch (GET_CODE (x))
4889 {
4890 case PLUS:
4891 op[0] = XEXP (x, 0);
4892 if (GET_CODE (XEXP (x, 1)) == CONST_INT
4893 && INTVAL (XEXP (x, 1)) < 0)
4894 {
4895 st[1] = "-";
4896 op[1] = GEN_INT (-INTVAL (XEXP (x, 1)));
4897 }
4898 else
4899 {
4900 st[1] = "+";
4901 op[1] = XEXP (x, 1);
4902 }
4903 break;
4904 case LO_SUM:
4905 op[0] = XEXP (x, 0);
4906 st[1] = "+low(";
4907 op[1] = XEXP (x, 1);
4908 st[2] = ")";
4909 break;
4910 case MINUS:
4911 op[0] = XEXP (x, 0);
4912 st[1] = "-";
4913 op[1] = XEXP (x, 1);
4914 break;
4915 case COMPARE:
4916 fun = "cmp";
4917 op[0] = XEXP (x, 0);
4918 op[1] = XEXP (x, 1);
4919 break;
4920 case NEG:
4921 st[0] = "-";
4922 op[0] = XEXP (x, 0);
4923 break;
4924 case MULT:
4925 op[0] = XEXP (x, 0);
4926 st[1] = "*";
4927 op[1] = XEXP (x, 1);
4928 break;
4929 case DIV:
4930 op[0] = XEXP (x, 0);
4931 st[1] = "/";
4932 op[1] = XEXP (x, 1);
4933 break;
4934 case UDIV:
4935 fun = "udiv";
4936 op[0] = XEXP (x, 0);
4937 op[1] = XEXP (x, 1);
4938 break;
4939 case MOD:
4940 op[0] = XEXP (x, 0);
4941 st[1] = "%";
4942 op[1] = XEXP (x, 1);
4943 break;
4944 case UMOD:
4945 fun = "umod";
4946 op[0] = XEXP (x, 0);
4947 op[1] = XEXP (x, 1);
4948 break;
4949 case SMIN:
4950 fun = "smin";
4951 op[0] = XEXP (x, 0);
4952 op[1] = XEXP (x, 1);
4953 break;
4954 case SMAX:
4955 fun = "smax";
4956 op[0] = XEXP (x, 0);
4957 op[1] = XEXP (x, 1);
4958 break;
4959 case UMIN:
4960 fun = "umin";
4961 op[0] = XEXP (x, 0);
4962 op[1] = XEXP (x, 1);
4963 break;
4964 case UMAX:
4965 fun = "umax";
4966 op[0] = XEXP (x, 0);
4967 op[1] = XEXP (x, 1);
4968 break;
4969 case NOT:
4970 st[0] = "!";
4971 op[0] = XEXP (x, 0);
4972 break;
4973 case AND:
4974 op[0] = XEXP (x, 0);
4975 st[1] = "&";
4976 op[1] = XEXP (x, 1);
4977 break;
4978 case IOR:
4979 op[0] = XEXP (x, 0);
4980 st[1] = "|";
4981 op[1] = XEXP (x, 1);
4982 break;
4983 case XOR:
4984 op[0] = XEXP (x, 0);
4985 st[1] = "^";
4986 op[1] = XEXP (x, 1);
4987 break;
4988 case ASHIFT:
4989 op[0] = XEXP (x, 0);
4990 st[1] = "<<";
4991 op[1] = XEXP (x, 1);
4992 break;
4993 case LSHIFTRT:
4994 op[0] = XEXP (x, 0);
4995 st[1] = " 0>>";
4996 op[1] = XEXP (x, 1);
4997 break;
4998 case ASHIFTRT:
4999 op[0] = XEXP (x, 0);
5000 st[1] = ">>";
5001 op[1] = XEXP (x, 1);
5002 break;
5003 case ROTATE:
5004 op[0] = XEXP (x, 0);
5005 st[1] = "<-<";
5006 op[1] = XEXP (x, 1);
5007 break;
5008 case ROTATERT:
5009 op[0] = XEXP (x, 0);
5010 st[1] = ">->";
5011 op[1] = XEXP (x, 1);
5012 break;
5013 case ABS:
5014 fun = "abs";
5015 op[0] = XEXP (x, 0);
5016 break;
5017 case SQRT:
5018 fun = "sqrt";
5019 op[0] = XEXP (x, 0);
5020 break;
5021 case FFS:
5022 fun = "ffs";
5023 op[0] = XEXP (x, 0);
5024 break;
5025 case EQ:
5026 op[0] = XEXP (x, 0);
5027 st[1] = "==";
5028 op[1] = XEXP (x, 1);
5029 break;
5030 case NE:
5031 op[0] = XEXP (x, 0);
5032 st[1] = "!=";
5033 op[1] = XEXP (x, 1);
5034 break;
5035 case GT:
5036 op[0] = XEXP (x, 0);
5037 st[1] = ">";
5038 op[1] = XEXP (x, 1);
5039 break;
5040 case GTU:
5041 fun = "gtu";
5042 op[0] = XEXP (x, 0);
5043 op[1] = XEXP (x, 1);
5044 break;
5045 case LT:
5046 op[0] = XEXP (x, 0);
5047 st[1] = "<";
5048 op[1] = XEXP (x, 1);
5049 break;
5050 case LTU:
5051 fun = "ltu";
5052 op[0] = XEXP (x, 0);
5053 op[1] = XEXP (x, 1);
5054 break;
5055 case GE:
5056 op[0] = XEXP (x, 0);
5057 st[1] = ">=";
5058 op[1] = XEXP (x, 1);
5059 break;
5060 case GEU:
5061 fun = "geu";
5062 op[0] = XEXP (x, 0);
5063 op[1] = XEXP (x, 1);
5064 break;
5065 case LE:
5066 op[0] = XEXP (x, 0);
5067 st[1] = "<=";
5068 op[1] = XEXP (x, 1);
5069 break;
5070 case LEU:
5071 fun = "leu";
5072 op[0] = XEXP (x, 0);
5073 op[1] = XEXP (x, 1);
5074 break;
5075 case SIGN_EXTRACT:
5076 fun = (verbose) ? "sign_extract" : "sxt";
5077 op[0] = XEXP (x, 0);
5078 op[1] = XEXP (x, 1);
5079 op[2] = XEXP (x, 2);
5080 break;
5081 case ZERO_EXTRACT:
5082 fun = (verbose) ? "zero_extract" : "zxt";
5083 op[0] = XEXP (x, 0);
5084 op[1] = XEXP (x, 1);
5085 op[2] = XEXP (x, 2);
5086 break;
5087 case SIGN_EXTEND:
5088 fun = (verbose) ? "sign_extend" : "sxn";
5089 op[0] = XEXP (x, 0);
5090 break;
5091 case ZERO_EXTEND:
5092 fun = (verbose) ? "zero_extend" : "zxn";
5093 op[0] = XEXP (x, 0);
5094 break;
5095 case FLOAT_EXTEND:
5096 fun = (verbose) ? "float_extend" : "fxn";
5097 op[0] = XEXP (x, 0);
5098 break;
5099 case TRUNCATE:
5100 fun = (verbose) ? "trunc" : "trn";
5101 op[0] = XEXP (x, 0);
5102 break;
5103 case FLOAT_TRUNCATE:
5104 fun = (verbose) ? "float_trunc" : "ftr";
5105 op[0] = XEXP (x, 0);
5106 break;
5107 case FLOAT:
5108 fun = (verbose) ? "float" : "flt";
5109 op[0] = XEXP (x, 0);
5110 break;
5111 case UNSIGNED_FLOAT:
5112 fun = (verbose) ? "uns_float" : "ufl";
5113 op[0] = XEXP (x, 0);
5114 break;
5115 case FIX:
5116 fun = "fix";
5117 op[0] = XEXP (x, 0);
5118 break;
5119 case UNSIGNED_FIX:
5120 fun = (verbose) ? "uns_fix" : "ufx";
5121 op[0] = XEXP (x, 0);
5122 break;
5123 case PRE_DEC:
5124 st[0] = "--";
5125 op[0] = XEXP (x, 0);
5126 break;
5127 case PRE_INC:
5128 st[0] = "++";
5129 op[0] = XEXP (x, 0);
5130 break;
5131 case POST_DEC:
5132 op[0] = XEXP (x, 0);
5133 st[1] = "--";
5134 break;
5135 case POST_INC:
5136 op[0] = XEXP (x, 0);
5137 st[1] = "++";
5138 break;
5139 case CALL:
5140 st[0] = "call ";
5141 op[0] = XEXP (x, 0);
5142 if (verbose)
5143 {
5144 st[1] = " argc:";
5145 op[1] = XEXP (x, 1);
5146 }
5147 break;
5148 case IF_THEN_ELSE:
5149 st[0] = "{(";
5150 op[0] = XEXP (x, 0);
5151 st[1] = ")?";
5152 op[1] = XEXP (x, 1);
5153 st[2] = ":";
5154 op[2] = XEXP (x, 2);
5155 st[3] = "}";
5156 break;
5157 case TRAP_IF:
5158 fun = "trap_if";
5159 op[0] = TRAP_CONDITION (x);
5160 break;
5161 case UNSPEC:
5162 case UNSPEC_VOLATILE:
5163 {
5164 cur = safe_concat (buf, cur, "unspec");
5165 if (GET_CODE (x) == UNSPEC_VOLATILE)
5166 cur = safe_concat (buf, cur, "/v");
5167 cur = safe_concat (buf, cur, "[");
5168 sep = "";
5169 for (i = 0; i < XVECLEN (x, 0); i++)
5170 {
5171 print_pattern (tmp, XVECEXP (x, 0, i), verbose);
5172 cur = safe_concat (buf, cur, sep);
5173 cur = safe_concat (buf, cur, tmp);
5174 sep = ",";
5175 }
5176 cur = safe_concat (buf, cur, "] ");
5177 sprintf (tmp, "%d", XINT (x, 1));
5178 cur = safe_concat (buf, cur, tmp);
5179 }
5180 break;
5181 default:
5182 /* If (verbose) debug_rtx (x); */
5183 st[0] = GET_RTX_NAME (GET_CODE (x));
5184 break;
5185 }
5186
5187 /* Print this as a function? */
5188 if (fun)
5189 {
5190 cur = safe_concat (buf, cur, fun);
5191 cur = safe_concat (buf, cur, "(");
5192 }
5193
5194 for (i = 0; i < 4; i++)
5195 {
5196 if (st[i])
5197 cur = safe_concat (buf, cur, st[i]);
5198
5199 if (op[i])
5200 {
5201 if (fun && i != 0)
5202 cur = safe_concat (buf, cur, ",");
5203
5204 print_value (tmp, op[i], verbose);
5205 cur = safe_concat (buf, cur, tmp);
5206 }
5207 }
5208
5209 if (fun)
5210 cur = safe_concat (buf, cur, ")");
5211 } /* print_exp */
5212
5213 /* Prints rtxes, I customly classified as values. They're constants,
5214 registers, labels, symbols and memory accesses. */
5215
5216 static void
5217 print_value (buf, x, verbose)
5218 char *buf;
5219 rtx x;
5220 int verbose;
5221 {
5222 char t[BUF_LEN];
5223 char *cur = buf;
5224
5225 switch (GET_CODE (x))
5226 {
5227 case CONST_INT:
5228 sprintf (t, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
5229 cur = safe_concat (buf, cur, t);
5230 break;
5231 case CONST_DOUBLE:
5232 sprintf (t, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
5233 cur = safe_concat (buf, cur, t);
5234 break;
5235 case CONST_STRING:
5236 cur = safe_concat (buf, cur, "\"");
5237 cur = safe_concat (buf, cur, XSTR (x, 0));
5238 cur = safe_concat (buf, cur, "\"");
5239 break;
5240 case SYMBOL_REF:
5241 cur = safe_concat (buf, cur, "`");
5242 cur = safe_concat (buf, cur, XSTR (x, 0));
5243 cur = safe_concat (buf, cur, "'");
5244 break;
5245 case LABEL_REF:
5246 sprintf (t, "L%d", INSN_UID (XEXP (x, 0)));
5247 cur = safe_concat (buf, cur, t);
5248 break;
5249 case CONST:
5250 print_value (t, XEXP (x, 0), verbose);
5251 cur = safe_concat (buf, cur, "const(");
5252 cur = safe_concat (buf, cur, t);
5253 cur = safe_concat (buf, cur, ")");
5254 break;
5255 case HIGH:
5256 print_value (t, XEXP (x, 0), verbose);
5257 cur = safe_concat (buf, cur, "high(");
5258 cur = safe_concat (buf, cur, t);
5259 cur = safe_concat (buf, cur, ")");
5260 break;
5261 case REG:
5262 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
5263 {
5264 int c = reg_names[ REGNO (x) ][0];
5265 if (c >= '0' && c <= '9')
5266 cur = safe_concat (buf, cur, "%");
5267
5268 cur = safe_concat (buf, cur, reg_names[ REGNO (x) ]);
5269 }
5270 else
5271 {
5272 sprintf (t, "r%d", REGNO (x));
5273 cur = safe_concat (buf, cur, t);
5274 }
5275 break;
5276 case SUBREG:
5277 print_value (t, SUBREG_REG (x), verbose);
5278 cur = safe_concat (buf, cur, t);
5279 sprintf (t, "#%d", SUBREG_WORD (x));
5280 cur = safe_concat (buf, cur, t);
5281 break;
5282 case SCRATCH:
5283 cur = safe_concat (buf, cur, "scratch");
5284 break;
5285 case CC0:
5286 cur = safe_concat (buf, cur, "cc0");
5287 break;
5288 case PC:
5289 cur = safe_concat (buf, cur, "pc");
5290 break;
5291 case MEM:
5292 print_value (t, XEXP (x, 0), verbose);
5293 cur = safe_concat (buf, cur, "[");
5294 cur = safe_concat (buf, cur, t);
5295 cur = safe_concat (buf, cur, "]");
5296 break;
5297 default:
5298 print_exp (t, x, verbose);
5299 cur = safe_concat (buf, cur, t);
5300 break;
5301 }
5302 } /* print_value */
5303
5304 /* The next step in insn detalization, its pattern recognition. */
5305
5306 static void
5307 print_pattern (buf, x, verbose)
5308 char *buf;
5309 rtx x;
5310 int verbose;
5311 {
5312 char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN];
5313
5314 switch (GET_CODE (x))
5315 {
5316 case SET:
5317 print_value (t1, SET_DEST (x), verbose);
5318 print_value (t2, SET_SRC (x), verbose);
5319 sprintf (buf, "%s=%s", t1, t2);
5320 break;
5321 case RETURN:
5322 sprintf (buf, "return");
5323 break;
5324 case CALL:
5325 print_exp (buf, x, verbose);
5326 break;
5327 case CLOBBER:
5328 print_value (t1, XEXP (x, 0), verbose);
5329 sprintf (buf, "clobber %s", t1);
5330 break;
5331 case USE:
5332 print_value (t1, XEXP (x, 0), verbose);
5333 sprintf (buf, "use %s", t1);
5334 break;
5335 case COND_EXEC:
5336 print_value (t1, COND_EXEC_CODE (x), verbose);
5337 print_value (t2, COND_EXEC_TEST (x), verbose);
5338 sprintf (buf, "cond_exec %s %s", t1, t2);
5339 break;
5340 case PARALLEL:
5341 {
5342 int i;
5343
5344 sprintf (t1, "{");
5345 for (i = 0; i < XVECLEN (x, 0); i++)
5346 {
5347 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5348 sprintf (t3, "%s%s;", t1, t2);
5349 strcpy (t1, t3);
5350 }
5351 sprintf (buf, "%s}", t1);
5352 }
5353 break;
5354 case SEQUENCE:
5355 {
5356 int i;
5357
5358 sprintf (t1, "%%{");
5359 for (i = 0; i < XVECLEN (x, 0); i++)
5360 {
5361 print_insn (t2, XVECEXP (x, 0, i), verbose);
5362 sprintf (t3, "%s%s;", t1, t2);
5363 strcpy (t1, t3);
5364 }
5365 sprintf (buf, "%s%%}", t1);
5366 }
5367 break;
5368 case ASM_INPUT:
5369 sprintf (buf, "asm {%s}", XSTR (x, 0));
5370 break;
5371 case ADDR_VEC:
5372 break;
5373 case ADDR_DIFF_VEC:
5374 print_value (buf, XEXP (x, 0), verbose);
5375 break;
5376 case TRAP_IF:
5377 print_value (t1, TRAP_CONDITION (x), verbose);
5378 sprintf (buf, "trap_if %s", t1);
5379 break;
5380 case UNSPEC:
5381 {
5382 int i;
5383
5384 sprintf (t1, "unspec{");
5385 for (i = 0; i < XVECLEN (x, 0); i++)
5386 {
5387 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5388 sprintf (t3, "%s%s;", t1, t2);
5389 strcpy (t1, t3);
5390 }
5391 sprintf (buf, "%s}", t1);
5392 }
5393 break;
5394 case UNSPEC_VOLATILE:
5395 {
5396 int i;
5397
5398 sprintf (t1, "unspec/v{");
5399 for (i = 0; i < XVECLEN (x, 0); i++)
5400 {
5401 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5402 sprintf (t3, "%s%s;", t1, t2);
5403 strcpy (t1, t3);
5404 }
5405 sprintf (buf, "%s}", t1);
5406 }
5407 break;
5408 default:
5409 print_value (buf, x, verbose);
5410 }
5411 } /* print_pattern */
5412
5413 /* This is the main function in rtl visualization mechanism. It
5414 accepts an rtx and tries to recognize it as an insn, then prints it
5415 properly in human readable form, resembling assembler mnemonics.
5416 For every insn it prints its UID and BB the insn belongs too.
5417 (Probably the last "option" should be extended somehow, since it
5418 depends now on sched.c inner variables ...) */
5419
5420 static void
5421 print_insn (buf, x, verbose)
5422 char *buf;
5423 rtx x;
5424 int verbose;
5425 {
5426 char t[BUF_LEN];
5427 rtx insn = x;
5428
5429 switch (GET_CODE (x))
5430 {
5431 case INSN:
5432 print_pattern (t, PATTERN (x), verbose);
5433 if (verbose)
5434 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (x),
5435 INSN_UID (x), t);
5436 else
5437 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5438 break;
5439 case JUMP_INSN:
5440 print_pattern (t, PATTERN (x), verbose);
5441 if (verbose)
5442 sprintf (buf, "b%d: i% 4d: jump %s", INSN_BB (x),
5443 INSN_UID (x), t);
5444 else
5445 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5446 break;
5447 case CALL_INSN:
5448 x = PATTERN (insn);
5449 if (GET_CODE (x) == PARALLEL)
5450 {
5451 x = XVECEXP (x, 0, 0);
5452 print_pattern (t, x, verbose);
5453 }
5454 else
5455 strcpy (t, "call <...>");
5456 if (verbose)
5457 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (insn),
5458 INSN_UID (insn), t);
5459 else
5460 sprintf (buf, "%-4d %s", INSN_UID (insn), t);
5461 break;
5462 case CODE_LABEL:
5463 sprintf (buf, "L%d:", INSN_UID (x));
5464 break;
5465 case BARRIER:
5466 sprintf (buf, "i% 4d: barrier", INSN_UID (x));
5467 break;
5468 case NOTE:
5469 if (NOTE_LINE_NUMBER (x) > 0)
5470 sprintf (buf, "%4d note \"%s\" %d", INSN_UID (x),
5471 NOTE_SOURCE_FILE (x), NOTE_LINE_NUMBER (x));
5472 else
5473 sprintf (buf, "%4d %s", INSN_UID (x),
5474 GET_NOTE_INSN_NAME (NOTE_LINE_NUMBER (x)));
5475 break;
5476 default:
5477 if (verbose)
5478 {
5479 sprintf (buf, "Not an INSN at all\n");
5480 debug_rtx (x);
5481 }
5482 else
5483 sprintf (buf, "i%-4d <What?>", INSN_UID (x));
5484 }
5485 } /* print_insn */
5486
5487 /* Print visualization debugging info. */
5488
5489 static void
5490 print_block_visualization (b, s)
5491 int b;
5492 const char *s;
5493 {
5494 int unit, i;
5495
5496 /* Print header. */
5497 fprintf (dump, "\n;; ==================== scheduling visualization for block %d %s \n", b, s);
5498
5499 /* Print names of units. */
5500 fprintf (dump, ";; %-8s", "clock");
5501 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5502 if (function_units[unit].bitmask & target_units)
5503 for (i = 0; i < function_units[unit].multiplicity; i++)
5504 fprintf (dump, " %-33s", function_units[unit].name);
5505 fprintf (dump, " %-8s\n", "no-unit");
5506
5507 fprintf (dump, ";; %-8s", "=====");
5508 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5509 if (function_units[unit].bitmask & target_units)
5510 for (i = 0; i < function_units[unit].multiplicity; i++)
5511 fprintf (dump, " %-33s", "==============================");
5512 fprintf (dump, " %-8s\n", "=======");
5513
5514 /* Print insns in each cycle. */
5515 fprintf (dump, "%s\n", visual_tbl);
5516 }
5517
5518 /* Print insns in the 'no_unit' column of visualization. */
5519
5520 static void
5521 visualize_no_unit (insn)
5522 rtx insn;
5523 {
5524 vis_no_unit[n_vis_no_unit] = insn;
5525 n_vis_no_unit++;
5526 }
5527
5528 /* Print insns scheduled in clock, for visualization. */
5529
5530 static void
5531 visualize_scheduled_insns (b, clock)
5532 int b, clock;
5533 {
5534 int i, unit;
5535
5536 /* If no more room, split table into two. */
5537 if (n_visual_lines >= MAX_VISUAL_LINES)
5538 {
5539 print_block_visualization (b, "(incomplete)");
5540 init_block_visualization ();
5541 }
5542
5543 n_visual_lines++;
5544
5545 sprintf (visual_tbl + strlen (visual_tbl), ";; %-8d", clock);
5546 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5547 if (function_units[unit].bitmask & target_units)
5548 for (i = 0; i < function_units[unit].multiplicity; i++)
5549 {
5550 int instance = unit + i * FUNCTION_UNITS_SIZE;
5551 rtx insn = unit_last_insn[instance];
5552
5553 /* Print insns that still keep the unit busy. */
5554 if (insn &&
5555 actual_hazard_this_instance (unit, instance, insn, clock, 0))
5556 {
5557 char str[BUF_LEN];
5558 print_insn (str, insn, 0);
5559 str[INSN_LEN] = '\0';
5560 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", str);
5561 }
5562 else
5563 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", "------------------------------");
5564 }
5565
5566 /* Print insns that are not assigned to any unit. */
5567 for (i = 0; i < n_vis_no_unit; i++)
5568 sprintf (visual_tbl + strlen (visual_tbl), " %-8d",
5569 INSN_UID (vis_no_unit[i]));
5570 n_vis_no_unit = 0;
5571
5572 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5573 }
5574
5575 /* Print stalled cycles. */
5576
5577 static void
5578 visualize_stall_cycles (b, stalls)
5579 int b, stalls;
5580 {
5581 int i;
5582
5583 /* If no more room, split table into two. */
5584 if (n_visual_lines >= MAX_VISUAL_LINES)
5585 {
5586 print_block_visualization (b, "(incomplete)");
5587 init_block_visualization ();
5588 }
5589
5590 n_visual_lines++;
5591
5592 sprintf (visual_tbl + strlen (visual_tbl), ";; ");
5593 for (i = 0; i < stalls; i++)
5594 sprintf (visual_tbl + strlen (visual_tbl), ".");
5595 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5596 }
5597
5598 /* move_insn1: Remove INSN from insn chain, and link it after LAST insn. */
5599
5600 static rtx
5601 move_insn1 (insn, last)
5602 rtx insn, last;
5603 {
5604 NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
5605 PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
5606
5607 NEXT_INSN (insn) = NEXT_INSN (last);
5608 PREV_INSN (NEXT_INSN (last)) = insn;
5609
5610 NEXT_INSN (last) = insn;
5611 PREV_INSN (insn) = last;
5612
5613 return insn;
5614 }
5615
5616 /* Search INSN for REG_SAVE_NOTE note pairs for NOTE_INSN_SETJMP,
5617 NOTE_INSN_{LOOP,EHREGION}_{BEG,END}; and convert them back into
5618 NOTEs. The REG_SAVE_NOTE note following first one is contains the
5619 saved value for NOTE_BLOCK_NUMBER which is useful for
5620 NOTE_INSN_EH_REGION_{BEG,END} NOTEs. LAST is the last instruction
5621 output by the instruction scheduler. Return the new value of LAST. */
5622
5623 static rtx
5624 reemit_notes (insn, last)
5625 rtx insn;
5626 rtx last;
5627 {
5628 rtx note, retval;
5629
5630 retval = last;
5631 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
5632 {
5633 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5634 {
5635 enum insn_note note_type = INTVAL (XEXP (note, 0));
5636
5637 if (note_type == NOTE_INSN_SETJMP)
5638 {
5639 retval = emit_note_after (NOTE_INSN_SETJMP, insn);
5640 CONST_CALL_P (retval) = CONST_CALL_P (note);
5641 remove_note (insn, note);
5642 note = XEXP (note, 1);
5643 }
5644 else if (note_type == NOTE_INSN_RANGE_BEG
5645 || note_type == NOTE_INSN_RANGE_END)
5646 {
5647 last = emit_note_before (note_type, last);
5648 remove_note (insn, note);
5649 note = XEXP (note, 1);
5650 NOTE_RANGE_INFO (last) = XEXP (note, 0);
5651 }
5652 else
5653 {
5654 last = emit_note_before (note_type, last);
5655 remove_note (insn, note);
5656 note = XEXP (note, 1);
5657 if (note_type == NOTE_INSN_EH_REGION_BEG
5658 || note_type == NOTE_INSN_EH_REGION_END)
5659 NOTE_EH_HANDLER (last) = INTVAL (XEXP (note, 0));
5660 }
5661 remove_note (insn, note);
5662 }
5663 }
5664 return retval;
5665 }
5666
5667 /* Move INSN, and all insns which should be issued before it,
5668 due to SCHED_GROUP_P flag. Reemit notes if needed.
5669
5670 Return the last insn emitted by the scheduler, which is the
5671 return value from the first call to reemit_notes. */
5672
5673 static rtx
5674 move_insn (insn, last)
5675 rtx insn, last;
5676 {
5677 rtx retval = NULL;
5678
5679 /* If INSN has SCHED_GROUP_P set, then issue it and any other
5680 insns with SCHED_GROUP_P set first. */
5681 while (SCHED_GROUP_P (insn))
5682 {
5683 rtx prev = PREV_INSN (insn);
5684
5685 /* Move a SCHED_GROUP_P insn. */
5686 move_insn1 (insn, last);
5687 /* If this is the first call to reemit_notes, then record
5688 its return value. */
5689 if (retval == NULL_RTX)
5690 retval = reemit_notes (insn, insn);
5691 else
5692 reemit_notes (insn, insn);
5693 insn = prev;
5694 }
5695
5696 /* Now move the first non SCHED_GROUP_P insn. */
5697 move_insn1 (insn, last);
5698
5699 /* If this is the first call to reemit_notes, then record
5700 its return value. */
5701 if (retval == NULL_RTX)
5702 retval = reemit_notes (insn, insn);
5703 else
5704 reemit_notes (insn, insn);
5705
5706 return retval;
5707 }
5708
5709 /* Return an insn which represents a SCHED_GROUP, which is
5710 the last insn in the group. */
5711
5712 static rtx
5713 group_leader (insn)
5714 rtx insn;
5715 {
5716 rtx prev;
5717
5718 do
5719 {
5720 prev = insn;
5721 insn = next_nonnote_insn (insn);
5722 }
5723 while (insn && SCHED_GROUP_P (insn) && (GET_CODE (insn) != CODE_LABEL));
5724
5725 return prev;
5726 }
5727
5728 /* Use forward list scheduling to rearrange insns of block BB in region RGN,
5729 possibly bringing insns from subsequent blocks in the same region.
5730 Return number of insns scheduled. */
5731
5732 static int
5733 schedule_block (bb, rgn_n_insns)
5734 int bb;
5735 int rgn_n_insns;
5736 {
5737 /* Local variables. */
5738 rtx insn, last;
5739 rtx *ready;
5740 int n_ready = 0;
5741 int can_issue_more;
5742
5743 /* Flow block of this bb. */
5744 int b = BB_TO_BLOCK (bb);
5745
5746 /* target_n_insns == number of insns in b before scheduling starts.
5747 sched_target_n_insns == how many of b's insns were scheduled.
5748 sched_n_insns == how many insns were scheduled in b. */
5749 int target_n_insns = 0;
5750 int sched_target_n_insns = 0;
5751 int sched_n_insns = 0;
5752
5753 #define NEED_NOTHING 0
5754 #define NEED_HEAD 1
5755 #define NEED_TAIL 2
5756 int new_needs;
5757
5758 /* Head/tail info for this block. */
5759 rtx prev_head;
5760 rtx next_tail;
5761 rtx head;
5762 rtx tail;
5763 int bb_src;
5764
5765 /* We used to have code to avoid getting parameters moved from hard
5766 argument registers into pseudos.
5767
5768 However, it was removed when it proved to be of marginal benefit
5769 and caused problems because schedule_block and compute_forward_dependences
5770 had different notions of what the "head" insn was. */
5771 get_bb_head_tail (bb, &head, &tail);
5772
5773 /* rm_other_notes only removes notes which are _inside_ the
5774 block---that is, it won't remove notes before the first real insn
5775 or after the last real insn of the block. So if the first insn
5776 has a REG_SAVE_NOTE which would otherwise be emitted before the
5777 insn, it is redundant with the note before the start of the
5778 block, and so we have to take it out.
5779
5780 FIXME: Probably the same thing should be done with REG_SAVE_NOTEs
5781 referencing NOTE_INSN_SETJMP at the end of the block. */
5782 if (GET_RTX_CLASS (GET_CODE (head)) == 'i')
5783 {
5784 rtx note;
5785
5786 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
5787 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5788 {
5789 if (INTVAL (XEXP (note, 0)) != NOTE_INSN_SETJMP)
5790 {
5791 remove_note (head, note);
5792 note = XEXP (note, 1);
5793 remove_note (head, note);
5794 }
5795 else
5796 note = XEXP (note, 1);
5797 }
5798 }
5799
5800 next_tail = NEXT_INSN (tail);
5801 prev_head = PREV_INSN (head);
5802
5803 /* If the only insn left is a NOTE or a CODE_LABEL, then there is no need
5804 to schedule this block. */
5805 if (head == tail
5806 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
5807 return (sched_n_insns);
5808
5809 /* Debug info. */
5810 if (sched_verbose)
5811 {
5812 fprintf (dump, ";; ======================================================\n");
5813 fprintf (dump,
5814 ";; -- basic block %d from %d to %d -- %s reload\n",
5815 b, INSN_UID (BLOCK_HEAD (b)), INSN_UID (BLOCK_END (b)),
5816 (reload_completed ? "after" : "before"));
5817 fprintf (dump, ";; ======================================================\n");
5818 fprintf (dump, "\n");
5819
5820 visual_tbl = (char *) alloca (get_visual_tbl_length ());
5821 init_block_visualization ();
5822 }
5823
5824 /* Remove remaining note insns from the block, save them in
5825 note_list. These notes are restored at the end of
5826 schedule_block (). */
5827 note_list = 0;
5828 rm_other_notes (head, tail);
5829
5830 target_bb = bb;
5831
5832 /* Prepare current target block info. */
5833 if (current_nr_blocks > 1)
5834 {
5835 candidate_table = (candidate *) xmalloc (current_nr_blocks
5836 * sizeof (candidate));
5837
5838 bblst_last = 0;
5839 /* ??? It is not clear why bblst_size is computed this way. The original
5840 number was clearly too small as it resulted in compiler failures.
5841 Multiplying by the original number by 2 (to account for update_bbs
5842 members) seems to be a reasonable solution. */
5843 /* ??? Or perhaps there is a bug somewhere else in this file? */
5844 bblst_size = (current_nr_blocks - bb) * rgn_nr_edges * 2;
5845 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
5846
5847 bitlst_table_last = 0;
5848 bitlst_table_size = rgn_nr_edges;
5849 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
5850
5851 compute_trg_info (bb);
5852 }
5853
5854 clear_units ();
5855
5856 /* Allocate the ready list. */
5857 ready = (rtx *) xmalloc ((rgn_n_insns + 1) * sizeof (rtx));
5858
5859 /* Print debugging information. */
5860 if (sched_verbose >= 5)
5861 debug_dependencies ();
5862
5863
5864 /* Initialize ready list with all 'ready' insns in target block.
5865 Count number of insns in the target block being scheduled. */
5866 n_ready = 0;
5867 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5868 {
5869 rtx next;
5870
5871 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5872 continue;
5873 next = NEXT_INSN (insn);
5874
5875 if (INSN_DEP_COUNT (insn) == 0
5876 && (SCHED_GROUP_P (next) == 0 || GET_RTX_CLASS (GET_CODE (next)) != 'i'))
5877 ready[n_ready++] = insn;
5878 if (!(SCHED_GROUP_P (insn)))
5879 target_n_insns++;
5880 }
5881
5882 /* Add to ready list all 'ready' insns in valid source blocks.
5883 For speculative insns, check-live, exception-free, and
5884 issue-delay. */
5885 for (bb_src = bb + 1; bb_src < current_nr_blocks; bb_src++)
5886 if (IS_VALID (bb_src))
5887 {
5888 rtx src_head;
5889 rtx src_next_tail;
5890 rtx tail, head;
5891
5892 get_bb_head_tail (bb_src, &head, &tail);
5893 src_next_tail = NEXT_INSN (tail);
5894 src_head = head;
5895
5896 if (head == tail
5897 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
5898 continue;
5899
5900 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
5901 {
5902 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5903 continue;
5904
5905 if (!CANT_MOVE (insn)
5906 && (!IS_SPECULATIVE_INSN (insn)
5907 || (insn_issue_delay (insn) <= 3
5908 && check_live (insn, bb_src)
5909 && is_exception_free (insn, bb_src, target_bb))))
5910 {
5911 rtx next;
5912
5913 /* Note that we havn't squirrled away the notes for
5914 blocks other than the current. So if this is a
5915 speculative insn, NEXT might otherwise be a note. */
5916 next = next_nonnote_insn (insn);
5917 if (INSN_DEP_COUNT (insn) == 0
5918 && (! next
5919 || SCHED_GROUP_P (next) == 0
5920 || GET_RTX_CLASS (GET_CODE (next)) != 'i'))
5921 ready[n_ready++] = insn;
5922 }
5923 }
5924 }
5925
5926 #ifdef MD_SCHED_INIT
5927 MD_SCHED_INIT (dump, sched_verbose);
5928 #endif
5929
5930 /* No insns scheduled in this block yet. */
5931 last_scheduled_insn = 0;
5932
5933 /* Q_SIZE is the total number of insns in the queue. */
5934 q_ptr = 0;
5935 q_size = 0;
5936 last_clock_var = 0;
5937 bzero ((char *) insn_queue, sizeof (insn_queue));
5938
5939 /* Start just before the beginning of time. */
5940 clock_var = -1;
5941
5942 /* We start inserting insns after PREV_HEAD. */
5943 last = prev_head;
5944
5945 /* Initialize INSN_QUEUE, LIST and NEW_NEEDS. */
5946 new_needs = (NEXT_INSN (prev_head) == BLOCK_HEAD (b)
5947 ? NEED_HEAD : NEED_NOTHING);
5948 if (PREV_INSN (next_tail) == BLOCK_END (b))
5949 new_needs |= NEED_TAIL;
5950
5951 /* Loop until all the insns in BB are scheduled. */
5952 while (sched_target_n_insns < target_n_insns)
5953 {
5954 clock_var++;
5955
5956 /* Add to the ready list all pending insns that can be issued now.
5957 If there are no ready insns, increment clock until one
5958 is ready and add all pending insns at that point to the ready
5959 list. */
5960 n_ready = queue_to_ready (ready, n_ready);
5961
5962 if (n_ready == 0)
5963 abort ();
5964
5965 if (sched_verbose >= 2)
5966 {
5967 fprintf (dump, ";;\t\tReady list after queue_to_ready: ");
5968 debug_ready_list (ready, n_ready);
5969 }
5970
5971 /* Sort the ready list based on priority. */
5972 SCHED_SORT (ready, n_ready);
5973
5974 /* Allow the target to reorder the list, typically for
5975 better instruction bundling. */
5976 #ifdef MD_SCHED_REORDER
5977 MD_SCHED_REORDER (dump, sched_verbose, ready, n_ready, clock_var,
5978 can_issue_more);
5979 #else
5980 can_issue_more = issue_rate;
5981 #endif
5982
5983 if (sched_verbose)
5984 {
5985 fprintf (dump, "\n;;\tReady list (t =%3d): ", clock_var);
5986 debug_ready_list (ready, n_ready);
5987 }
5988
5989 /* Issue insns from ready list. */
5990 while (n_ready != 0 && can_issue_more)
5991 {
5992 /* Select and remove the insn from the ready list. */
5993 rtx insn = ready[--n_ready];
5994 int cost = actual_hazard (insn_unit (insn), insn, clock_var, 0);
5995
5996 if (cost >= 1)
5997 {
5998 queue_insn (insn, cost);
5999 continue;
6000 }
6001
6002 /* An interblock motion? */
6003 if (INSN_BB (insn) != target_bb)
6004 {
6005 rtx temp;
6006 basic_block b1;
6007
6008 if (IS_SPECULATIVE_INSN (insn))
6009 {
6010 if (!check_live (insn, INSN_BB (insn)))
6011 continue;
6012 update_live (insn, INSN_BB (insn));
6013
6014 /* For speculative load, mark insns fed by it. */
6015 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
6016 set_spec_fed (insn);
6017
6018 nr_spec++;
6019 }
6020 nr_inter++;
6021
6022 /* Find the beginning of the scheduling group. */
6023 /* ??? Ought to update basic block here, but later bits of
6024 schedule_block assumes the original insn block is
6025 still intact. */
6026
6027 temp = insn;
6028 while (SCHED_GROUP_P (temp))
6029 temp = PREV_INSN (temp);
6030
6031 /* Update source block boundaries. */
6032 b1 = BLOCK_FOR_INSN (temp);
6033 if (temp == b1->head && insn == b1->end)
6034 {
6035 /* We moved all the insns in the basic block.
6036 Emit a note after the last insn and update the
6037 begin/end boundaries to point to the note. */
6038 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
6039 b1->head = note;
6040 b1->end = note;
6041 }
6042 else if (insn == b1->end)
6043 {
6044 /* We took insns from the end of the basic block,
6045 so update the end of block boundary so that it
6046 points to the first insn we did not move. */
6047 b1->end = PREV_INSN (temp);
6048 }
6049 else if (temp == b1->head)
6050 {
6051 /* We took insns from the start of the basic block,
6052 so update the start of block boundary so that
6053 it points to the first insn we did not move. */
6054 b1->head = NEXT_INSN (insn);
6055 }
6056 }
6057 else
6058 {
6059 /* In block motion. */
6060 sched_target_n_insns++;
6061 }
6062
6063 last_scheduled_insn = insn;
6064 last = move_insn (insn, last);
6065 sched_n_insns++;
6066
6067 #ifdef MD_SCHED_VARIABLE_ISSUE
6068 MD_SCHED_VARIABLE_ISSUE (dump, sched_verbose, insn,
6069 can_issue_more);
6070 #else
6071 can_issue_more--;
6072 #endif
6073
6074 n_ready = schedule_insn (insn, ready, n_ready, clock_var);
6075
6076 /* Close this block after scheduling its jump. */
6077 if (GET_CODE (last_scheduled_insn) == JUMP_INSN)
6078 break;
6079 }
6080
6081 /* Debug info. */
6082 if (sched_verbose)
6083 visualize_scheduled_insns (b, clock_var);
6084 }
6085
6086 /* Debug info. */
6087 if (sched_verbose)
6088 {
6089 fprintf (dump, ";;\tReady list (final): ");
6090 debug_ready_list (ready, n_ready);
6091 print_block_visualization (b, "");
6092 }
6093
6094 /* Sanity check -- queue must be empty now. Meaningless if region has
6095 multiple bbs. */
6096 if (current_nr_blocks > 1)
6097 if (!flag_schedule_interblock && q_size != 0)
6098 abort ();
6099
6100 /* Update head/tail boundaries. */
6101 head = NEXT_INSN (prev_head);
6102 tail = last;
6103
6104 /* Restore-other-notes: NOTE_LIST is the end of a chain of notes
6105 previously found among the insns. Insert them at the beginning
6106 of the insns. */
6107 if (note_list != 0)
6108 {
6109 rtx note_head = note_list;
6110
6111 while (PREV_INSN (note_head))
6112 {
6113 note_head = PREV_INSN (note_head);
6114 }
6115
6116 PREV_INSN (note_head) = PREV_INSN (head);
6117 NEXT_INSN (PREV_INSN (head)) = note_head;
6118 PREV_INSN (head) = note_list;
6119 NEXT_INSN (note_list) = head;
6120 head = note_head;
6121 }
6122
6123 /* Update target block boundaries. */
6124 if (new_needs & NEED_HEAD)
6125 BLOCK_HEAD (b) = head;
6126
6127 if (new_needs & NEED_TAIL)
6128 BLOCK_END (b) = tail;
6129
6130 /* Debugging. */
6131 if (sched_verbose)
6132 {
6133 fprintf (dump, ";; total time = %d\n;; new basic block head = %d\n",
6134 clock_var, INSN_UID (BLOCK_HEAD (b)));
6135 fprintf (dump, ";; new basic block end = %d\n\n",
6136 INSN_UID (BLOCK_END (b)));
6137 }
6138
6139 /* Clean up. */
6140 if (current_nr_blocks > 1)
6141 {
6142 free (candidate_table);
6143 free (bblst_table);
6144 free (bitlst_table);
6145 }
6146 free (ready);
6147
6148 return (sched_n_insns);
6149 } /* schedule_block () */
6150 \f
6151
6152 /* Print the bit-set of registers, S, callable from debugger. */
6153
6154 extern void
6155 debug_reg_vector (s)
6156 regset s;
6157 {
6158 int regno;
6159
6160 EXECUTE_IF_SET_IN_REG_SET (s, 0, regno,
6161 {
6162 fprintf (dump, " %d", regno);
6163 });
6164
6165 fprintf (dump, "\n");
6166 }
6167
6168 /* Use the backward dependences from LOG_LINKS to build
6169 forward dependences in INSN_DEPEND. */
6170
6171 static void
6172 compute_block_forward_dependences (bb)
6173 int bb;
6174 {
6175 rtx insn, link;
6176 rtx tail, head;
6177 rtx next_tail;
6178 enum reg_note dep_type;
6179
6180 get_bb_head_tail (bb, &head, &tail);
6181 next_tail = NEXT_INSN (tail);
6182 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6183 {
6184 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
6185 continue;
6186
6187 insn = group_leader (insn);
6188
6189 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6190 {
6191 rtx x = group_leader (XEXP (link, 0));
6192 rtx new_link;
6193
6194 if (x != XEXP (link, 0))
6195 continue;
6196
6197 #ifdef ENABLE_CHECKING
6198 /* If add_dependence is working properly there should never
6199 be notes, deleted insns or duplicates in the backward
6200 links. Thus we need not check for them here.
6201
6202 However, if we have enabled checking we might as well go
6203 ahead and verify that add_dependence worked properly. */
6204 if (GET_CODE (x) == NOTE
6205 || INSN_DELETED_P (x)
6206 || find_insn_list (insn, INSN_DEPEND (x)))
6207 abort ();
6208 #endif
6209
6210 new_link = alloc_INSN_LIST (insn, INSN_DEPEND (x));
6211
6212 dep_type = REG_NOTE_KIND (link);
6213 PUT_REG_NOTE_KIND (new_link, dep_type);
6214
6215 INSN_DEPEND (x) = new_link;
6216 INSN_DEP_COUNT (insn) += 1;
6217 }
6218 }
6219 }
6220
6221 /* Initialize variables for region data dependence analysis.
6222 n_bbs is the number of region blocks. */
6223
6224 static void
6225 init_deps (deps)
6226 struct deps *deps;
6227 {
6228 int maxreg = max_reg_num ();
6229 deps->reg_last_uses = (rtx *) xcalloc (maxreg, sizeof (rtx));
6230 deps->reg_last_sets = (rtx *) xcalloc (maxreg, sizeof (rtx));
6231 deps->reg_last_clobbers = (rtx *) xcalloc (maxreg, sizeof (rtx));
6232
6233 deps->pending_read_insns = 0;
6234 deps->pending_read_mems = 0;
6235 deps->pending_write_insns = 0;
6236 deps->pending_write_mems = 0;
6237 deps->pending_lists_length = 0;
6238 deps->last_pending_memory_flush = 0;
6239 deps->last_function_call = 0;
6240
6241 deps->sched_before_next_call
6242 = gen_rtx_INSN (VOIDmode, 0, NULL_RTX, NULL_RTX,
6243 NULL_RTX, 0, NULL_RTX, NULL_RTX);
6244 LOG_LINKS (deps->sched_before_next_call) = 0;
6245 }
6246
6247 /* Add dependences so that branches are scheduled to run last in their
6248 block. */
6249
6250 static void
6251 add_branch_dependences (head, tail)
6252 rtx head, tail;
6253 {
6254 rtx insn, last;
6255
6256 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
6257 to remain in order at the end of the block by adding dependencies and
6258 giving the last a high priority. There may be notes present, and
6259 prev_head may also be a note.
6260
6261 Branches must obviously remain at the end. Calls should remain at the
6262 end since moving them results in worse register allocation. Uses remain
6263 at the end to ensure proper register allocation. cc0 setters remaim
6264 at the end because they can't be moved away from their cc0 user. */
6265 insn = tail;
6266 last = 0;
6267 while (GET_CODE (insn) == CALL_INSN
6268 || GET_CODE (insn) == JUMP_INSN
6269 || (GET_CODE (insn) == INSN
6270 && (GET_CODE (PATTERN (insn)) == USE
6271 || GET_CODE (PATTERN (insn)) == CLOBBER
6272 #ifdef HAVE_cc0
6273 || sets_cc0_p (PATTERN (insn))
6274 #endif
6275 ))
6276 || GET_CODE (insn) == NOTE)
6277 {
6278 if (GET_CODE (insn) != NOTE)
6279 {
6280 if (last != 0
6281 && !find_insn_list (insn, LOG_LINKS (last)))
6282 {
6283 add_dependence (last, insn, REG_DEP_ANTI);
6284 INSN_REF_COUNT (insn)++;
6285 }
6286
6287 CANT_MOVE (insn) = 1;
6288
6289 last = insn;
6290 /* Skip over insns that are part of a group.
6291 Make each insn explicitly depend on the previous insn.
6292 This ensures that only the group header will ever enter
6293 the ready queue (and, when scheduled, will automatically
6294 schedule the SCHED_GROUP_P block). */
6295 while (SCHED_GROUP_P (insn))
6296 {
6297 rtx temp = prev_nonnote_insn (insn);
6298 add_dependence (insn, temp, REG_DEP_ANTI);
6299 insn = temp;
6300 }
6301 }
6302
6303 /* Don't overrun the bounds of the basic block. */
6304 if (insn == head)
6305 break;
6306
6307 insn = PREV_INSN (insn);
6308 }
6309
6310 /* Make sure these insns are scheduled last in their block. */
6311 insn = last;
6312 if (insn != 0)
6313 while (insn != head)
6314 {
6315 insn = prev_nonnote_insn (insn);
6316
6317 if (INSN_REF_COUNT (insn) != 0)
6318 continue;
6319
6320 add_dependence (last, insn, REG_DEP_ANTI);
6321 INSN_REF_COUNT (insn) = 1;
6322
6323 /* Skip over insns that are part of a group. */
6324 while (SCHED_GROUP_P (insn))
6325 insn = prev_nonnote_insn (insn);
6326 }
6327 }
6328
6329 /* After computing the dependencies for block BB, propagate the dependencies
6330 found in TMP_DEPS to the successors of the block. MAX_REG is the number
6331 of registers. */
6332 static void
6333 propagate_deps (bb, tmp_deps, max_reg)
6334 int bb;
6335 struct deps *tmp_deps;
6336 int max_reg;
6337 {
6338 int b = BB_TO_BLOCK (bb);
6339 int e, first_edge;
6340 int reg;
6341 rtx link_insn, link_mem;
6342 rtx u;
6343
6344 /* These lists should point to the right place, for correct
6345 freeing later. */
6346 bb_deps[bb].pending_read_insns = tmp_deps->pending_read_insns;
6347 bb_deps[bb].pending_read_mems = tmp_deps->pending_read_mems;
6348 bb_deps[bb].pending_write_insns = tmp_deps->pending_write_insns;
6349 bb_deps[bb].pending_write_mems = tmp_deps->pending_write_mems;
6350
6351 /* bb's structures are inherited by its successors. */
6352 first_edge = e = OUT_EDGES (b);
6353 if (e <= 0)
6354 return;
6355
6356 do
6357 {
6358 rtx x;
6359 int b_succ = TO_BLOCK (e);
6360 int bb_succ = BLOCK_TO_BB (b_succ);
6361 struct deps *succ_deps = bb_deps + bb_succ;
6362
6363 /* Only bbs "below" bb, in the same region, are interesting. */
6364 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
6365 || bb_succ <= bb)
6366 {
6367 e = NEXT_OUT (e);
6368 continue;
6369 }
6370
6371 for (reg = 0; reg < max_reg; reg++)
6372 {
6373 /* reg-last-uses lists are inherited by bb_succ. */
6374 for (u = tmp_deps->reg_last_uses[reg]; u; u = XEXP (u, 1))
6375 {
6376 if (find_insn_list (XEXP (u, 0),
6377 succ_deps->reg_last_uses[reg]))
6378 continue;
6379
6380 succ_deps->reg_last_uses[reg]
6381 = alloc_INSN_LIST (XEXP (u, 0),
6382 succ_deps->reg_last_uses[reg]);
6383 }
6384
6385 /* reg-last-defs lists are inherited by bb_succ. */
6386 for (u = tmp_deps->reg_last_sets[reg]; u; u = XEXP (u, 1))
6387 {
6388 if (find_insn_list (XEXP (u, 0),
6389 succ_deps->reg_last_sets[reg]))
6390 continue;
6391
6392 succ_deps->reg_last_sets[reg]
6393 = alloc_INSN_LIST (XEXP (u, 0),
6394 succ_deps->reg_last_sets[reg]);
6395 }
6396
6397 for (u = tmp_deps->reg_last_clobbers[reg]; u; u = XEXP (u, 1))
6398 {
6399 if (find_insn_list (XEXP (u, 0),
6400 succ_deps->reg_last_clobbers[reg]))
6401 continue;
6402
6403 succ_deps->reg_last_clobbers[reg]
6404 = alloc_INSN_LIST (XEXP (u, 0),
6405 succ_deps->reg_last_clobbers[reg]);
6406 }
6407 }
6408
6409 /* Mem read/write lists are inherited by bb_succ. */
6410 link_insn = tmp_deps->pending_read_insns;
6411 link_mem = tmp_deps->pending_read_mems;
6412 while (link_insn)
6413 {
6414 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6415 XEXP (link_mem, 0),
6416 succ_deps->pending_read_insns,
6417 succ_deps->pending_read_mems)))
6418 add_insn_mem_dependence (succ_deps, &succ_deps->pending_read_insns,
6419 &succ_deps->pending_read_mems,
6420 XEXP (link_insn, 0), XEXP (link_mem, 0));
6421 link_insn = XEXP (link_insn, 1);
6422 link_mem = XEXP (link_mem, 1);
6423 }
6424
6425 link_insn = tmp_deps->pending_write_insns;
6426 link_mem = tmp_deps->pending_write_mems;
6427 while (link_insn)
6428 {
6429 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6430 XEXP (link_mem, 0),
6431 succ_deps->pending_write_insns,
6432 succ_deps->pending_write_mems)))
6433 add_insn_mem_dependence (succ_deps,
6434 &succ_deps->pending_write_insns,
6435 &succ_deps->pending_write_mems,
6436 XEXP (link_insn, 0), XEXP (link_mem, 0));
6437
6438 link_insn = XEXP (link_insn, 1);
6439 link_mem = XEXP (link_mem, 1);
6440 }
6441
6442 /* last_function_call is inherited by bb_succ. */
6443 for (u = tmp_deps->last_function_call; u; u = XEXP (u, 1))
6444 {
6445 if (find_insn_list (XEXP (u, 0),
6446 succ_deps->last_function_call))
6447 continue;
6448
6449 succ_deps->last_function_call
6450 = alloc_INSN_LIST (XEXP (u, 0),
6451 succ_deps->last_function_call);
6452 }
6453
6454 /* last_pending_memory_flush is inherited by bb_succ. */
6455 for (u = tmp_deps->last_pending_memory_flush; u; u = XEXP (u, 1))
6456 {
6457 if (find_insn_list (XEXP (u, 0),
6458 succ_deps->last_pending_memory_flush))
6459 continue;
6460
6461 succ_deps->last_pending_memory_flush
6462 = alloc_INSN_LIST (XEXP (u, 0),
6463 succ_deps->last_pending_memory_flush);
6464 }
6465
6466 /* sched_before_next_call is inherited by bb_succ. */
6467 x = LOG_LINKS (tmp_deps->sched_before_next_call);
6468 for (; x; x = XEXP (x, 1))
6469 add_dependence (succ_deps->sched_before_next_call,
6470 XEXP (x, 0), REG_DEP_ANTI);
6471
6472 e = NEXT_OUT (e);
6473 }
6474 while (e != first_edge);
6475 }
6476
6477 /* Compute backward dependences inside bb. In a multiple blocks region:
6478 (1) a bb is analyzed after its predecessors, and (2) the lists in
6479 effect at the end of bb (after analyzing for bb) are inherited by
6480 bb's successrs.
6481
6482 Specifically for reg-reg data dependences, the block insns are
6483 scanned by sched_analyze () top-to-bottom. Two lists are
6484 maintained by sched_analyze (): reg_last_sets[] for register DEFs,
6485 and reg_last_uses[] for register USEs.
6486
6487 When analysis is completed for bb, we update for its successors:
6488 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
6489 ; - USES[succ] = Union (USES [succ], DEFS [bb])
6490
6491 The mechanism for computing mem-mem data dependence is very
6492 similar, and the result is interblock dependences in the region. */
6493
6494 static void
6495 compute_block_backward_dependences (bb)
6496 int bb;
6497 {
6498 int i;
6499 rtx head, tail;
6500 int max_reg = max_reg_num ();
6501 struct deps tmp_deps;
6502
6503 tmp_deps = bb_deps[bb];
6504
6505 /* Do the analysis for this block. */
6506 get_bb_head_tail (bb, &head, &tail);
6507 sched_analyze (&tmp_deps, head, tail);
6508 add_branch_dependences (head, tail);
6509
6510 if (current_nr_blocks > 1)
6511 propagate_deps (bb, &tmp_deps, max_reg);
6512
6513 /* Free up the INSN_LISTs.
6514
6515 Note this loop is executed max_reg * nr_regions times. It's first
6516 implementation accounted for over 90% of the calls to free_INSN_LIST_list.
6517 The list was empty for the vast majority of those calls. On the PA, not
6518 calling free_INSN_LIST_list in those cases improves -O2 compile times by
6519 3-5% on average. */
6520 for (i = 0; i < max_reg; ++i)
6521 {
6522 if (tmp_deps.reg_last_clobbers[i])
6523 free_INSN_LIST_list (&tmp_deps.reg_last_clobbers[i]);
6524 if (tmp_deps.reg_last_sets[i])
6525 free_INSN_LIST_list (&tmp_deps.reg_last_sets[i]);
6526 if (tmp_deps.reg_last_uses[i])
6527 free_INSN_LIST_list (&tmp_deps.reg_last_uses[i]);
6528 }
6529
6530 /* Assert that we won't need bb_reg_last_* for this block anymore. */
6531 free (bb_deps[bb].reg_last_uses);
6532 free (bb_deps[bb].reg_last_sets);
6533 free (bb_deps[bb].reg_last_clobbers);
6534 bb_deps[bb].reg_last_uses = 0;
6535 bb_deps[bb].reg_last_sets = 0;
6536 bb_deps[bb].reg_last_clobbers = 0;
6537 }
6538
6539 /* Print dependences for debugging, callable from debugger. */
6540
6541 void
6542 debug_dependencies ()
6543 {
6544 int bb;
6545
6546 fprintf (dump, ";; --------------- forward dependences: ------------ \n");
6547 for (bb = 0; bb < current_nr_blocks; bb++)
6548 {
6549 if (1)
6550 {
6551 rtx head, tail;
6552 rtx next_tail;
6553 rtx insn;
6554
6555 get_bb_head_tail (bb, &head, &tail);
6556 next_tail = NEXT_INSN (tail);
6557 fprintf (dump, "\n;; --- Region Dependences --- b %d bb %d \n",
6558 BB_TO_BLOCK (bb), bb);
6559
6560 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6561 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
6562 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6563 "----", "----", "--", "---", "----", "----", "--------", "-----");
6564 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6565 {
6566 rtx link;
6567 int unit, range;
6568
6569 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
6570 {
6571 int n;
6572 fprintf (dump, ";; %6d ", INSN_UID (insn));
6573 if (GET_CODE (insn) == NOTE)
6574 {
6575 n = NOTE_LINE_NUMBER (insn);
6576 if (n < 0)
6577 fprintf (dump, "%s\n", GET_NOTE_INSN_NAME (n));
6578 else
6579 fprintf (dump, "line %d, file %s\n", n,
6580 NOTE_SOURCE_FILE (insn));
6581 }
6582 else
6583 fprintf (dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
6584 continue;
6585 }
6586
6587 unit = insn_unit (insn);
6588 range = (unit < 0
6589 || function_units[unit].blockage_range_function == 0) ? 0 :
6590 function_units[unit].blockage_range_function (insn);
6591 fprintf (dump,
6592 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
6593 (SCHED_GROUP_P (insn) ? "+" : " "),
6594 INSN_UID (insn),
6595 INSN_CODE (insn),
6596 INSN_BB (insn),
6597 INSN_DEP_COUNT (insn),
6598 INSN_PRIORITY (insn),
6599 insn_cost (insn, 0, 0),
6600 (int) MIN_BLOCKAGE_COST (range),
6601 (int) MAX_BLOCKAGE_COST (range));
6602 insn_print_units (insn);
6603 fprintf (dump, "\t: ");
6604 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
6605 fprintf (dump, "%d ", INSN_UID (XEXP (link, 0)));
6606 fprintf (dump, "\n");
6607 }
6608 }
6609 }
6610 fprintf (dump, "\n");
6611 }
6612
6613 /* Set_priorities: compute priority of each insn in the block. */
6614
6615 static int
6616 set_priorities (bb)
6617 int bb;
6618 {
6619 rtx insn;
6620 int n_insn;
6621
6622 rtx tail;
6623 rtx prev_head;
6624 rtx head;
6625
6626 get_bb_head_tail (bb, &head, &tail);
6627 prev_head = PREV_INSN (head);
6628
6629 if (head == tail
6630 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
6631 return 0;
6632
6633 n_insn = 0;
6634 for (insn = tail; insn != prev_head; insn = PREV_INSN (insn))
6635 {
6636
6637 if (GET_CODE (insn) == NOTE)
6638 continue;
6639
6640 if (!(SCHED_GROUP_P (insn)))
6641 n_insn++;
6642 (void) priority (insn);
6643 }
6644
6645 return n_insn;
6646 }
6647
6648 /* Schedule a region. A region is either an inner loop, a loop-free
6649 subroutine, or a single basic block. Each bb in the region is
6650 scheduled after its flow predecessors. */
6651
6652 static void
6653 schedule_region (rgn)
6654 int rgn;
6655 {
6656 int bb;
6657 int rgn_n_insns = 0;
6658 int sched_rgn_n_insns = 0;
6659 regset_head reg_pending_sets_head;
6660 regset_head reg_pending_clobbers_head;
6661
6662 /* Set variables for the current region. */
6663 current_nr_blocks = RGN_NR_BLOCKS (rgn);
6664 current_blocks = RGN_BLOCKS (rgn);
6665
6666 reg_pending_sets = INITIALIZE_REG_SET (reg_pending_sets_head);
6667 reg_pending_clobbers = INITIALIZE_REG_SET (reg_pending_clobbers_head);
6668 reg_pending_sets_all = 0;
6669
6670 /* Initializations for region data dependence analyisis. */
6671 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
6672 for (bb = 0; bb < current_nr_blocks; bb++)
6673 init_deps (bb_deps + bb);
6674
6675 /* Compute LOG_LINKS. */
6676 for (bb = 0; bb < current_nr_blocks; bb++)
6677 compute_block_backward_dependences (bb);
6678
6679 /* Compute INSN_DEPEND. */
6680 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
6681 compute_block_forward_dependences (bb);
6682
6683 /* Delete line notes and set priorities. */
6684 for (bb = 0; bb < current_nr_blocks; bb++)
6685 {
6686 if (write_symbols != NO_DEBUG)
6687 {
6688 save_line_notes (bb);
6689 rm_line_notes (bb);
6690 }
6691
6692 rgn_n_insns += set_priorities (bb);
6693 }
6694
6695 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
6696 if (current_nr_blocks > 1)
6697 {
6698 int i;
6699
6700 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
6701
6702 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
6703 dom = (bbset *) xmalloc (current_nr_blocks * sizeof (bbset));
6704 for (i = 0; i < current_nr_blocks; i++)
6705 dom[i] = (bbset) xcalloc (bbset_size, sizeof (HOST_WIDE_INT));
6706
6707 /* Edge to bit. */
6708 rgn_nr_edges = 0;
6709 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
6710 for (i = 1; i < nr_edges; i++)
6711 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
6712 EDGE_TO_BIT (i) = rgn_nr_edges++;
6713 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
6714
6715 rgn_nr_edges = 0;
6716 for (i = 1; i < nr_edges; i++)
6717 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
6718 rgn_edges[rgn_nr_edges++] = i;
6719
6720 /* Split edges. */
6721 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
6722 edgeset_bitsize = rgn_nr_edges;
6723 pot_split = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
6724 ancestor_edges
6725 = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
6726 for (i = 0; i < current_nr_blocks; i++)
6727 {
6728 pot_split[i] =
6729 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
6730 ancestor_edges[i] =
6731 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
6732 }
6733
6734 /* Compute probabilities, dominators, split_edges. */
6735 for (bb = 0; bb < current_nr_blocks; bb++)
6736 compute_dom_prob_ps (bb);
6737 }
6738
6739 /* Now we can schedule all blocks. */
6740 for (bb = 0; bb < current_nr_blocks; bb++)
6741 sched_rgn_n_insns += schedule_block (bb, rgn_n_insns);
6742
6743 /* Sanity check: verify that all region insns were scheduled. */
6744 if (sched_rgn_n_insns != rgn_n_insns)
6745 abort ();
6746
6747 /* Restore line notes. */
6748 if (write_symbols != NO_DEBUG)
6749 {
6750 for (bb = 0; bb < current_nr_blocks; bb++)
6751 restore_line_notes (bb);
6752 }
6753
6754 /* Done with this region. */
6755 free_pending_lists ();
6756
6757 FREE_REG_SET (reg_pending_sets);
6758 FREE_REG_SET (reg_pending_clobbers);
6759
6760 free (bb_deps);
6761
6762 if (current_nr_blocks > 1)
6763 {
6764 int i;
6765
6766 free (prob);
6767 for (i = 0; i < current_nr_blocks; ++i)
6768 {
6769 free (dom[i]);
6770 free (pot_split[i]);
6771 free (ancestor_edges[i]);
6772 }
6773 free (dom);
6774 free (edge_to_bit);
6775 free (rgn_edges);
6776 free (pot_split);
6777 free (ancestor_edges);
6778 }
6779 }
6780
6781 /* The one entry point in this file. DUMP_FILE is the dump file for
6782 this pass. */
6783
6784 void
6785 schedule_insns (dump_file)
6786 FILE *dump_file;
6787 {
6788 int *deaths_in_region;
6789 sbitmap blocks, large_region_blocks;
6790 int max_uid;
6791 int b;
6792 rtx insn;
6793 int rgn;
6794 int luid;
6795 int any_large_regions;
6796
6797 /* Disable speculative loads in their presence if cc0 defined. */
6798 #ifdef HAVE_cc0
6799 flag_schedule_speculative_load = 0;
6800 #endif
6801
6802 /* Taking care of this degenerate case makes the rest of
6803 this code simpler. */
6804 if (n_basic_blocks == 0)
6805 return;
6806
6807 /* Set dump and sched_verbose for the desired debugging output. If no
6808 dump-file was specified, but -fsched-verbose=N (any N), print to stderr.
6809 For -fsched-verbose=N, N>=10, print everything to stderr. */
6810 sched_verbose = sched_verbose_param;
6811 if (sched_verbose_param == 0 && dump_file)
6812 sched_verbose = 1;
6813 dump = ((sched_verbose_param >= 10 || !dump_file) ? stderr : dump_file);
6814
6815 nr_inter = 0;
6816 nr_spec = 0;
6817
6818 /* Initialize issue_rate. */
6819 issue_rate = ISSUE_RATE;
6820
6821 split_all_insns (1);
6822
6823 /* We use LUID 0 for the fake insn (UID 0) which holds dependencies for
6824 pseudos which do not cross calls. */
6825 max_uid = get_max_uid () + 1;
6826
6827 h_i_d = (struct haifa_insn_data *) xcalloc (max_uid, sizeof (*h_i_d));
6828
6829 h_i_d[0].luid = 0;
6830 luid = 1;
6831 for (b = 0; b < n_basic_blocks; b++)
6832 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
6833 {
6834 INSN_LUID (insn) = luid;
6835
6836 /* Increment the next luid, unless this is a note. We don't
6837 really need separate IDs for notes and we don't want to
6838 schedule differently depending on whether or not there are
6839 line-number notes, i.e., depending on whether or not we're
6840 generating debugging information. */
6841 if (GET_CODE (insn) != NOTE)
6842 ++luid;
6843
6844 if (insn == BLOCK_END (b))
6845 break;
6846 }
6847
6848 /* ?!? We could save some memory by computing a per-region luid mapping
6849 which could reduce both the number of vectors in the cache and the size
6850 of each vector. Instead we just avoid the cache entirely unless the
6851 average number of instructions in a basic block is very high. See
6852 the comment before the declaration of true_dependency_cache for
6853 what we consider "very high". */
6854 if (luid / n_basic_blocks > 100 * 5)
6855 {
6856 true_dependency_cache = sbitmap_vector_alloc (luid, luid);
6857 sbitmap_vector_zero (true_dependency_cache, luid);
6858 }
6859
6860 nr_regions = 0;
6861 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
6862 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6863 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6864 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6865
6866 blocks = sbitmap_alloc (n_basic_blocks);
6867 large_region_blocks = sbitmap_alloc (n_basic_blocks);
6868
6869 compute_bb_for_insn (max_uid);
6870
6871 /* Compute regions for scheduling. */
6872 if (reload_completed
6873 || n_basic_blocks == 1
6874 || !flag_schedule_interblock)
6875 {
6876 find_single_block_region ();
6877 }
6878 else
6879 {
6880 /* Verify that a 'good' control flow graph can be built. */
6881 if (is_cfg_nonregular ())
6882 {
6883 find_single_block_region ();
6884 }
6885 else
6886 {
6887 sbitmap *dom;
6888 struct edge_list *edge_list;
6889
6890 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
6891
6892 /* The scheduler runs after flow; therefore, we can't blindly call
6893 back into find_basic_blocks since doing so could invalidate the
6894 info in global_live_at_start.
6895
6896 Consider a block consisting entirely of dead stores; after life
6897 analysis it would be a block of NOTE_INSN_DELETED notes. If
6898 we call find_basic_blocks again, then the block would be removed
6899 entirely and invalidate our the register live information.
6900
6901 We could (should?) recompute register live information. Doing
6902 so may even be beneficial. */
6903 edge_list = create_edge_list ();
6904
6905 /* Compute the dominators and post dominators. We don't
6906 currently use post dominators, but we should for
6907 speculative motion analysis. */
6908 compute_flow_dominators (dom, NULL);
6909
6910 /* build_control_flow will return nonzero if it detects unreachable
6911 blocks or any other irregularity with the cfg which prevents
6912 cross block scheduling. */
6913 if (build_control_flow (edge_list) != 0)
6914 find_single_block_region ();
6915 else
6916 find_rgns (edge_list, dom);
6917
6918 if (sched_verbose >= 3)
6919 debug_regions ();
6920
6921 /* We are done with flow's edge list. */
6922 free_edge_list (edge_list);
6923
6924 /* For now. This will move as more and more of haifa is converted
6925 to using the cfg code in flow.c. */
6926 free (dom);
6927 }
6928 }
6929
6930 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
6931
6932 init_alias_analysis ();
6933
6934 if (write_symbols != NO_DEBUG)
6935 {
6936 rtx line;
6937
6938 line_note_head = (rtx *) xcalloc (n_basic_blocks, sizeof (rtx));
6939
6940 /* Save-line-note-head:
6941 Determine the line-number at the start of each basic block.
6942 This must be computed and saved now, because after a basic block's
6943 predecessor has been scheduled, it is impossible to accurately
6944 determine the correct line number for the first insn of the block. */
6945
6946 for (b = 0; b < n_basic_blocks; b++)
6947 for (line = BLOCK_HEAD (b); line; line = PREV_INSN (line))
6948 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
6949 {
6950 line_note_head[b] = line;
6951 break;
6952 }
6953 }
6954
6955 /* Find units used in this fuction, for visualization. */
6956 if (sched_verbose)
6957 init_target_units ();
6958
6959 /* ??? Add a NOTE after the last insn of the last basic block. It is not
6960 known why this is done. */
6961
6962 insn = BLOCK_END (n_basic_blocks - 1);
6963 if (NEXT_INSN (insn) == 0
6964 || (GET_CODE (insn) != NOTE
6965 && GET_CODE (insn) != CODE_LABEL
6966 /* Don't emit a NOTE if it would end up between an unconditional
6967 jump and a BARRIER. */
6968 && !(GET_CODE (insn) == JUMP_INSN
6969 && GET_CODE (NEXT_INSN (insn)) == BARRIER)))
6970 emit_note_after (NOTE_INSN_DELETED, BLOCK_END (n_basic_blocks - 1));
6971
6972 /* Compute INSN_REG_WEIGHT for all blocks. We must do this before
6973 removing death notes. */
6974 for (b = n_basic_blocks - 1; b >= 0; b--)
6975 find_insn_reg_weight (b);
6976
6977 /* Remove all death notes from the subroutine. */
6978 for (rgn = 0; rgn < nr_regions; rgn++)
6979 {
6980 sbitmap_zero (blocks);
6981 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
6982 SET_BIT (blocks, rgn_bb_table [RGN_BLOCKS (rgn) + b]);
6983
6984 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
6985 }
6986
6987 /* Schedule every region in the subroutine. */
6988 for (rgn = 0; rgn < nr_regions; rgn++)
6989 schedule_region (rgn);
6990
6991 /* Update life analysis for the subroutine. Do single block regions
6992 first so that we can verify that live_at_start didn't change. Then
6993 do all other blocks. */
6994 /* ??? There is an outside possibility that update_life_info, or more
6995 to the point propagate_block, could get called with non-zero flags
6996 more than once for one basic block. This would be kinda bad if it
6997 were to happen, since REG_INFO would be accumulated twice for the
6998 block, and we'd have twice the REG_DEAD notes.
6999
7000 I'm fairly certain that this _shouldn't_ happen, since I don't think
7001 that live_at_start should change at region heads. Not sure what the
7002 best way to test for this kind of thing... */
7003
7004 allocate_reg_life_data ();
7005 compute_bb_for_insn (max_uid);
7006
7007 any_large_regions = 0;
7008 sbitmap_ones (large_region_blocks);
7009
7010 for (rgn = 0; rgn < nr_regions; rgn++)
7011 if (RGN_NR_BLOCKS (rgn) > 1)
7012 any_large_regions = 1;
7013 else
7014 {
7015 sbitmap_zero (blocks);
7016 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
7017 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
7018
7019 /* Don't update reg info after reload, since that affects
7020 regs_ever_live, which should not change after reload. */
7021 update_life_info (blocks, UPDATE_LIFE_LOCAL,
7022 (reload_completed ? PROP_DEATH_NOTES
7023 : PROP_DEATH_NOTES | PROP_REG_INFO));
7024
7025 #ifndef HAVE_conditional_execution
7026 /* ??? REG_DEAD notes only exist for unconditional deaths. We need
7027 a count of the conditional plus unconditional deaths for this to
7028 work out. */
7029 /* In the single block case, the count of registers that died should
7030 not have changed during the schedule. */
7031 if (count_or_remove_death_notes (blocks, 0) != deaths_in_region[rgn])
7032 abort ();
7033 #endif
7034 }
7035
7036 if (any_large_regions)
7037 {
7038 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
7039 PROP_DEATH_NOTES | PROP_REG_INFO);
7040 }
7041
7042 /* Reposition the prologue and epilogue notes in case we moved the
7043 prologue/epilogue insns. */
7044 if (reload_completed)
7045 reposition_prologue_and_epilogue_notes (get_insns ());
7046
7047 /* Delete redundant line notes. */
7048 if (write_symbols != NO_DEBUG)
7049 rm_redundant_line_notes ();
7050
7051 if (sched_verbose)
7052 {
7053 if (reload_completed == 0 && flag_schedule_interblock)
7054 {
7055 fprintf (dump, "\n;; Procedure interblock/speculative motions == %d/%d \n",
7056 nr_inter, nr_spec);
7057 }
7058 else
7059 {
7060 if (nr_inter > 0)
7061 abort ();
7062 }
7063 fprintf (dump, "\n\n");
7064 }
7065
7066 /* Clean up. */
7067 end_alias_analysis ();
7068
7069 if (true_dependency_cache)
7070 {
7071 free (true_dependency_cache);
7072 true_dependency_cache = NULL;
7073 }
7074 free (rgn_table);
7075 free (rgn_bb_table);
7076 free (block_to_bb);
7077 free (containing_rgn);
7078
7079 free (h_i_d);
7080
7081 if (write_symbols != NO_DEBUG)
7082 free (line_note_head);
7083
7084 if (edge_table)
7085 {
7086 free (edge_table);
7087 edge_table = NULL;
7088 }
7089
7090 if (in_edges)
7091 {
7092 free (in_edges);
7093 in_edges = NULL;
7094 }
7095 if (out_edges)
7096 {
7097 free (out_edges);
7098 out_edges = NULL;
7099 }
7100
7101 sbitmap_free (blocks);
7102 sbitmap_free (large_region_blocks);
7103
7104 free (deaths_in_region);
7105 }
7106
7107 #endif /* INSN_SCHEDULING */