toplev.c (rest_of_compilation): Don't call optimize_mode_switching unless OPTIMIZE_MO...
[gcc.git] / gcc / lcm.c
1 /* Generic partial redundancy elimination with lazy code motion support.
2 Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
3
4 This file is part of GNU CC.
5
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21 /* These routines are meant to be used by various optimization
22 passes which can be modeled as lazy code motion problems.
23 Including, but not limited to:
24
25 * Traditional partial redundancy elimination.
26
27 * Placement of caller/caller register save/restores.
28
29 * Load/store motion.
30
31 * Copy motion.
32
33 * Conversion of flat register files to a stacked register
34 model.
35
36 * Dead load/store elimination.
37
38 These routines accept as input:
39
40 * Basic block information (number of blocks, lists of
41 predecessors and successors). Note the granularity
42 does not need to be basic block, they could be statements
43 or functions.
44
45 * Bitmaps of local properties (computed, transparent and
46 anticipatable expressions).
47
48 The output of these routines is bitmap of redundant computations
49 and a bitmap of optimal placement points. */
50
51
52 #include "config.h"
53 #include "system.h"
54 #include "rtl.h"
55 #include "regs.h"
56 #include "hard-reg-set.h"
57 #include "flags.h"
58 #include "real.h"
59 #include "insn-config.h"
60 #include "recog.h"
61 #include "basic-block.h"
62 #include "tm_p.h"
63
64 /* We want target macros for the mode switching code to be able to refer
65 to instruction attribute values. */
66 #include "insn-attr.h"
67
68 /* Edge based LCM routines. */
69 static void compute_antinout_edge PARAMS ((sbitmap *, sbitmap *,
70 sbitmap *, sbitmap *));
71 static void compute_earliest PARAMS ((struct edge_list *, int,
72 sbitmap *, sbitmap *,
73 sbitmap *, sbitmap *,
74 sbitmap *));
75 static void compute_laterin PARAMS ((struct edge_list *, sbitmap *,
76 sbitmap *, sbitmap *,
77 sbitmap *));
78 static void compute_insert_delete PARAMS ((struct edge_list *edge_list,
79 sbitmap *, sbitmap *,
80 sbitmap *, sbitmap *,
81 sbitmap *));
82
83 /* Edge based LCM routines on a reverse flowgraph. */
84 static void compute_farthest PARAMS ((struct edge_list *, int,
85 sbitmap *, sbitmap *,
86 sbitmap*, sbitmap *,
87 sbitmap *));
88 static void compute_nearerout PARAMS ((struct edge_list *, sbitmap *,
89 sbitmap *, sbitmap *,
90 sbitmap *));
91 static void compute_rev_insert_delete PARAMS ((struct edge_list *edge_list,
92 sbitmap *, sbitmap *,
93 sbitmap *, sbitmap *,
94 sbitmap *));
95 \f
96 /* Edge based lcm routines. */
97
98 /* Compute expression anticipatability at entrance and exit of each block.
99 This is done based on the flow graph, and not on the pred-succ lists.
100 Other than that, its pretty much identical to compute_antinout. */
101
102 static void
103 compute_antinout_edge (antloc, transp, antin, antout)
104 sbitmap *antloc;
105 sbitmap *transp;
106 sbitmap *antin;
107 sbitmap *antout;
108 {
109 int bb;
110 edge e;
111 basic_block *worklist, *tos;
112
113 /* Allocate a worklist array/queue. Entries are only added to the
114 list if they were not already on the list. So the size is
115 bounded by the number of basic blocks. */
116 tos = worklist
117 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
118
119 /* We want a maximal solution, so make an optimistic initialization of
120 ANTIN. */
121 sbitmap_vector_ones (antin, n_basic_blocks);
122
123 /* Put every block on the worklist; this is necessary because of the
124 optimistic initialization of ANTIN above. */
125 for (bb = 0; bb < n_basic_blocks; bb++)
126 {
127 *tos++ = BASIC_BLOCK (bb);
128 BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb);
129 }
130
131 /* Mark blocks which are predecessors of the exit block so that we
132 can easily identify them below. */
133 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
134 e->src->aux = EXIT_BLOCK_PTR;
135
136 /* Iterate until the worklist is empty. */
137 while (tos != worklist)
138 {
139 /* Take the first entry off the worklist. */
140 basic_block b = *--tos;
141 bb = b->index;
142
143 if (b->aux == EXIT_BLOCK_PTR)
144 /* Do not clear the aux field for blocks which are predecessors of
145 the EXIT block. That way we never add then to the worklist
146 again. */
147 sbitmap_zero (antout[bb]);
148 else
149 {
150 /* Clear the aux field of this block so that it can be added to
151 the worklist again if necessary. */
152 b->aux = NULL;
153 sbitmap_intersection_of_succs (antout[bb], antin, bb);
154 }
155
156 if (sbitmap_a_or_b_and_c (antin[bb], antloc[bb], transp[bb], antout[bb]))
157 /* If the in state of this block changed, then we need
158 to add the predecessors of this block to the worklist
159 if they are not already on the worklist. */
160 for (e = b->pred; e; e = e->pred_next)
161 if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
162 {
163 *tos++ = e->src;
164 e->src->aux = e;
165 }
166 }
167
168 free (tos);
169 }
170
171 /* Compute the earliest vector for edge based lcm. */
172
173 static void
174 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest)
175 struct edge_list *edge_list;
176 int n_exprs;
177 sbitmap *antin, *antout, *avout, *kill, *earliest;
178 {
179 sbitmap difference, temp_bitmap;
180 int x, num_edges;
181 basic_block pred, succ;
182
183 num_edges = NUM_EDGES (edge_list);
184
185 difference = sbitmap_alloc (n_exprs);
186 temp_bitmap = sbitmap_alloc (n_exprs);
187
188 for (x = 0; x < num_edges; x++)
189 {
190 pred = INDEX_EDGE_PRED_BB (edge_list, x);
191 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
192 if (pred == ENTRY_BLOCK_PTR)
193 sbitmap_copy (earliest[x], antin[succ->index]);
194 else
195 {
196 if (succ == EXIT_BLOCK_PTR)
197 sbitmap_zero (earliest[x]);
198 else
199 {
200 sbitmap_difference (difference, antin[succ->index],
201 avout[pred->index]);
202 sbitmap_not (temp_bitmap, antout[pred->index]);
203 sbitmap_a_and_b_or_c (earliest[x], difference,
204 kill[pred->index], temp_bitmap);
205 }
206 }
207 }
208
209 free (temp_bitmap);
210 free (difference);
211 }
212
213 /* later(p,s) is dependent on the calculation of laterin(p).
214 laterin(p) is dependent on the calculation of later(p2,p).
215
216 laterin(ENTRY) is defined as all 0's
217 later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY)
218 laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)).
219
220 If we progress in this manner, starting with all basic blocks
221 in the work list, anytime we change later(bb), we need to add
222 succs(bb) to the worklist if they are not already on the worklist.
223
224 Boundary conditions:
225
226 We prime the worklist all the normal basic blocks. The ENTRY block can
227 never be added to the worklist since it is never the successor of any
228 block. We explicitly prevent the EXIT block from being added to the
229 worklist.
230
231 We optimistically initialize LATER. That is the only time this routine
232 will compute LATER for an edge out of the entry block since the entry
233 block is never on the worklist. Thus, LATERIN is neither used nor
234 computed for the ENTRY block.
235
236 Since the EXIT block is never added to the worklist, we will neither
237 use nor compute LATERIN for the exit block. Edges which reach the
238 EXIT block are handled in the normal fashion inside the loop. However,
239 the insertion/deletion computation needs LATERIN(EXIT), so we have
240 to compute it. */
241
242 static void
243 compute_laterin (edge_list, earliest, antloc, later, laterin)
244 struct edge_list *edge_list;
245 sbitmap *earliest, *antloc, *later, *laterin;
246 {
247 int bb, num_edges, i;
248 edge e;
249 basic_block *worklist, *tos;
250
251 num_edges = NUM_EDGES (edge_list);
252
253 /* Allocate a worklist array/queue. Entries are only added to the
254 list if they were not already on the list. So the size is
255 bounded by the number of basic blocks. */
256 tos = worklist
257 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
258
259 /* Initialize a mapping from each edge to its index. */
260 for (i = 0; i < num_edges; i++)
261 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
262
263 /* We want a maximal solution, so initially consider LATER true for
264 all edges. This allows propagation through a loop since the incoming
265 loop edge will have LATER set, so if all the other incoming edges
266 to the loop are set, then LATERIN will be set for the head of the
267 loop.
268
269 If the optimistic setting of LATER on that edge was incorrect (for
270 example the expression is ANTLOC in a block within the loop) then
271 this algorithm will detect it when we process the block at the head
272 of the optimistic edge. That will requeue the affected blocks. */
273 sbitmap_vector_ones (later, num_edges);
274
275 /* Note that even though we want an optimistic setting of LATER, we
276 do not want to be overly optimistic. Consider an outgoing edge from
277 the entry block. That edge should always have a LATER value the
278 same as EARLIEST for that edge. */
279 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
280 sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);
281
282 /* Add all the blocks to the worklist. This prevents an early exit from
283 the loop given our optimistic initialization of LATER above. */
284 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
285 {
286 basic_block b = BASIC_BLOCK (bb);
287 *tos++ = b;
288 b->aux = b;
289 }
290
291 /* Iterate until the worklist is empty. */
292 while (tos != worklist)
293 {
294 /* Take the first entry off the worklist. */
295 basic_block b = *--tos;
296 b->aux = NULL;
297
298 /* Compute the intersection of LATERIN for each incoming edge to B. */
299 bb = b->index;
300 sbitmap_ones (laterin[bb]);
301 for (e = b->pred; e != NULL; e = e->pred_next)
302 sbitmap_a_and_b (laterin[bb], laterin[bb], later[(size_t)e->aux]);
303
304 /* Calculate LATER for all outgoing edges. */
305 for (e = b->succ; e != NULL; e = e->succ_next)
306 if (sbitmap_union_of_diff (later[(size_t) e->aux],
307 earliest[(size_t) e->aux],
308 laterin[e->src->index],
309 antloc[e->src->index])
310 /* If LATER for an outgoing edge was changed, then we need
311 to add the target of the outgoing edge to the worklist. */
312 && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
313 {
314 *tos++ = e->dest;
315 e->dest->aux = e;
316 }
317 }
318
319 /* Computation of insertion and deletion points requires computing LATERIN
320 for the EXIT block. We allocated an extra entry in the LATERIN array
321 for just this purpose. */
322 sbitmap_ones (laterin[n_basic_blocks]);
323 for (e = EXIT_BLOCK_PTR->pred; e != NULL; e = e->pred_next)
324 sbitmap_a_and_b (laterin[n_basic_blocks],
325 laterin[n_basic_blocks],
326 later[(size_t) e->aux]);
327
328 free (tos);
329 }
330
331 /* Compute the insertion and deletion points for edge based LCM. */
332
333 static void
334 compute_insert_delete (edge_list, antloc, later, laterin,
335 insert, delete)
336 struct edge_list *edge_list;
337 sbitmap *antloc, *later, *laterin, *insert, *delete;
338 {
339 int x;
340
341 for (x = 0; x < n_basic_blocks; x++)
342 sbitmap_difference (delete[x], antloc[x], laterin[x]);
343
344 for (x = 0; x < NUM_EDGES (edge_list); x++)
345 {
346 basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
347
348 if (b == EXIT_BLOCK_PTR)
349 sbitmap_difference (insert[x], later[x], laterin[n_basic_blocks]);
350 else
351 sbitmap_difference (insert[x], later[x], laterin[b->index]);
352 }
353 }
354
355 /* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and
356 delete vectors for edge based LCM. Returns an edgelist which is used to
357 map the insert vector to what edge an expression should be inserted on. */
358
359 struct edge_list *
360 pre_edge_lcm (file, n_exprs, transp, avloc, antloc, kill, insert, delete)
361 FILE *file ATTRIBUTE_UNUSED;
362 int n_exprs;
363 sbitmap *transp;
364 sbitmap *avloc;
365 sbitmap *antloc;
366 sbitmap *kill;
367 sbitmap **insert;
368 sbitmap **delete;
369 {
370 sbitmap *antin, *antout, *earliest;
371 sbitmap *avin, *avout;
372 sbitmap *later, *laterin;
373 struct edge_list *edge_list;
374 int num_edges;
375
376 edge_list = create_edge_list ();
377 num_edges = NUM_EDGES (edge_list);
378
379 #ifdef LCM_DEBUG_INFO
380 if (file)
381 {
382 fprintf (file, "Edge List:\n");
383 verify_edge_list (file, edge_list);
384 print_edge_list (file, edge_list);
385 dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks);
386 dump_sbitmap_vector (file, "antloc", "", antloc, n_basic_blocks);
387 dump_sbitmap_vector (file, "avloc", "", avloc, n_basic_blocks);
388 dump_sbitmap_vector (file, "kill", "", kill, n_basic_blocks);
389 }
390 #endif
391
392 /* Compute global availability. */
393 avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
394 avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
395 compute_available (avloc, kill, avout, avin);
396 free (avin);
397
398 /* Compute global anticipatability. */
399 antin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
400 antout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
401 compute_antinout_edge (antloc, transp, antin, antout);
402
403 #ifdef LCM_DEBUG_INFO
404 if (file)
405 {
406 dump_sbitmap_vector (file, "antin", "", antin, n_basic_blocks);
407 dump_sbitmap_vector (file, "antout", "", antout, n_basic_blocks);
408 }
409 #endif
410
411 /* Compute earliestness. */
412 earliest = sbitmap_vector_alloc (num_edges, n_exprs);
413 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
414
415 #ifdef LCM_DEBUG_INFO
416 if (file)
417 dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
418 #endif
419
420 free (antout);
421 free (antin);
422 free (avout);
423
424 later = sbitmap_vector_alloc (num_edges, n_exprs);
425
426 /* Allocate an extra element for the exit block in the laterin vector. */
427 laterin = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs);
428 compute_laterin (edge_list, earliest, antloc, later, laterin);
429
430 #ifdef LCM_DEBUG_INFO
431 if (file)
432 {
433 dump_sbitmap_vector (file, "laterin", "", laterin, n_basic_blocks + 1);
434 dump_sbitmap_vector (file, "later", "", later, num_edges);
435 }
436 #endif
437
438 free (earliest);
439
440 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
441 *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
442 compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
443
444 free (laterin);
445 free (later);
446
447 #ifdef LCM_DEBUG_INFO
448 if (file)
449 {
450 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
451 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
452 n_basic_blocks);
453 }
454 #endif
455
456 return edge_list;
457 }
458
459 /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
460 Return the number of passes we performed to iterate to a solution. */
461
462 void
463 compute_available (avloc, kill, avout, avin)
464 sbitmap *avloc, *kill, *avout, *avin;
465 {
466 int bb;
467 edge e;
468 basic_block *worklist, *tos;
469
470 /* Allocate a worklist array/queue. Entries are only added to the
471 list if they were not already on the list. So the size is
472 bounded by the number of basic blocks. */
473 tos = worklist
474 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
475
476 /* We want a maximal solution. */
477 sbitmap_vector_ones (avout, n_basic_blocks);
478
479 /* Put every block on the worklist; this is necessary because of the
480 optimistic initialization of AVOUT above. */
481 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
482 {
483 *tos++ = BASIC_BLOCK (bb);
484 BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb);
485 }
486
487 /* Mark blocks which are successors of the entry block so that we
488 can easily identify them below. */
489 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
490 e->dest->aux = ENTRY_BLOCK_PTR;
491
492 /* Iterate until the worklist is empty. */
493 while (tos != worklist)
494 {
495 /* Take the first entry off the worklist. */
496 basic_block b = *--tos;
497 bb = b->index;
498
499 /* If one of the predecessor blocks is the ENTRY block, then the
500 intersection of avouts is the null set. We can identify such blocks
501 by the special value in the AUX field in the block structure. */
502 if (b->aux == ENTRY_BLOCK_PTR)
503 /* Do not clear the aux field for blocks which are successors of the
504 ENTRY block. That way we never add then to the worklist again. */
505 sbitmap_zero (avin[bb]);
506 else
507 {
508 /* Clear the aux field of this block so that it can be added to
509 the worklist again if necessary. */
510 b->aux = NULL;
511 sbitmap_intersection_of_preds (avin[bb], avout, bb);
512 }
513
514 if (sbitmap_union_of_diff (avout[bb], avloc[bb], avin[bb], kill[bb]))
515 /* If the out state of this block changed, then we need
516 to add the successors of this block to the worklist
517 if they are not already on the worklist. */
518 for (e = b->succ; e; e = e->succ_next)
519 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
520 {
521 *tos++ = e->dest;
522 e->dest->aux = e;
523 }
524 }
525
526 free (tos);
527 }
528
529 /* Compute the farthest vector for edge based lcm. */
530
531 static void
532 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
533 kill, farthest)
534 struct edge_list *edge_list;
535 int n_exprs;
536 sbitmap *st_avout, *st_avin, *st_antin, *kill, *farthest;
537 {
538 sbitmap difference, temp_bitmap;
539 int x, num_edges;
540 basic_block pred, succ;
541
542 num_edges = NUM_EDGES (edge_list);
543
544 difference = sbitmap_alloc (n_exprs);
545 temp_bitmap = sbitmap_alloc (n_exprs);
546
547 for (x = 0; x < num_edges; x++)
548 {
549 pred = INDEX_EDGE_PRED_BB (edge_list, x);
550 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
551 if (succ == EXIT_BLOCK_PTR)
552 sbitmap_copy (farthest[x], st_avout[pred->index]);
553 else
554 {
555 if (pred == ENTRY_BLOCK_PTR)
556 sbitmap_zero (farthest[x]);
557 else
558 {
559 sbitmap_difference (difference, st_avout[pred->index],
560 st_antin[succ->index]);
561 sbitmap_not (temp_bitmap, st_avin[succ->index]);
562 sbitmap_a_and_b_or_c (farthest[x], difference,
563 kill[succ->index], temp_bitmap);
564 }
565 }
566 }
567
568 free (temp_bitmap);
569 free (difference);
570 }
571
572 /* Compute nearer and nearerout vectors for edge based lcm.
573
574 This is the mirror of compute_laterin, additional comments on the
575 implementation can be found before compute_laterin. */
576
577 static void
578 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout)
579 struct edge_list *edge_list;
580 sbitmap *farthest, *st_avloc, *nearer, *nearerout;
581 {
582 int bb, num_edges, i;
583 edge e;
584 basic_block *worklist, *tos;
585
586 num_edges = NUM_EDGES (edge_list);
587
588 /* Allocate a worklist array/queue. Entries are only added to the
589 list if they were not already on the list. So the size is
590 bounded by the number of basic blocks. */
591 tos = worklist
592 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
593
594 /* Initialize NEARER for each edge and build a mapping from an edge to
595 its index. */
596 for (i = 0; i < num_edges; i++)
597 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
598
599 /* We want a maximal solution. */
600 sbitmap_vector_ones (nearer, num_edges);
601
602 /* Note that even though we want an optimistic setting of NEARER, we
603 do not want to be overly optimistic. Consider an incoming edge to
604 the exit block. That edge should always have a NEARER value the
605 same as FARTHEST for that edge. */
606 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
607 sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);
608
609 /* Add all the blocks to the worklist. This prevents an early exit
610 from the loop given our optimistic initialization of NEARER. */
611 for (bb = 0; bb < n_basic_blocks; bb++)
612 {
613 basic_block b = BASIC_BLOCK (bb);
614 *tos++ = b;
615 b->aux = b;
616 }
617
618 /* Iterate until the worklist is empty. */
619 while (tos != worklist)
620 {
621 /* Take the first entry off the worklist. */
622 basic_block b = *--tos;
623 b->aux = NULL;
624
625 /* Compute the intersection of NEARER for each outgoing edge from B. */
626 bb = b->index;
627 sbitmap_ones (nearerout[bb]);
628 for (e = b->succ; e != NULL; e = e->succ_next)
629 sbitmap_a_and_b (nearerout[bb], nearerout[bb],
630 nearer[(size_t) e->aux]);
631
632 /* Calculate NEARER for all incoming edges. */
633 for (e = b->pred; e != NULL; e = e->pred_next)
634 if (sbitmap_union_of_diff (nearer[(size_t) e->aux],
635 farthest[(size_t) e->aux],
636 nearerout[e->dest->index],
637 st_avloc[e->dest->index])
638 /* If NEARER for an incoming edge was changed, then we need
639 to add the source of the incoming edge to the worklist. */
640 && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
641 {
642 *tos++ = e->src;
643 e->src->aux = e;
644 }
645 }
646
647 /* Computation of insertion and deletion points requires computing NEAREROUT
648 for the ENTRY block. We allocated an extra entry in the NEAREROUT array
649 for just this purpose. */
650 sbitmap_ones (nearerout[n_basic_blocks]);
651 for (e = ENTRY_BLOCK_PTR->succ; e != NULL; e = e->succ_next)
652 sbitmap_a_and_b (nearerout[n_basic_blocks],
653 nearerout[n_basic_blocks],
654 nearer[(size_t) e->aux]);
655
656 free (tos);
657 }
658
659 /* Compute the insertion and deletion points for edge based LCM. */
660
661 static void
662 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
663 insert, delete)
664 struct edge_list *edge_list;
665 sbitmap *st_avloc, *nearer, *nearerout, *insert, *delete;
666 {
667 int x;
668
669 for (x = 0; x < n_basic_blocks; x++)
670 sbitmap_difference (delete[x], st_avloc[x], nearerout[x]);
671
672 for (x = 0; x < NUM_EDGES (edge_list); x++)
673 {
674 basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
675 if (b == ENTRY_BLOCK_PTR)
676 sbitmap_difference (insert[x], nearer[x], nearerout[n_basic_blocks]);
677 else
678 sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
679 }
680 }
681
682 /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
683 insert and delete vectors for edge based reverse LCM. Returns an
684 edgelist which is used to map the insert vector to what edge
685 an expression should be inserted on. */
686
687 struct edge_list *
688 pre_edge_rev_lcm (file, n_exprs, transp, st_avloc, st_antloc, kill,
689 insert, delete)
690 FILE *file ATTRIBUTE_UNUSED;
691 int n_exprs;
692 sbitmap *transp;
693 sbitmap *st_avloc;
694 sbitmap *st_antloc;
695 sbitmap *kill;
696 sbitmap **insert;
697 sbitmap **delete;
698 {
699 sbitmap *st_antin, *st_antout;
700 sbitmap *st_avout, *st_avin, *farthest;
701 sbitmap *nearer, *nearerout;
702 struct edge_list *edge_list;
703 int num_edges;
704
705 edge_list = create_edge_list ();
706 num_edges = NUM_EDGES (edge_list);
707
708 st_antin = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs);
709 st_antout = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs);
710 sbitmap_vector_zero (st_antin, n_basic_blocks);
711 sbitmap_vector_zero (st_antout, n_basic_blocks);
712 compute_antinout_edge (st_antloc, transp, st_antin, st_antout);
713
714 /* Compute global anticipatability. */
715 st_avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
716 st_avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
717 compute_available (st_avloc, kill, st_avout, st_avin);
718
719 #ifdef LCM_DEBUG_INFO
720 if (file)
721 {
722 fprintf (file, "Edge List:\n");
723 verify_edge_list (file, edge_list);
724 print_edge_list (file, edge_list);
725 dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks);
726 dump_sbitmap_vector (file, "st_avloc", "", st_avloc, n_basic_blocks);
727 dump_sbitmap_vector (file, "st_antloc", "", st_antloc, n_basic_blocks);
728 dump_sbitmap_vector (file, "st_antin", "", st_antin, n_basic_blocks);
729 dump_sbitmap_vector (file, "st_antout", "", st_antout, n_basic_blocks);
730 dump_sbitmap_vector (file, "st_kill", "", kill, n_basic_blocks);
731 }
732 #endif
733
734 #ifdef LCM_DEBUG_INFO
735 if (file)
736 {
737 dump_sbitmap_vector (file, "st_avout", "", st_avout, n_basic_blocks);
738 dump_sbitmap_vector (file, "st_avin", "", st_avin, n_basic_blocks);
739 }
740 #endif
741
742 /* Compute farthestness. */
743 farthest = sbitmap_vector_alloc (num_edges, n_exprs);
744 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
745 kill, farthest);
746
747 #ifdef LCM_DEBUG_INFO
748 if (file)
749 dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
750 #endif
751
752 free (st_avin);
753 free (st_avout);
754
755 nearer = sbitmap_vector_alloc (num_edges, n_exprs);
756
757 /* Allocate an extra element for the entry block. */
758 nearerout = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs);
759 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout);
760
761 #ifdef LCM_DEBUG_INFO
762 if (file)
763 {
764 dump_sbitmap_vector (file, "nearerout", "", nearerout,
765 n_basic_blocks + 1);
766 dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
767 }
768 #endif
769
770 free (farthest);
771
772 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
773 *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
774 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
775 *insert, *delete);
776
777 free (nearerout);
778 free (nearer);
779
780 #ifdef LCM_DEBUG_INFO
781 if (file)
782 {
783 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
784 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
785 n_basic_blocks);
786 }
787 #endif
788
789 return edge_list;
790 }
791
792 /* Mode switching:
793
794 The algorithm for setting the modes consists of scanning the insn list
795 and finding all the insns which require a specific mode. Each insn gets
796 a unique struct seginfo element. These structures are inserted into a list
797 for each basic block. For each entity, there is an array of bb_info over
798 the flow graph basic blocks (local var 'bb_info'), and contains a list
799 of all insns within that basic block, in the order they are encountered.
800
801 For each entity, any basic block WITHOUT any insns requiring a specific
802 mode are given a single entry, without a mode. (Each basic block
803 in the flow graph must have at least one entry in the segment table.)
804
805 The LCM algorithm is then run over the flow graph to determine where to
806 place the sets to the highest-priority value in respect of first the first
807 insn in any one block. Any adjustments required to the transparancy
808 vectors are made, then the next iteration starts for the next-lower
809 priority mode, till for each entity all modes are exhasted.
810
811 More details are located in the code for optimize_mode_switching(). */
812
813 /* This structure contains the information for each insn which requires
814 either single or double mode to be set.
815 MODE is the mode this insn must be executed in.
816 INSN_PTR is the insn to be executed.
817 BBNUM is the flow graph basic block this insn occurs in.
818 NEXT is the next insn in the same basic block. */
819 struct seginfo
820 {
821 int mode;
822 rtx insn_ptr;
823 int bbnum;
824 struct seginfo *next;
825 HARD_REG_SET regs_live;
826 };
827
828 struct bb_info
829 {
830 struct seginfo *seginfo;
831 int computing;
832 };
833
834 /* These bitmaps are used for the LCM algorithm. */
835
836 #ifdef OPTIMIZE_MODE_SWITCHING
837 static sbitmap *antic;
838 static sbitmap *transp;
839 static sbitmap *comp;
840 static sbitmap *delete;
841 static sbitmap *insert;
842
843 static struct seginfo * new_seginfo PARAMS ((int, rtx, int, HARD_REG_SET));;
844 static void add_seginfo PARAMS ((struct bb_info *, struct seginfo *));
845 static void reg_dies PARAMS ((rtx, HARD_REG_SET));
846 static void reg_becomes_live PARAMS ((rtx, rtx, void *));
847 static void make_preds_opaque PARAMS ((basic_block, int));
848 #endif
849 \f
850 #ifdef OPTIMIZE_MODE_SWITCHING
851
852 /* This function will allocate a new BBINFO structure, initialized
853 with the FP_MODE, INSN, and basic block BB parameters. */
854
855 static struct seginfo *
856 new_seginfo (mode, insn, bb, regs_live)
857 int mode;
858 rtx insn;
859 int bb;
860 HARD_REG_SET regs_live;
861 {
862 struct seginfo *ptr;
863 ptr = xmalloc (sizeof (struct seginfo));
864 ptr->mode = mode;
865 ptr->insn_ptr = insn;
866 ptr->bbnum = bb;
867 ptr->next = NULL;
868 COPY_HARD_REG_SET (ptr->regs_live, regs_live);
869 return ptr;
870 }
871
872 /* Add a seginfo element to the end of a list.
873 HEAD is a pointer to the list beginning.
874 INFO is the structure to be linked in. */
875
876 static void
877 add_seginfo (head, info)
878 struct bb_info *head;
879 struct seginfo *info;
880 {
881 struct seginfo *ptr;
882
883 if (head->seginfo == NULL)
884 head->seginfo = info;
885 else
886 {
887 ptr = head->seginfo;
888 while (ptr->next != NULL)
889 ptr = ptr->next;
890 ptr->next = info;
891 }
892 }
893
894 /* Make all predecessors of basic block B opaque, recursively, till we hit
895 some that are already non-transparent, or an edge where aux is set; that
896 denotes that a mode set is to be done on that edge.
897 J is the bit number in the bitmaps that corresponds to the entity that
898 we are currently handling mode-switching for. */
899
900 static void
901 make_preds_opaque (b, j)
902 basic_block b;
903 int j;
904 {
905 edge e;
906
907 for (e = b->pred; e; e = e->pred_next)
908 {
909 basic_block pb = e->src;
910
911 if (e->aux || ! TEST_BIT (transp[pb->index], j))
912 continue;
913
914 RESET_BIT (transp[pb->index], j);
915 make_preds_opaque (pb, j);
916 }
917 }
918
919 /* Record in LIVE that register REG died. */
920
921 static void
922 reg_dies (reg, live)
923 rtx reg;
924 HARD_REG_SET live;
925 {
926 int regno, nregs;
927
928 if (GET_CODE (reg) != REG)
929 return;
930
931 regno = REGNO (reg);
932 if (regno < FIRST_PSEUDO_REGISTER)
933 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
934 nregs--)
935 CLEAR_HARD_REG_BIT (live, regno + nregs);
936 }
937
938 /* Record in LIVE that register REG became live.
939 This is called via note_stores. */
940
941 static void
942 reg_becomes_live (reg, setter, live)
943 rtx reg;
944 rtx setter ATTRIBUTE_UNUSED;
945 void *live;
946 {
947 int regno, nregs;
948
949 if (GET_CODE (reg) == SUBREG)
950 reg = SUBREG_REG (reg);
951
952 if (GET_CODE (reg) != REG)
953 return;
954
955 regno = REGNO (reg);
956 if (regno < FIRST_PSEUDO_REGISTER)
957 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
958 nregs--)
959 SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs);
960 }
961
962 /* Find all insns that need a particular mode setting, and insert the
963 necessary mode switches. Return true if we did work. */
964
965 int
966 optimize_mode_switching (file)
967 FILE *file;
968 {
969 rtx insn;
970 int bb, e;
971 edge eg;
972 int need_commit = 0;
973 sbitmap *kill;
974 struct edge_list *edge_list;
975 static int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
976 #define N_ENTITIES (sizeof num_modes / sizeof (int))
977 int entity_map[N_ENTITIES];
978 struct bb_info *bb_info[N_ENTITIES];
979 int i, j;
980 int n_entities;
981 int max_num_modes = 0;
982
983 for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--)
984 if (OPTIMIZE_MODE_SWITCHING (e))
985 {
986 /* Create the list of segments within each basic block. */
987 bb_info[n_entities]
988 = (struct bb_info *) xcalloc (n_basic_blocks, sizeof **bb_info);
989 entity_map[n_entities++] = e;
990 if (num_modes[e] > max_num_modes)
991 max_num_modes = num_modes[e];
992 }
993
994 if (! n_entities)
995 return 0;
996
997 #ifdef MODE_USES_IN_EXIT_BLOCK
998 /* For some ABIs a particular mode setting is required at function exit. */
999
1000 for (eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next)
1001 {
1002 int bb = eg->src->index;
1003 rtx insn = BLOCK_END (bb);
1004 rtx use = MODE_USES_IN_EXIT_BLOCK;
1005
1006 /* If the block ends with the use of the return value
1007 and / or a return, insert the new use(s) in front of them. */
1008 while ((GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == USE)
1009 || GET_CODE (insn) == JUMP_INSN)
1010 insn = PREV_INSN (insn);
1011
1012 use = emit_insn_after (use, insn);
1013 if (insn == BLOCK_END (bb))
1014 BLOCK_END (bb) = use;
1015 else if (NEXT_INSN (use) == BLOCK_HEAD (bb))
1016 BLOCK_HEAD (bb) = NEXT_INSN (insn);
1017 }
1018 #endif /* MODE_USES_IN_EXIT_BLOCK */
1019
1020 /* Create the bitmap vectors. */
1021
1022 antic = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1023 transp = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1024 comp = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1025
1026 sbitmap_vector_ones (transp, n_basic_blocks);
1027
1028 for (j = n_entities - 1; j >= 0; j--)
1029 {
1030 int e = entity_map[j];
1031 int no_mode = num_modes[e];
1032 struct bb_info *info = bb_info[j];
1033
1034 /* Determine what the first use (if any) need for a mode of entity E is.
1035 This will be the mode that is anticipatable for this block.
1036 Also compute the initial transparency settings. */
1037 for (bb = 0 ; bb < n_basic_blocks; bb++)
1038 {
1039 struct seginfo *ptr;
1040 int last_mode = no_mode;
1041 HARD_REG_SET live_now;
1042
1043 REG_SET_TO_HARD_REG_SET (live_now,
1044 BASIC_BLOCK (bb)->global_live_at_start);
1045 for (insn = BLOCK_HEAD (bb);
1046 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
1047 insn = NEXT_INSN (insn))
1048 {
1049 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1050 {
1051 int mode = MODE_NEEDED (e, insn);
1052 rtx link;
1053
1054 if (mode != no_mode && mode != last_mode)
1055 {
1056 last_mode = mode;
1057 ptr = new_seginfo (mode, insn, bb, live_now);
1058 add_seginfo (info + bb, ptr);
1059 RESET_BIT (transp[bb], j);
1060 }
1061
1062 /* Update LIVE_NOW. */
1063 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1064 if (REG_NOTE_KIND (link) == REG_DEAD)
1065 reg_dies (XEXP (link, 0), live_now);
1066
1067 note_stores (PATTERN (insn), reg_becomes_live, &live_now);
1068 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1069 if (REG_NOTE_KIND (link) == REG_UNUSED)
1070 reg_dies (XEXP (link, 0), live_now);
1071 }
1072 }
1073
1074 info[bb].computing = last_mode;
1075 /* Check for blocks without ANY mode requirements. */
1076 if (last_mode == no_mode)
1077 {
1078 ptr = new_seginfo (no_mode, insn, bb, live_now);
1079 add_seginfo (info + bb, ptr);
1080 }
1081 }
1082 #ifdef MODE_AT_ENTRY
1083 {
1084 int mode = MODE_AT_ENTRY (e);
1085
1086 if (mode != no_mode)
1087 {
1088 for (eg = ENTRY_BLOCK_PTR->succ; eg; eg = eg->succ_next)
1089 {
1090 bb = eg->dest->index;
1091
1092 /* By always making this nontransparent, we save
1093 an extra check in make_preds_opaque. We also
1094 need this to avoid confusing pre_edge_lcm when
1095 antic is cleared but transp and comp are set. */
1096 RESET_BIT (transp[bb], j);
1097
1098 /* If the block already has MODE, pretend it
1099 has none (because we don't need to set it),
1100 but retain whatever mode it computes. */
1101 if (info[bb].seginfo->mode == mode)
1102 info[bb].seginfo->mode = no_mode;
1103
1104 /* Insert a fake computing definition of MODE into entry
1105 blocks which compute no mode. This represents the mode on
1106 entry. */
1107 else if (info[bb].computing == no_mode)
1108 {
1109 info[bb].computing = mode;
1110 info[bb].seginfo->mode = no_mode;
1111 }
1112 }
1113 }
1114 }
1115 #endif /* MODE_AT_ENTRY */
1116 }
1117
1118 kill = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1119 for (i = 0; i < max_num_modes; i++)
1120 {
1121 int current_mode[N_ENTITIES];
1122
1123 /* Set the anticipatable and computing arrays. */
1124 sbitmap_vector_zero (antic, n_basic_blocks);
1125 sbitmap_vector_zero (comp, n_basic_blocks);
1126 for (j = n_entities - 1; j >= 0; j--)
1127 {
1128 int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i);
1129 struct bb_info *info = bb_info[j];
1130
1131 for (bb = 0 ; bb < n_basic_blocks; bb++)
1132 {
1133 if (info[bb].seginfo->mode == m)
1134 SET_BIT (antic[bb], j);
1135
1136 if (info[bb].computing == m)
1137 SET_BIT (comp[bb], j);
1138 }
1139 }
1140
1141 /* Calculate the optimal locations for the
1142 placement mode switches to modes with priority I. */
1143
1144 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
1145 sbitmap_not (kill[bb], transp[bb]);
1146 edge_list = pre_edge_lcm (file, 1, transp, comp, antic,
1147 kill, &insert, &delete);
1148
1149 for (j = n_entities - 1; j >= 0; j--)
1150 {
1151 /* Insert all mode sets that have been inserted by lcm. */
1152 int no_mode = num_modes[entity_map[j]];
1153
1154 /* Wherever we have moved a mode setting upwards in the flow graph,
1155 the blocks between the new setting site and the now redundant
1156 computation ceases to be transparent for any lower-priority
1157 mode of the same entity. First set the aux field of each
1158 insertion site edge non-transparent, then propagate the new
1159 non-transparency from the redundant computation upwards till
1160 we hit an insertion site or an already non-transparent block. */
1161 for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--)
1162 {
1163 edge eg = INDEX_EDGE (edge_list, e);
1164 int mode;
1165 basic_block src_bb;
1166 HARD_REG_SET live_at_edge;
1167 rtx mode_set;
1168
1169 eg->aux = 0;
1170
1171 if (! TEST_BIT (insert[e], j))
1172 continue;
1173
1174 eg->aux = (void *)1;
1175
1176 mode = current_mode[j];
1177 src_bb = eg->src;
1178
1179 REG_SET_TO_HARD_REG_SET (live_at_edge,
1180 src_bb->global_live_at_end);
1181
1182 start_sequence ();
1183 EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
1184 mode_set = gen_sequence ();
1185 end_sequence ();
1186
1187 /* If this is an abnormal edge, we'll insert at the end of the
1188 previous block. */
1189 if (eg->flags & EDGE_ABNORMAL)
1190 {
1191 src_bb->end = emit_insn_after (mode_set, src_bb->end);
1192 bb_info[j][src_bb->index].computing = mode;
1193 RESET_BIT (transp[src_bb->index], j);
1194 }
1195 else
1196 {
1197 need_commit = 1;
1198 insert_insn_on_edge (mode_set, eg);
1199 }
1200 }
1201
1202 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
1203 if (TEST_BIT (delete[bb], j))
1204 {
1205 make_preds_opaque (BASIC_BLOCK (bb), j);
1206 /* Cancel the 'deleted' mode set. */
1207 bb_info[j][bb].seginfo->mode = no_mode;
1208 }
1209 }
1210
1211 free_edge_list (edge_list);
1212 }
1213
1214 /* Now output the remaining mode sets in all the segments. */
1215 for (j = n_entities - 1; j >= 0; j--)
1216 {
1217 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
1218 {
1219 struct seginfo *ptr, *next;
1220 for (ptr = bb_info[j][bb].seginfo; ptr; ptr = next)
1221 {
1222 next = ptr->next;
1223 if (ptr->mode != FP_MODE_NONE)
1224 {
1225 rtx mode_set;
1226
1227 start_sequence ();
1228 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1229 mode_set = gen_sequence ();
1230 end_sequence ();
1231
1232 emit_block_insn_before (mode_set, ptr->insn_ptr,
1233 BASIC_BLOCK (ptr->bbnum));
1234 }
1235
1236 free (ptr);
1237 }
1238 }
1239
1240 free (bb_info[j]);
1241 }
1242
1243 /* Finished. Free up all the things we've allocated. */
1244
1245 sbitmap_vector_free (kill);
1246 sbitmap_vector_free (antic);
1247 sbitmap_vector_free (transp);
1248 sbitmap_vector_free (comp);
1249 sbitmap_vector_free (delete);
1250 sbitmap_vector_free (insert);
1251
1252 if (need_commit)
1253 commit_edge_insertions ();
1254
1255 /* Ideally we'd figure out what blocks were affected and start from
1256 there, but this is enormously complicated by commit_edge_insertions,
1257 which would screw up any indicies we'd collected, and also need to
1258 be involved in the update. Bail and recompute global life info for
1259 everything. */
1260
1261 allocate_reg_life_data ();
1262 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
1263 (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE
1264 | PROP_SCAN_DEAD_CODE | PROP_REG_INFO));
1265
1266 return 1;
1267 }
1268 #endif /* OPTIMIZE_MODE_SWITCHING */