jump.c: Convert prototypes to ISO C90.
[gcc.git] / gcc / lcm.c
1 /* Generic partial redundancy elimination with lazy code motion support.
2 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 /* These routines are meant to be used by various optimization
23 passes which can be modeled as lazy code motion problems.
24 Including, but not limited to:
25
26 * Traditional partial redundancy elimination.
27
28 * Placement of caller/caller register save/restores.
29
30 * Load/store motion.
31
32 * Copy motion.
33
34 * Conversion of flat register files to a stacked register
35 model.
36
37 * Dead load/store elimination.
38
39 These routines accept as input:
40
41 * Basic block information (number of blocks, lists of
42 predecessors and successors). Note the granularity
43 does not need to be basic block, they could be statements
44 or functions.
45
46 * Bitmaps of local properties (computed, transparent and
47 anticipatable expressions).
48
49 The output of these routines is bitmap of redundant computations
50 and a bitmap of optimal placement points. */
51
52
53 #include "config.h"
54 #include "system.h"
55 #include "coretypes.h"
56 #include "tm.h"
57 #include "rtl.h"
58 #include "regs.h"
59 #include "hard-reg-set.h"
60 #include "flags.h"
61 #include "real.h"
62 #include "insn-config.h"
63 #include "recog.h"
64 #include "basic-block.h"
65 #include "output.h"
66 #include "tm_p.h"
67 #include "function.h"
68
69 /* We want target macros for the mode switching code to be able to refer
70 to instruction attribute values. */
71 #include "insn-attr.h"
72
73 /* Edge based LCM routines. */
74 static void compute_antinout_edge (sbitmap *, sbitmap *, sbitmap *, sbitmap *);
75 static void compute_earliest (struct edge_list *, int, sbitmap *, sbitmap *,
76 sbitmap *, sbitmap *, sbitmap *);
77 static void compute_laterin (struct edge_list *, sbitmap *, sbitmap *,
78 sbitmap *, sbitmap *);
79 static void compute_insert_delete (struct edge_list *edge_list, sbitmap *,
80 sbitmap *, sbitmap *, sbitmap *, sbitmap *);
81
82 /* Edge based LCM routines on a reverse flowgraph. */
83 static void compute_farthest (struct edge_list *, int, sbitmap *, sbitmap *,
84 sbitmap*, sbitmap *, sbitmap *);
85 static void compute_nearerout (struct edge_list *, sbitmap *, sbitmap *,
86 sbitmap *, sbitmap *);
87 static void compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *,
88 sbitmap *, sbitmap *, sbitmap *,
89 sbitmap *);
90 \f
91 /* Edge based lcm routines. */
92
93 /* Compute expression anticipatability at entrance and exit of each block.
94 This is done based on the flow graph, and not on the pred-succ lists.
95 Other than that, its pretty much identical to compute_antinout. */
96
97 static void
98 compute_antinout_edge (sbitmap *antloc, sbitmap *transp, sbitmap *antin,
99 sbitmap *antout)
100 {
101 basic_block bb;
102 edge e;
103 basic_block *worklist, *qin, *qout, *qend;
104 unsigned int qlen;
105
106 /* Allocate a worklist array/queue. Entries are only added to the
107 list if they were not already on the list. So the size is
108 bounded by the number of basic blocks. */
109 qin = qout = worklist
110 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
111
112 /* We want a maximal solution, so make an optimistic initialization of
113 ANTIN. */
114 sbitmap_vector_ones (antin, last_basic_block);
115
116 /* Put every block on the worklist; this is necessary because of the
117 optimistic initialization of ANTIN above. */
118 FOR_EACH_BB_REVERSE (bb)
119 {
120 *qin++ = bb;
121 bb->aux = bb;
122 }
123
124 qin = worklist;
125 qend = &worklist[n_basic_blocks];
126 qlen = n_basic_blocks;
127
128 /* Mark blocks which are predecessors of the exit block so that we
129 can easily identify them below. */
130 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
131 e->src->aux = EXIT_BLOCK_PTR;
132
133 /* Iterate until the worklist is empty. */
134 while (qlen)
135 {
136 /* Take the first entry off the worklist. */
137 bb = *qout++;
138 qlen--;
139
140 if (qout >= qend)
141 qout = worklist;
142
143 if (bb->aux == EXIT_BLOCK_PTR)
144 /* Do not clear the aux field for blocks which are predecessors of
145 the EXIT block. That way we never add then to the worklist
146 again. */
147 sbitmap_zero (antout[bb->index]);
148 else
149 {
150 /* Clear the aux field of this block so that it can be added to
151 the worklist again if necessary. */
152 bb->aux = NULL;
153 sbitmap_intersection_of_succs (antout[bb->index], antin, bb->index);
154 }
155
156 if (sbitmap_a_or_b_and_c_cg (antin[bb->index], antloc[bb->index],
157 transp[bb->index], antout[bb->index]))
158 /* If the in state of this block changed, then we need
159 to add the predecessors of this block to the worklist
160 if they are not already on the worklist. */
161 for (e = bb->pred; e; e = e->pred_next)
162 if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
163 {
164 *qin++ = e->src;
165 e->src->aux = e;
166 qlen++;
167 if (qin >= qend)
168 qin = worklist;
169 }
170 }
171
172 clear_aux_for_edges ();
173 clear_aux_for_blocks ();
174 free (worklist);
175 }
176
177 /* Compute the earliest vector for edge based lcm. */
178
179 static void
180 compute_earliest (struct edge_list *edge_list, int n_exprs, sbitmap *antin,
181 sbitmap *antout, sbitmap *avout, sbitmap *kill,
182 sbitmap *earliest)
183 {
184 sbitmap difference, temp_bitmap;
185 int x, num_edges;
186 basic_block pred, succ;
187
188 num_edges = NUM_EDGES (edge_list);
189
190 difference = sbitmap_alloc (n_exprs);
191 temp_bitmap = sbitmap_alloc (n_exprs);
192
193 for (x = 0; x < num_edges; x++)
194 {
195 pred = INDEX_EDGE_PRED_BB (edge_list, x);
196 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
197 if (pred == ENTRY_BLOCK_PTR)
198 sbitmap_copy (earliest[x], antin[succ->index]);
199 else
200 {
201 if (succ == EXIT_BLOCK_PTR)
202 sbitmap_zero (earliest[x]);
203 else
204 {
205 sbitmap_difference (difference, antin[succ->index],
206 avout[pred->index]);
207 sbitmap_not (temp_bitmap, antout[pred->index]);
208 sbitmap_a_and_b_or_c (earliest[x], difference,
209 kill[pred->index], temp_bitmap);
210 }
211 }
212 }
213
214 sbitmap_free (temp_bitmap);
215 sbitmap_free (difference);
216 }
217
218 /* later(p,s) is dependent on the calculation of laterin(p).
219 laterin(p) is dependent on the calculation of later(p2,p).
220
221 laterin(ENTRY) is defined as all 0's
222 later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY)
223 laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)).
224
225 If we progress in this manner, starting with all basic blocks
226 in the work list, anytime we change later(bb), we need to add
227 succs(bb) to the worklist if they are not already on the worklist.
228
229 Boundary conditions:
230
231 We prime the worklist all the normal basic blocks. The ENTRY block can
232 never be added to the worklist since it is never the successor of any
233 block. We explicitly prevent the EXIT block from being added to the
234 worklist.
235
236 We optimistically initialize LATER. That is the only time this routine
237 will compute LATER for an edge out of the entry block since the entry
238 block is never on the worklist. Thus, LATERIN is neither used nor
239 computed for the ENTRY block.
240
241 Since the EXIT block is never added to the worklist, we will neither
242 use nor compute LATERIN for the exit block. Edges which reach the
243 EXIT block are handled in the normal fashion inside the loop. However,
244 the insertion/deletion computation needs LATERIN(EXIT), so we have
245 to compute it. */
246
247 static void
248 compute_laterin (struct edge_list *edge_list, sbitmap *earliest,
249 sbitmap *antloc, sbitmap *later, sbitmap *laterin)
250 {
251 int num_edges, i;
252 edge e;
253 basic_block *worklist, *qin, *qout, *qend, bb;
254 unsigned int qlen;
255
256 num_edges = NUM_EDGES (edge_list);
257
258 /* Allocate a worklist array/queue. Entries are only added to the
259 list if they were not already on the list. So the size is
260 bounded by the number of basic blocks. */
261 qin = qout = worklist
262 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
263
264 /* Initialize a mapping from each edge to its index. */
265 for (i = 0; i < num_edges; i++)
266 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
267
268 /* We want a maximal solution, so initially consider LATER true for
269 all edges. This allows propagation through a loop since the incoming
270 loop edge will have LATER set, so if all the other incoming edges
271 to the loop are set, then LATERIN will be set for the head of the
272 loop.
273
274 If the optimistic setting of LATER on that edge was incorrect (for
275 example the expression is ANTLOC in a block within the loop) then
276 this algorithm will detect it when we process the block at the head
277 of the optimistic edge. That will requeue the affected blocks. */
278 sbitmap_vector_ones (later, num_edges);
279
280 /* Note that even though we want an optimistic setting of LATER, we
281 do not want to be overly optimistic. Consider an outgoing edge from
282 the entry block. That edge should always have a LATER value the
283 same as EARLIEST for that edge. */
284 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
285 sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);
286
287 /* Add all the blocks to the worklist. This prevents an early exit from
288 the loop given our optimistic initialization of LATER above. */
289 FOR_EACH_BB (bb)
290 {
291 *qin++ = bb;
292 bb->aux = bb;
293 }
294 qin = worklist;
295 /* Note that we do not use the last allocated element for our queue,
296 as EXIT_BLOCK is never inserted into it. In fact the above allocation
297 of n_basic_blocks + 1 elements is not necessary. */
298 qend = &worklist[n_basic_blocks];
299 qlen = n_basic_blocks;
300
301 /* Iterate until the worklist is empty. */
302 while (qlen)
303 {
304 /* Take the first entry off the worklist. */
305 bb = *qout++;
306 bb->aux = NULL;
307 qlen--;
308 if (qout >= qend)
309 qout = worklist;
310
311 /* Compute the intersection of LATERIN for each incoming edge to B. */
312 sbitmap_ones (laterin[bb->index]);
313 for (e = bb->pred; e != NULL; e = e->pred_next)
314 sbitmap_a_and_b (laterin[bb->index], laterin[bb->index], later[(size_t)e->aux]);
315
316 /* Calculate LATER for all outgoing edges. */
317 for (e = bb->succ; e != NULL; e = e->succ_next)
318 if (sbitmap_union_of_diff_cg (later[(size_t) e->aux],
319 earliest[(size_t) e->aux],
320 laterin[e->src->index],
321 antloc[e->src->index])
322 /* If LATER for an outgoing edge was changed, then we need
323 to add the target of the outgoing edge to the worklist. */
324 && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
325 {
326 *qin++ = e->dest;
327 e->dest->aux = e;
328 qlen++;
329 if (qin >= qend)
330 qin = worklist;
331 }
332 }
333
334 /* Computation of insertion and deletion points requires computing LATERIN
335 for the EXIT block. We allocated an extra entry in the LATERIN array
336 for just this purpose. */
337 sbitmap_ones (laterin[last_basic_block]);
338 for (e = EXIT_BLOCK_PTR->pred; e != NULL; e = e->pred_next)
339 sbitmap_a_and_b (laterin[last_basic_block],
340 laterin[last_basic_block],
341 later[(size_t) e->aux]);
342
343 clear_aux_for_edges ();
344 free (worklist);
345 }
346
347 /* Compute the insertion and deletion points for edge based LCM. */
348
349 static void
350 compute_insert_delete (struct edge_list *edge_list, sbitmap *antloc,
351 sbitmap *later, sbitmap *laterin, sbitmap *insert,
352 sbitmap *delete)
353 {
354 int x;
355 basic_block bb;
356
357 FOR_EACH_BB (bb)
358 sbitmap_difference (delete[bb->index], antloc[bb->index], laterin[bb->index]);
359
360 for (x = 0; x < NUM_EDGES (edge_list); x++)
361 {
362 basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
363
364 if (b == EXIT_BLOCK_PTR)
365 sbitmap_difference (insert[x], later[x], laterin[last_basic_block]);
366 else
367 sbitmap_difference (insert[x], later[x], laterin[b->index]);
368 }
369 }
370
371 /* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and
372 delete vectors for edge based LCM. Returns an edgelist which is used to
373 map the insert vector to what edge an expression should be inserted on. */
374
375 struct edge_list *
376 pre_edge_lcm (FILE *file ATTRIBUTE_UNUSED, int n_exprs, sbitmap *transp,
377 sbitmap *avloc, sbitmap *antloc, sbitmap *kill,
378 sbitmap **insert, sbitmap **delete)
379 {
380 sbitmap *antin, *antout, *earliest;
381 sbitmap *avin, *avout;
382 sbitmap *later, *laterin;
383 struct edge_list *edge_list;
384 int num_edges;
385
386 edge_list = create_edge_list ();
387 num_edges = NUM_EDGES (edge_list);
388
389 #ifdef LCM_DEBUG_INFO
390 if (file)
391 {
392 fprintf (file, "Edge List:\n");
393 verify_edge_list (file, edge_list);
394 print_edge_list (file, edge_list);
395 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
396 dump_sbitmap_vector (file, "antloc", "", antloc, last_basic_block);
397 dump_sbitmap_vector (file, "avloc", "", avloc, last_basic_block);
398 dump_sbitmap_vector (file, "kill", "", kill, last_basic_block);
399 }
400 #endif
401
402 /* Compute global availability. */
403 avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
404 avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
405 compute_available (avloc, kill, avout, avin);
406 sbitmap_vector_free (avin);
407
408 /* Compute global anticipatability. */
409 antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
410 antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
411 compute_antinout_edge (antloc, transp, antin, antout);
412
413 #ifdef LCM_DEBUG_INFO
414 if (file)
415 {
416 dump_sbitmap_vector (file, "antin", "", antin, last_basic_block);
417 dump_sbitmap_vector (file, "antout", "", antout, last_basic_block);
418 }
419 #endif
420
421 /* Compute earliestness. */
422 earliest = sbitmap_vector_alloc (num_edges, n_exprs);
423 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
424
425 #ifdef LCM_DEBUG_INFO
426 if (file)
427 dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
428 #endif
429
430 sbitmap_vector_free (antout);
431 sbitmap_vector_free (antin);
432 sbitmap_vector_free (avout);
433
434 later = sbitmap_vector_alloc (num_edges, n_exprs);
435
436 /* Allocate an extra element for the exit block in the laterin vector. */
437 laterin = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
438 compute_laterin (edge_list, earliest, antloc, later, laterin);
439
440 #ifdef LCM_DEBUG_INFO
441 if (file)
442 {
443 dump_sbitmap_vector (file, "laterin", "", laterin, last_basic_block + 1);
444 dump_sbitmap_vector (file, "later", "", later, num_edges);
445 }
446 #endif
447
448 sbitmap_vector_free (earliest);
449
450 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
451 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
452 compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
453
454 sbitmap_vector_free (laterin);
455 sbitmap_vector_free (later);
456
457 #ifdef LCM_DEBUG_INFO
458 if (file)
459 {
460 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
461 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
462 last_basic_block);
463 }
464 #endif
465
466 return edge_list;
467 }
468
469 /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
470 Return the number of passes we performed to iterate to a solution. */
471
472 void
473 compute_available (sbitmap *avloc, sbitmap *kill, sbitmap *avout,
474 sbitmap *avin)
475 {
476 edge e;
477 basic_block *worklist, *qin, *qout, *qend, bb;
478 unsigned int qlen;
479
480 /* Allocate a worklist array/queue. Entries are only added to the
481 list if they were not already on the list. So the size is
482 bounded by the number of basic blocks. */
483 qin = qout = worklist
484 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
485
486 /* We want a maximal solution. */
487 sbitmap_vector_ones (avout, last_basic_block);
488
489 /* Put every block on the worklist; this is necessary because of the
490 optimistic initialization of AVOUT above. */
491 FOR_EACH_BB (bb)
492 {
493 *qin++ = bb;
494 bb->aux = bb;
495 }
496
497 qin = worklist;
498 qend = &worklist[n_basic_blocks];
499 qlen = n_basic_blocks;
500
501 /* Mark blocks which are successors of the entry block so that we
502 can easily identify them below. */
503 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
504 e->dest->aux = ENTRY_BLOCK_PTR;
505
506 /* Iterate until the worklist is empty. */
507 while (qlen)
508 {
509 /* Take the first entry off the worklist. */
510 bb = *qout++;
511 qlen--;
512
513 if (qout >= qend)
514 qout = worklist;
515
516 /* If one of the predecessor blocks is the ENTRY block, then the
517 intersection of avouts is the null set. We can identify such blocks
518 by the special value in the AUX field in the block structure. */
519 if (bb->aux == ENTRY_BLOCK_PTR)
520 /* Do not clear the aux field for blocks which are successors of the
521 ENTRY block. That way we never add then to the worklist again. */
522 sbitmap_zero (avin[bb->index]);
523 else
524 {
525 /* Clear the aux field of this block so that it can be added to
526 the worklist again if necessary. */
527 bb->aux = NULL;
528 sbitmap_intersection_of_preds (avin[bb->index], avout, bb->index);
529 }
530
531 if (sbitmap_union_of_diff_cg (avout[bb->index], avloc[bb->index], avin[bb->index], kill[bb->index]))
532 /* If the out state of this block changed, then we need
533 to add the successors of this block to the worklist
534 if they are not already on the worklist. */
535 for (e = bb->succ; e; e = e->succ_next)
536 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
537 {
538 *qin++ = e->dest;
539 e->dest->aux = e;
540 qlen++;
541
542 if (qin >= qend)
543 qin = worklist;
544 }
545 }
546
547 clear_aux_for_edges ();
548 clear_aux_for_blocks ();
549 free (worklist);
550 }
551
552 /* Compute the farthest vector for edge based lcm. */
553
554 static void
555 compute_farthest (struct edge_list *edge_list, int n_exprs,
556 sbitmap *st_avout, sbitmap *st_avin, sbitmap *st_antin,
557 sbitmap *kill, sbitmap *farthest)
558 {
559 sbitmap difference, temp_bitmap;
560 int x, num_edges;
561 basic_block pred, succ;
562
563 num_edges = NUM_EDGES (edge_list);
564
565 difference = sbitmap_alloc (n_exprs);
566 temp_bitmap = sbitmap_alloc (n_exprs);
567
568 for (x = 0; x < num_edges; x++)
569 {
570 pred = INDEX_EDGE_PRED_BB (edge_list, x);
571 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
572 if (succ == EXIT_BLOCK_PTR)
573 sbitmap_copy (farthest[x], st_avout[pred->index]);
574 else
575 {
576 if (pred == ENTRY_BLOCK_PTR)
577 sbitmap_zero (farthest[x]);
578 else
579 {
580 sbitmap_difference (difference, st_avout[pred->index],
581 st_antin[succ->index]);
582 sbitmap_not (temp_bitmap, st_avin[succ->index]);
583 sbitmap_a_and_b_or_c (farthest[x], difference,
584 kill[succ->index], temp_bitmap);
585 }
586 }
587 }
588
589 sbitmap_free (temp_bitmap);
590 sbitmap_free (difference);
591 }
592
593 /* Compute nearer and nearerout vectors for edge based lcm.
594
595 This is the mirror of compute_laterin, additional comments on the
596 implementation can be found before compute_laterin. */
597
598 static void
599 compute_nearerout (struct edge_list *edge_list, sbitmap *farthest,
600 sbitmap *st_avloc, sbitmap *nearer, sbitmap *nearerout)
601 {
602 int num_edges, i;
603 edge e;
604 basic_block *worklist, *tos, bb;
605
606 num_edges = NUM_EDGES (edge_list);
607
608 /* Allocate a worklist array/queue. Entries are only added to the
609 list if they were not already on the list. So the size is
610 bounded by the number of basic blocks. */
611 tos = worklist
612 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
613
614 /* Initialize NEARER for each edge and build a mapping from an edge to
615 its index. */
616 for (i = 0; i < num_edges; i++)
617 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
618
619 /* We want a maximal solution. */
620 sbitmap_vector_ones (nearer, num_edges);
621
622 /* Note that even though we want an optimistic setting of NEARER, we
623 do not want to be overly optimistic. Consider an incoming edge to
624 the exit block. That edge should always have a NEARER value the
625 same as FARTHEST for that edge. */
626 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
627 sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);
628
629 /* Add all the blocks to the worklist. This prevents an early exit
630 from the loop given our optimistic initialization of NEARER. */
631 FOR_EACH_BB (bb)
632 {
633 *tos++ = bb;
634 bb->aux = bb;
635 }
636
637 /* Iterate until the worklist is empty. */
638 while (tos != worklist)
639 {
640 /* Take the first entry off the worklist. */
641 bb = *--tos;
642 bb->aux = NULL;
643
644 /* Compute the intersection of NEARER for each outgoing edge from B. */
645 sbitmap_ones (nearerout[bb->index]);
646 for (e = bb->succ; e != NULL; e = e->succ_next)
647 sbitmap_a_and_b (nearerout[bb->index], nearerout[bb->index],
648 nearer[(size_t) e->aux]);
649
650 /* Calculate NEARER for all incoming edges. */
651 for (e = bb->pred; e != NULL; e = e->pred_next)
652 if (sbitmap_union_of_diff_cg (nearer[(size_t) e->aux],
653 farthest[(size_t) e->aux],
654 nearerout[e->dest->index],
655 st_avloc[e->dest->index])
656 /* If NEARER for an incoming edge was changed, then we need
657 to add the source of the incoming edge to the worklist. */
658 && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
659 {
660 *tos++ = e->src;
661 e->src->aux = e;
662 }
663 }
664
665 /* Computation of insertion and deletion points requires computing NEAREROUT
666 for the ENTRY block. We allocated an extra entry in the NEAREROUT array
667 for just this purpose. */
668 sbitmap_ones (nearerout[last_basic_block]);
669 for (e = ENTRY_BLOCK_PTR->succ; e != NULL; e = e->succ_next)
670 sbitmap_a_and_b (nearerout[last_basic_block],
671 nearerout[last_basic_block],
672 nearer[(size_t) e->aux]);
673
674 clear_aux_for_edges ();
675 free (tos);
676 }
677
678 /* Compute the insertion and deletion points for edge based LCM. */
679
680 static void
681 compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *st_avloc,
682 sbitmap *nearer, sbitmap *nearerout,
683 sbitmap *insert, sbitmap *delete)
684 {
685 int x;
686 basic_block bb;
687
688 FOR_EACH_BB (bb)
689 sbitmap_difference (delete[bb->index], st_avloc[bb->index], nearerout[bb->index]);
690
691 for (x = 0; x < NUM_EDGES (edge_list); x++)
692 {
693 basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
694 if (b == ENTRY_BLOCK_PTR)
695 sbitmap_difference (insert[x], nearer[x], nearerout[last_basic_block]);
696 else
697 sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
698 }
699 }
700
701 /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
702 insert and delete vectors for edge based reverse LCM. Returns an
703 edgelist which is used to map the insert vector to what edge
704 an expression should be inserted on. */
705
706 struct edge_list *
707 pre_edge_rev_lcm (FILE *file ATTRIBUTE_UNUSED, int n_exprs, sbitmap *transp,
708 sbitmap *st_avloc, sbitmap *st_antloc, sbitmap *kill,
709 sbitmap **insert, sbitmap **delete)
710 {
711 sbitmap *st_antin, *st_antout;
712 sbitmap *st_avout, *st_avin, *farthest;
713 sbitmap *nearer, *nearerout;
714 struct edge_list *edge_list;
715 int num_edges;
716
717 edge_list = create_edge_list ();
718 num_edges = NUM_EDGES (edge_list);
719
720 st_antin = (sbitmap *) sbitmap_vector_alloc (last_basic_block, n_exprs);
721 st_antout = (sbitmap *) sbitmap_vector_alloc (last_basic_block, n_exprs);
722 sbitmap_vector_zero (st_antin, last_basic_block);
723 sbitmap_vector_zero (st_antout, last_basic_block);
724 compute_antinout_edge (st_antloc, transp, st_antin, st_antout);
725
726 /* Compute global anticipatability. */
727 st_avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
728 st_avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
729 compute_available (st_avloc, kill, st_avout, st_avin);
730
731 #ifdef LCM_DEBUG_INFO
732 if (file)
733 {
734 fprintf (file, "Edge List:\n");
735 verify_edge_list (file, edge_list);
736 print_edge_list (file, edge_list);
737 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
738 dump_sbitmap_vector (file, "st_avloc", "", st_avloc, last_basic_block);
739 dump_sbitmap_vector (file, "st_antloc", "", st_antloc, last_basic_block);
740 dump_sbitmap_vector (file, "st_antin", "", st_antin, last_basic_block);
741 dump_sbitmap_vector (file, "st_antout", "", st_antout, last_basic_block);
742 dump_sbitmap_vector (file, "st_kill", "", kill, last_basic_block);
743 }
744 #endif
745
746 #ifdef LCM_DEBUG_INFO
747 if (file)
748 {
749 dump_sbitmap_vector (file, "st_avout", "", st_avout, last_basic_block);
750 dump_sbitmap_vector (file, "st_avin", "", st_avin, last_basic_block);
751 }
752 #endif
753
754 /* Compute farthestness. */
755 farthest = sbitmap_vector_alloc (num_edges, n_exprs);
756 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
757 kill, farthest);
758
759 #ifdef LCM_DEBUG_INFO
760 if (file)
761 dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
762 #endif
763
764 sbitmap_vector_free (st_antin);
765 sbitmap_vector_free (st_antout);
766
767 sbitmap_vector_free (st_avin);
768 sbitmap_vector_free (st_avout);
769
770 nearer = sbitmap_vector_alloc (num_edges, n_exprs);
771
772 /* Allocate an extra element for the entry block. */
773 nearerout = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
774 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout);
775
776 #ifdef LCM_DEBUG_INFO
777 if (file)
778 {
779 dump_sbitmap_vector (file, "nearerout", "", nearerout,
780 last_basic_block + 1);
781 dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
782 }
783 #endif
784
785 sbitmap_vector_free (farthest);
786
787 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
788 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
789 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
790 *insert, *delete);
791
792 sbitmap_vector_free (nearerout);
793 sbitmap_vector_free (nearer);
794
795 #ifdef LCM_DEBUG_INFO
796 if (file)
797 {
798 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
799 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
800 last_basic_block);
801 }
802 #endif
803 return edge_list;
804 }
805
806 /* Mode switching:
807
808 The algorithm for setting the modes consists of scanning the insn list
809 and finding all the insns which require a specific mode. Each insn gets
810 a unique struct seginfo element. These structures are inserted into a list
811 for each basic block. For each entity, there is an array of bb_info over
812 the flow graph basic blocks (local var 'bb_info'), and contains a list
813 of all insns within that basic block, in the order they are encountered.
814
815 For each entity, any basic block WITHOUT any insns requiring a specific
816 mode are given a single entry, without a mode. (Each basic block
817 in the flow graph must have at least one entry in the segment table.)
818
819 The LCM algorithm is then run over the flow graph to determine where to
820 place the sets to the highest-priority value in respect of first the first
821 insn in any one block. Any adjustments required to the transparency
822 vectors are made, then the next iteration starts for the next-lower
823 priority mode, till for each entity all modes are exhausted.
824
825 More details are located in the code for optimize_mode_switching(). */
826
827 /* This structure contains the information for each insn which requires
828 either single or double mode to be set.
829 MODE is the mode this insn must be executed in.
830 INSN_PTR is the insn to be executed (may be the note that marks the
831 beginning of a basic block).
832 BBNUM is the flow graph basic block this insn occurs in.
833 NEXT is the next insn in the same basic block. */
834 struct seginfo
835 {
836 int mode;
837 rtx insn_ptr;
838 int bbnum;
839 struct seginfo *next;
840 HARD_REG_SET regs_live;
841 };
842
843 struct bb_info
844 {
845 struct seginfo *seginfo;
846 int computing;
847 };
848
849 /* These bitmaps are used for the LCM algorithm. */
850
851 #ifdef OPTIMIZE_MODE_SWITCHING
852 static sbitmap *antic;
853 static sbitmap *transp;
854 static sbitmap *comp;
855 static sbitmap *delete;
856 static sbitmap *insert;
857
858 static struct seginfo * new_seginfo (int, rtx, int, HARD_REG_SET);
859 static void add_seginfo (struct bb_info *, struct seginfo *);
860 static void reg_dies (rtx, HARD_REG_SET);
861 static void reg_becomes_live (rtx, rtx, void *);
862 static void make_preds_opaque (basic_block, int);
863 #endif
864 \f
865 #ifdef OPTIMIZE_MODE_SWITCHING
866
867 /* This function will allocate a new BBINFO structure, initialized
868 with the MODE, INSN, and basic block BB parameters. */
869
870 static struct seginfo *
871 new_seginfo (int mode, rtx insn, int bb, HARD_REG_SET regs_live)
872 {
873 struct seginfo *ptr;
874 ptr = xmalloc (sizeof (struct seginfo));
875 ptr->mode = mode;
876 ptr->insn_ptr = insn;
877 ptr->bbnum = bb;
878 ptr->next = NULL;
879 COPY_HARD_REG_SET (ptr->regs_live, regs_live);
880 return ptr;
881 }
882
883 /* Add a seginfo element to the end of a list.
884 HEAD is a pointer to the list beginning.
885 INFO is the structure to be linked in. */
886
887 static void
888 add_seginfo (struct bb_info *head, struct seginfo *info)
889 {
890 struct seginfo *ptr;
891
892 if (head->seginfo == NULL)
893 head->seginfo = info;
894 else
895 {
896 ptr = head->seginfo;
897 while (ptr->next != NULL)
898 ptr = ptr->next;
899 ptr->next = info;
900 }
901 }
902
903 /* Make all predecessors of basic block B opaque, recursively, till we hit
904 some that are already non-transparent, or an edge where aux is set; that
905 denotes that a mode set is to be done on that edge.
906 J is the bit number in the bitmaps that corresponds to the entity that
907 we are currently handling mode-switching for. */
908
909 static void
910 make_preds_opaque (basic_block b, int j)
911 {
912 edge e;
913
914 for (e = b->pred; e; e = e->pred_next)
915 {
916 basic_block pb = e->src;
917
918 if (e->aux || ! TEST_BIT (transp[pb->index], j))
919 continue;
920
921 RESET_BIT (transp[pb->index], j);
922 make_preds_opaque (pb, j);
923 }
924 }
925
926 /* Record in LIVE that register REG died. */
927
928 static void
929 reg_dies (rtx reg, HARD_REG_SET live)
930 {
931 int regno, nregs;
932
933 if (GET_CODE (reg) != REG)
934 return;
935
936 regno = REGNO (reg);
937 if (regno < FIRST_PSEUDO_REGISTER)
938 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
939 nregs--)
940 CLEAR_HARD_REG_BIT (live, regno + nregs);
941 }
942
943 /* Record in LIVE that register REG became live.
944 This is called via note_stores. */
945
946 static void
947 reg_becomes_live (rtx reg, rtx setter ATTRIBUTE_UNUSED, void *live)
948 {
949 int regno, nregs;
950
951 if (GET_CODE (reg) == SUBREG)
952 reg = SUBREG_REG (reg);
953
954 if (GET_CODE (reg) != REG)
955 return;
956
957 regno = REGNO (reg);
958 if (regno < FIRST_PSEUDO_REGISTER)
959 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
960 nregs--)
961 SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs);
962 }
963
964 /* Find all insns that need a particular mode setting, and insert the
965 necessary mode switches. Return true if we did work. */
966
967 int
968 optimize_mode_switching (FILE *file)
969 {
970 rtx insn;
971 int e;
972 basic_block bb;
973 int need_commit = 0;
974 sbitmap *kill;
975 struct edge_list *edge_list;
976 static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
977 #define N_ENTITIES ARRAY_SIZE (num_modes)
978 int entity_map[N_ENTITIES];
979 struct bb_info *bb_info[N_ENTITIES];
980 int i, j;
981 int n_entities;
982 int max_num_modes = 0;
983 bool emited = false;
984 basic_block post_entry ATTRIBUTE_UNUSED, pre_exit ATTRIBUTE_UNUSED;
985
986 clear_bb_flags ();
987
988 for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--)
989 if (OPTIMIZE_MODE_SWITCHING (e))
990 {
991 int entry_exit_extra = 0;
992
993 /* Create the list of segments within each basic block.
994 If NORMAL_MODE is defined, allow for two extra
995 blocks split from the entry and exit block. */
996 #ifdef NORMAL_MODE
997 entry_exit_extra = 2;
998 #endif
999 bb_info[n_entities]
1000 = (struct bb_info *) xcalloc (last_basic_block + entry_exit_extra,
1001 sizeof **bb_info);
1002 entity_map[n_entities++] = e;
1003 if (num_modes[e] > max_num_modes)
1004 max_num_modes = num_modes[e];
1005 }
1006
1007 if (! n_entities)
1008 return 0;
1009
1010 #ifdef NORMAL_MODE
1011 {
1012 /* Split the edge from the entry block and the fallthrough edge to the
1013 exit block, so that we can note that there NORMAL_MODE is supplied /
1014 required. */
1015 edge eg;
1016 post_entry = split_edge (ENTRY_BLOCK_PTR->succ);
1017 /* The only non-call predecessor at this stage is a block with a
1018 fallthrough edge; there can be at most one, but there could be
1019 none at all, e.g. when exit is called. */
1020 for (pre_exit = 0, eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next)
1021 if (eg->flags & EDGE_FALLTHRU)
1022 {
1023 regset live_at_end = eg->src->global_live_at_end;
1024
1025 if (pre_exit)
1026 abort ();
1027 pre_exit = split_edge (eg);
1028 COPY_REG_SET (pre_exit->global_live_at_start, live_at_end);
1029 COPY_REG_SET (pre_exit->global_live_at_end, live_at_end);
1030 }
1031 }
1032 #endif
1033
1034 /* Create the bitmap vectors. */
1035
1036 antic = sbitmap_vector_alloc (last_basic_block, n_entities);
1037 transp = sbitmap_vector_alloc (last_basic_block, n_entities);
1038 comp = sbitmap_vector_alloc (last_basic_block, n_entities);
1039
1040 sbitmap_vector_ones (transp, last_basic_block);
1041
1042 for (j = n_entities - 1; j >= 0; j--)
1043 {
1044 int e = entity_map[j];
1045 int no_mode = num_modes[e];
1046 struct bb_info *info = bb_info[j];
1047
1048 /* Determine what the first use (if any) need for a mode of entity E is.
1049 This will be the mode that is anticipatable for this block.
1050 Also compute the initial transparency settings. */
1051 FOR_EACH_BB (bb)
1052 {
1053 struct seginfo *ptr;
1054 int last_mode = no_mode;
1055 HARD_REG_SET live_now;
1056
1057 REG_SET_TO_HARD_REG_SET (live_now,
1058 bb->global_live_at_start);
1059 for (insn = bb->head;
1060 insn != NULL && insn != NEXT_INSN (bb->end);
1061 insn = NEXT_INSN (insn))
1062 {
1063 if (INSN_P (insn))
1064 {
1065 int mode = MODE_NEEDED (e, insn);
1066 rtx link;
1067
1068 if (mode != no_mode && mode != last_mode)
1069 {
1070 last_mode = mode;
1071 ptr = new_seginfo (mode, insn, bb->index, live_now);
1072 add_seginfo (info + bb->index, ptr);
1073 RESET_BIT (transp[bb->index], j);
1074 }
1075
1076 /* Update LIVE_NOW. */
1077 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1078 if (REG_NOTE_KIND (link) == REG_DEAD)
1079 reg_dies (XEXP (link, 0), live_now);
1080
1081 note_stores (PATTERN (insn), reg_becomes_live, &live_now);
1082 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1083 if (REG_NOTE_KIND (link) == REG_UNUSED)
1084 reg_dies (XEXP (link, 0), live_now);
1085 }
1086 }
1087
1088 info[bb->index].computing = last_mode;
1089 /* Check for blocks without ANY mode requirements. */
1090 if (last_mode == no_mode)
1091 {
1092 ptr = new_seginfo (no_mode, bb->end, bb->index, live_now);
1093 add_seginfo (info + bb->index, ptr);
1094 }
1095 }
1096 #ifdef NORMAL_MODE
1097 {
1098 int mode = NORMAL_MODE (e);
1099
1100 if (mode != no_mode)
1101 {
1102 bb = post_entry;
1103
1104 /* By always making this nontransparent, we save
1105 an extra check in make_preds_opaque. We also
1106 need this to avoid confusing pre_edge_lcm when
1107 antic is cleared but transp and comp are set. */
1108 RESET_BIT (transp[bb->index], j);
1109
1110 /* Insert a fake computing definition of MODE into entry
1111 blocks which compute no mode. This represents the mode on
1112 entry. */
1113 info[bb->index].computing = mode;
1114
1115 if (pre_exit)
1116 info[pre_exit->index].seginfo->mode = mode;
1117 }
1118 }
1119 #endif /* NORMAL_MODE */
1120 }
1121
1122 kill = sbitmap_vector_alloc (last_basic_block, n_entities);
1123 for (i = 0; i < max_num_modes; i++)
1124 {
1125 int current_mode[N_ENTITIES];
1126
1127 /* Set the anticipatable and computing arrays. */
1128 sbitmap_vector_zero (antic, last_basic_block);
1129 sbitmap_vector_zero (comp, last_basic_block);
1130 for (j = n_entities - 1; j >= 0; j--)
1131 {
1132 int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i);
1133 struct bb_info *info = bb_info[j];
1134
1135 FOR_EACH_BB (bb)
1136 {
1137 if (info[bb->index].seginfo->mode == m)
1138 SET_BIT (antic[bb->index], j);
1139
1140 if (info[bb->index].computing == m)
1141 SET_BIT (comp[bb->index], j);
1142 }
1143 }
1144
1145 /* Calculate the optimal locations for the
1146 placement mode switches to modes with priority I. */
1147
1148 FOR_EACH_BB (bb)
1149 sbitmap_not (kill[bb->index], transp[bb->index]);
1150 edge_list = pre_edge_lcm (file, 1, transp, comp, antic,
1151 kill, &insert, &delete);
1152
1153 for (j = n_entities - 1; j >= 0; j--)
1154 {
1155 /* Insert all mode sets that have been inserted by lcm. */
1156 int no_mode = num_modes[entity_map[j]];
1157
1158 /* Wherever we have moved a mode setting upwards in the flow graph,
1159 the blocks between the new setting site and the now redundant
1160 computation ceases to be transparent for any lower-priority
1161 mode of the same entity. First set the aux field of each
1162 insertion site edge non-transparent, then propagate the new
1163 non-transparency from the redundant computation upwards till
1164 we hit an insertion site or an already non-transparent block. */
1165 for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--)
1166 {
1167 edge eg = INDEX_EDGE (edge_list, e);
1168 int mode;
1169 basic_block src_bb;
1170 HARD_REG_SET live_at_edge;
1171 rtx mode_set;
1172
1173 eg->aux = 0;
1174
1175 if (! TEST_BIT (insert[e], j))
1176 continue;
1177
1178 eg->aux = (void *)1;
1179
1180 mode = current_mode[j];
1181 src_bb = eg->src;
1182
1183 REG_SET_TO_HARD_REG_SET (live_at_edge,
1184 src_bb->global_live_at_end);
1185
1186 start_sequence ();
1187 EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
1188 mode_set = get_insns ();
1189 end_sequence ();
1190
1191 /* Do not bother to insert empty sequence. */
1192 if (mode_set == NULL_RTX)
1193 continue;
1194
1195 /* If this is an abnormal edge, we'll insert at the end
1196 of the previous block. */
1197 if (eg->flags & EDGE_ABNORMAL)
1198 {
1199 emited = true;
1200 if (GET_CODE (src_bb->end) == JUMP_INSN)
1201 emit_insn_before (mode_set, src_bb->end);
1202 /* It doesn't make sense to switch to normal mode
1203 after a CALL_INSN, so we're going to abort if we
1204 find one. The cases in which a CALL_INSN may
1205 have an abnormal edge are sibcalls and EH edges.
1206 In the case of sibcalls, the dest basic-block is
1207 the EXIT_BLOCK, that runs in normal mode; it is
1208 assumed that a sibcall insn requires normal mode
1209 itself, so no mode switch would be required after
1210 the call (it wouldn't make sense, anyway). In
1211 the case of EH edges, EH entry points also start
1212 in normal mode, so a similar reasoning applies. */
1213 else if (GET_CODE (src_bb->end) == INSN)
1214 emit_insn_after (mode_set, src_bb->end);
1215 else
1216 abort ();
1217 bb_info[j][src_bb->index].computing = mode;
1218 RESET_BIT (transp[src_bb->index], j);
1219 }
1220 else
1221 {
1222 need_commit = 1;
1223 insert_insn_on_edge (mode_set, eg);
1224 }
1225 }
1226
1227 FOR_EACH_BB_REVERSE (bb)
1228 if (TEST_BIT (delete[bb->index], j))
1229 {
1230 make_preds_opaque (bb, j);
1231 /* Cancel the 'deleted' mode set. */
1232 bb_info[j][bb->index].seginfo->mode = no_mode;
1233 }
1234 }
1235
1236 clear_aux_for_edges ();
1237 free_edge_list (edge_list);
1238 }
1239
1240 /* Now output the remaining mode sets in all the segments. */
1241 for (j = n_entities - 1; j >= 0; j--)
1242 {
1243 int no_mode = num_modes[entity_map[j]];
1244
1245 FOR_EACH_BB_REVERSE (bb)
1246 {
1247 struct seginfo *ptr, *next;
1248 for (ptr = bb_info[j][bb->index].seginfo; ptr; ptr = next)
1249 {
1250 next = ptr->next;
1251 if (ptr->mode != no_mode)
1252 {
1253 rtx mode_set;
1254
1255 start_sequence ();
1256 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1257 mode_set = get_insns ();
1258 end_sequence ();
1259
1260 /* Do not bother to insert empty sequence. */
1261 if (mode_set == NULL_RTX)
1262 continue;
1263
1264 emited = true;
1265 if (GET_CODE (ptr->insn_ptr) == NOTE
1266 && (NOTE_LINE_NUMBER (ptr->insn_ptr)
1267 == NOTE_INSN_BASIC_BLOCK))
1268 emit_insn_after (mode_set, ptr->insn_ptr);
1269 else
1270 emit_insn_before (mode_set, ptr->insn_ptr);
1271 }
1272
1273 free (ptr);
1274 }
1275 }
1276
1277 free (bb_info[j]);
1278 }
1279
1280 /* Finished. Free up all the things we've allocated. */
1281
1282 sbitmap_vector_free (kill);
1283 sbitmap_vector_free (antic);
1284 sbitmap_vector_free (transp);
1285 sbitmap_vector_free (comp);
1286 sbitmap_vector_free (delete);
1287 sbitmap_vector_free (insert);
1288
1289 if (need_commit)
1290 commit_edge_insertions ();
1291
1292 #ifdef NORMAL_MODE
1293 cleanup_cfg (CLEANUP_NO_INSN_DEL);
1294 #else
1295 if (!need_commit && !emited)
1296 return 0;
1297 #endif
1298
1299 max_regno = max_reg_num ();
1300 allocate_reg_info (max_regno, FALSE, FALSE);
1301 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
1302 (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE
1303 | PROP_SCAN_DEAD_CODE));
1304
1305 return 1;
1306 }
1307 #endif /* OPTIMIZE_MODE_SWITCHING */