fold-const.c (fold_binary_loc): Fix copy-and-pasto.
[gcc.git] / gcc / shrink-wrap.c
1 /* Shrink-wrapping related optimizations.
2 Copyright (C) 1987-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file handles shrink-wrapping related optimizations. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl-error.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "varasm.h"
30 #include "stringpool.h"
31 #include "flags.h"
32 #include "except.h"
33 #include "hashtab.h"
34 #include "hash-set.h"
35 #include "vec.h"
36 #include "machmode.h"
37 #include "hard-reg-set.h"
38 #include "input.h"
39 #include "function.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "libfuncs.h"
43 #include "regs.h"
44 #include "insn-config.h"
45 #include "recog.h"
46 #include "output.h"
47 #include "tm_p.h"
48 #include "langhooks.h"
49 #include "target.h"
50 #include "common/common-target.h"
51 #include "gimple-expr.h"
52 #include "gimplify.h"
53 #include "tree-pass.h"
54 #include "predict.h"
55 #include "df.h"
56 #include "params.h"
57 #include "bb-reorder.h"
58 #include "shrink-wrap.h"
59 #include "regcprop.h"
60 #include "rtl-iter.h"
61
62 #ifdef HAVE_simple_return
63
64 /* Return true if INSN requires the stack frame to be set up.
65 PROLOGUE_USED contains the hard registers used in the function
66 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
67 prologue to set up for the function. */
68 bool
69 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
70 HARD_REG_SET set_up_by_prologue)
71 {
72 df_ref def, use;
73 HARD_REG_SET hardregs;
74 unsigned regno;
75
76 if (CALL_P (insn))
77 return !SIBLING_CALL_P (insn);
78
79 /* We need a frame to get the unique CFA expected by the unwinder. */
80 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
81 return true;
82
83 CLEAR_HARD_REG_SET (hardregs);
84 FOR_EACH_INSN_DEF (def, insn)
85 {
86 rtx dreg = DF_REF_REG (def);
87
88 if (!REG_P (dreg))
89 continue;
90
91 add_to_hard_reg_set (&hardregs, GET_MODE (dreg),
92 REGNO (dreg));
93 }
94 if (hard_reg_set_intersect_p (hardregs, prologue_used))
95 return true;
96 AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
97 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
98 if (TEST_HARD_REG_BIT (hardregs, regno)
99 && df_regs_ever_live_p (regno))
100 return true;
101
102 FOR_EACH_INSN_USE (use, insn)
103 {
104 rtx reg = DF_REF_REG (use);
105
106 if (!REG_P (reg))
107 continue;
108
109 add_to_hard_reg_set (&hardregs, GET_MODE (reg),
110 REGNO (reg));
111 }
112 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
113 return true;
114
115 return false;
116 }
117
118 /* See whether there has a single live edge from BB, which dest uses
119 [REGNO, END_REGNO). Return the live edge if its dest bb has
120 one or two predecessors. Otherwise return NULL. */
121
122 static edge
123 live_edge_for_reg (basic_block bb, int regno, int end_regno)
124 {
125 edge e, live_edge;
126 edge_iterator ei;
127 bitmap live;
128 int i;
129
130 live_edge = NULL;
131 FOR_EACH_EDGE (e, ei, bb->succs)
132 {
133 live = df_get_live_in (e->dest);
134 for (i = regno; i < end_regno; i++)
135 if (REGNO_REG_SET_P (live, i))
136 {
137 if (live_edge && live_edge != e)
138 return NULL;
139 live_edge = e;
140 }
141 }
142
143 /* We can sometimes encounter dead code. Don't try to move it
144 into the exit block. */
145 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
146 return NULL;
147
148 /* Reject targets of abnormal edges. This is needed for correctness
149 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
150 exception edges even though it is generally treated as call-saved
151 for the majority of the compilation. Moving across abnormal edges
152 isn't going to be interesting for shrink-wrap usage anyway. */
153 if (live_edge->flags & EDGE_ABNORMAL)
154 return NULL;
155
156 /* When live_edge->dest->preds == 2, we can create a new block on
157 the edge to make it meet the requirement. */
158 if (EDGE_COUNT (live_edge->dest->preds) > 2)
159 return NULL;
160
161 return live_edge;
162 }
163
164 /* Try to move INSN from BB to a successor. Return true on success.
165 USES and DEFS are the set of registers that are used and defined
166 after INSN in BB. SPLIT_P indicates whether a live edge from BB
167 is splitted or not. */
168
169 static bool
170 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
171 const HARD_REG_SET uses,
172 const HARD_REG_SET defs,
173 bool *split_p)
174 {
175 rtx set, src, dest;
176 bitmap live_out, live_in, bb_uses, bb_defs;
177 unsigned int i, dregno, end_dregno;
178 unsigned int sregno = FIRST_PSEUDO_REGISTER;
179 unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
180 basic_block next_block;
181 edge live_edge;
182
183 /* Look for a simple register assignment. We don't use single_set here
184 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
185 destinations. */
186 if (!INSN_P (insn))
187 return false;
188 set = PATTERN (insn);
189 if (GET_CODE (set) != SET)
190 return false;
191 src = SET_SRC (set);
192 dest = SET_DEST (set);
193
194 /* For the destination, we want only a register. Also disallow STACK
195 or FRAME related adjustments. They are likely part of the prologue,
196 so keep them in the entry block. */
197 if (!REG_P (dest)
198 || dest == stack_pointer_rtx
199 || dest == frame_pointer_rtx
200 || dest == hard_frame_pointer_rtx)
201 return false;
202
203 /* For the source, we want one of:
204 (1) A (non-overlapping) register
205 (2) A constant,
206 (3) An expression involving no more than one register.
207
208 That last point comes from the code following, which was originally
209 written to handle only register move operations, and still only handles
210 a single source register when checking for overlaps. Happily, the
211 same checks can be applied to expressions like (plus reg const). */
212
213 if (CONSTANT_P (src))
214 ;
215 else if (!REG_P (src))
216 {
217 rtx src_inner = NULL_RTX;
218
219 if (can_throw_internal (insn))
220 return false;
221
222 subrtx_var_iterator::array_type array;
223 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
224 {
225 rtx x = *iter;
226 switch (GET_RTX_CLASS (GET_CODE (x)))
227 {
228 case RTX_CONST_OBJ:
229 case RTX_COMPARE:
230 case RTX_COMM_COMPARE:
231 case RTX_BIN_ARITH:
232 case RTX_COMM_ARITH:
233 case RTX_UNARY:
234 case RTX_TERNARY:
235 /* Constant or expression. Continue. */
236 break;
237
238 case RTX_OBJ:
239 case RTX_EXTRA:
240 switch (GET_CODE (x))
241 {
242 case UNSPEC:
243 case SUBREG:
244 case STRICT_LOW_PART:
245 case PC:
246 /* Ok. Continue. */
247 break;
248
249 case REG:
250 /* Fail if we see a second inner register. */
251 if (src_inner != NULL)
252 return false;
253 src_inner = x;
254 break;
255
256 default:
257 return false;
258 }
259 break;
260
261 default:
262 return false;
263 }
264 }
265
266 if (src_inner != NULL)
267 src = src_inner;
268 }
269
270 /* Make sure that the source register isn't defined later in BB. */
271 if (REG_P (src))
272 {
273 sregno = REGNO (src);
274 end_sregno = END_REGNO (src);
275 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
276 return false;
277 }
278
279 /* Make sure that the destination register isn't referenced later in BB. */
280 dregno = REGNO (dest);
281 end_dregno = END_REGNO (dest);
282 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
283 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
284 return false;
285
286 /* See whether there is a successor block to which we could move INSN. */
287 live_edge = live_edge_for_reg (bb, dregno, end_dregno);
288 if (!live_edge)
289 return false;
290
291 next_block = live_edge->dest;
292 /* Create a new basic block on the edge. */
293 if (EDGE_COUNT (next_block->preds) == 2)
294 {
295 /* split_edge for a block with only one successor is meaningless. */
296 if (EDGE_COUNT (bb->succs) == 1)
297 return false;
298
299 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
300 if (!df_live)
301 return false;
302
303 basic_block old_dest = live_edge->dest;
304 next_block = split_edge (live_edge);
305
306 /* We create a new basic block. Call df_grow_bb_info to make sure
307 all data structures are allocated. */
308 df_grow_bb_info (df_live);
309
310 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
311 df_get_live_in (old_dest));
312 df_set_bb_dirty (next_block);
313
314 /* We should not split more than once for a function. */
315 if (*split_p)
316 return false;
317
318 *split_p = true;
319 }
320
321 /* At this point we are committed to moving INSN, but let's try to
322 move it as far as we can. */
323 do
324 {
325 live_out = df_get_live_out (bb);
326 live_in = df_get_live_in (next_block);
327 bb = next_block;
328
329 /* Check whether BB uses DEST or clobbers DEST. We need to add
330 INSN to BB if so. Either way, DEST is no longer live on entry,
331 except for any part that overlaps SRC (next loop). */
332 bb_uses = &DF_LR_BB_INFO (bb)->use;
333 bb_defs = &DF_LR_BB_INFO (bb)->def;
334 if (df_live)
335 {
336 for (i = dregno; i < end_dregno; i++)
337 {
338 if (*split_p
339 || REGNO_REG_SET_P (bb_uses, i)
340 || REGNO_REG_SET_P (bb_defs, i)
341 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
342 next_block = NULL;
343 CLEAR_REGNO_REG_SET (live_out, i);
344 CLEAR_REGNO_REG_SET (live_in, i);
345 }
346
347 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
348 Either way, SRC is now live on entry. */
349 for (i = sregno; i < end_sregno; i++)
350 {
351 if (*split_p
352 || REGNO_REG_SET_P (bb_defs, i)
353 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
354 next_block = NULL;
355 SET_REGNO_REG_SET (live_out, i);
356 SET_REGNO_REG_SET (live_in, i);
357 }
358 }
359 else
360 {
361 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
362 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
363 at -O1, just give up searching NEXT_BLOCK. */
364 next_block = NULL;
365 for (i = dregno; i < end_dregno; i++)
366 {
367 CLEAR_REGNO_REG_SET (live_out, i);
368 CLEAR_REGNO_REG_SET (live_in, i);
369 }
370
371 for (i = sregno; i < end_sregno; i++)
372 {
373 SET_REGNO_REG_SET (live_out, i);
374 SET_REGNO_REG_SET (live_in, i);
375 }
376 }
377
378 /* If we don't need to add the move to BB, look for a single
379 successor block. */
380 if (next_block)
381 {
382 live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
383 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
384 break;
385 next_block = live_edge->dest;
386 }
387 }
388 while (next_block);
389
390 /* For the new created basic block, there is no dataflow info at all.
391 So skip the following dataflow update and check. */
392 if (!(*split_p))
393 {
394 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
395 (next loop). */
396 for (i = dregno; i < end_dregno; i++)
397 {
398 CLEAR_REGNO_REG_SET (bb_uses, i);
399 SET_REGNO_REG_SET (bb_defs, i);
400 }
401
402 /* BB now uses SRC. */
403 for (i = sregno; i < end_sregno; i++)
404 SET_REGNO_REG_SET (bb_uses, i);
405 }
406
407 emit_insn_after (PATTERN (insn), bb_note (bb));
408 delete_insn (insn);
409 return true;
410 }
411
412 /* Look for register copies in the first block of the function, and move
413 them down into successor blocks if the register is used only on one
414 path. This exposes more opportunities for shrink-wrapping. These
415 kinds of sets often occur when incoming argument registers are moved
416 to call-saved registers because their values are live across one or
417 more calls during the function. */
418
419 void
420 prepare_shrink_wrap (basic_block entry_block)
421 {
422 rtx_insn *insn, *curr;
423 rtx x;
424 HARD_REG_SET uses, defs;
425 df_ref def, use;
426 bool split_p = false;
427
428 if (JUMP_P (BB_END (entry_block)))
429 {
430 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
431 to sink the copies from parameter to callee saved register out of
432 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
433 to release some dependences. */
434 copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
435 }
436
437 CLEAR_HARD_REG_SET (uses);
438 CLEAR_HARD_REG_SET (defs);
439 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
440 if (NONDEBUG_INSN_P (insn)
441 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
442 &split_p))
443 {
444 /* Add all defined registers to DEFs. */
445 FOR_EACH_INSN_DEF (def, insn)
446 {
447 x = DF_REF_REG (def);
448 if (REG_P (x) && HARD_REGISTER_P (x))
449 SET_HARD_REG_BIT (defs, REGNO (x));
450 }
451
452 /* Add all used registers to USESs. */
453 FOR_EACH_INSN_USE (use, insn)
454 {
455 x = DF_REF_REG (use);
456 if (REG_P (x) && HARD_REGISTER_P (x))
457 SET_HARD_REG_BIT (uses, REGNO (x));
458 }
459 }
460 }
461
462 /* Create a copy of BB instructions and insert at BEFORE. Redirect
463 preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */
464 void
465 dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before,
466 bitmap_head *need_prologue)
467 {
468 edge_iterator ei;
469 edge e;
470 rtx_insn *insn = BB_END (bb);
471
472 /* We know BB has a single successor, so there is no need to copy a
473 simple jump at the end of BB. */
474 if (simplejump_p (insn))
475 insn = PREV_INSN (insn);
476
477 start_sequence ();
478 duplicate_insn_chain (BB_HEAD (bb), insn);
479 if (dump_file)
480 {
481 unsigned count = 0;
482 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
483 if (active_insn_p (insn))
484 ++count;
485 fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n",
486 bb->index, copy_bb->index, count);
487 }
488 insn = get_insns ();
489 end_sequence ();
490 emit_insn_before (insn, before);
491
492 /* Redirect all the paths that need no prologue into copy_bb. */
493 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
494 if (!bitmap_bit_p (need_prologue, e->src->index))
495 {
496 int freq = EDGE_FREQUENCY (e);
497 copy_bb->count += e->count;
498 copy_bb->frequency += EDGE_FREQUENCY (e);
499 e->dest->count -= e->count;
500 if (e->dest->count < 0)
501 e->dest->count = 0;
502 e->dest->frequency -= freq;
503 if (e->dest->frequency < 0)
504 e->dest->frequency = 0;
505 redirect_edge_and_branch_force (e, copy_bb);
506 continue;
507 }
508 else
509 ei_next (&ei);
510 }
511
512
513 /* Try to perform a kind of shrink-wrapping, making sure the
514 prologue/epilogue is emitted only around those parts of the
515 function that require it. */
516
517 void
518 try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
519 bitmap_head *bb_flags, rtx_insn *prologue_seq)
520 {
521 edge e;
522 edge_iterator ei;
523 bool nonempty_prologue = false;
524 unsigned max_grow_size;
525 rtx_insn *seq;
526
527 for (seq = prologue_seq; seq; seq = NEXT_INSN (seq))
528 if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END)
529 {
530 nonempty_prologue = true;
531 break;
532 }
533
534 if (flag_shrink_wrap && HAVE_simple_return
535 && (targetm.profile_before_prologue () || !crtl->profile)
536 && nonempty_prologue && !crtl->calls_eh_return)
537 {
538 HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
539 struct hard_reg_set_container set_up_by_prologue;
540 rtx_insn *p_insn;
541 vec<basic_block> vec;
542 basic_block bb;
543 bitmap_head bb_antic_flags;
544 bitmap_head bb_on_list;
545 bitmap_head bb_tail;
546
547 if (dump_file)
548 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
549
550 /* Compute the registers set and used in the prologue. */
551 CLEAR_HARD_REG_SET (prologue_clobbered);
552 CLEAR_HARD_REG_SET (prologue_used);
553 for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
554 {
555 HARD_REG_SET this_used;
556 if (!NONDEBUG_INSN_P (p_insn))
557 continue;
558
559 CLEAR_HARD_REG_SET (this_used);
560 note_uses (&PATTERN (p_insn), record_hard_reg_uses,
561 &this_used);
562 AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
563 IOR_HARD_REG_SET (prologue_used, this_used);
564 note_stores (PATTERN (p_insn), record_hard_reg_sets,
565 &prologue_clobbered);
566 }
567
568 prepare_shrink_wrap ((*entry_edge)->dest);
569
570 bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
571 bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
572 bitmap_initialize (&bb_tail, &bitmap_default_obstack);
573
574 /* Find the set of basic blocks that require a stack frame,
575 and blocks that are too big to be duplicated. */
576
577 vec.create (n_basic_blocks_for_fn (cfun));
578
579 CLEAR_HARD_REG_SET (set_up_by_prologue.set);
580 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
581 STACK_POINTER_REGNUM);
582 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
583 if (frame_pointer_needed)
584 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
585 HARD_FRAME_POINTER_REGNUM);
586 if (pic_offset_table_rtx
587 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
588 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
589 PIC_OFFSET_TABLE_REGNUM);
590 if (crtl->drap_reg)
591 add_to_hard_reg_set (&set_up_by_prologue.set,
592 GET_MODE (crtl->drap_reg),
593 REGNO (crtl->drap_reg));
594 if (targetm.set_up_by_prologue)
595 targetm.set_up_by_prologue (&set_up_by_prologue);
596
597 /* We don't use a different max size depending on
598 optimize_bb_for_speed_p because increasing shrink-wrapping
599 opportunities by duplicating tail blocks can actually result
600 in an overall decrease in code size. */
601 max_grow_size = get_uncond_jump_length ();
602 max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
603
604 FOR_EACH_BB_FN (bb, cfun)
605 {
606 rtx_insn *insn;
607 unsigned size = 0;
608
609 FOR_BB_INSNS (bb, insn)
610 if (NONDEBUG_INSN_P (insn))
611 {
612 if (requires_stack_frame_p (insn, prologue_used,
613 set_up_by_prologue.set))
614 {
615 if (bb == (*entry_edge)->dest)
616 goto fail_shrinkwrap;
617 bitmap_set_bit (bb_flags, bb->index);
618 vec.quick_push (bb);
619 break;
620 }
621 else if (size <= max_grow_size)
622 {
623 size += get_attr_min_length (insn);
624 if (size > max_grow_size)
625 bitmap_set_bit (&bb_on_list, bb->index);
626 }
627 }
628 }
629
630 /* Blocks that really need a prologue, or are too big for tails. */
631 bitmap_ior_into (&bb_on_list, bb_flags);
632
633 /* For every basic block that needs a prologue, mark all blocks
634 reachable from it, so as to ensure they are also seen as
635 requiring a prologue. */
636 while (!vec.is_empty ())
637 {
638 basic_block tmp_bb = vec.pop ();
639
640 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
641 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
642 && bitmap_set_bit (bb_flags, e->dest->index))
643 vec.quick_push (e->dest);
644 }
645
646 /* Find the set of basic blocks that need no prologue, have a
647 single successor, can be duplicated, meet a max size
648 requirement, and go to the exit via like blocks. */
649 vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun));
650 while (!vec.is_empty ())
651 {
652 basic_block tmp_bb = vec.pop ();
653
654 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
655 if (single_succ_p (e->src)
656 && !bitmap_bit_p (&bb_on_list, e->src->index)
657 && can_duplicate_block_p (e->src))
658 {
659 edge pe;
660 edge_iterator pei;
661
662 /* If there is predecessor of e->src which doesn't
663 need prologue and the edge is complex,
664 we might not be able to redirect the branch
665 to a copy of e->src. */
666 FOR_EACH_EDGE (pe, pei, e->src->preds)
667 if ((pe->flags & EDGE_COMPLEX) != 0
668 && !bitmap_bit_p (bb_flags, pe->src->index))
669 break;
670 if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index))
671 vec.quick_push (e->src);
672 }
673 }
674
675 /* Now walk backwards from every block that is marked as needing
676 a prologue to compute the bb_antic_flags bitmap. Exclude
677 tail blocks; They can be duplicated to be used on paths not
678 needing a prologue. */
679 bitmap_clear (&bb_on_list);
680 bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail);
681 FOR_EACH_BB_FN (bb, cfun)
682 {
683 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
684 continue;
685 FOR_EACH_EDGE (e, ei, bb->preds)
686 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
687 && bitmap_set_bit (&bb_on_list, e->src->index))
688 vec.quick_push (e->src);
689 }
690 while (!vec.is_empty ())
691 {
692 basic_block tmp_bb = vec.pop ();
693 bool all_set = true;
694
695 bitmap_clear_bit (&bb_on_list, tmp_bb->index);
696 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
697 if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
698 {
699 all_set = false;
700 break;
701 }
702
703 if (all_set)
704 {
705 bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
706 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
707 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
708 && bitmap_set_bit (&bb_on_list, e->src->index))
709 vec.quick_push (e->src);
710 }
711 }
712 /* Find exactly one edge that leads to a block in ANTIC from
713 a block that isn't. */
714 if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index))
715 FOR_EACH_BB_FN (bb, cfun)
716 {
717 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
718 continue;
719 FOR_EACH_EDGE (e, ei, bb->preds)
720 if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
721 {
722 if (*entry_edge != orig_entry_edge)
723 {
724 *entry_edge = orig_entry_edge;
725 if (dump_file)
726 fprintf (dump_file, "More than one candidate edge.\n");
727 goto fail_shrinkwrap;
728 }
729 if (dump_file)
730 fprintf (dump_file, "Found candidate edge for "
731 "shrink-wrapping, %d->%d.\n", e->src->index,
732 e->dest->index);
733 *entry_edge = e;
734 }
735 }
736
737 if (*entry_edge != orig_entry_edge)
738 {
739 /* Test whether the prologue is known to clobber any register
740 (other than FP or SP) which are live on the edge. */
741 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
742 if (frame_pointer_needed)
743 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
744 REG_SET_TO_HARD_REG_SET (live_on_edge,
745 df_get_live_in ((*entry_edge)->dest));
746 if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
747 {
748 *entry_edge = orig_entry_edge;
749 if (dump_file)
750 fprintf (dump_file,
751 "Shrink-wrapping aborted due to clobber.\n");
752 }
753 }
754 if (*entry_edge != orig_entry_edge)
755 {
756 crtl->shrink_wrapped = true;
757 if (dump_file)
758 fprintf (dump_file, "Performing shrink-wrapping.\n");
759
760 /* Find tail blocks reachable from both blocks needing a
761 prologue and blocks not needing a prologue. */
762 if (!bitmap_empty_p (&bb_tail))
763 FOR_EACH_BB_FN (bb, cfun)
764 {
765 bool some_pro, some_no_pro;
766 if (!bitmap_bit_p (&bb_tail, bb->index))
767 continue;
768 some_pro = some_no_pro = false;
769 FOR_EACH_EDGE (e, ei, bb->preds)
770 {
771 if (bitmap_bit_p (bb_flags, e->src->index))
772 some_pro = true;
773 else
774 some_no_pro = true;
775 }
776 if (some_pro && some_no_pro)
777 vec.quick_push (bb);
778 else
779 bitmap_clear_bit (&bb_tail, bb->index);
780 }
781 /* Find the head of each tail. */
782 while (!vec.is_empty ())
783 {
784 basic_block tbb = vec.pop ();
785
786 if (!bitmap_bit_p (&bb_tail, tbb->index))
787 continue;
788
789 while (single_succ_p (tbb))
790 {
791 tbb = single_succ (tbb);
792 bitmap_clear_bit (&bb_tail, tbb->index);
793 }
794 }
795 /* Now duplicate the tails. */
796 if (!bitmap_empty_p (&bb_tail))
797 FOR_EACH_BB_REVERSE_FN (bb, cfun)
798 {
799 basic_block copy_bb, tbb;
800 rtx_insn *insert_point;
801 int eflags;
802
803 if (!bitmap_clear_bit (&bb_tail, bb->index))
804 continue;
805
806 /* Create a copy of BB, instructions and all, for
807 use on paths that don't need a prologue.
808 Ideal placement of the copy is on a fall-thru edge
809 or after a block that would jump to the copy. */
810 FOR_EACH_EDGE (e, ei, bb->preds)
811 if (!bitmap_bit_p (bb_flags, e->src->index)
812 && single_succ_p (e->src))
813 break;
814 if (e)
815 {
816 /* Make sure we insert after any barriers. */
817 rtx_insn *end = get_last_bb_insn (e->src);
818 copy_bb = create_basic_block (NEXT_INSN (end),
819 NULL_RTX, e->src);
820 BB_COPY_PARTITION (copy_bb, e->src);
821 }
822 else
823 {
824 /* Otherwise put the copy at the end of the function. */
825 copy_bb = create_basic_block (NULL_RTX, NULL_RTX,
826 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
827 BB_COPY_PARTITION (copy_bb, bb);
828 }
829
830 insert_point = emit_note_after (NOTE_INSN_DELETED,
831 BB_END (copy_bb));
832 emit_barrier_after (BB_END (copy_bb));
833
834 tbb = bb;
835 while (1)
836 {
837 dup_block_and_redirect (tbb, copy_bb, insert_point,
838 bb_flags);
839 tbb = single_succ (tbb);
840 if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
841 break;
842 e = split_block (copy_bb, PREV_INSN (insert_point));
843 copy_bb = e->dest;
844 }
845
846 /* Quiet verify_flow_info by (ab)using EDGE_FAKE.
847 We have yet to add a simple_return to the tails,
848 as we'd like to first convert_jumps_to_returns in
849 case the block is no longer used after that. */
850 eflags = EDGE_FAKE;
851 if (CALL_P (PREV_INSN (insert_point))
852 && SIBLING_CALL_P (PREV_INSN (insert_point)))
853 eflags = EDGE_SIBCALL | EDGE_ABNORMAL;
854 make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun),
855 eflags);
856
857 /* verify_flow_info doesn't like a note after a
858 sibling call. */
859 delete_insn (insert_point);
860 if (bitmap_empty_p (&bb_tail))
861 break;
862 }
863 }
864
865 fail_shrinkwrap:
866 bitmap_clear (&bb_tail);
867 bitmap_clear (&bb_antic_flags);
868 bitmap_clear (&bb_on_list);
869 vec.release ();
870 }
871 }
872
873 /* If we're allowed to generate a simple return instruction, then by
874 definition we don't need a full epilogue. If the last basic
875 block before the exit block does not contain active instructions,
876 examine its predecessors and try to emit (conditional) return
877 instructions. */
878
879 edge
880 get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags,
881 vec<edge> *unconverted_simple_returns,
882 rtx_insn **returnjump)
883 {
884 if (optimize)
885 {
886 unsigned i, last;
887
888 /* convert_jumps_to_returns may add to preds of the exit block
889 (but won't remove). Stop at end of current preds. */
890 last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
891 for (i = 0; i < last; i++)
892 {
893 edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i);
894 if (LABEL_P (BB_HEAD (e->src))
895 && !bitmap_bit_p (&bb_flags, e->src->index)
896 && !active_insn_between (BB_HEAD (e->src), BB_END (e->src)))
897 *unconverted_simple_returns
898 = convert_jumps_to_returns (e->src, true,
899 *unconverted_simple_returns);
900 }
901 }
902
903 if (exit_fallthru_edge != NULL
904 && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0
905 && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index))
906 {
907 basic_block last_bb;
908
909 last_bb = emit_return_for_exit (exit_fallthru_edge, true);
910 *returnjump = BB_END (last_bb);
911 exit_fallthru_edge = NULL;
912 }
913 return exit_fallthru_edge;
914 }
915
916 /* If there were branches to an empty LAST_BB which we tried to
917 convert to conditional simple_returns, but couldn't for some
918 reason, create a block to hold a simple_return insn and redirect
919 those remaining edges. */
920
921 void
922 convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
923 bitmap_head bb_flags, rtx_insn *returnjump,
924 vec<edge> unconverted_simple_returns)
925 {
926 edge e;
927 edge_iterator ei;
928
929 if (!unconverted_simple_returns.is_empty ())
930 {
931 basic_block simple_return_block_hot = NULL;
932 basic_block simple_return_block_cold = NULL;
933 edge pending_edge_hot = NULL;
934 edge pending_edge_cold = NULL;
935 basic_block exit_pred;
936 int i;
937
938 gcc_assert (entry_edge != orig_entry_edge);
939
940 /* See if we can reuse the last insn that was emitted for the
941 epilogue. */
942 if (returnjump != NULL_RTX
943 && JUMP_LABEL (returnjump) == simple_return_rtx)
944 {
945 e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump));
946 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
947 simple_return_block_hot = e->dest;
948 else
949 simple_return_block_cold = e->dest;
950 }
951
952 /* Also check returns we might need to add to tail blocks. */
953 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
954 if (EDGE_COUNT (e->src->preds) != 0
955 && (e->flags & EDGE_FAKE) != 0
956 && !bitmap_bit_p (&bb_flags, e->src->index))
957 {
958 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
959 pending_edge_hot = e;
960 else
961 pending_edge_cold = e;
962 }
963
964 /* Save a pointer to the exit's predecessor BB for use in
965 inserting new BBs at the end of the function. Do this
966 after the call to split_block above which may split
967 the original exit pred. */
968 exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
969
970 FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e)
971 {
972 basic_block *pdest_bb;
973 edge pending;
974
975 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
976 {
977 pdest_bb = &simple_return_block_hot;
978 pending = pending_edge_hot;
979 }
980 else
981 {
982 pdest_bb = &simple_return_block_cold;
983 pending = pending_edge_cold;
984 }
985
986 if (*pdest_bb == NULL && pending != NULL)
987 {
988 emit_return_into_block (true, pending->src);
989 pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
990 *pdest_bb = pending->src;
991 }
992 else if (*pdest_bb == NULL)
993 {
994 basic_block bb;
995 rtx_insn *start;
996
997 bb = create_basic_block (NULL, NULL, exit_pred);
998 BB_COPY_PARTITION (bb, e->src);
999 start = emit_jump_insn_after (gen_simple_return (),
1000 BB_END (bb));
1001 JUMP_LABEL (start) = simple_return_rtx;
1002 emit_barrier_after (start);
1003
1004 *pdest_bb = bb;
1005 make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1006 }
1007 redirect_edge_and_branch_force (e, *pdest_bb);
1008 }
1009 unconverted_simple_returns.release ();
1010 }
1011
1012 if (entry_edge != orig_entry_edge)
1013 {
1014 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1015 if (EDGE_COUNT (e->src->preds) != 0
1016 && (e->flags & EDGE_FAKE) != 0
1017 && !bitmap_bit_p (&bb_flags, e->src->index))
1018 {
1019 emit_return_into_block (true, e->src);
1020 e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1021 }
1022 }
1023 }
1024
1025 #endif