coretypes.h: Include machmode.h...
[gcc.git] / gcc / shrink-wrap.c
1 /* Shrink-wrapping related optimizations.
2 Copyright (C) 1987-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file handles shrink-wrapping related optimizations. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl-error.h"
27 #include "hash-set.h"
28 #include "vec.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "inchash.h"
33 #include "tree.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "varasm.h"
37 #include "stringpool.h"
38 #include "flags.h"
39 #include "except.h"
40 #include "hard-reg-set.h"
41 #include "function.h"
42 #include "hashtab.h"
43 #include "rtl.h"
44 #include "statistics.h"
45 #include "insn-config.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "calls.h"
50 #include "emit-rtl.h"
51 #include "stmt.h"
52 #include "expr.h"
53 #include "insn-codes.h"
54 #include "optabs.h"
55 #include "libfuncs.h"
56 #include "regs.h"
57 #include "recog.h"
58 #include "output.h"
59 #include "tm_p.h"
60 #include "langhooks.h"
61 #include "target.h"
62 #include "common/common-target.h"
63 #include "gimple-expr.h"
64 #include "gimplify.h"
65 #include "tree-pass.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "basic-block.h"
71 #include "df.h"
72 #include "params.h"
73 #include "bb-reorder.h"
74 #include "shrink-wrap.h"
75 #include "regcprop.h"
76 #include "rtl-iter.h"
77
78
79 /* Return true if INSN requires the stack frame to be set up.
80 PROLOGUE_USED contains the hard registers used in the function
81 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
82 prologue to set up for the function. */
83 bool
84 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
85 HARD_REG_SET set_up_by_prologue)
86 {
87 df_ref def, use;
88 HARD_REG_SET hardregs;
89 unsigned regno;
90
91 if (CALL_P (insn))
92 return !SIBLING_CALL_P (insn);
93
94 /* We need a frame to get the unique CFA expected by the unwinder. */
95 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
96 return true;
97
98 CLEAR_HARD_REG_SET (hardregs);
99 FOR_EACH_INSN_DEF (def, insn)
100 {
101 rtx dreg = DF_REF_REG (def);
102
103 if (!REG_P (dreg))
104 continue;
105
106 add_to_hard_reg_set (&hardregs, GET_MODE (dreg),
107 REGNO (dreg));
108 }
109 if (hard_reg_set_intersect_p (hardregs, prologue_used))
110 return true;
111 AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
112 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
113 if (TEST_HARD_REG_BIT (hardregs, regno)
114 && df_regs_ever_live_p (regno))
115 return true;
116
117 FOR_EACH_INSN_USE (use, insn)
118 {
119 rtx reg = DF_REF_REG (use);
120
121 if (!REG_P (reg))
122 continue;
123
124 add_to_hard_reg_set (&hardregs, GET_MODE (reg),
125 REGNO (reg));
126 }
127 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
128 return true;
129
130 return false;
131 }
132
133 /* See whether there has a single live edge from BB, which dest uses
134 [REGNO, END_REGNO). Return the live edge if its dest bb has
135 one or two predecessors. Otherwise return NULL. */
136
137 static edge
138 live_edge_for_reg (basic_block bb, int regno, int end_regno)
139 {
140 edge e, live_edge;
141 edge_iterator ei;
142 bitmap live;
143 int i;
144
145 live_edge = NULL;
146 FOR_EACH_EDGE (e, ei, bb->succs)
147 {
148 live = df_get_live_in (e->dest);
149 for (i = regno; i < end_regno; i++)
150 if (REGNO_REG_SET_P (live, i))
151 {
152 if (live_edge && live_edge != e)
153 return NULL;
154 live_edge = e;
155 }
156 }
157
158 /* We can sometimes encounter dead code. Don't try to move it
159 into the exit block. */
160 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
161 return NULL;
162
163 /* Reject targets of abnormal edges. This is needed for correctness
164 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
165 exception edges even though it is generally treated as call-saved
166 for the majority of the compilation. Moving across abnormal edges
167 isn't going to be interesting for shrink-wrap usage anyway. */
168 if (live_edge->flags & EDGE_ABNORMAL)
169 return NULL;
170
171 /* When live_edge->dest->preds == 2, we can create a new block on
172 the edge to make it meet the requirement. */
173 if (EDGE_COUNT (live_edge->dest->preds) > 2)
174 return NULL;
175
176 return live_edge;
177 }
178
179 /* Try to move INSN from BB to a successor. Return true on success.
180 USES and DEFS are the set of registers that are used and defined
181 after INSN in BB. SPLIT_P indicates whether a live edge from BB
182 is splitted or not. */
183
184 static bool
185 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
186 const HARD_REG_SET uses,
187 const HARD_REG_SET defs,
188 bool *split_p)
189 {
190 rtx set, src, dest;
191 bitmap live_out, live_in, bb_uses, bb_defs;
192 unsigned int i, dregno, end_dregno;
193 unsigned int sregno = FIRST_PSEUDO_REGISTER;
194 unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
195 basic_block next_block;
196 edge live_edge;
197
198 /* Look for a simple register assignment. We don't use single_set here
199 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
200 destinations. */
201 if (!INSN_P (insn))
202 return false;
203 set = PATTERN (insn);
204 if (GET_CODE (set) != SET)
205 return false;
206 src = SET_SRC (set);
207 dest = SET_DEST (set);
208
209 /* For the destination, we want only a register. Also disallow STACK
210 or FRAME related adjustments. They are likely part of the prologue,
211 so keep them in the entry block. */
212 if (!REG_P (dest)
213 || dest == stack_pointer_rtx
214 || dest == frame_pointer_rtx
215 || dest == hard_frame_pointer_rtx)
216 return false;
217
218 /* For the source, we want one of:
219 (1) A (non-overlapping) register
220 (2) A constant,
221 (3) An expression involving no more than one register.
222
223 That last point comes from the code following, which was originally
224 written to handle only register move operations, and still only handles
225 a single source register when checking for overlaps. Happily, the
226 same checks can be applied to expressions like (plus reg const). */
227
228 if (CONSTANT_P (src))
229 ;
230 else if (!REG_P (src))
231 {
232 rtx src_inner = NULL_RTX;
233
234 if (can_throw_internal (insn))
235 return false;
236
237 subrtx_var_iterator::array_type array;
238 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
239 {
240 rtx x = *iter;
241 switch (GET_RTX_CLASS (GET_CODE (x)))
242 {
243 case RTX_CONST_OBJ:
244 case RTX_COMPARE:
245 case RTX_COMM_COMPARE:
246 case RTX_BIN_ARITH:
247 case RTX_COMM_ARITH:
248 case RTX_UNARY:
249 case RTX_TERNARY:
250 /* Constant or expression. Continue. */
251 break;
252
253 case RTX_OBJ:
254 case RTX_EXTRA:
255 switch (GET_CODE (x))
256 {
257 case UNSPEC:
258 case SUBREG:
259 case STRICT_LOW_PART:
260 case PC:
261 case LO_SUM:
262 /* Ok. Continue. */
263 break;
264
265 case REG:
266 /* Fail if we see a second inner register. */
267 if (src_inner != NULL)
268 return false;
269 src_inner = x;
270 break;
271
272 default:
273 return false;
274 }
275 break;
276
277 default:
278 return false;
279 }
280 }
281
282 if (src_inner != NULL)
283 src = src_inner;
284 }
285
286 /* Make sure that the source register isn't defined later in BB. */
287 if (REG_P (src))
288 {
289 sregno = REGNO (src);
290 end_sregno = END_REGNO (src);
291 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
292 return false;
293 }
294
295 /* Make sure that the destination register isn't referenced later in BB. */
296 dregno = REGNO (dest);
297 end_dregno = END_REGNO (dest);
298 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
299 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
300 return false;
301
302 /* See whether there is a successor block to which we could move INSN. */
303 live_edge = live_edge_for_reg (bb, dregno, end_dregno);
304 if (!live_edge)
305 return false;
306
307 next_block = live_edge->dest;
308 /* Create a new basic block on the edge. */
309 if (EDGE_COUNT (next_block->preds) == 2)
310 {
311 /* split_edge for a block with only one successor is meaningless. */
312 if (EDGE_COUNT (bb->succs) == 1)
313 return false;
314
315 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
316 if (!df_live)
317 return false;
318
319 basic_block old_dest = live_edge->dest;
320 next_block = split_edge (live_edge);
321
322 /* We create a new basic block. Call df_grow_bb_info to make sure
323 all data structures are allocated. */
324 df_grow_bb_info (df_live);
325
326 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
327 df_get_live_in (old_dest));
328 df_set_bb_dirty (next_block);
329
330 /* We should not split more than once for a function. */
331 if (*split_p)
332 return false;
333
334 *split_p = true;
335 }
336
337 /* At this point we are committed to moving INSN, but let's try to
338 move it as far as we can. */
339 do
340 {
341 live_out = df_get_live_out (bb);
342 live_in = df_get_live_in (next_block);
343 bb = next_block;
344
345 /* Check whether BB uses DEST or clobbers DEST. We need to add
346 INSN to BB if so. Either way, DEST is no longer live on entry,
347 except for any part that overlaps SRC (next loop). */
348 bb_uses = &DF_LR_BB_INFO (bb)->use;
349 bb_defs = &DF_LR_BB_INFO (bb)->def;
350 if (df_live)
351 {
352 for (i = dregno; i < end_dregno; i++)
353 {
354 if (*split_p
355 || REGNO_REG_SET_P (bb_uses, i)
356 || REGNO_REG_SET_P (bb_defs, i)
357 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
358 next_block = NULL;
359 CLEAR_REGNO_REG_SET (live_out, i);
360 CLEAR_REGNO_REG_SET (live_in, i);
361 }
362
363 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
364 Either way, SRC is now live on entry. */
365 for (i = sregno; i < end_sregno; i++)
366 {
367 if (*split_p
368 || REGNO_REG_SET_P (bb_defs, i)
369 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
370 next_block = NULL;
371 SET_REGNO_REG_SET (live_out, i);
372 SET_REGNO_REG_SET (live_in, i);
373 }
374 }
375 else
376 {
377 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
378 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
379 at -O1, just give up searching NEXT_BLOCK. */
380 next_block = NULL;
381 for (i = dregno; i < end_dregno; i++)
382 {
383 CLEAR_REGNO_REG_SET (live_out, i);
384 CLEAR_REGNO_REG_SET (live_in, i);
385 }
386
387 for (i = sregno; i < end_sregno; i++)
388 {
389 SET_REGNO_REG_SET (live_out, i);
390 SET_REGNO_REG_SET (live_in, i);
391 }
392 }
393
394 /* If we don't need to add the move to BB, look for a single
395 successor block. */
396 if (next_block)
397 {
398 live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
399 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
400 break;
401 next_block = live_edge->dest;
402 }
403 }
404 while (next_block);
405
406 /* For the new created basic block, there is no dataflow info at all.
407 So skip the following dataflow update and check. */
408 if (!(*split_p))
409 {
410 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
411 (next loop). */
412 for (i = dregno; i < end_dregno; i++)
413 {
414 CLEAR_REGNO_REG_SET (bb_uses, i);
415 SET_REGNO_REG_SET (bb_defs, i);
416 }
417
418 /* BB now uses SRC. */
419 for (i = sregno; i < end_sregno; i++)
420 SET_REGNO_REG_SET (bb_uses, i);
421 }
422
423 emit_insn_after (PATTERN (insn), bb_note (bb));
424 delete_insn (insn);
425 return true;
426 }
427
428 /* Look for register copies in the first block of the function, and move
429 them down into successor blocks if the register is used only on one
430 path. This exposes more opportunities for shrink-wrapping. These
431 kinds of sets often occur when incoming argument registers are moved
432 to call-saved registers because their values are live across one or
433 more calls during the function. */
434
435 void
436 prepare_shrink_wrap (basic_block entry_block)
437 {
438 rtx_insn *insn, *curr;
439 rtx x;
440 HARD_REG_SET uses, defs;
441 df_ref def, use;
442 bool split_p = false;
443
444 if (JUMP_P (BB_END (entry_block)))
445 {
446 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
447 to sink the copies from parameter to callee saved register out of
448 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
449 to release some dependences. */
450 copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
451 }
452
453 CLEAR_HARD_REG_SET (uses);
454 CLEAR_HARD_REG_SET (defs);
455 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
456 if (NONDEBUG_INSN_P (insn)
457 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
458 &split_p))
459 {
460 /* Add all defined registers to DEFs. */
461 FOR_EACH_INSN_DEF (def, insn)
462 {
463 x = DF_REF_REG (def);
464 if (REG_P (x) && HARD_REGISTER_P (x))
465 SET_HARD_REG_BIT (defs, REGNO (x));
466 }
467
468 /* Add all used registers to USESs. */
469 FOR_EACH_INSN_USE (use, insn)
470 {
471 x = DF_REF_REG (use);
472 if (REG_P (x) && HARD_REGISTER_P (x))
473 SET_HARD_REG_BIT (uses, REGNO (x));
474 }
475 }
476 }
477
478 /* Create a copy of BB instructions and insert at BEFORE. Redirect
479 preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */
480 void
481 dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before,
482 bitmap_head *need_prologue)
483 {
484 edge_iterator ei;
485 edge e;
486 rtx_insn *insn = BB_END (bb);
487
488 /* We know BB has a single successor, so there is no need to copy a
489 simple jump at the end of BB. */
490 if (simplejump_p (insn))
491 insn = PREV_INSN (insn);
492
493 start_sequence ();
494 duplicate_insn_chain (BB_HEAD (bb), insn);
495 if (dump_file)
496 {
497 unsigned count = 0;
498 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
499 if (active_insn_p (insn))
500 ++count;
501 fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n",
502 bb->index, copy_bb->index, count);
503 }
504 insn = get_insns ();
505 end_sequence ();
506 emit_insn_before (insn, before);
507
508 /* Redirect all the paths that need no prologue into copy_bb. */
509 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
510 if (!bitmap_bit_p (need_prologue, e->src->index))
511 {
512 int freq = EDGE_FREQUENCY (e);
513 copy_bb->count += e->count;
514 copy_bb->frequency += EDGE_FREQUENCY (e);
515 e->dest->count -= e->count;
516 if (e->dest->count < 0)
517 e->dest->count = 0;
518 e->dest->frequency -= freq;
519 if (e->dest->frequency < 0)
520 e->dest->frequency = 0;
521 redirect_edge_and_branch_force (e, copy_bb);
522 continue;
523 }
524 else
525 ei_next (&ei);
526 }
527
528
529 /* Try to perform a kind of shrink-wrapping, making sure the
530 prologue/epilogue is emitted only around those parts of the
531 function that require it. */
532
533 void
534 try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
535 bitmap_head *bb_flags, rtx_insn *prologue_seq)
536 {
537 edge e;
538 edge_iterator ei;
539 bool nonempty_prologue = false;
540 unsigned max_grow_size;
541 rtx_insn *seq;
542
543 for (seq = prologue_seq; seq; seq = NEXT_INSN (seq))
544 if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END)
545 {
546 nonempty_prologue = true;
547 break;
548 }
549
550 if (flag_shrink_wrap && HAVE_simple_return
551 && (targetm.profile_before_prologue () || !crtl->profile)
552 && nonempty_prologue && !crtl->calls_eh_return)
553 {
554 HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
555 struct hard_reg_set_container set_up_by_prologue;
556 rtx_insn *p_insn;
557 vec<basic_block> vec;
558 basic_block bb;
559 bitmap_head bb_antic_flags;
560 bitmap_head bb_on_list;
561 bitmap_head bb_tail;
562
563 if (dump_file)
564 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
565
566 /* Compute the registers set and used in the prologue. */
567 CLEAR_HARD_REG_SET (prologue_clobbered);
568 CLEAR_HARD_REG_SET (prologue_used);
569 for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
570 {
571 HARD_REG_SET this_used;
572 if (!NONDEBUG_INSN_P (p_insn))
573 continue;
574
575 CLEAR_HARD_REG_SET (this_used);
576 note_uses (&PATTERN (p_insn), record_hard_reg_uses,
577 &this_used);
578 AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
579 IOR_HARD_REG_SET (prologue_used, this_used);
580 note_stores (PATTERN (p_insn), record_hard_reg_sets,
581 &prologue_clobbered);
582 }
583
584 prepare_shrink_wrap ((*entry_edge)->dest);
585
586 bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
587 bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
588 bitmap_initialize (&bb_tail, &bitmap_default_obstack);
589
590 /* Find the set of basic blocks that require a stack frame,
591 and blocks that are too big to be duplicated. */
592
593 vec.create (n_basic_blocks_for_fn (cfun));
594
595 CLEAR_HARD_REG_SET (set_up_by_prologue.set);
596 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
597 STACK_POINTER_REGNUM);
598 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
599 if (frame_pointer_needed)
600 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
601 HARD_FRAME_POINTER_REGNUM);
602 if (pic_offset_table_rtx
603 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
604 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
605 PIC_OFFSET_TABLE_REGNUM);
606 if (crtl->drap_reg)
607 add_to_hard_reg_set (&set_up_by_prologue.set,
608 GET_MODE (crtl->drap_reg),
609 REGNO (crtl->drap_reg));
610 if (targetm.set_up_by_prologue)
611 targetm.set_up_by_prologue (&set_up_by_prologue);
612
613 /* We don't use a different max size depending on
614 optimize_bb_for_speed_p because increasing shrink-wrapping
615 opportunities by duplicating tail blocks can actually result
616 in an overall decrease in code size. */
617 max_grow_size = get_uncond_jump_length ();
618 max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
619
620 FOR_EACH_BB_FN (bb, cfun)
621 {
622 rtx_insn *insn;
623 unsigned size = 0;
624
625 FOR_BB_INSNS (bb, insn)
626 if (NONDEBUG_INSN_P (insn))
627 {
628 if (requires_stack_frame_p (insn, prologue_used,
629 set_up_by_prologue.set))
630 {
631 if (bb == (*entry_edge)->dest)
632 goto fail_shrinkwrap;
633 bitmap_set_bit (bb_flags, bb->index);
634 vec.quick_push (bb);
635 break;
636 }
637 else if (size <= max_grow_size)
638 {
639 size += get_attr_min_length (insn);
640 if (size > max_grow_size)
641 bitmap_set_bit (&bb_on_list, bb->index);
642 }
643 }
644 }
645
646 /* Blocks that really need a prologue, or are too big for tails. */
647 bitmap_ior_into (&bb_on_list, bb_flags);
648
649 /* For every basic block that needs a prologue, mark all blocks
650 reachable from it, so as to ensure they are also seen as
651 requiring a prologue. */
652 while (!vec.is_empty ())
653 {
654 basic_block tmp_bb = vec.pop ();
655
656 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
657 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
658 && bitmap_set_bit (bb_flags, e->dest->index))
659 vec.quick_push (e->dest);
660 }
661
662 /* Find the set of basic blocks that need no prologue, have a
663 single successor, can be duplicated, meet a max size
664 requirement, and go to the exit via like blocks. */
665 vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun));
666 while (!vec.is_empty ())
667 {
668 basic_block tmp_bb = vec.pop ();
669
670 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
671 if (single_succ_p (e->src)
672 && !bitmap_bit_p (&bb_on_list, e->src->index)
673 && can_duplicate_block_p (e->src))
674 {
675 edge pe;
676 edge_iterator pei;
677
678 /* If there is predecessor of e->src which doesn't
679 need prologue and the edge is complex,
680 we might not be able to redirect the branch
681 to a copy of e->src. */
682 FOR_EACH_EDGE (pe, pei, e->src->preds)
683 if ((pe->flags & EDGE_COMPLEX) != 0
684 && !bitmap_bit_p (bb_flags, pe->src->index))
685 break;
686 if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index))
687 vec.quick_push (e->src);
688 }
689 }
690
691 /* Now walk backwards from every block that is marked as needing
692 a prologue to compute the bb_antic_flags bitmap. Exclude
693 tail blocks; They can be duplicated to be used on paths not
694 needing a prologue. */
695 bitmap_clear (&bb_on_list);
696 bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail);
697 FOR_EACH_BB_FN (bb, cfun)
698 {
699 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
700 continue;
701 FOR_EACH_EDGE (e, ei, bb->preds)
702 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
703 && bitmap_set_bit (&bb_on_list, e->src->index))
704 vec.quick_push (e->src);
705 }
706 while (!vec.is_empty ())
707 {
708 basic_block tmp_bb = vec.pop ();
709 bool all_set = true;
710
711 bitmap_clear_bit (&bb_on_list, tmp_bb->index);
712 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
713 if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
714 {
715 all_set = false;
716 break;
717 }
718
719 if (all_set)
720 {
721 bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
722 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
723 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
724 && bitmap_set_bit (&bb_on_list, e->src->index))
725 vec.quick_push (e->src);
726 }
727 }
728 /* Find exactly one edge that leads to a block in ANTIC from
729 a block that isn't. */
730 if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index))
731 FOR_EACH_BB_FN (bb, cfun)
732 {
733 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
734 continue;
735 FOR_EACH_EDGE (e, ei, bb->preds)
736 if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
737 {
738 if (*entry_edge != orig_entry_edge)
739 {
740 *entry_edge = orig_entry_edge;
741 if (dump_file)
742 fprintf (dump_file, "More than one candidate edge.\n");
743 goto fail_shrinkwrap;
744 }
745 if (dump_file)
746 fprintf (dump_file, "Found candidate edge for "
747 "shrink-wrapping, %d->%d.\n", e->src->index,
748 e->dest->index);
749 *entry_edge = e;
750 }
751 }
752
753 if (*entry_edge != orig_entry_edge)
754 {
755 /* Test whether the prologue is known to clobber any register
756 (other than FP or SP) which are live on the edge. */
757 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
758 if (frame_pointer_needed)
759 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
760 REG_SET_TO_HARD_REG_SET (live_on_edge,
761 df_get_live_in ((*entry_edge)->dest));
762 if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
763 {
764 *entry_edge = orig_entry_edge;
765 if (dump_file)
766 fprintf (dump_file,
767 "Shrink-wrapping aborted due to clobber.\n");
768 }
769 }
770 if (*entry_edge != orig_entry_edge)
771 {
772 crtl->shrink_wrapped = true;
773 if (dump_file)
774 fprintf (dump_file, "Performing shrink-wrapping.\n");
775
776 /* Find tail blocks reachable from both blocks needing a
777 prologue and blocks not needing a prologue. */
778 if (!bitmap_empty_p (&bb_tail))
779 FOR_EACH_BB_FN (bb, cfun)
780 {
781 bool some_pro, some_no_pro;
782 if (!bitmap_bit_p (&bb_tail, bb->index))
783 continue;
784 some_pro = some_no_pro = false;
785 FOR_EACH_EDGE (e, ei, bb->preds)
786 {
787 if (bitmap_bit_p (bb_flags, e->src->index))
788 some_pro = true;
789 else
790 some_no_pro = true;
791 }
792 if (some_pro && some_no_pro)
793 vec.quick_push (bb);
794 else
795 bitmap_clear_bit (&bb_tail, bb->index);
796 }
797 /* Find the head of each tail. */
798 while (!vec.is_empty ())
799 {
800 basic_block tbb = vec.pop ();
801
802 if (!bitmap_bit_p (&bb_tail, tbb->index))
803 continue;
804
805 while (single_succ_p (tbb))
806 {
807 tbb = single_succ (tbb);
808 bitmap_clear_bit (&bb_tail, tbb->index);
809 }
810 }
811 /* Now duplicate the tails. */
812 if (!bitmap_empty_p (&bb_tail))
813 FOR_EACH_BB_REVERSE_FN (bb, cfun)
814 {
815 basic_block copy_bb, tbb;
816 rtx_insn *insert_point;
817 int eflags;
818
819 if (!bitmap_clear_bit (&bb_tail, bb->index))
820 continue;
821
822 /* Create a copy of BB, instructions and all, for
823 use on paths that don't need a prologue.
824 Ideal placement of the copy is on a fall-thru edge
825 or after a block that would jump to the copy. */
826 FOR_EACH_EDGE (e, ei, bb->preds)
827 if (!bitmap_bit_p (bb_flags, e->src->index)
828 && single_succ_p (e->src))
829 break;
830 if (e)
831 {
832 /* Make sure we insert after any barriers. */
833 rtx_insn *end = get_last_bb_insn (e->src);
834 copy_bb = create_basic_block (NEXT_INSN (end),
835 NULL_RTX, e->src);
836 BB_COPY_PARTITION (copy_bb, e->src);
837 }
838 else
839 {
840 /* Otherwise put the copy at the end of the function. */
841 copy_bb = create_basic_block (NULL_RTX, NULL_RTX,
842 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
843 BB_COPY_PARTITION (copy_bb, bb);
844 }
845
846 insert_point = emit_note_after (NOTE_INSN_DELETED,
847 BB_END (copy_bb));
848 emit_barrier_after (BB_END (copy_bb));
849
850 tbb = bb;
851 while (1)
852 {
853 dup_block_and_redirect (tbb, copy_bb, insert_point,
854 bb_flags);
855 tbb = single_succ (tbb);
856 if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
857 break;
858 e = split_block (copy_bb, PREV_INSN (insert_point));
859 copy_bb = e->dest;
860 }
861
862 /* Quiet verify_flow_info by (ab)using EDGE_FAKE.
863 We have yet to add a simple_return to the tails,
864 as we'd like to first convert_jumps_to_returns in
865 case the block is no longer used after that. */
866 eflags = EDGE_FAKE;
867 if (CALL_P (PREV_INSN (insert_point))
868 && SIBLING_CALL_P (PREV_INSN (insert_point)))
869 eflags = EDGE_SIBCALL | EDGE_ABNORMAL;
870 make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun),
871 eflags);
872
873 /* verify_flow_info doesn't like a note after a
874 sibling call. */
875 delete_insn (insert_point);
876 if (bitmap_empty_p (&bb_tail))
877 break;
878 }
879 }
880
881 fail_shrinkwrap:
882 bitmap_clear (&bb_tail);
883 bitmap_clear (&bb_antic_flags);
884 bitmap_clear (&bb_on_list);
885 vec.release ();
886 }
887 }
888
889 /* If we're allowed to generate a simple return instruction, then by
890 definition we don't need a full epilogue. If the last basic
891 block before the exit block does not contain active instructions,
892 examine its predecessors and try to emit (conditional) return
893 instructions. */
894
895 edge
896 get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags,
897 vec<edge> *unconverted_simple_returns,
898 rtx_insn **returnjump)
899 {
900 if (optimize)
901 {
902 unsigned i, last;
903
904 /* convert_jumps_to_returns may add to preds of the exit block
905 (but won't remove). Stop at end of current preds. */
906 last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
907 for (i = 0; i < last; i++)
908 {
909 edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i);
910 if (LABEL_P (BB_HEAD (e->src))
911 && !bitmap_bit_p (&bb_flags, e->src->index)
912 && !active_insn_between (BB_HEAD (e->src), BB_END (e->src)))
913 *unconverted_simple_returns
914 = convert_jumps_to_returns (e->src, true,
915 *unconverted_simple_returns);
916 }
917 }
918
919 if (exit_fallthru_edge != NULL
920 && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0
921 && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index))
922 {
923 basic_block last_bb;
924
925 last_bb = emit_return_for_exit (exit_fallthru_edge, true);
926 *returnjump = BB_END (last_bb);
927 exit_fallthru_edge = NULL;
928 }
929 return exit_fallthru_edge;
930 }
931
932 /* If there were branches to an empty LAST_BB which we tried to
933 convert to conditional simple_returns, but couldn't for some
934 reason, create a block to hold a simple_return insn and redirect
935 those remaining edges. */
936
937 void
938 convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
939 bitmap_head bb_flags, rtx_insn *returnjump,
940 vec<edge> unconverted_simple_returns)
941 {
942 edge e;
943 edge_iterator ei;
944
945 if (!unconverted_simple_returns.is_empty ())
946 {
947 basic_block simple_return_block_hot = NULL;
948 basic_block simple_return_block_cold = NULL;
949 edge pending_edge_hot = NULL;
950 edge pending_edge_cold = NULL;
951 basic_block exit_pred;
952 int i;
953
954 gcc_assert (entry_edge != orig_entry_edge);
955
956 /* See if we can reuse the last insn that was emitted for the
957 epilogue. */
958 if (returnjump != NULL_RTX
959 && JUMP_LABEL (returnjump) == simple_return_rtx)
960 {
961 e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump));
962 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
963 simple_return_block_hot = e->dest;
964 else
965 simple_return_block_cold = e->dest;
966 }
967
968 /* Also check returns we might need to add to tail blocks. */
969 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
970 if (EDGE_COUNT (e->src->preds) != 0
971 && (e->flags & EDGE_FAKE) != 0
972 && !bitmap_bit_p (&bb_flags, e->src->index))
973 {
974 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
975 pending_edge_hot = e;
976 else
977 pending_edge_cold = e;
978 }
979
980 /* Save a pointer to the exit's predecessor BB for use in
981 inserting new BBs at the end of the function. Do this
982 after the call to split_block above which may split
983 the original exit pred. */
984 exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
985
986 FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e)
987 {
988 basic_block *pdest_bb;
989 edge pending;
990
991 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
992 {
993 pdest_bb = &simple_return_block_hot;
994 pending = pending_edge_hot;
995 }
996 else
997 {
998 pdest_bb = &simple_return_block_cold;
999 pending = pending_edge_cold;
1000 }
1001
1002 if (*pdest_bb == NULL && pending != NULL)
1003 {
1004 emit_return_into_block (true, pending->src);
1005 pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1006 *pdest_bb = pending->src;
1007 }
1008 else if (*pdest_bb == NULL)
1009 {
1010 basic_block bb;
1011 rtx_insn *start;
1012
1013 bb = create_basic_block (NULL, NULL, exit_pred);
1014 BB_COPY_PARTITION (bb, e->src);
1015 start = emit_jump_insn_after (gen_simple_return (),
1016 BB_END (bb));
1017 JUMP_LABEL (start) = simple_return_rtx;
1018 emit_barrier_after (start);
1019
1020 *pdest_bb = bb;
1021 make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1022 }
1023 redirect_edge_and_branch_force (e, *pdest_bb);
1024 }
1025 unconverted_simple_returns.release ();
1026 }
1027
1028 if (entry_edge != orig_entry_edge)
1029 {
1030 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1031 if (EDGE_COUNT (e->src->preds) != 0
1032 && (e->flags & EDGE_FAKE) != 0
1033 && !bitmap_bit_p (&bb_flags, e->src->index))
1034 {
1035 emit_return_into_block (true, e->src);
1036 e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1037 }
1038 }
1039 }