coretypes.h: Include hash-table.h and hash-set.h for host files.
[gcc.git] / gcc / shrink-wrap.c
1 /* Shrink-wrapping related optimizations.
2 Copyright (C) 1987-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file handles shrink-wrapping related optimizations. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl-error.h"
27 #include "input.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "tree.h"
31 #include "fold-const.h"
32 #include "stor-layout.h"
33 #include "varasm.h"
34 #include "stringpool.h"
35 #include "flags.h"
36 #include "except.h"
37 #include "hard-reg-set.h"
38 #include "function.h"
39 #include "rtl.h"
40 #include "insn-config.h"
41 #include "expmed.h"
42 #include "dojump.h"
43 #include "explow.h"
44 #include "calls.h"
45 #include "emit-rtl.h"
46 #include "stmt.h"
47 #include "expr.h"
48 #include "insn-codes.h"
49 #include "optabs.h"
50 #include "libfuncs.h"
51 #include "regs.h"
52 #include "recog.h"
53 #include "output.h"
54 #include "tm_p.h"
55 #include "langhooks.h"
56 #include "target.h"
57 #include "common/common-target.h"
58 #include "gimple-expr.h"
59 #include "gimplify.h"
60 #include "tree-pass.h"
61 #include "predict.h"
62 #include "dominance.h"
63 #include "cfg.h"
64 #include "cfgrtl.h"
65 #include "basic-block.h"
66 #include "df.h"
67 #include "params.h"
68 #include "bb-reorder.h"
69 #include "shrink-wrap.h"
70 #include "regcprop.h"
71 #include "rtl-iter.h"
72
73
74 /* Return true if INSN requires the stack frame to be set up.
75 PROLOGUE_USED contains the hard registers used in the function
76 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
77 prologue to set up for the function. */
78 bool
79 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
80 HARD_REG_SET set_up_by_prologue)
81 {
82 df_ref def, use;
83 HARD_REG_SET hardregs;
84 unsigned regno;
85
86 if (CALL_P (insn))
87 return !SIBLING_CALL_P (insn);
88
89 /* We need a frame to get the unique CFA expected by the unwinder. */
90 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
91 return true;
92
93 CLEAR_HARD_REG_SET (hardregs);
94 FOR_EACH_INSN_DEF (def, insn)
95 {
96 rtx dreg = DF_REF_REG (def);
97
98 if (!REG_P (dreg))
99 continue;
100
101 add_to_hard_reg_set (&hardregs, GET_MODE (dreg),
102 REGNO (dreg));
103 }
104 if (hard_reg_set_intersect_p (hardregs, prologue_used))
105 return true;
106 AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
107 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
108 if (TEST_HARD_REG_BIT (hardregs, regno)
109 && df_regs_ever_live_p (regno))
110 return true;
111
112 FOR_EACH_INSN_USE (use, insn)
113 {
114 rtx reg = DF_REF_REG (use);
115
116 if (!REG_P (reg))
117 continue;
118
119 add_to_hard_reg_set (&hardregs, GET_MODE (reg),
120 REGNO (reg));
121 }
122 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
123 return true;
124
125 return false;
126 }
127
128 /* See whether there has a single live edge from BB, which dest uses
129 [REGNO, END_REGNO). Return the live edge if its dest bb has
130 one or two predecessors. Otherwise return NULL. */
131
132 static edge
133 live_edge_for_reg (basic_block bb, int regno, int end_regno)
134 {
135 edge e, live_edge;
136 edge_iterator ei;
137 bitmap live;
138 int i;
139
140 live_edge = NULL;
141 FOR_EACH_EDGE (e, ei, bb->succs)
142 {
143 live = df_get_live_in (e->dest);
144 for (i = regno; i < end_regno; i++)
145 if (REGNO_REG_SET_P (live, i))
146 {
147 if (live_edge && live_edge != e)
148 return NULL;
149 live_edge = e;
150 }
151 }
152
153 /* We can sometimes encounter dead code. Don't try to move it
154 into the exit block. */
155 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
156 return NULL;
157
158 /* Reject targets of abnormal edges. This is needed for correctness
159 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
160 exception edges even though it is generally treated as call-saved
161 for the majority of the compilation. Moving across abnormal edges
162 isn't going to be interesting for shrink-wrap usage anyway. */
163 if (live_edge->flags & EDGE_ABNORMAL)
164 return NULL;
165
166 /* When live_edge->dest->preds == 2, we can create a new block on
167 the edge to make it meet the requirement. */
168 if (EDGE_COUNT (live_edge->dest->preds) > 2)
169 return NULL;
170
171 return live_edge;
172 }
173
174 /* Try to move INSN from BB to a successor. Return true on success.
175 USES and DEFS are the set of registers that are used and defined
176 after INSN in BB. SPLIT_P indicates whether a live edge from BB
177 is splitted or not. */
178
179 static bool
180 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
181 const HARD_REG_SET uses,
182 const HARD_REG_SET defs,
183 bool *split_p)
184 {
185 rtx set, src, dest;
186 bitmap live_out, live_in, bb_uses, bb_defs;
187 unsigned int i, dregno, end_dregno;
188 unsigned int sregno = FIRST_PSEUDO_REGISTER;
189 unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
190 basic_block next_block;
191 edge live_edge;
192
193 /* Look for a simple register assignment. We don't use single_set here
194 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
195 destinations. */
196 if (!INSN_P (insn))
197 return false;
198 set = PATTERN (insn);
199 if (GET_CODE (set) != SET)
200 return false;
201 src = SET_SRC (set);
202 dest = SET_DEST (set);
203
204 /* For the destination, we want only a register. Also disallow STACK
205 or FRAME related adjustments. They are likely part of the prologue,
206 so keep them in the entry block. */
207 if (!REG_P (dest)
208 || dest == stack_pointer_rtx
209 || dest == frame_pointer_rtx
210 || dest == hard_frame_pointer_rtx)
211 return false;
212
213 /* For the source, we want one of:
214 (1) A (non-overlapping) register
215 (2) A constant,
216 (3) An expression involving no more than one register.
217
218 That last point comes from the code following, which was originally
219 written to handle only register move operations, and still only handles
220 a single source register when checking for overlaps. Happily, the
221 same checks can be applied to expressions like (plus reg const). */
222
223 if (CONSTANT_P (src))
224 ;
225 else if (!REG_P (src))
226 {
227 rtx src_inner = NULL_RTX;
228
229 if (can_throw_internal (insn))
230 return false;
231
232 subrtx_var_iterator::array_type array;
233 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
234 {
235 rtx x = *iter;
236 switch (GET_RTX_CLASS (GET_CODE (x)))
237 {
238 case RTX_CONST_OBJ:
239 case RTX_COMPARE:
240 case RTX_COMM_COMPARE:
241 case RTX_BIN_ARITH:
242 case RTX_COMM_ARITH:
243 case RTX_UNARY:
244 case RTX_TERNARY:
245 /* Constant or expression. Continue. */
246 break;
247
248 case RTX_OBJ:
249 case RTX_EXTRA:
250 switch (GET_CODE (x))
251 {
252 case UNSPEC:
253 case SUBREG:
254 case STRICT_LOW_PART:
255 case PC:
256 case LO_SUM:
257 /* Ok. Continue. */
258 break;
259
260 case REG:
261 /* Fail if we see a second inner register. */
262 if (src_inner != NULL)
263 return false;
264 src_inner = x;
265 break;
266
267 default:
268 return false;
269 }
270 break;
271
272 default:
273 return false;
274 }
275 }
276
277 if (src_inner != NULL)
278 src = src_inner;
279 }
280
281 /* Make sure that the source register isn't defined later in BB. */
282 if (REG_P (src))
283 {
284 sregno = REGNO (src);
285 end_sregno = END_REGNO (src);
286 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
287 return false;
288 }
289
290 /* Make sure that the destination register isn't referenced later in BB. */
291 dregno = REGNO (dest);
292 end_dregno = END_REGNO (dest);
293 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
294 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
295 return false;
296
297 /* See whether there is a successor block to which we could move INSN. */
298 live_edge = live_edge_for_reg (bb, dregno, end_dregno);
299 if (!live_edge)
300 return false;
301
302 next_block = live_edge->dest;
303 /* Create a new basic block on the edge. */
304 if (EDGE_COUNT (next_block->preds) == 2)
305 {
306 /* split_edge for a block with only one successor is meaningless. */
307 if (EDGE_COUNT (bb->succs) == 1)
308 return false;
309
310 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
311 if (!df_live)
312 return false;
313
314 basic_block old_dest = live_edge->dest;
315 next_block = split_edge (live_edge);
316
317 /* We create a new basic block. Call df_grow_bb_info to make sure
318 all data structures are allocated. */
319 df_grow_bb_info (df_live);
320
321 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
322 df_get_live_in (old_dest));
323 df_set_bb_dirty (next_block);
324
325 /* We should not split more than once for a function. */
326 if (*split_p)
327 return false;
328
329 *split_p = true;
330 }
331
332 /* At this point we are committed to moving INSN, but let's try to
333 move it as far as we can. */
334 do
335 {
336 live_out = df_get_live_out (bb);
337 live_in = df_get_live_in (next_block);
338 bb = next_block;
339
340 /* Check whether BB uses DEST or clobbers DEST. We need to add
341 INSN to BB if so. Either way, DEST is no longer live on entry,
342 except for any part that overlaps SRC (next loop). */
343 bb_uses = &DF_LR_BB_INFO (bb)->use;
344 bb_defs = &DF_LR_BB_INFO (bb)->def;
345 if (df_live)
346 {
347 for (i = dregno; i < end_dregno; i++)
348 {
349 if (*split_p
350 || REGNO_REG_SET_P (bb_uses, i)
351 || REGNO_REG_SET_P (bb_defs, i)
352 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
353 next_block = NULL;
354 CLEAR_REGNO_REG_SET (live_out, i);
355 CLEAR_REGNO_REG_SET (live_in, i);
356 }
357
358 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
359 Either way, SRC is now live on entry. */
360 for (i = sregno; i < end_sregno; i++)
361 {
362 if (*split_p
363 || REGNO_REG_SET_P (bb_defs, i)
364 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
365 next_block = NULL;
366 SET_REGNO_REG_SET (live_out, i);
367 SET_REGNO_REG_SET (live_in, i);
368 }
369 }
370 else
371 {
372 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
373 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
374 at -O1, just give up searching NEXT_BLOCK. */
375 next_block = NULL;
376 for (i = dregno; i < end_dregno; i++)
377 {
378 CLEAR_REGNO_REG_SET (live_out, i);
379 CLEAR_REGNO_REG_SET (live_in, i);
380 }
381
382 for (i = sregno; i < end_sregno; i++)
383 {
384 SET_REGNO_REG_SET (live_out, i);
385 SET_REGNO_REG_SET (live_in, i);
386 }
387 }
388
389 /* If we don't need to add the move to BB, look for a single
390 successor block. */
391 if (next_block)
392 {
393 live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
394 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
395 break;
396 next_block = live_edge->dest;
397 }
398 }
399 while (next_block);
400
401 /* For the new created basic block, there is no dataflow info at all.
402 So skip the following dataflow update and check. */
403 if (!(*split_p))
404 {
405 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
406 (next loop). */
407 for (i = dregno; i < end_dregno; i++)
408 {
409 CLEAR_REGNO_REG_SET (bb_uses, i);
410 SET_REGNO_REG_SET (bb_defs, i);
411 }
412
413 /* BB now uses SRC. */
414 for (i = sregno; i < end_sregno; i++)
415 SET_REGNO_REG_SET (bb_uses, i);
416 }
417
418 emit_insn_after (PATTERN (insn), bb_note (bb));
419 delete_insn (insn);
420 return true;
421 }
422
423 /* Look for register copies in the first block of the function, and move
424 them down into successor blocks if the register is used only on one
425 path. This exposes more opportunities for shrink-wrapping. These
426 kinds of sets often occur when incoming argument registers are moved
427 to call-saved registers because their values are live across one or
428 more calls during the function. */
429
430 void
431 prepare_shrink_wrap (basic_block entry_block)
432 {
433 rtx_insn *insn, *curr;
434 rtx x;
435 HARD_REG_SET uses, defs;
436 df_ref def, use;
437 bool split_p = false;
438
439 if (JUMP_P (BB_END (entry_block)))
440 {
441 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
442 to sink the copies from parameter to callee saved register out of
443 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
444 to release some dependences. */
445 copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
446 }
447
448 CLEAR_HARD_REG_SET (uses);
449 CLEAR_HARD_REG_SET (defs);
450 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
451 if (NONDEBUG_INSN_P (insn)
452 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
453 &split_p))
454 {
455 /* Add all defined registers to DEFs. */
456 FOR_EACH_INSN_DEF (def, insn)
457 {
458 x = DF_REF_REG (def);
459 if (REG_P (x) && HARD_REGISTER_P (x))
460 SET_HARD_REG_BIT (defs, REGNO (x));
461 }
462
463 /* Add all used registers to USESs. */
464 FOR_EACH_INSN_USE (use, insn)
465 {
466 x = DF_REF_REG (use);
467 if (REG_P (x) && HARD_REGISTER_P (x))
468 SET_HARD_REG_BIT (uses, REGNO (x));
469 }
470 }
471 }
472
473 /* Create a copy of BB instructions and insert at BEFORE. Redirect
474 preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */
475 void
476 dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before,
477 bitmap_head *need_prologue)
478 {
479 edge_iterator ei;
480 edge e;
481 rtx_insn *insn = BB_END (bb);
482
483 /* We know BB has a single successor, so there is no need to copy a
484 simple jump at the end of BB. */
485 if (simplejump_p (insn))
486 insn = PREV_INSN (insn);
487
488 start_sequence ();
489 duplicate_insn_chain (BB_HEAD (bb), insn);
490 if (dump_file)
491 {
492 unsigned count = 0;
493 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
494 if (active_insn_p (insn))
495 ++count;
496 fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n",
497 bb->index, copy_bb->index, count);
498 }
499 insn = get_insns ();
500 end_sequence ();
501 emit_insn_before (insn, before);
502
503 /* Redirect all the paths that need no prologue into copy_bb. */
504 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
505 if (!bitmap_bit_p (need_prologue, e->src->index))
506 {
507 int freq = EDGE_FREQUENCY (e);
508 copy_bb->count += e->count;
509 copy_bb->frequency += EDGE_FREQUENCY (e);
510 e->dest->count -= e->count;
511 if (e->dest->count < 0)
512 e->dest->count = 0;
513 e->dest->frequency -= freq;
514 if (e->dest->frequency < 0)
515 e->dest->frequency = 0;
516 redirect_edge_and_branch_force (e, copy_bb);
517 continue;
518 }
519 else
520 ei_next (&ei);
521 }
522
523
524 /* Try to perform a kind of shrink-wrapping, making sure the
525 prologue/epilogue is emitted only around those parts of the
526 function that require it. */
527
528 void
529 try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
530 bitmap_head *bb_flags, rtx_insn *prologue_seq)
531 {
532 edge e;
533 edge_iterator ei;
534 bool nonempty_prologue = false;
535 unsigned max_grow_size;
536 rtx_insn *seq;
537
538 for (seq = prologue_seq; seq; seq = NEXT_INSN (seq))
539 if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END)
540 {
541 nonempty_prologue = true;
542 break;
543 }
544
545 if (flag_shrink_wrap && HAVE_simple_return
546 && (targetm.profile_before_prologue () || !crtl->profile)
547 && nonempty_prologue && !crtl->calls_eh_return)
548 {
549 HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
550 struct hard_reg_set_container set_up_by_prologue;
551 rtx_insn *p_insn;
552 vec<basic_block> vec;
553 basic_block bb;
554 bitmap_head bb_antic_flags;
555 bitmap_head bb_on_list;
556 bitmap_head bb_tail;
557
558 if (dump_file)
559 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
560
561 /* Compute the registers set and used in the prologue. */
562 CLEAR_HARD_REG_SET (prologue_clobbered);
563 CLEAR_HARD_REG_SET (prologue_used);
564 for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
565 {
566 HARD_REG_SET this_used;
567 if (!NONDEBUG_INSN_P (p_insn))
568 continue;
569
570 CLEAR_HARD_REG_SET (this_used);
571 note_uses (&PATTERN (p_insn), record_hard_reg_uses,
572 &this_used);
573 AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
574 IOR_HARD_REG_SET (prologue_used, this_used);
575 note_stores (PATTERN (p_insn), record_hard_reg_sets,
576 &prologue_clobbered);
577 }
578
579 prepare_shrink_wrap ((*entry_edge)->dest);
580
581 bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
582 bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
583 bitmap_initialize (&bb_tail, &bitmap_default_obstack);
584
585 /* Find the set of basic blocks that require a stack frame,
586 and blocks that are too big to be duplicated. */
587
588 vec.create (n_basic_blocks_for_fn (cfun));
589
590 CLEAR_HARD_REG_SET (set_up_by_prologue.set);
591 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
592 STACK_POINTER_REGNUM);
593 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
594 if (frame_pointer_needed)
595 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
596 HARD_FRAME_POINTER_REGNUM);
597 if (pic_offset_table_rtx
598 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
599 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
600 PIC_OFFSET_TABLE_REGNUM);
601 if (crtl->drap_reg)
602 add_to_hard_reg_set (&set_up_by_prologue.set,
603 GET_MODE (crtl->drap_reg),
604 REGNO (crtl->drap_reg));
605 if (targetm.set_up_by_prologue)
606 targetm.set_up_by_prologue (&set_up_by_prologue);
607
608 /* We don't use a different max size depending on
609 optimize_bb_for_speed_p because increasing shrink-wrapping
610 opportunities by duplicating tail blocks can actually result
611 in an overall decrease in code size. */
612 max_grow_size = get_uncond_jump_length ();
613 max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
614
615 FOR_EACH_BB_FN (bb, cfun)
616 {
617 rtx_insn *insn;
618 unsigned size = 0;
619
620 FOR_BB_INSNS (bb, insn)
621 if (NONDEBUG_INSN_P (insn))
622 {
623 if (requires_stack_frame_p (insn, prologue_used,
624 set_up_by_prologue.set))
625 {
626 if (bb == (*entry_edge)->dest)
627 goto fail_shrinkwrap;
628 bitmap_set_bit (bb_flags, bb->index);
629 vec.quick_push (bb);
630 break;
631 }
632 else if (size <= max_grow_size)
633 {
634 size += get_attr_min_length (insn);
635 if (size > max_grow_size)
636 bitmap_set_bit (&bb_on_list, bb->index);
637 }
638 }
639 }
640
641 /* Blocks that really need a prologue, or are too big for tails. */
642 bitmap_ior_into (&bb_on_list, bb_flags);
643
644 /* For every basic block that needs a prologue, mark all blocks
645 reachable from it, so as to ensure they are also seen as
646 requiring a prologue. */
647 while (!vec.is_empty ())
648 {
649 basic_block tmp_bb = vec.pop ();
650
651 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
652 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
653 && bitmap_set_bit (bb_flags, e->dest->index))
654 vec.quick_push (e->dest);
655 }
656
657 /* Find the set of basic blocks that need no prologue, have a
658 single successor, can be duplicated, meet a max size
659 requirement, and go to the exit via like blocks. */
660 vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun));
661 while (!vec.is_empty ())
662 {
663 basic_block tmp_bb = vec.pop ();
664
665 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
666 if (single_succ_p (e->src)
667 && !bitmap_bit_p (&bb_on_list, e->src->index)
668 && can_duplicate_block_p (e->src))
669 {
670 edge pe;
671 edge_iterator pei;
672
673 /* If there is predecessor of e->src which doesn't
674 need prologue and the edge is complex,
675 we might not be able to redirect the branch
676 to a copy of e->src. */
677 FOR_EACH_EDGE (pe, pei, e->src->preds)
678 if ((pe->flags & EDGE_COMPLEX) != 0
679 && !bitmap_bit_p (bb_flags, pe->src->index))
680 break;
681 if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index))
682 vec.quick_push (e->src);
683 }
684 }
685
686 /* Now walk backwards from every block that is marked as needing
687 a prologue to compute the bb_antic_flags bitmap. Exclude
688 tail blocks; They can be duplicated to be used on paths not
689 needing a prologue. */
690 bitmap_clear (&bb_on_list);
691 bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail);
692 FOR_EACH_BB_FN (bb, cfun)
693 {
694 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
695 continue;
696 FOR_EACH_EDGE (e, ei, bb->preds)
697 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
698 && bitmap_set_bit (&bb_on_list, e->src->index))
699 vec.quick_push (e->src);
700 }
701 while (!vec.is_empty ())
702 {
703 basic_block tmp_bb = vec.pop ();
704 bool all_set = true;
705
706 bitmap_clear_bit (&bb_on_list, tmp_bb->index);
707 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
708 if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
709 {
710 all_set = false;
711 break;
712 }
713
714 if (all_set)
715 {
716 bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
717 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
718 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
719 && bitmap_set_bit (&bb_on_list, e->src->index))
720 vec.quick_push (e->src);
721 }
722 }
723 /* Find exactly one edge that leads to a block in ANTIC from
724 a block that isn't. */
725 if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index))
726 FOR_EACH_BB_FN (bb, cfun)
727 {
728 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
729 continue;
730 FOR_EACH_EDGE (e, ei, bb->preds)
731 if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
732 {
733 if (*entry_edge != orig_entry_edge)
734 {
735 *entry_edge = orig_entry_edge;
736 if (dump_file)
737 fprintf (dump_file, "More than one candidate edge.\n");
738 goto fail_shrinkwrap;
739 }
740 if (dump_file)
741 fprintf (dump_file, "Found candidate edge for "
742 "shrink-wrapping, %d->%d.\n", e->src->index,
743 e->dest->index);
744 *entry_edge = e;
745 }
746 }
747
748 if (*entry_edge != orig_entry_edge)
749 {
750 /* Test whether the prologue is known to clobber any register
751 (other than FP or SP) which are live on the edge. */
752 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
753 if (frame_pointer_needed)
754 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
755 REG_SET_TO_HARD_REG_SET (live_on_edge,
756 df_get_live_in ((*entry_edge)->dest));
757 if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
758 {
759 *entry_edge = orig_entry_edge;
760 if (dump_file)
761 fprintf (dump_file,
762 "Shrink-wrapping aborted due to clobber.\n");
763 }
764 }
765 if (*entry_edge != orig_entry_edge)
766 {
767 crtl->shrink_wrapped = true;
768 if (dump_file)
769 fprintf (dump_file, "Performing shrink-wrapping.\n");
770
771 /* Find tail blocks reachable from both blocks needing a
772 prologue and blocks not needing a prologue. */
773 if (!bitmap_empty_p (&bb_tail))
774 FOR_EACH_BB_FN (bb, cfun)
775 {
776 bool some_pro, some_no_pro;
777 if (!bitmap_bit_p (&bb_tail, bb->index))
778 continue;
779 some_pro = some_no_pro = false;
780 FOR_EACH_EDGE (e, ei, bb->preds)
781 {
782 if (bitmap_bit_p (bb_flags, e->src->index))
783 some_pro = true;
784 else
785 some_no_pro = true;
786 }
787 if (some_pro && some_no_pro)
788 vec.quick_push (bb);
789 else
790 bitmap_clear_bit (&bb_tail, bb->index);
791 }
792 /* Find the head of each tail. */
793 while (!vec.is_empty ())
794 {
795 basic_block tbb = vec.pop ();
796
797 if (!bitmap_bit_p (&bb_tail, tbb->index))
798 continue;
799
800 while (single_succ_p (tbb))
801 {
802 tbb = single_succ (tbb);
803 bitmap_clear_bit (&bb_tail, tbb->index);
804 }
805 }
806 /* Now duplicate the tails. */
807 if (!bitmap_empty_p (&bb_tail))
808 FOR_EACH_BB_REVERSE_FN (bb, cfun)
809 {
810 basic_block copy_bb, tbb;
811 int eflags;
812
813 if (!bitmap_clear_bit (&bb_tail, bb->index))
814 continue;
815
816 /* Create a copy of BB, instructions and all, for
817 use on paths that don't need a prologue.
818 Ideal placement of the copy is on a fall-thru edge
819 or after a block that would jump to the copy. */
820 FOR_EACH_EDGE (e, ei, bb->preds)
821 if (!bitmap_bit_p (bb_flags, e->src->index)
822 && single_succ_p (e->src))
823 break;
824 if (e)
825 {
826 /* Make sure we insert after any barriers. */
827 rtx_insn *end = get_last_bb_insn (e->src);
828 copy_bb = create_basic_block (NEXT_INSN (end),
829 NULL_RTX, e->src);
830 BB_COPY_PARTITION (copy_bb, e->src);
831 }
832 else
833 {
834 /* Otherwise put the copy at the end of the function. */
835 copy_bb = create_basic_block (NULL_RTX, NULL_RTX,
836 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
837 BB_COPY_PARTITION (copy_bb, bb);
838 }
839
840 rtx_note *insert_point = emit_note_after (NOTE_INSN_DELETED,
841 BB_END (copy_bb));
842 emit_barrier_after (BB_END (copy_bb));
843
844 tbb = bb;
845 while (1)
846 {
847 dup_block_and_redirect (tbb, copy_bb, insert_point,
848 bb_flags);
849 tbb = single_succ (tbb);
850 if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
851 break;
852 e = split_block (copy_bb, PREV_INSN (insert_point));
853 copy_bb = e->dest;
854 }
855
856 /* Quiet verify_flow_info by (ab)using EDGE_FAKE.
857 We have yet to add a simple_return to the tails,
858 as we'd like to first convert_jumps_to_returns in
859 case the block is no longer used after that. */
860 eflags = EDGE_FAKE;
861 if (CALL_P (PREV_INSN (insert_point))
862 && SIBLING_CALL_P (PREV_INSN (insert_point)))
863 eflags = EDGE_SIBCALL | EDGE_ABNORMAL;
864 make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun),
865 eflags);
866
867 /* verify_flow_info doesn't like a note after a
868 sibling call. */
869 delete_insn (insert_point);
870 if (bitmap_empty_p (&bb_tail))
871 break;
872 }
873 }
874
875 fail_shrinkwrap:
876 bitmap_clear (&bb_tail);
877 bitmap_clear (&bb_antic_flags);
878 bitmap_clear (&bb_on_list);
879 vec.release ();
880 }
881 }
882
883 /* If we're allowed to generate a simple return instruction, then by
884 definition we don't need a full epilogue. If the last basic
885 block before the exit block does not contain active instructions,
886 examine its predecessors and try to emit (conditional) return
887 instructions. */
888
889 edge
890 get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags,
891 vec<edge> *unconverted_simple_returns,
892 rtx_insn **returnjump)
893 {
894 if (optimize)
895 {
896 unsigned i, last;
897
898 /* convert_jumps_to_returns may add to preds of the exit block
899 (but won't remove). Stop at end of current preds. */
900 last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
901 for (i = 0; i < last; i++)
902 {
903 edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i);
904 if (LABEL_P (BB_HEAD (e->src))
905 && !bitmap_bit_p (&bb_flags, e->src->index)
906 && !active_insn_between (BB_HEAD (e->src), BB_END (e->src)))
907 *unconverted_simple_returns
908 = convert_jumps_to_returns (e->src, true,
909 *unconverted_simple_returns);
910 }
911 }
912
913 if (exit_fallthru_edge != NULL
914 && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0
915 && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index))
916 {
917 basic_block last_bb;
918
919 last_bb = emit_return_for_exit (exit_fallthru_edge, true);
920 *returnjump = BB_END (last_bb);
921 exit_fallthru_edge = NULL;
922 }
923 return exit_fallthru_edge;
924 }
925
926 /* If there were branches to an empty LAST_BB which we tried to
927 convert to conditional simple_returns, but couldn't for some
928 reason, create a block to hold a simple_return insn and redirect
929 those remaining edges. */
930
931 void
932 convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
933 bitmap_head bb_flags, rtx_insn *returnjump,
934 vec<edge> unconverted_simple_returns)
935 {
936 edge e;
937 edge_iterator ei;
938
939 if (!unconverted_simple_returns.is_empty ())
940 {
941 basic_block simple_return_block_hot = NULL;
942 basic_block simple_return_block_cold = NULL;
943 edge pending_edge_hot = NULL;
944 edge pending_edge_cold = NULL;
945 basic_block exit_pred;
946 int i;
947
948 gcc_assert (entry_edge != orig_entry_edge);
949
950 /* See if we can reuse the last insn that was emitted for the
951 epilogue. */
952 if (returnjump != NULL_RTX
953 && JUMP_LABEL (returnjump) == simple_return_rtx)
954 {
955 e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump));
956 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
957 simple_return_block_hot = e->dest;
958 else
959 simple_return_block_cold = e->dest;
960 }
961
962 /* Also check returns we might need to add to tail blocks. */
963 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
964 if (EDGE_COUNT (e->src->preds) != 0
965 && (e->flags & EDGE_FAKE) != 0
966 && !bitmap_bit_p (&bb_flags, e->src->index))
967 {
968 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
969 pending_edge_hot = e;
970 else
971 pending_edge_cold = e;
972 }
973
974 /* Save a pointer to the exit's predecessor BB for use in
975 inserting new BBs at the end of the function. Do this
976 after the call to split_block above which may split
977 the original exit pred. */
978 exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
979
980 FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e)
981 {
982 basic_block *pdest_bb;
983 edge pending;
984
985 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
986 {
987 pdest_bb = &simple_return_block_hot;
988 pending = pending_edge_hot;
989 }
990 else
991 {
992 pdest_bb = &simple_return_block_cold;
993 pending = pending_edge_cold;
994 }
995
996 if (*pdest_bb == NULL && pending != NULL)
997 {
998 emit_return_into_block (true, pending->src);
999 pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1000 *pdest_bb = pending->src;
1001 }
1002 else if (*pdest_bb == NULL)
1003 {
1004 basic_block bb;
1005
1006 bb = create_basic_block (NULL, NULL, exit_pred);
1007 BB_COPY_PARTITION (bb, e->src);
1008 rtx_jump_insn *start = emit_jump_insn_after (gen_simple_return (),
1009 BB_END (bb));
1010 JUMP_LABEL (start) = simple_return_rtx;
1011 emit_barrier_after (start);
1012
1013 *pdest_bb = bb;
1014 make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1015 }
1016 redirect_edge_and_branch_force (e, *pdest_bb);
1017 }
1018 unconverted_simple_returns.release ();
1019 }
1020
1021 if (entry_edge != orig_entry_edge)
1022 {
1023 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1024 if (EDGE_COUNT (e->src->preds) != 0
1025 && (e->flags & EDGE_FAKE) != 0
1026 && !bitmap_bit_p (&bb_flags, e->src->index))
1027 {
1028 emit_return_into_block (true, e->src);
1029 e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1030 }
1031 }
1032 }