Makefile.in (TARGET_DEF): Add target-insns.def.
[gcc.git] / gcc / shrink-wrap.c
1 /* Shrink-wrapping related optimizations.
2 Copyright (C) 1987-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file handles shrink-wrapping related optimizations. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl-error.h"
27 #include "alias.h"
28 #include "symtab.h"
29 #include "tree.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "varasm.h"
33 #include "stringpool.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "hard-reg-set.h"
37 #include "function.h"
38 #include "rtl.h"
39 #include "insn-config.h"
40 #include "expmed.h"
41 #include "dojump.h"
42 #include "explow.h"
43 #include "calls.h"
44 #include "emit-rtl.h"
45 #include "stmt.h"
46 #include "expr.h"
47 #include "insn-codes.h"
48 #include "optabs.h"
49 #include "libfuncs.h"
50 #include "regs.h"
51 #include "recog.h"
52 #include "output.h"
53 #include "tm_p.h"
54 #include "langhooks.h"
55 #include "target.h"
56 #include "common/common-target.h"
57 #include "gimple-expr.h"
58 #include "gimplify.h"
59 #include "tree-pass.h"
60 #include "predict.h"
61 #include "dominance.h"
62 #include "cfg.h"
63 #include "cfgrtl.h"
64 #include "basic-block.h"
65 #include "df.h"
66 #include "params.h"
67 #include "bb-reorder.h"
68 #include "shrink-wrap.h"
69 #include "regcprop.h"
70 #include "rtl-iter.h"
71
72
73 /* Return true if INSN requires the stack frame to be set up.
74 PROLOGUE_USED contains the hard registers used in the function
75 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
76 prologue to set up for the function. */
77 bool
78 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
79 HARD_REG_SET set_up_by_prologue)
80 {
81 df_ref def, use;
82 HARD_REG_SET hardregs;
83 unsigned regno;
84
85 if (CALL_P (insn))
86 return !SIBLING_CALL_P (insn);
87
88 /* We need a frame to get the unique CFA expected by the unwinder. */
89 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
90 return true;
91
92 CLEAR_HARD_REG_SET (hardregs);
93 FOR_EACH_INSN_DEF (def, insn)
94 {
95 rtx dreg = DF_REF_REG (def);
96
97 if (!REG_P (dreg))
98 continue;
99
100 add_to_hard_reg_set (&hardregs, GET_MODE (dreg),
101 REGNO (dreg));
102 }
103 if (hard_reg_set_intersect_p (hardregs, prologue_used))
104 return true;
105 AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
106 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
107 if (TEST_HARD_REG_BIT (hardregs, regno)
108 && df_regs_ever_live_p (regno))
109 return true;
110
111 FOR_EACH_INSN_USE (use, insn)
112 {
113 rtx reg = DF_REF_REG (use);
114
115 if (!REG_P (reg))
116 continue;
117
118 add_to_hard_reg_set (&hardregs, GET_MODE (reg),
119 REGNO (reg));
120 }
121 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
122 return true;
123
124 return false;
125 }
126
127 /* See whether there has a single live edge from BB, which dest uses
128 [REGNO, END_REGNO). Return the live edge if its dest bb has
129 one or two predecessors. Otherwise return NULL. */
130
131 static edge
132 live_edge_for_reg (basic_block bb, int regno, int end_regno)
133 {
134 edge e, live_edge;
135 edge_iterator ei;
136 bitmap live;
137 int i;
138
139 live_edge = NULL;
140 FOR_EACH_EDGE (e, ei, bb->succs)
141 {
142 live = df_get_live_in (e->dest);
143 for (i = regno; i < end_regno; i++)
144 if (REGNO_REG_SET_P (live, i))
145 {
146 if (live_edge && live_edge != e)
147 return NULL;
148 live_edge = e;
149 }
150 }
151
152 /* We can sometimes encounter dead code. Don't try to move it
153 into the exit block. */
154 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
155 return NULL;
156
157 /* Reject targets of abnormal edges. This is needed for correctness
158 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
159 exception edges even though it is generally treated as call-saved
160 for the majority of the compilation. Moving across abnormal edges
161 isn't going to be interesting for shrink-wrap usage anyway. */
162 if (live_edge->flags & EDGE_ABNORMAL)
163 return NULL;
164
165 /* When live_edge->dest->preds == 2, we can create a new block on
166 the edge to make it meet the requirement. */
167 if (EDGE_COUNT (live_edge->dest->preds) > 2)
168 return NULL;
169
170 return live_edge;
171 }
172
173 /* Try to move INSN from BB to a successor. Return true on success.
174 USES and DEFS are the set of registers that are used and defined
175 after INSN in BB. SPLIT_P indicates whether a live edge from BB
176 is splitted or not. */
177
178 static bool
179 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
180 const HARD_REG_SET uses,
181 const HARD_REG_SET defs,
182 bool *split_p)
183 {
184 rtx set, src, dest;
185 bitmap live_out, live_in, bb_uses, bb_defs;
186 unsigned int i, dregno, end_dregno;
187 unsigned int sregno = FIRST_PSEUDO_REGISTER;
188 unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
189 basic_block next_block;
190 edge live_edge;
191
192 /* Look for a simple register assignment. We don't use single_set here
193 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
194 destinations. */
195 if (!INSN_P (insn))
196 return false;
197 set = PATTERN (insn);
198 if (GET_CODE (set) != SET)
199 return false;
200 src = SET_SRC (set);
201 dest = SET_DEST (set);
202
203 /* For the destination, we want only a register. Also disallow STACK
204 or FRAME related adjustments. They are likely part of the prologue,
205 so keep them in the entry block. */
206 if (!REG_P (dest)
207 || dest == stack_pointer_rtx
208 || dest == frame_pointer_rtx
209 || dest == hard_frame_pointer_rtx)
210 return false;
211
212 /* For the source, we want one of:
213 (1) A (non-overlapping) register
214 (2) A constant,
215 (3) An expression involving no more than one register.
216
217 That last point comes from the code following, which was originally
218 written to handle only register move operations, and still only handles
219 a single source register when checking for overlaps. Happily, the
220 same checks can be applied to expressions like (plus reg const). */
221
222 if (CONSTANT_P (src))
223 ;
224 else if (!REG_P (src))
225 {
226 rtx src_inner = NULL_RTX;
227
228 if (can_throw_internal (insn))
229 return false;
230
231 subrtx_var_iterator::array_type array;
232 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
233 {
234 rtx x = *iter;
235 switch (GET_RTX_CLASS (GET_CODE (x)))
236 {
237 case RTX_CONST_OBJ:
238 case RTX_COMPARE:
239 case RTX_COMM_COMPARE:
240 case RTX_BIN_ARITH:
241 case RTX_COMM_ARITH:
242 case RTX_UNARY:
243 case RTX_TERNARY:
244 /* Constant or expression. Continue. */
245 break;
246
247 case RTX_OBJ:
248 case RTX_EXTRA:
249 switch (GET_CODE (x))
250 {
251 case UNSPEC:
252 case SUBREG:
253 case STRICT_LOW_PART:
254 case PC:
255 case LO_SUM:
256 /* Ok. Continue. */
257 break;
258
259 case REG:
260 /* Fail if we see a second inner register. */
261 if (src_inner != NULL)
262 return false;
263 src_inner = x;
264 break;
265
266 default:
267 return false;
268 }
269 break;
270
271 default:
272 return false;
273 }
274 }
275
276 if (src_inner != NULL)
277 src = src_inner;
278 }
279
280 /* Make sure that the source register isn't defined later in BB. */
281 if (REG_P (src))
282 {
283 sregno = REGNO (src);
284 end_sregno = END_REGNO (src);
285 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
286 return false;
287 }
288
289 /* Make sure that the destination register isn't referenced later in BB. */
290 dregno = REGNO (dest);
291 end_dregno = END_REGNO (dest);
292 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
293 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
294 return false;
295
296 /* See whether there is a successor block to which we could move INSN. */
297 live_edge = live_edge_for_reg (bb, dregno, end_dregno);
298 if (!live_edge)
299 return false;
300
301 next_block = live_edge->dest;
302 /* Create a new basic block on the edge. */
303 if (EDGE_COUNT (next_block->preds) == 2)
304 {
305 /* split_edge for a block with only one successor is meaningless. */
306 if (EDGE_COUNT (bb->succs) == 1)
307 return false;
308
309 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
310 if (!df_live)
311 return false;
312
313 basic_block old_dest = live_edge->dest;
314 next_block = split_edge (live_edge);
315
316 /* We create a new basic block. Call df_grow_bb_info to make sure
317 all data structures are allocated. */
318 df_grow_bb_info (df_live);
319
320 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
321 df_get_live_in (old_dest));
322 df_set_bb_dirty (next_block);
323
324 /* We should not split more than once for a function. */
325 if (*split_p)
326 return false;
327
328 *split_p = true;
329 }
330
331 /* At this point we are committed to moving INSN, but let's try to
332 move it as far as we can. */
333 do
334 {
335 live_out = df_get_live_out (bb);
336 live_in = df_get_live_in (next_block);
337 bb = next_block;
338
339 /* Check whether BB uses DEST or clobbers DEST. We need to add
340 INSN to BB if so. Either way, DEST is no longer live on entry,
341 except for any part that overlaps SRC (next loop). */
342 bb_uses = &DF_LR_BB_INFO (bb)->use;
343 bb_defs = &DF_LR_BB_INFO (bb)->def;
344 if (df_live)
345 {
346 for (i = dregno; i < end_dregno; i++)
347 {
348 if (*split_p
349 || REGNO_REG_SET_P (bb_uses, i)
350 || REGNO_REG_SET_P (bb_defs, i)
351 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
352 next_block = NULL;
353 CLEAR_REGNO_REG_SET (live_out, i);
354 CLEAR_REGNO_REG_SET (live_in, i);
355 }
356
357 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
358 Either way, SRC is now live on entry. */
359 for (i = sregno; i < end_sregno; i++)
360 {
361 if (*split_p
362 || REGNO_REG_SET_P (bb_defs, i)
363 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
364 next_block = NULL;
365 SET_REGNO_REG_SET (live_out, i);
366 SET_REGNO_REG_SET (live_in, i);
367 }
368 }
369 else
370 {
371 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
372 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
373 at -O1, just give up searching NEXT_BLOCK. */
374 next_block = NULL;
375 for (i = dregno; i < end_dregno; i++)
376 {
377 CLEAR_REGNO_REG_SET (live_out, i);
378 CLEAR_REGNO_REG_SET (live_in, i);
379 }
380
381 for (i = sregno; i < end_sregno; i++)
382 {
383 SET_REGNO_REG_SET (live_out, i);
384 SET_REGNO_REG_SET (live_in, i);
385 }
386 }
387
388 /* If we don't need to add the move to BB, look for a single
389 successor block. */
390 if (next_block)
391 {
392 live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
393 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
394 break;
395 next_block = live_edge->dest;
396 }
397 }
398 while (next_block);
399
400 /* For the new created basic block, there is no dataflow info at all.
401 So skip the following dataflow update and check. */
402 if (!(*split_p))
403 {
404 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
405 (next loop). */
406 for (i = dregno; i < end_dregno; i++)
407 {
408 CLEAR_REGNO_REG_SET (bb_uses, i);
409 SET_REGNO_REG_SET (bb_defs, i);
410 }
411
412 /* BB now uses SRC. */
413 for (i = sregno; i < end_sregno; i++)
414 SET_REGNO_REG_SET (bb_uses, i);
415 }
416
417 emit_insn_after (PATTERN (insn), bb_note (bb));
418 delete_insn (insn);
419 return true;
420 }
421
422 /* Look for register copies in the first block of the function, and move
423 them down into successor blocks if the register is used only on one
424 path. This exposes more opportunities for shrink-wrapping. These
425 kinds of sets often occur when incoming argument registers are moved
426 to call-saved registers because their values are live across one or
427 more calls during the function. */
428
429 void
430 prepare_shrink_wrap (basic_block entry_block)
431 {
432 rtx_insn *insn, *curr;
433 rtx x;
434 HARD_REG_SET uses, defs;
435 df_ref def, use;
436 bool split_p = false;
437
438 if (JUMP_P (BB_END (entry_block)))
439 {
440 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
441 to sink the copies from parameter to callee saved register out of
442 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
443 to release some dependences. */
444 copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
445 }
446
447 CLEAR_HARD_REG_SET (uses);
448 CLEAR_HARD_REG_SET (defs);
449 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
450 if (NONDEBUG_INSN_P (insn)
451 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
452 &split_p))
453 {
454 /* Add all defined registers to DEFs. */
455 FOR_EACH_INSN_DEF (def, insn)
456 {
457 x = DF_REF_REG (def);
458 if (REG_P (x) && HARD_REGISTER_P (x))
459 SET_HARD_REG_BIT (defs, REGNO (x));
460 }
461
462 /* Add all used registers to USESs. */
463 FOR_EACH_INSN_USE (use, insn)
464 {
465 x = DF_REF_REG (use);
466 if (REG_P (x) && HARD_REGISTER_P (x))
467 SET_HARD_REG_BIT (uses, REGNO (x));
468 }
469 }
470 }
471
472 /* Create a copy of BB instructions and insert at BEFORE. Redirect
473 preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */
474 void
475 dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before,
476 bitmap_head *need_prologue)
477 {
478 edge_iterator ei;
479 edge e;
480 rtx_insn *insn = BB_END (bb);
481
482 /* We know BB has a single successor, so there is no need to copy a
483 simple jump at the end of BB. */
484 if (simplejump_p (insn))
485 insn = PREV_INSN (insn);
486
487 start_sequence ();
488 duplicate_insn_chain (BB_HEAD (bb), insn);
489 if (dump_file)
490 {
491 unsigned count = 0;
492 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
493 if (active_insn_p (insn))
494 ++count;
495 fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n",
496 bb->index, copy_bb->index, count);
497 }
498 insn = get_insns ();
499 end_sequence ();
500 emit_insn_before (insn, before);
501
502 /* Redirect all the paths that need no prologue into copy_bb. */
503 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
504 if (!bitmap_bit_p (need_prologue, e->src->index))
505 {
506 int freq = EDGE_FREQUENCY (e);
507 copy_bb->count += e->count;
508 copy_bb->frequency += EDGE_FREQUENCY (e);
509 e->dest->count -= e->count;
510 if (e->dest->count < 0)
511 e->dest->count = 0;
512 e->dest->frequency -= freq;
513 if (e->dest->frequency < 0)
514 e->dest->frequency = 0;
515 redirect_edge_and_branch_force (e, copy_bb);
516 continue;
517 }
518 else
519 ei_next (&ei);
520 }
521
522
523 /* Try to perform a kind of shrink-wrapping, making sure the
524 prologue/epilogue is emitted only around those parts of the
525 function that require it. */
526
527 void
528 try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
529 bitmap_head *bb_flags, rtx_insn *prologue_seq)
530 {
531 edge e;
532 edge_iterator ei;
533 bool nonempty_prologue = false;
534 unsigned max_grow_size;
535 rtx_insn *seq;
536
537 for (seq = prologue_seq; seq; seq = NEXT_INSN (seq))
538 if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END)
539 {
540 nonempty_prologue = true;
541 break;
542 }
543
544 if (SHRINK_WRAPPING_ENABLED
545 && (targetm.profile_before_prologue () || !crtl->profile)
546 && nonempty_prologue && !crtl->calls_eh_return)
547 {
548 HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
549 struct hard_reg_set_container set_up_by_prologue;
550 rtx_insn *p_insn;
551 vec<basic_block> vec;
552 basic_block bb;
553 bitmap_head bb_antic_flags;
554 bitmap_head bb_on_list;
555 bitmap_head bb_tail;
556
557 if (dump_file)
558 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
559
560 /* Compute the registers set and used in the prologue. */
561 CLEAR_HARD_REG_SET (prologue_clobbered);
562 CLEAR_HARD_REG_SET (prologue_used);
563 for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
564 {
565 HARD_REG_SET this_used;
566 if (!NONDEBUG_INSN_P (p_insn))
567 continue;
568
569 CLEAR_HARD_REG_SET (this_used);
570 note_uses (&PATTERN (p_insn), record_hard_reg_uses,
571 &this_used);
572 AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
573 IOR_HARD_REG_SET (prologue_used, this_used);
574 note_stores (PATTERN (p_insn), record_hard_reg_sets,
575 &prologue_clobbered);
576 }
577
578 prepare_shrink_wrap ((*entry_edge)->dest);
579
580 bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
581 bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
582 bitmap_initialize (&bb_tail, &bitmap_default_obstack);
583
584 /* Find the set of basic blocks that require a stack frame,
585 and blocks that are too big to be duplicated. */
586
587 vec.create (n_basic_blocks_for_fn (cfun));
588
589 CLEAR_HARD_REG_SET (set_up_by_prologue.set);
590 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
591 STACK_POINTER_REGNUM);
592 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
593 if (frame_pointer_needed)
594 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
595 HARD_FRAME_POINTER_REGNUM);
596 if (pic_offset_table_rtx
597 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
598 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
599 PIC_OFFSET_TABLE_REGNUM);
600 if (crtl->drap_reg)
601 add_to_hard_reg_set (&set_up_by_prologue.set,
602 GET_MODE (crtl->drap_reg),
603 REGNO (crtl->drap_reg));
604 if (targetm.set_up_by_prologue)
605 targetm.set_up_by_prologue (&set_up_by_prologue);
606
607 /* We don't use a different max size depending on
608 optimize_bb_for_speed_p because increasing shrink-wrapping
609 opportunities by duplicating tail blocks can actually result
610 in an overall decrease in code size. */
611 max_grow_size = get_uncond_jump_length ();
612 max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
613
614 FOR_EACH_BB_FN (bb, cfun)
615 {
616 rtx_insn *insn;
617 unsigned size = 0;
618
619 FOR_BB_INSNS (bb, insn)
620 if (NONDEBUG_INSN_P (insn))
621 {
622 if (requires_stack_frame_p (insn, prologue_used,
623 set_up_by_prologue.set))
624 {
625 if (bb == (*entry_edge)->dest)
626 goto fail_shrinkwrap;
627 bitmap_set_bit (bb_flags, bb->index);
628 vec.quick_push (bb);
629 break;
630 }
631 else if (size <= max_grow_size)
632 {
633 size += get_attr_min_length (insn);
634 if (size > max_grow_size)
635 bitmap_set_bit (&bb_on_list, bb->index);
636 }
637 }
638 }
639
640 /* Blocks that really need a prologue, or are too big for tails. */
641 bitmap_ior_into (&bb_on_list, bb_flags);
642
643 /* For every basic block that needs a prologue, mark all blocks
644 reachable from it, so as to ensure they are also seen as
645 requiring a prologue. */
646 while (!vec.is_empty ())
647 {
648 basic_block tmp_bb = vec.pop ();
649
650 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
651 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
652 && bitmap_set_bit (bb_flags, e->dest->index))
653 vec.quick_push (e->dest);
654 }
655
656 /* Find the set of basic blocks that need no prologue, have a
657 single successor, can be duplicated, meet a max size
658 requirement, and go to the exit via like blocks. */
659 vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun));
660 while (!vec.is_empty ())
661 {
662 basic_block tmp_bb = vec.pop ();
663
664 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
665 if (single_succ_p (e->src)
666 && !bitmap_bit_p (&bb_on_list, e->src->index)
667 && can_duplicate_block_p (e->src))
668 {
669 edge pe;
670 edge_iterator pei;
671
672 /* If there is predecessor of e->src which doesn't
673 need prologue and the edge is complex,
674 we might not be able to redirect the branch
675 to a copy of e->src. */
676 FOR_EACH_EDGE (pe, pei, e->src->preds)
677 if ((pe->flags & EDGE_COMPLEX) != 0
678 && !bitmap_bit_p (bb_flags, pe->src->index))
679 break;
680 if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index))
681 vec.quick_push (e->src);
682 }
683 }
684
685 /* Now walk backwards from every block that is marked as needing
686 a prologue to compute the bb_antic_flags bitmap. Exclude
687 tail blocks; They can be duplicated to be used on paths not
688 needing a prologue. */
689 bitmap_clear (&bb_on_list);
690 bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail);
691 FOR_EACH_BB_FN (bb, cfun)
692 {
693 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
694 continue;
695 FOR_EACH_EDGE (e, ei, bb->preds)
696 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
697 && bitmap_set_bit (&bb_on_list, e->src->index))
698 vec.quick_push (e->src);
699 }
700 while (!vec.is_empty ())
701 {
702 basic_block tmp_bb = vec.pop ();
703 bool all_set = true;
704
705 bitmap_clear_bit (&bb_on_list, tmp_bb->index);
706 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
707 if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
708 {
709 all_set = false;
710 break;
711 }
712
713 if (all_set)
714 {
715 bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
716 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
717 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
718 && bitmap_set_bit (&bb_on_list, e->src->index))
719 vec.quick_push (e->src);
720 }
721 }
722 /* Find exactly one edge that leads to a block in ANTIC from
723 a block that isn't. */
724 if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index))
725 FOR_EACH_BB_FN (bb, cfun)
726 {
727 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
728 continue;
729 FOR_EACH_EDGE (e, ei, bb->preds)
730 if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
731 {
732 if (*entry_edge != orig_entry_edge)
733 {
734 *entry_edge = orig_entry_edge;
735 if (dump_file)
736 fprintf (dump_file, "More than one candidate edge.\n");
737 goto fail_shrinkwrap;
738 }
739 if (dump_file)
740 fprintf (dump_file, "Found candidate edge for "
741 "shrink-wrapping, %d->%d.\n", e->src->index,
742 e->dest->index);
743 *entry_edge = e;
744 }
745 }
746
747 if (*entry_edge != orig_entry_edge)
748 {
749 /* Test whether the prologue is known to clobber any register
750 (other than FP or SP) which are live on the edge. */
751 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
752 if (frame_pointer_needed)
753 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
754 REG_SET_TO_HARD_REG_SET (live_on_edge,
755 df_get_live_in ((*entry_edge)->dest));
756 if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
757 {
758 *entry_edge = orig_entry_edge;
759 if (dump_file)
760 fprintf (dump_file,
761 "Shrink-wrapping aborted due to clobber.\n");
762 }
763 }
764 if (*entry_edge != orig_entry_edge)
765 {
766 crtl->shrink_wrapped = true;
767 if (dump_file)
768 fprintf (dump_file, "Performing shrink-wrapping.\n");
769
770 /* Find tail blocks reachable from both blocks needing a
771 prologue and blocks not needing a prologue. */
772 if (!bitmap_empty_p (&bb_tail))
773 FOR_EACH_BB_FN (bb, cfun)
774 {
775 bool some_pro, some_no_pro;
776 if (!bitmap_bit_p (&bb_tail, bb->index))
777 continue;
778 some_pro = some_no_pro = false;
779 FOR_EACH_EDGE (e, ei, bb->preds)
780 {
781 if (bitmap_bit_p (bb_flags, e->src->index))
782 some_pro = true;
783 else
784 some_no_pro = true;
785 }
786 if (some_pro && some_no_pro)
787 vec.quick_push (bb);
788 else
789 bitmap_clear_bit (&bb_tail, bb->index);
790 }
791 /* Find the head of each tail. */
792 while (!vec.is_empty ())
793 {
794 basic_block tbb = vec.pop ();
795
796 if (!bitmap_bit_p (&bb_tail, tbb->index))
797 continue;
798
799 while (single_succ_p (tbb))
800 {
801 tbb = single_succ (tbb);
802 bitmap_clear_bit (&bb_tail, tbb->index);
803 }
804 }
805 /* Now duplicate the tails. */
806 if (!bitmap_empty_p (&bb_tail))
807 FOR_EACH_BB_REVERSE_FN (bb, cfun)
808 {
809 basic_block copy_bb, tbb;
810 int eflags;
811
812 if (!bitmap_clear_bit (&bb_tail, bb->index))
813 continue;
814
815 /* Create a copy of BB, instructions and all, for
816 use on paths that don't need a prologue.
817 Ideal placement of the copy is on a fall-thru edge
818 or after a block that would jump to the copy. */
819 FOR_EACH_EDGE (e, ei, bb->preds)
820 if (!bitmap_bit_p (bb_flags, e->src->index)
821 && single_succ_p (e->src))
822 break;
823 if (e)
824 {
825 /* Make sure we insert after any barriers. */
826 rtx_insn *end = get_last_bb_insn (e->src);
827 copy_bb = create_basic_block (NEXT_INSN (end),
828 NULL_RTX, e->src);
829 BB_COPY_PARTITION (copy_bb, e->src);
830 }
831 else
832 {
833 /* Otherwise put the copy at the end of the function. */
834 copy_bb = create_basic_block (NULL_RTX, NULL_RTX,
835 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
836 BB_COPY_PARTITION (copy_bb, bb);
837 }
838
839 rtx_note *insert_point = emit_note_after (NOTE_INSN_DELETED,
840 BB_END (copy_bb));
841 emit_barrier_after (BB_END (copy_bb));
842
843 tbb = bb;
844 while (1)
845 {
846 dup_block_and_redirect (tbb, copy_bb, insert_point,
847 bb_flags);
848 tbb = single_succ (tbb);
849 if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
850 break;
851 e = split_block (copy_bb, PREV_INSN (insert_point));
852 copy_bb = e->dest;
853 }
854
855 /* Quiet verify_flow_info by (ab)using EDGE_FAKE.
856 We have yet to add a simple_return to the tails,
857 as we'd like to first convert_jumps_to_returns in
858 case the block is no longer used after that. */
859 eflags = EDGE_FAKE;
860 if (CALL_P (PREV_INSN (insert_point))
861 && SIBLING_CALL_P (PREV_INSN (insert_point)))
862 eflags = EDGE_SIBCALL | EDGE_ABNORMAL;
863 make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun),
864 eflags);
865
866 /* verify_flow_info doesn't like a note after a
867 sibling call. */
868 delete_insn (insert_point);
869 if (bitmap_empty_p (&bb_tail))
870 break;
871 }
872 }
873
874 fail_shrinkwrap:
875 bitmap_clear (&bb_tail);
876 bitmap_clear (&bb_antic_flags);
877 bitmap_clear (&bb_on_list);
878 vec.release ();
879 }
880 }
881
882 /* If we're allowed to generate a simple return instruction, then by
883 definition we don't need a full epilogue. If the last basic
884 block before the exit block does not contain active instructions,
885 examine its predecessors and try to emit (conditional) return
886 instructions. */
887
888 edge
889 get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags,
890 vec<edge> *unconverted_simple_returns,
891 rtx_insn **returnjump)
892 {
893 if (optimize)
894 {
895 unsigned i, last;
896
897 /* convert_jumps_to_returns may add to preds of the exit block
898 (but won't remove). Stop at end of current preds. */
899 last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
900 for (i = 0; i < last; i++)
901 {
902 edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i);
903 if (LABEL_P (BB_HEAD (e->src))
904 && !bitmap_bit_p (&bb_flags, e->src->index)
905 && !active_insn_between (BB_HEAD (e->src), BB_END (e->src)))
906 *unconverted_simple_returns
907 = convert_jumps_to_returns (e->src, true,
908 *unconverted_simple_returns);
909 }
910 }
911
912 if (exit_fallthru_edge != NULL
913 && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0
914 && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index))
915 {
916 basic_block last_bb;
917
918 last_bb = emit_return_for_exit (exit_fallthru_edge, true);
919 *returnjump = BB_END (last_bb);
920 exit_fallthru_edge = NULL;
921 }
922 return exit_fallthru_edge;
923 }
924
925 /* If there were branches to an empty LAST_BB which we tried to
926 convert to conditional simple_returns, but couldn't for some
927 reason, create a block to hold a simple_return insn and redirect
928 those remaining edges. */
929
930 void
931 convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
932 bitmap_head bb_flags, rtx_insn *returnjump,
933 vec<edge> unconverted_simple_returns)
934 {
935 edge e;
936 edge_iterator ei;
937
938 if (!unconverted_simple_returns.is_empty ())
939 {
940 basic_block simple_return_block_hot = NULL;
941 basic_block simple_return_block_cold = NULL;
942 edge pending_edge_hot = NULL;
943 edge pending_edge_cold = NULL;
944 basic_block exit_pred;
945 int i;
946
947 gcc_assert (entry_edge != orig_entry_edge);
948
949 /* See if we can reuse the last insn that was emitted for the
950 epilogue. */
951 if (returnjump != NULL_RTX
952 && JUMP_LABEL (returnjump) == simple_return_rtx)
953 {
954 e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump));
955 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
956 simple_return_block_hot = e->dest;
957 else
958 simple_return_block_cold = e->dest;
959 }
960
961 /* Also check returns we might need to add to tail blocks. */
962 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
963 if (EDGE_COUNT (e->src->preds) != 0
964 && (e->flags & EDGE_FAKE) != 0
965 && !bitmap_bit_p (&bb_flags, e->src->index))
966 {
967 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
968 pending_edge_hot = e;
969 else
970 pending_edge_cold = e;
971 }
972
973 /* Save a pointer to the exit's predecessor BB for use in
974 inserting new BBs at the end of the function. Do this
975 after the call to split_block above which may split
976 the original exit pred. */
977 exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
978
979 FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e)
980 {
981 basic_block *pdest_bb;
982 edge pending;
983
984 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
985 {
986 pdest_bb = &simple_return_block_hot;
987 pending = pending_edge_hot;
988 }
989 else
990 {
991 pdest_bb = &simple_return_block_cold;
992 pending = pending_edge_cold;
993 }
994
995 if (*pdest_bb == NULL && pending != NULL)
996 {
997 emit_return_into_block (true, pending->src);
998 pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
999 *pdest_bb = pending->src;
1000 }
1001 else if (*pdest_bb == NULL)
1002 {
1003 basic_block bb;
1004
1005 bb = create_basic_block (NULL, NULL, exit_pred);
1006 BB_COPY_PARTITION (bb, e->src);
1007 rtx_insn *ret = targetm.gen_simple_return ();
1008 rtx_jump_insn *start = emit_jump_insn_after (ret, BB_END (bb));
1009 JUMP_LABEL (start) = simple_return_rtx;
1010 emit_barrier_after (start);
1011
1012 *pdest_bb = bb;
1013 make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1014 }
1015 redirect_edge_and_branch_force (e, *pdest_bb);
1016 }
1017 unconverted_simple_returns.release ();
1018 }
1019
1020 if (entry_edge != orig_entry_edge)
1021 {
1022 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1023 if (EDGE_COUNT (e->src->preds) != 0
1024 && (e->flags & EDGE_FAKE) != 0
1025 && !bitmap_bit_p (&bb_flags, e->src->index))
1026 {
1027 emit_return_into_block (true, e->src);
1028 e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1029 }
1030 }
1031 }