From: Segher Boessenkool Date: Tue, 15 Sep 2015 00:38:21 +0000 (+0200) Subject: shrink-wrap: Rewrite X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=23997c53b8f7696d6ba484913835b19287f96f82;p=gcc.git shrink-wrap: Rewrite This patch rewrites the shrink-wrapping algorithm, allowing non-linear pieces of CFG to be duplicated for use without prologue instead of just linear pieces. * shrink-wrap.c (requires_stack_frame_p): Fix formatting. (dup_block_and_redirect): Delete function. (can_dup_for_shrink_wrapping): New function. (fix_fake_fallthrough_edge): New function. (try_shrink_wrapping): Rewrite function. (convert_to_simple_return): Call fix_fake_fallthrough_edge. From-SVN: r227775 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f14f5b2b5d1..328675a87de 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2015-09-14 Segher Boessenkool + + * shrink-wrap.c (requires_stack_frame_p): Fix formatting. + (dup_block_and_redirect): Delete function. + (can_dup_for_shrink_wrapping): New function. + (fix_fake_fallthrough_edge): New function. + (try_shrink_wrapping): Rewrite function. + (convert_to_simple_return): Call fix_fake_fallthrough_edge. + 2015-09-14 Rich Felker * configure.ac: Change target pattern for sh TLS support diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c index d10795a0b3a..138759432ee 100644 --- a/gcc/shrink-wrap.c +++ b/gcc/shrink-wrap.c @@ -91,8 +91,7 @@ requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used, if (!REG_P (dreg)) continue; - add_to_hard_reg_set (&hardregs, GET_MODE (dreg), - REGNO (dreg)); + add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg)); } if (hard_reg_set_intersect_p (hardregs, prologue_used)) return true; @@ -463,414 +462,469 @@ prepare_shrink_wrap (basic_block entry_block) } } -/* Create a copy of BB instructions and insert at BEFORE. Redirect - preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */ -static void -dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before, - bitmap_head *need_prologue) +/* Return whether we can duplicate basic block BB for shrink wrapping. We + cannot if the block cannot be duplicated at all, or if any of its incoming + edges are complex and come from a block that does not require a prologue + (we cannot redirect such edges), or if the block is too big to copy. + PRO is the basic block before which we would put the prologue, MAX_SIZE is + the maximum size block we allow to be copied. */ + +static bool +can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size) { - edge_iterator ei; - edge e; - rtx_insn *insn = BB_END (bb); + if (!can_duplicate_block_p (bb)) + return false; - /* We know BB has a single successor, so there is no need to copy a - simple jump at the end of BB. */ - if (simplejump_p (insn)) - insn = PREV_INSN (insn); + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, bb->preds) + if (e->flags & EDGE_COMPLEX + && !dominated_by_p (CDI_DOMINATORS, e->src, pro)) + return false; - start_sequence (); - duplicate_insn_chain (BB_HEAD (bb), insn); - if (dump_file) - { - unsigned count = 0; - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - if (active_insn_p (insn)) - ++count; - fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n", - bb->index, copy_bb->index, count); - } - insn = get_insns (); - end_sequence (); - emit_insn_before (insn, before); + unsigned size = 0; - /* Redirect all the paths that need no prologue into copy_bb. */ - for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) - if (!bitmap_bit_p (need_prologue, e->src->index)) + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + if (NONDEBUG_INSN_P (insn)) { - int freq = EDGE_FREQUENCY (e); - copy_bb->count += e->count; - copy_bb->frequency += EDGE_FREQUENCY (e); - e->dest->count -= e->count; - if (e->dest->count < 0) - e->dest->count = 0; - e->dest->frequency -= freq; - if (e->dest->frequency < 0) - e->dest->frequency = 0; - redirect_edge_and_branch_force (e, copy_bb); - continue; + size += get_attr_min_length (insn); + if (size > max_size) + return false; } - else - ei_next (&ei); + + return true; } +/* If the source of edge E has more than one successor, the verifier for + branch probabilities gets confused by the fake edges we make where + simple_return statements will be inserted later (because those are not + marked as fallthrough edges). Fix this by creating an extra block just + for that fallthrough. */ + +static edge +fix_fake_fallthrough_edge (edge e) +{ + if (EDGE_COUNT (e->src->succs) <= 1) + return e; + + basic_block old_bb = e->src; + rtx_insn *end = BB_END (old_bb); + rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end); + basic_block new_bb = create_basic_block (note, note, old_bb); + BB_COPY_PARTITION (new_bb, old_bb); + BB_END (old_bb) = end; + + redirect_edge_succ (e, new_bb); + e->flags |= EDGE_FALLTHRU; + e->flags &= ~EDGE_FAKE; + + return make_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), EDGE_FAKE); +} /* Try to perform a kind of shrink-wrapping, making sure the prologue/epilogue is emitted only around those parts of the - function that require it. */ + function that require it. + + There will be exactly one prologue, and it will be executed either + zero or one time, on any path. Depending on where the prologue is + placed, some of the basic blocks can be reached via both paths with + and without a prologue. Such blocks will be duplicated here, and the + edges changed to match. + + Paths that go to the exit without going through the prologue will use + a simple_return instead of the epilogue. We maximize the number of + those, making sure to only duplicate blocks that can be duplicated. + If the prologue can then still be placed in multiple locations, we + place it as early as possible. + + An example, where we duplicate blocks with control flow (legend: + _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should + be taken to point down or to the right, to simplify the diagram; here, + block 3 needs a prologue, the rest does not): + + + B B + | | + 2 2 + |\ |\ + | 3 becomes | 3 + |/ | \ + 4 7 4 + |\ |\ |\ + | 5 | 8 | 5 + |/ |/ |/ + 6 9 6 + | | | + R S R + + + (bb 4 is duplicated to 7, and so on; the prologue is inserted on the + edge 2->3). + + Another example, where part of a loop is duplicated (again, bb 3 is + the only block that needs a prologue): + + + B 3<-- B ->3<-- + | | | | | | | + | v | becomes | | v | + 2---4--- 2---5-- 4--- + | | | + R S R + + + (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3). + + ENTRY_EDGE is the edge where the prologue will be placed, possibly + changed by this function. ORIG_ENTRY_EDGE is the edge where it + would be placed without shrink-wrapping. BB_WITH is a bitmap that, + if we do shrink-wrap, will on return contain the interesting blocks + that run with prologue. PROLOGUE_SEQ is the prologue we will insert. */ void try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge, - bitmap_head *bb_flags, rtx_insn *prologue_seq) + bitmap_head *bb_with, rtx_insn *prologue_seq) { - edge e; - edge_iterator ei; - bool nonempty_prologue = false; - unsigned max_grow_size; - rtx_insn *seq; + /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes + no sense to shrink-wrap: then do not shrink-wrap! */ + + if (!SHRINK_WRAPPING_ENABLED) + return; + + if (crtl->profile && !targetm.profile_before_prologue ()) + return; - for (seq = prologue_seq; seq; seq = NEXT_INSN (seq)) - if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END) + if (crtl->calls_eh_return) + return; + + bool empty_prologue = true; + for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn)) + if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)) { - nonempty_prologue = true; + empty_prologue = false; break; } + if (empty_prologue) + return; + + /* Move some code down to expose more shrink-wrapping opportunities. */ + + basic_block entry = (*entry_edge)->dest; + prepare_shrink_wrap (entry); + + if (dump_file) + fprintf (dump_file, "Attempting shrink-wrapping optimization.\n"); + + /* Compute the registers set and used in the prologue. */ + + HARD_REG_SET prologue_clobbered, prologue_used; + CLEAR_HARD_REG_SET (prologue_clobbered); + CLEAR_HARD_REG_SET (prologue_used); + for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn)) + if (NONDEBUG_INSN_P (insn)) + { + HARD_REG_SET this_used; + CLEAR_HARD_REG_SET (this_used); + note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used); + AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered); + IOR_HARD_REG_SET (prologue_used, this_used); + note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered); + } - if (SHRINK_WRAPPING_ENABLED - && (targetm.profile_before_prologue () || !crtl->profile) - && nonempty_prologue && !crtl->calls_eh_return) + /* Find out what registers are set up by the prologue; any use of these + cannot happen before the prologue. */ + + struct hard_reg_set_container set_up_by_prologue; + CLEAR_HARD_REG_SET (set_up_by_prologue.set); + add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM); + add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM); + if (frame_pointer_needed) + add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, + HARD_FRAME_POINTER_REGNUM); + if (pic_offset_table_rtx + && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) + add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, + PIC_OFFSET_TABLE_REGNUM); + if (crtl->drap_reg) + add_to_hard_reg_set (&set_up_by_prologue.set, + GET_MODE (crtl->drap_reg), + REGNO (crtl->drap_reg)); + if (targetm.set_up_by_prologue) + targetm.set_up_by_prologue (&set_up_by_prologue); + + /* We will insert the prologue before the basic block PRO. PRO should + dominate all basic blocks that need the prologue to be executed + before them. First, make PRO the "tightest wrap" possible. */ + + calculate_dominance_info (CDI_DOMINATORS); + + basic_block pro = 0; + + basic_block bb; + edge e; + edge_iterator ei; + FOR_EACH_BB_FN (bb, cfun) { - HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge; - struct hard_reg_set_container set_up_by_prologue; - rtx_insn *p_insn; - vec vec; - basic_block bb; - bitmap_head bb_antic_flags; - bitmap_head bb_on_list; - bitmap_head bb_tail; + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + if (NONDEBUG_INSN_P (insn) + && requires_stack_frame_p (insn, prologue_used, + set_up_by_prologue.set)) + { + if (dump_file) + fprintf (dump_file, "Block %d needs the prologue.\n", bb->index); + pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb); + break; + } + } + /* If nothing needs a prologue, just put it at the start. This really + shouldn't happen, but we cannot fix it here. */ + + if (pro == 0) + { if (dump_file) - fprintf (dump_file, "Attempting shrink-wrapping optimization.\n"); + fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; " + "putting it at the start.\n"); + pro = entry; + } - /* Compute the registers set and used in the prologue. */ - CLEAR_HARD_REG_SET (prologue_clobbered); - CLEAR_HARD_REG_SET (prologue_used); - for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn)) - { - HARD_REG_SET this_used; - if (!NONDEBUG_INSN_P (p_insn)) - continue; - - CLEAR_HARD_REG_SET (this_used); - note_uses (&PATTERN (p_insn), record_hard_reg_uses, - &this_used); - AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered); - IOR_HARD_REG_SET (prologue_used, this_used); - note_stores (PATTERN (p_insn), record_hard_reg_sets, - &prologue_clobbered); - } + if (dump_file) + fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n", + pro->index); - prepare_shrink_wrap ((*entry_edge)->dest); - - bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack); - bitmap_initialize (&bb_on_list, &bitmap_default_obstack); - bitmap_initialize (&bb_tail, &bitmap_default_obstack); - - /* Find the set of basic blocks that require a stack frame, - and blocks that are too big to be duplicated. */ - - vec.create (n_basic_blocks_for_fn (cfun)); - - CLEAR_HARD_REG_SET (set_up_by_prologue.set); - add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, - STACK_POINTER_REGNUM); - add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM); - if (frame_pointer_needed) - add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, - HARD_FRAME_POINTER_REGNUM); - if (pic_offset_table_rtx - && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) - add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, - PIC_OFFSET_TABLE_REGNUM); - if (crtl->drap_reg) - add_to_hard_reg_set (&set_up_by_prologue.set, - GET_MODE (crtl->drap_reg), - REGNO (crtl->drap_reg)); - if (targetm.set_up_by_prologue) - targetm.set_up_by_prologue (&set_up_by_prologue); - - /* We don't use a different max size depending on - optimize_bb_for_speed_p because increasing shrink-wrapping - opportunities by duplicating tail blocks can actually result - in an overall decrease in code size. */ - max_grow_size = get_uncond_jump_length (); - max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS); - - FOR_EACH_BB_FN (bb, cfun) - { - rtx_insn *insn; - unsigned size = 0; + /* Now see if we can put the prologue at the start of PRO. Putting it + there might require duplicating a block that cannot be duplicated; + if so, try again with the immediate dominator of PRO, and so on. - FOR_BB_INSNS (bb, insn) - if (NONDEBUG_INSN_P (insn)) - { - if (requires_stack_frame_p (insn, prologue_used, - set_up_by_prologue.set)) - { - if (bb == (*entry_edge)->dest) - goto fail_shrinkwrap; - bitmap_set_bit (bb_flags, bb->index); - vec.quick_push (bb); - break; - } - else if (size <= max_grow_size) - { - size += get_attr_min_length (insn); - if (size > max_grow_size) - bitmap_set_bit (&bb_on_list, bb->index); - } - } - } + The blocks that need duplicating are those reachable from PRO but + not dominated by it. We keep in BB_WITH a bitmap of the blocks + reachable from PRO that we already found, and in VEC a stack of + those we still need to consider (to find successors). */ - /* Blocks that really need a prologue, or are too big for tails. */ - bitmap_ior_into (&bb_on_list, bb_flags); + bitmap_set_bit (bb_with, pro->index); - /* For every basic block that needs a prologue, mark all blocks - reachable from it, so as to ensure they are also seen as - requiring a prologue. */ - while (!vec.is_empty ()) - { - basic_block tmp_bb = vec.pop (); + vec vec; + vec.create (n_basic_blocks_for_fn (cfun)); + vec.quick_push (pro); - FOR_EACH_EDGE (e, ei, tmp_bb->succs) - if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) - && bitmap_set_bit (bb_flags, e->dest->index)) - vec.quick_push (e->dest); - } + unsigned max_grow_size = get_uncond_jump_length (); + max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS); - /* Find the set of basic blocks that need no prologue, have a - single successor, can be duplicated, meet a max size - requirement, and go to the exit via like blocks. */ - vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun)); - while (!vec.is_empty ()) - { - basic_block tmp_bb = vec.pop (); + while (!vec.is_empty () && pro != entry) + { + basic_block bb = vec.pop (); + if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size)) + while (!dominated_by_p (CDI_DOMINATORS, bb, pro)) + { + gcc_assert (pro != entry); - FOR_EACH_EDGE (e, ei, tmp_bb->preds) - if (single_succ_p (e->src) - && !bitmap_bit_p (&bb_on_list, e->src->index) - && can_duplicate_block_p (e->src)) - { - edge pe; - edge_iterator pei; - - /* If there is predecessor of e->src which doesn't - need prologue and the edge is complex, - we might not be able to redirect the branch - to a copy of e->src. */ - FOR_EACH_EDGE (pe, pei, e->src->preds) - if ((pe->flags & EDGE_COMPLEX) != 0 - && !bitmap_bit_p (bb_flags, pe->src->index)) - break; - if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index)) - vec.quick_push (e->src); - } - } + pro = get_immediate_dominator (CDI_DOMINATORS, pro); + + bitmap_set_bit (bb_with, pro->index); + vec.quick_push (pro); + } + + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) + && bitmap_set_bit (bb_with, e->dest->index)) + vec.quick_push (e->dest); + } + + vec.release (); + + if (dump_file) + fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n", + pro->index); + + /* If we can move PRO back without having to duplicate more blocks, do so. + We can move back to a block PRE if every path from PRE will eventually + need a prologue, that is, PRO is a post-dominator of PRE. */ + + if (pro != entry) + { + calculate_dominance_info (CDI_POST_DOMINATORS); - /* Now walk backwards from every block that is marked as needing - a prologue to compute the bb_antic_flags bitmap. Exclude - tail blocks; They can be duplicated to be used on paths not - needing a prologue. */ - bitmap_clear (&bb_on_list); - bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail); - FOR_EACH_BB_FN (bb, cfun) + while (pro != entry) { - if (!bitmap_bit_p (&bb_antic_flags, bb->index)) - continue; - FOR_EACH_EDGE (e, ei, bb->preds) - if (!bitmap_bit_p (&bb_antic_flags, e->src->index) - && bitmap_set_bit (&bb_on_list, e->src->index)) - vec.quick_push (e->src); + basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro); + if (dominated_by_p (CDI_POST_DOMINATORS, pre, pro)) + pro = pre; + else + break; } - while (!vec.is_empty ()) - { - basic_block tmp_bb = vec.pop (); - bool all_set = true; - bitmap_clear_bit (&bb_on_list, tmp_bb->index); - FOR_EACH_EDGE (e, ei, tmp_bb->succs) - if (!bitmap_bit_p (&bb_antic_flags, e->dest->index)) - { - all_set = false; - break; - } + free_dominance_info (CDI_POST_DOMINATORS); + } - if (all_set) - { - bitmap_set_bit (&bb_antic_flags, tmp_bb->index); - FOR_EACH_EDGE (e, ei, tmp_bb->preds) - if (!bitmap_bit_p (&bb_antic_flags, e->src->index) - && bitmap_set_bit (&bb_on_list, e->src->index)) - vec.quick_push (e->src); - } - } - /* Find exactly one edge that leads to a block in ANTIC from - a block that isn't. */ - if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index)) - FOR_EACH_BB_FN (bb, cfun) + if (dump_file) + fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n", + pro->index); + + /* If there is more than one predecessor of PRO not dominated by PRO, fail. + Also find that single edge that leads to PRO. */ + + bool multi = false; + edge the_edge = 0; + FOR_EACH_EDGE (e, ei, pro->preds) + if (!dominated_by_p (CDI_DOMINATORS, e->src, pro)) + { + if (the_edge) + multi = true; + else + the_edge = e; + } + + if (multi) + { + the_edge = orig_entry_edge; + + if (dump_file) + fprintf (dump_file, "More than one candidate edge.\n"); + } + + if (dump_file) + fprintf (dump_file, "Found candidate edge for shrink-wrapping, %d->%d.\n", + the_edge->src->index, the_edge->dest->index); + + *entry_edge = the_edge; + + /* Compute what fraction of the frequency and count of the blocks that run + both with and without prologue are for running with prologue. This gives + the correct answer for reducible flow graphs; for irreducible flow graphs + our profile is messed up beyond repair anyway. */ + + int num = (*entry_edge)->probability; + int den = REG_BR_PROB_BASE; + + if (*entry_edge == orig_entry_edge) + goto out; + + /* Test whether the prologue is known to clobber any register + (other than FP or SP) which are live on the edge. */ + + HARD_REG_SET live_on_edge; + CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); + if (frame_pointer_needed) + CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); + REG_SET_TO_HARD_REG_SET (live_on_edge, + df_get_live_in ((*entry_edge)->dest)); + if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered)) + { + *entry_edge = orig_entry_edge; + if (dump_file) + fprintf (dump_file, + "Shrink-wrapping aborted due to clobber.\n"); + goto out; + } + + /* All is okay, so do it. */ + + crtl->shrink_wrapped = true; + if (dump_file) + fprintf (dump_file, "Performing shrink-wrapping.\n"); + + /* Copy the blocks that can run both with and without prologue. The + originals run with prologue, the copies without. Store a pointer to + the copy in the ->aux field of the original. */ + + FOR_EACH_BB_FN (bb, cfun) + if (bitmap_bit_p (bb_with, bb->index) + && !dominated_by_p (CDI_DOMINATORS, bb, pro)) + { + basic_block dup = duplicate_block (bb, 0, 0); + + bb->aux = dup; + + if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup))) + emit_barrier_after_bb (dup); + + if (EDGE_COUNT (dup->succs) == 0) + emit_barrier_after_bb (dup); + + if (dump_file) + fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index); + + bb->frequency = RDIV (num * bb->frequency, den); + dup->frequency -= bb->frequency; + bb->count = RDIV (num * bb->count, den); + dup->count -= bb->count; + } + + /* Change ENTRY_EDGE, if its src is duplicated. Do this first, before + the redirects have had a chance to create new blocks on the edge we + want to use for the prologue, which makes us not find it. */ + + gcc_assert (!dominated_by_p (CDI_DOMINATORS, (*entry_edge)->src, pro)); + + if (bitmap_bit_p (bb_with, (*entry_edge)->src->index)) + { + basic_block src = (basic_block) (*entry_edge)->src->aux; + FOR_EACH_EDGE (e, ei, src->succs) + if (e->dest == pro) + *entry_edge = e; + } + + /* Now change the edges to point to the copies, where appropriate. */ + + FOR_EACH_BB_FN (bb, cfun) + if (!dominated_by_p (CDI_DOMINATORS, bb, pro)) + { + basic_block src = bb; + if (bitmap_bit_p (bb_with, bb->index)) + src = (basic_block) bb->aux; + + FOR_EACH_EDGE (e, ei, src->succs) { - if (!bitmap_bit_p (&bb_antic_flags, bb->index)) + if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) continue; - FOR_EACH_EDGE (e, ei, bb->preds) - if (!bitmap_bit_p (&bb_antic_flags, e->src->index)) - { - if (*entry_edge != orig_entry_edge) - { - *entry_edge = orig_entry_edge; - if (dump_file) - fprintf (dump_file, "More than one candidate edge.\n"); - goto fail_shrinkwrap; - } - if (dump_file) - fprintf (dump_file, "Found candidate edge for " - "shrink-wrapping, %d->%d.\n", e->src->index, - e->dest->index); - *entry_edge = e; - } - } - if (*entry_edge != orig_entry_edge) - { - /* Test whether the prologue is known to clobber any register - (other than FP or SP) which are live on the edge. */ - CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); - if (frame_pointer_needed) - CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); - REG_SET_TO_HARD_REG_SET (live_on_edge, - df_get_live_in ((*entry_edge)->dest)); - if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered)) - { - *entry_edge = orig_entry_edge; - if (dump_file) - fprintf (dump_file, - "Shrink-wrapping aborted due to clobber.\n"); - } - } - if (*entry_edge != orig_entry_edge) - { - crtl->shrink_wrapped = true; - if (dump_file) - fprintf (dump_file, "Performing shrink-wrapping.\n"); - - /* Find tail blocks reachable from both blocks needing a - prologue and blocks not needing a prologue. */ - if (!bitmap_empty_p (&bb_tail)) - FOR_EACH_BB_FN (bb, cfun) + if (bitmap_bit_p (bb_with, e->dest->index) + && !dominated_by_p (CDI_DOMINATORS, e->dest, pro)) { - bool some_pro, some_no_pro; - if (!bitmap_bit_p (&bb_tail, bb->index)) - continue; - some_pro = some_no_pro = false; - FOR_EACH_EDGE (e, ei, bb->preds) - { - if (bitmap_bit_p (bb_flags, e->src->index)) - some_pro = true; - else - some_no_pro = true; - } - if (some_pro && some_no_pro) - vec.quick_push (bb); - else - bitmap_clear_bit (&bb_tail, bb->index); + if (dump_file) + fprintf (dump_file, "Redirecting edge %d->%d to %d\n", + e->src->index, e->dest->index, + ((basic_block) e->dest->aux)->index); + redirect_edge_and_branch_force (e, (basic_block) e->dest->aux); } - /* Find the head of each tail. */ - while (!vec.is_empty ()) - { - basic_block tbb = vec.pop (); + else if (e->flags & EDGE_FALLTHRU + && bitmap_bit_p (bb_with, bb->index)) + force_nonfallthru (e); + } + } - if (!bitmap_bit_p (&bb_tail, tbb->index)) - continue; + /* Also redirect the function entry edge if necessary. */ - while (single_succ_p (tbb)) - { - tbb = single_succ (tbb); - bitmap_clear_bit (&bb_tail, tbb->index); - } - } - /* Now duplicate the tails. */ - if (!bitmap_empty_p (&bb_tail)) - FOR_EACH_BB_REVERSE_FN (bb, cfun) - { - basic_block copy_bb, tbb; - int eflags; - - if (!bitmap_clear_bit (&bb_tail, bb->index)) - continue; - - /* Create a copy of BB, instructions and all, for - use on paths that don't need a prologue. - Ideal placement of the copy is on a fall-thru edge - or after a block that would jump to the copy. */ - FOR_EACH_EDGE (e, ei, bb->preds) - if (!bitmap_bit_p (bb_flags, e->src->index) - && single_succ_p (e->src)) - break; - if (e) - { - /* Make sure we insert after any barriers. */ - rtx_insn *end = get_last_bb_insn (e->src); - copy_bb = create_basic_block (NEXT_INSN (end), - NULL_RTX, e->src); - BB_COPY_PARTITION (copy_bb, e->src); - } - else - { - /* Otherwise put the copy at the end of the function. */ - copy_bb = create_basic_block (NULL_RTX, NULL_RTX, - EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); - BB_COPY_PARTITION (copy_bb, bb); - } - - rtx_note *insert_point = emit_note_after (NOTE_INSN_DELETED, - BB_END (copy_bb)); - emit_barrier_after (BB_END (copy_bb)); - - tbb = bb; - while (1) - { - dup_block_and_redirect (tbb, copy_bb, insert_point, - bb_flags); - tbb = single_succ (tbb); - if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun)) - break; - e = split_block (copy_bb, PREV_INSN (insert_point)); - copy_bb = e->dest; - } - - /* Quiet verify_flow_info by (ab)using EDGE_FAKE. - We have yet to add a simple_return to the tails, - as we'd like to first convert_jumps_to_returns in - case the block is no longer used after that. */ - eflags = EDGE_FAKE; - if (CALL_P (PREV_INSN (insert_point)) - && SIBLING_CALL_P (PREV_INSN (insert_point))) - eflags = EDGE_SIBCALL | EDGE_ABNORMAL; - make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), - eflags); - - /* verify_flow_info doesn't like a note after a - sibling call. */ - delete_insn (insert_point); - if (bitmap_empty_p (&bb_tail)) - break; - } - } + FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) + if (bitmap_bit_p (bb_with, e->dest->index) + && !dominated_by_p (CDI_DOMINATORS, e->dest, pro)) + { + basic_block split_bb = split_edge (e); + e = single_succ_edge (split_bb); + redirect_edge_and_branch_force (e, (basic_block) e->dest->aux); + } - fail_shrinkwrap: - bitmap_clear (&bb_tail); - bitmap_clear (&bb_antic_flags); - bitmap_clear (&bb_on_list); - vec.release (); - } + /* Change all the exits that should get a simple_return to FAKE. + They will be converted later. */ + + FOR_EACH_BB_FN (bb, cfun) + if (!bitmap_bit_p (bb_with, bb->index)) + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) + { + e = fix_fake_fallthrough_edge (e); + + e->flags &= ~EDGE_FALLTHRU; + if (!(e->flags & EDGE_SIBCALL)) + e->flags |= EDGE_FAKE; + + emit_barrier_after_bb (e->src); + } + +out: + free_dominance_info (CDI_DOMINATORS); } /* If we're allowed to generate a simple return instruction, then by @@ -1018,6 +1072,8 @@ convert_to_simple_return (edge entry_edge, edge orig_entry_edge, && (e->flags & EDGE_FAKE) != 0 && !bitmap_bit_p (&bb_flags, e->src->index)) { + e = fix_fake_fallthrough_edge (e); + emit_return_into_block (true, e->src); e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); }