From: Zdenek Dvorak Date: Thu, 17 May 2007 08:10:24 +0000 (+0200) Subject: tree-vrp.c (finalize_jump_threads): Do not care about dominance info. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b02b9b53ec39cfd36e27424c4c07be5880ec393a;p=gcc.git tree-vrp.c (finalize_jump_threads): Do not care about dominance info. * tree-vrp.c (finalize_jump_threads): Do not care about dominance info. (execute_vrp): Preserve loops through jump threading. * tree-ssa-threadupdate.c (thread_single_edge, dbds_continue_enumeration_p, determine_bb_domination_status, thread_through_loop_header): New functions. (create_edge_and_update_destination_phis, create_edge_and_update_destination_phis): Set loops for the new blocks. (prune_undesirable_thread_requests): Removed. (redirect_edges): Do not pretend that redirect_edge_and_branch can create new blocks. (thread_block): Do not call prune_undesirable_thread_requests. Update loops. (mark_threaded_blocks): Select edges to thread here. (thread_through_all_blocks): Take may_peel_loop_headers argument. Thread edges through loop headers independently. * cfgloopmanip.c (create_preheader, mfb_keep_just): Export. * tree-pass.h (TODO_mark_first_instance): New. (first_pass_instance): Declare. * cfghooks.c (duplicate_block): Put the block to the original loop if copy is not specified. * tree-ssa-dom.c (tree_ssa_dominator_optimize): Preserve loops through jump threading. Pass may_peel_loop_headers to thread_through_all_blocks according to first_pass_instance. * cfgloop.h (create_preheader): Declare. * tree-flow.h (thread_through_all_blocks): Declaration changed. * basic-block.h (mfb_keep_just, mfb_kj_edge): Declare. * passes.c (first_pass_instance): New variable. (next_pass_1): Set TODO_mark_first_instance. (execute_todo): Set first_pass_instance. * gcc.dg/tree-ssa/ssa-dom-thread-2.c: New test. * gcc.dg/vect/vect-102.c, gcc.dg/vect/vect-103.c, gcc.dg/vect/vect-104.c: Use more complex construction to prevent vectorizing. * gcc.dg/tree-ssa/pr21559.c: Update outcome. From-SVN: r124786 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f50bdc5ff9a..247a42d9b24 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,35 @@ +2007-05-17 Zdenek Dvorak + + * tree-vrp.c (finalize_jump_threads): Do not care about dominance info. + (execute_vrp): Preserve loops through jump threading. + * tree-ssa-threadupdate.c (thread_single_edge, + dbds_continue_enumeration_p, determine_bb_domination_status, + thread_through_loop_header): New functions. + (create_edge_and_update_destination_phis, + create_edge_and_update_destination_phis): Set loops for the new blocks. + (prune_undesirable_thread_requests): Removed. + (redirect_edges): Do not pretend that redirect_edge_and_branch can + create new blocks. + (thread_block): Do not call prune_undesirable_thread_requests. + Update loops. + (mark_threaded_blocks): Select edges to thread here. + (thread_through_all_blocks): Take may_peel_loop_headers argument. + Thread edges through loop headers independently. + * cfgloopmanip.c (create_preheader, mfb_keep_just): Export. + * tree-pass.h (TODO_mark_first_instance): New. + (first_pass_instance): Declare. + * cfghooks.c (duplicate_block): Put the block to the original loop + if copy is not specified. + * tree-ssa-dom.c (tree_ssa_dominator_optimize): Preserve loops through + jump threading. Pass may_peel_loop_headers to + thread_through_all_blocks according to first_pass_instance. + * cfgloop.h (create_preheader): Declare. + * tree-flow.h (thread_through_all_blocks): Declaration changed. + * basic-block.h (mfb_keep_just, mfb_kj_edge): Declare. + * passes.c (first_pass_instance): New variable. + (next_pass_1): Set TODO_mark_first_instance. + (execute_todo): Set first_pass_instance. + 2007-05-17 Uros Bizjak PR tree-optimization/24659 diff --git a/gcc/basic-block.h b/gcc/basic-block.h index 4684369e4e0..a3d4dc84b69 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -1173,4 +1173,8 @@ bb_has_eh_pred (basic_block bb) return false; } +/* In cfgloopmanip.c. */ +extern edge mfb_kj_edge; +bool mfb_keep_just (edge); + #endif /* GCC_BASIC_BLOCK_H */ diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c index f1103c15685..7f544c3ee3c 100644 --- a/gcc/cfghooks.c +++ b/gcc/cfghooks.c @@ -923,9 +923,15 @@ duplicate_block (basic_block bb, edge e, basic_block after) set_bb_original (new_bb, bb); set_bb_copy (bb, new_bb); - /* Add the new block to the prescribed loop. */ + /* Add the new block to the copy of the loop of BB, or directly to the loop + of BB if the loop is not being copied. */ if (current_loops != NULL) - add_bb_to_loop (new_bb, bb->loop_father->copy); + { + struct loop *cloop = bb->loop_father; + if (cloop->copy) + cloop = cloop->copy; + add_bb_to_loop (new_bb, cloop); + } return new_bb; } diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 47892ea13da..37869860688 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -252,6 +252,7 @@ enum CP_SIMPLE_PREHEADERS = 1 }; +basic_block create_preheader (struct loop *, int); extern void create_preheaders (int); extern void force_single_succ_latches (void); diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index 4c658df876b..1fadbb4356d 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -41,7 +41,6 @@ static int find_path (edge, basic_block **); static void fix_loop_placements (struct loop *, bool *); static bool fix_bb_placement (basic_block); static void fix_bb_placements (basic_block, bool *); -static basic_block create_preheader (struct loop *, int); static void unloop (struct loop *, bool *); #define RDIV(X,Y) (((X) + (Y) / 2) / (Y)) @@ -1085,8 +1084,8 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, MFB_KJ_EDGE to the entry part. E is the edge for that we should decide whether to redirect it. */ -static edge mfb_kj_edge; -static bool +edge mfb_kj_edge; +bool mfb_keep_just (edge e) { return e != mfb_kj_edge; @@ -1097,7 +1096,7 @@ mfb_keep_just (edge e) entry; otherwise we also force preheader block to have only one successor. The function also updates dominators. */ -static basic_block +basic_block create_preheader (struct loop *loop, int flags) { edge e, fallthru; diff --git a/gcc/passes.c b/gcc/passes.c index bea89b6f714..78ccb47049e 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -105,6 +105,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA /* Global variables used to communicate with passes. */ int dump_flags; bool in_gimple_form; +bool first_pass_instance; /* This is called from various places for FUNCTION_DECL, VAR_DECL, @@ -392,6 +393,8 @@ next_pass_1 (struct tree_opt_pass **list, struct tree_opt_pass *pass) memcpy (new, pass, sizeof (*new)); new->next = NULL; + new->todo_flags_start &= ~TODO_mark_first_instance; + /* Indicate to register_dump_files that this pass has duplicates, and so it should rename the dump file. The first instance will be -1, and be number of duplicates = -static_pass_number - 1. @@ -406,6 +409,7 @@ next_pass_1 (struct tree_opt_pass **list, struct tree_opt_pass *pass) } else { + pass->todo_flags_start |= TODO_mark_first_instance; pass->static_pass_number = -1; *list = pass; } @@ -932,6 +936,9 @@ execute_todo (unsigned int flags) gcc_assert (flags & TODO_update_ssa_any); #endif + /* Inform the pass whether it is the first time it is run. */ + first_pass_instance = (flags & TODO_mark_first_instance) != 0; + do_per_function (execute_function_todo, (void *)(size_t) flags); /* Always remove functions just as before inlining: IPA passes might be diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2c020300436..e829a0f9c72 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2007-05-17 Zdenek Dvorak + + * gcc.dg/tree-ssa/ssa-dom-thread-2.c: New test. + * gcc.dg/vect/vect-102.c, gcc.dg/vect/vect-103.c, + gcc.dg/vect/vect-104.c: Use more complex construction to prevent vectorizing. + * gcc.dg/tree-ssa/pr21559.c: Update outcome. + 2007-05-17 Uros Bizjak PR tree-optimization/24659 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr21559.c b/gcc/testsuite/gcc.dg/tree-ssa/pr21559.c index 6ffcfa1973a..402c102d259 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr21559.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr21559.c @@ -35,11 +35,9 @@ void foo (void) /* { dg-final { scan-tree-dump-times "Simplified relational" 1 "vrp1" } } */ /* Second, we should thread the edge out of the loop via the break - statement. */ -/* { dg-final { scan-tree-dump-times "Threaded jump" 1 "vrp1" } } */ - -/* Now if we were really good, we'd realize that the final bytes == 0 - test is totally useless. That's not likely to happen anytime soon. */ + statement. We also realize that the final bytes == 0 test is useless, + and thread over it. */ +/* { dg-final { scan-tree-dump-times "Threaded jump" 2 "vrp1" } } */ /* { dg-final { cleanup-tree-dump "vrp1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2.c new file mode 100644 index 00000000000..6aaea8ecb38 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2.c @@ -0,0 +1,119 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom1-stats" } */ + +void foo(); +void bla(); +void bar(); + +/* In the following two cases, we should be able to thread edge through + the loop header. */ + +void thread_entry_through_header (void) +{ + int i; + + for (i = 0; i < 170; i++) + bla (); +} + +void thread_latch_through_header (void) +{ + int i = 0; + int first = 1; + + do + { + if (first) + foo (); + + first = 0; + bla (); + } while (i++ < 100); +} + +/* This is a TODO -- it is correct to thread both entry and latch edge through + the header, but we do not handle this case yet. */ + +void dont_thread_1 (void) +{ + int i = 0; + int first = 1; + + do + { + if (first) + foo (); + else + bar (); + + first = 0; + bla (); + } while (i++ < 100); +} + +/* Avoid threading in the following two cases, to prevent creating subloops. */ + +void dont_thread_2 (int first) +{ + int i = 0; + + do + { + if (first) + foo (); + else + bar (); + + first = 0; + bla (); + } while (i++ < 100); +} + +void dont_thread_3 (int nfirst) +{ + int i = 0; + int first = 0; + + do + { + if (first) + foo (); + else + bar (); + + first = nfirst; + bla (); + } while (i++ < 100); +} + +/* Avoid threading in this case, in order to avoid creating loop with + multiple entries. */ + +void dont_thread_4 (int a, int nfirst) +{ + int i = 0; + int first; + + if (a) + first = 0; + else + first = 1; + + do + { + if (first) + foo (); + else + bar (); + + first = nfirst; + bla (); + } while (i++ < 100); +} + +/* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 1 "vrp1"} } */ +/* { dg-final { scan-tree-dump-times "Jumps threaded: 2" 0 "vrp1"} } */ +/* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 0 "dom1"} } */ +/* { dg-final { scan-tree-dump-times "Jumps threaded: 2" 1 "dom1"} } */ +/* { dg-final { cleanup-tree-dump "dom1" } } */ +/* { dg-final { cleanup-tree-dump "vrp1" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-102.c b/gcc/testsuite/gcc.dg/vect/vect-102.c index af3261d01f8..49df4f9bc90 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-102.c +++ b/gcc/testsuite/gcc.dg/vect/vect-102.c @@ -14,6 +14,7 @@ struct extraction static int a[N] = {1,2,3,4,5,6,7,8,9}; static int b[N] = {2,3,4,5,6,7,8,9,9}; +volatile int foo; int main1 (int x, int y) { int i; @@ -23,7 +24,7 @@ int main1 (int x, int y) { for (i = 0; i < N; i++) { p->a[i] = a[i]; - if (x == 135) + if (foo == 135) abort (); /* to avoid vectorization */ } @@ -46,6 +47,7 @@ int main (void) { check_vect (); + foo = 0; return main1 (0, N); } diff --git a/gcc/testsuite/gcc.dg/vect/vect-103.c b/gcc/testsuite/gcc.dg/vect/vect-103.c index effa97e15ed..da1b69e5626 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-103.c +++ b/gcc/testsuite/gcc.dg/vect/vect-103.c @@ -15,6 +15,7 @@ struct extraction static int a[N] = {1,2,3,4,5,6,7,8,9}; static int b[N] = {17,24,7,0,2,3,4,31,82}; static int c[N] = {9,17,24,7,0,2,3,4,31}; +volatile int foo; int main1 (int x, int y) { int i; @@ -25,7 +26,7 @@ int main1 (int x, int y) { { p->a[i] = a[i]; p->b[i] = b[i]; - if (x == 135) + if (foo == 135) abort (); /* to avoid vectorization */ } @@ -48,6 +49,7 @@ int main (void) { check_vect (); + foo = 0; return main1 (0, N); } diff --git a/gcc/testsuite/gcc.dg/vect/vect-104.c b/gcc/testsuite/gcc.dg/vect/vect-104.c index 6d16da6b32e..6ab0f23acf8 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-104.c +++ b/gcc/testsuite/gcc.dg/vect/vect-104.c @@ -15,6 +15,7 @@ struct extraction static int a[N][N] = {{1,2,3},{4,5,6},{7,8,9}}; static int b[N][N] = {{17,24,7},{0,2,3},{4,31,82}}; static int c[N][N] = {{1,2,3},{4,6,8},{8,9,9}}; +volatile int foo; int main1 (int x) { int i,j; @@ -27,7 +28,7 @@ int main1 (int x) { { p->a[i][j] = a[i][j]; p->b[i][j] = b[i][j]; - if (x == 135) + if (foo == 135) abort (); /* to avoid vectorization */ } } @@ -57,6 +58,7 @@ int main (void) { check_vect (); + foo = 0; return main1 (N); } diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index 1997abd8779..156ae13b43b 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -1113,7 +1113,7 @@ bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode); unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode); /* In tree-ssa-threadupdate.c. */ -extern bool thread_through_all_blocks (void); +extern bool thread_through_all_blocks (bool); extern void register_jump_thread (edge, edge); /* In gimplify.c */ diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index b77243582b6..299255cea75 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -218,6 +218,9 @@ struct dump_file_info for the passes that are handed to register_dump_files. */ #define TODO_set_props (1 << 15) +/* Internally used for the first instance of a pass. */ +#define TODO_mark_first_instance (1 << 16) + #define TODO_update_ssa_any \ (TODO_update_ssa \ | TODO_update_ssa_no_phi \ @@ -417,4 +420,13 @@ extern struct tree_opt_pass *all_passes, *all_ipa_passes, *all_lowering_passes; extern void execute_pass_list (struct tree_opt_pass *); extern void execute_ipa_pass_list (struct tree_opt_pass *); +/* Set to true if the pass is called the first time during compilation of the + current function. Note that using this information in the optimization + passes is considered not to be clean, and it should be avoided if possible. + This flag is currently used to prevent loops from being peeled repeatedly + in jump threading; it will be removed once we preserve loop structures + throughout the compilation -- we will be able to mark the affected loops + directly in jump threading, and avoid peeling them next time. */ +extern bool first_pass_instance; + #endif /* GCC_TREE_PASS_H */ diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c index 7a146157f99..87270fc8e08 100644 --- a/gcc/tree-ssa-dom.c +++ b/gcc/tree-ssa-dom.c @@ -277,25 +277,17 @@ tree_ssa_dominator_optimize (void) calculate_dominance_info (CDI_DOMINATORS); cfg_altered = false; - /* We need to know which edges exit loops so that we can - aggressively thread through loop headers to an exit - edge. */ - loop_optimizer_init (AVOID_CFG_MODIFICATIONS); - if (current_loops) - { - mark_loop_exit_edges (); - loop_optimizer_finalize (); - } - - /* Clean up the CFG so that any forwarder blocks created by loop - canonicalization are removed. */ - cleanup_tree_cfg (); - calculate_dominance_info (CDI_DOMINATORS); + /* We need to know loop structures in order to avoid destroying them + in jump threading. Note that we still can e.g. thread through loop + headers to an exit edge, or through loop header to the loop body, assuming + that we update the loop info. */ + loop_optimizer_init (LOOPS_HAVE_SIMPLE_LATCHES); /* We need accurate information regarding back edges in the CFG - for jump threading. */ + for jump threading; this may include back edes that are not part of + a single loop. */ mark_dfs_back_edges (); - + /* Recursively walk the dominator tree optimizing statements. */ walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); @@ -319,7 +311,7 @@ tree_ssa_dominator_optimize (void) free_all_edge_infos (); /* Thread jumps, creating duplicate blocks as needed. */ - cfg_altered |= thread_through_all_blocks (); + cfg_altered |= thread_through_all_blocks (first_pass_instance); if (cfg_altered) free_dominance_info (CDI_DOMINATORS); @@ -353,6 +345,8 @@ tree_ssa_dominator_optimize (void) if (dump_file && (dump_flags & TDF_STATS)) dump_dominator_optimization_stats (dump_file); + loop_optimizer_finalize (); + /* Delete our main hashtable. */ htab_delete (avail_exprs); diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c index c6b52095716..22266bd355c 100644 --- a/gcc/tree-ssa-threadupdate.c +++ b/gcc/tree-ssa-threadupdate.c @@ -315,6 +315,7 @@ create_edge_and_update_destination_phis (struct redirection_data *rd) e->probability = REG_BR_PROB_BASE; e->count = rd->dup_block->count; + e->aux = rd->outgoing_edge->aux; /* If there are any PHI nodes at the destination of the outgoing edge from the duplicate block, then we will need to add a new argument @@ -385,199 +386,6 @@ fixup_template_block (void **slot, void *data) return 1; } -/* Not all jump threading requests are useful. In particular some - jump threading requests can create irreducible regions which are - undesirable. - - This routine will examine the BB's incoming edges for jump threading - requests which, if acted upon, would create irreducible regions. Any - such jump threading requests found will be pruned away. */ - -static void -prune_undesirable_thread_requests (basic_block bb) -{ - edge e; - edge_iterator ei; - bool may_create_irreducible_region = false; - unsigned int num_outgoing_edges_into_loop = 0; - - /* For the heuristics below, we need to know if BB has more than - one outgoing edge into a loop. */ - FOR_EACH_EDGE (e, ei, bb->succs) - num_outgoing_edges_into_loop += ((e->flags & EDGE_LOOP_EXIT) == 0); - - if (num_outgoing_edges_into_loop > 1) - { - edge backedge = NULL; - - /* Consider the effect of threading the edge (0, 1) to 2 on the left - CFG to produce the right CFG: - - - 0 0 - | | - 1<--+ 2<--------+ - / \ | | | - 2 3 | 4<----+ | - \ / | / \ | | - 4---+ E 1-- | --+ - | | | - E 3---+ - - - Threading the (0, 1) edge to 2 effectively creates two loops - (2, 4, 1) and (4, 1, 3) which are neither disjoint nor nested. - This is not good. - - However, we do need to be able to thread (0, 1) to 2 or 3 - in the left CFG below (which creates the middle and right - CFGs with nested loops). - - 0 0 0 - | | | - 1<--+ 2<----+ 3<-+<-+ - /| | | | | | | - 2 | | 3<-+ | 1--+ | - \| | | | | | | - 3---+ 1--+--+ 2-----+ - - - A safe heuristic appears to be to only allow threading if BB - has a single incoming backedge from one of its direct successors. */ - - FOR_EACH_EDGE (e, ei, bb->preds) - { - if (e->flags & EDGE_DFS_BACK) - { - if (backedge) - { - backedge = NULL; - break; - } - else - { - backedge = e; - } - } - } - - if (backedge && find_edge (bb, backedge->src)) - ; - else - may_create_irreducible_region = true; - } - else - { - edge dest = NULL; - - /* If we thread across the loop entry block (BB) into the - loop and BB is still reached from outside the loop, then - we would create an irreducible CFG. Consider the effect - of threading the edge (1, 4) to 5 on the left CFG to produce - the right CFG - - 0 0 - / \ / \ - 1 2 1 2 - \ / | | - 4<----+ 5<->4 - / \ | | - E 5---+ E - - - Threading the (1, 4) edge to 5 creates two entry points - into the loop (4, 5) (one from block 1, the other from - block 2). A classic irreducible region. - - So look at all of BB's incoming edges which are not - backedges and which are not threaded to the loop exit. - If that subset of incoming edges do not all thread - to the same block, then threading any of them will create - an irreducible region. */ - - FOR_EACH_EDGE (e, ei, bb->preds) - { - edge e2; - - /* We ignore back edges for now. This may need refinement - as threading a backedge creates an inner loop which - we would need to verify has a single entry point. - - If all backedges thread to new locations, then this - block will no longer have incoming backedges and we - need not worry about creating irreducible regions - by threading through BB. I don't think this happens - enough in practice to worry about it. */ - if (e->flags & EDGE_DFS_BACK) - continue; - - /* If the incoming edge threads to the loop exit, then it - is clearly safe. */ - e2 = e->aux; - if (e2 && (e2->flags & EDGE_LOOP_EXIT)) - continue; - - /* E enters the loop header and is not threaded. We can - not allow any other incoming edges to thread into - the loop as that would create an irreducible region. */ - if (!e2) - { - may_create_irreducible_region = true; - break; - } - - /* We know that this incoming edge threads to a block inside - the loop. This edge must thread to the same target in - the loop as any previously seen threaded edges. Otherwise - we will create an irreducible region. */ - if (!dest) - dest = e2; - else if (e2 != dest) - { - may_create_irreducible_region = true; - break; - } - } - } - - /* If we might create an irreducible region, then cancel any of - the jump threading requests for incoming edges which are - not backedges and which do not thread to the exit block. */ - if (may_create_irreducible_region) - { - FOR_EACH_EDGE (e, ei, bb->preds) - { - edge e2; - - /* Ignore back edges. */ - if (e->flags & EDGE_DFS_BACK) - continue; - - e2 = e->aux; - - /* If this incoming edge was not threaded, then there is - nothing to do. */ - if (!e2) - continue; - - /* If this incoming edge threaded to the loop exit, - then it can be ignored as it is safe. */ - if (e2->flags & EDGE_LOOP_EXIT) - continue; - - if (e2) - { - /* This edge threaded into the loop and the jump thread - request must be cancelled. */ - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Not threading jump %d --> %d to %d\n", - e->src->index, e->dest->index, e2->dest->index); - e->aux = NULL; - } - } - } -} - /* Hash table traversal callback to redirect each incoming edge associated with this hash table element to its new destination. */ @@ -620,11 +428,8 @@ redirect_edges (void **slot, void *data) /* Redirect the incoming edge to the appropriate duplicate block. */ e2 = redirect_edge_and_branch (e, rd->dup_block); + gcc_assert (e == e2); flush_pending_stmts (e2); - - if ((dump_file && (dump_flags & TDF_DETAILS)) - && e->src != e2->src) - fprintf (dump_file, " basic block %d created\n", e2->src->index); } else { @@ -696,46 +501,23 @@ redirection_block_p (basic_block bb) successor of BB. We then revector the incoming edges into BB to the appropriate duplicate of BB. - BB and its duplicates will have assignments to the same set of - SSA_NAMEs. Right now, we just call into update_ssa to update the - SSA graph for those names. - - We are also going to experiment with a true incremental update - scheme for the duplicated resources. One of the interesting - properties we can exploit here is that all the resources set - in BB will have the same IDFS, so we have one IDFS computation - per block with incoming threaded edges, which can lower the - cost of the true incremental update algorithm. */ + If NOLOOP_ONLY is true, we only perform the threading as long as it + does not affect the structure of the loops in a nontrivial way. */ static bool -thread_block (basic_block bb) +thread_block (basic_block bb, bool noloop_only) { /* E is an incoming edge into BB that we may or may not want to redirect to a duplicate of BB. */ - edge e; + edge e, e2; edge_iterator ei; struct local_info local_info; - - /* FOUND_BACKEDGE indicates that we found an incoming backedge - into BB, in which case we may ignore certain jump threads - to avoid creating irreducible regions. */ - bool found_backedge = false; + struct loop *loop = bb->loop_father; /* ALL indicates whether or not all incoming edges into BB should be threaded to a duplicate of BB. */ bool all = true; - /* If optimizing for size, only thread this block if we don't have - to duplicate it or it's an otherwise empty redirection block. */ - if (optimize_size - && EDGE_COUNT (bb->preds) > 1 - && !redirection_block_p (bb)) - { - FOR_EACH_EDGE (e, ei, bb->preds) - e->aux = NULL; - return false; - } - /* To avoid scanning a linear array for the element we need we instead use a hash table. For normal code there should be no noticeable difference. However, if we have a block with a large number of @@ -745,35 +527,45 @@ thread_block (basic_block bb) redirection_data_eq, free); - FOR_EACH_EDGE (e, ei, bb->preds) - found_backedge |= ((e->flags & EDGE_DFS_BACK) != 0); + /* If we thread the latch of the loop to its exit, the loop ceases to + exist. Make sure we do not restrict ourselves in order to preserve + this loop. */ + if (current_loops && loop->header == bb) + { + e = loop_latch_edge (loop); + e2 = e->aux; - /* If BB has incoming backedges, then threading across BB might - introduce an irreducible region, which would be undesirable - as that inhibits various optimizations later. Prune away - any jump threading requests which we know will result in - an irreducible region. */ - if (found_backedge) - prune_undesirable_thread_requests (bb); + if (e2 && loop_exit_edge_p (loop, e2)) + { + loop->header = NULL; + loop->latch = NULL; + } + } /* Record each unique threaded destination into a hash table for efficient lookups. */ FOR_EACH_EDGE (e, ei, bb->preds) { - if (!e->aux) + e2 = e->aux; + + if (!e2 + /* If NOLOOP_ONLY is true, we only allow threading through the + header of a loop to exit edges. */ + || (noloop_only + && current_loops + && bb == bb->loop_father->header + && !loop_exit_edge_p (bb->loop_father, e2))) { all = false; + continue; } - else - { - edge e2 = e->aux; - update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e), - e->count, e->aux); - /* Insert the outgoing edge into the hash table if it is not - already in the hash table. */ - lookup_redirection_data (e2, e, INSERT); - } + update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e), + e->count, e->aux); + + /* Insert the outgoing edge into the hash table if it is not + already in the hash table. */ + lookup_redirection_data (e2, e, INSERT); } /* If we are going to thread all incoming edges to an outgoing edge, then @@ -821,6 +613,339 @@ thread_block (basic_block bb) return local_info.jumps_threaded; } +/* Threads edge E through E->dest to the edge E->aux. Returns the copy + of E->dest created during threading, or E->dest if it was not necessary + to copy it (E is its single predecessor). */ + +static basic_block +thread_single_edge (edge e) +{ + basic_block bb = e->dest; + edge eto = e->aux; + struct redirection_data rd; + struct local_info local_info; + + e->aux = NULL; + + thread_stats.num_threaded_edges++; + + if (single_pred_p (bb)) + { + /* If BB has just a single predecessor, we should only remove the + control statements at its end, and successors except for ETO. */ + remove_ctrl_stmt_and_useless_edges (bb, eto->dest); + return bb; + } + + /* Otherwise, we need to create a copy. */ + update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto); + + local_info.bb = bb; + rd.outgoing_edge = eto; + + create_block_for_threading (bb, &rd); + create_edge_and_update_destination_phis (&rd); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Threaded jump %d --> %d to %d\n", + e->src->index, e->dest->index, rd.dup_block->index); + + rd.dup_block->count = e->count; + rd.dup_block->frequency = EDGE_FREQUENCY (e); + single_succ_edge (rd.dup_block)->count = e->count; + redirect_edge_and_branch (e, rd.dup_block); + flush_pending_stmts (e); + + return rd.dup_block; +} + +/* Callback for dfs_enumerate_from. Returns true if BB is different + from STOP and DBDS_CE_STOP. */ + +static basic_block dbds_ce_stop; +static bool +dbds_continue_enumeration_p (basic_block bb, void *stop) +{ + return (bb != (basic_block) stop + && bb != dbds_ce_stop); +} + +/* Evaluates the dominance relationship of latch of the LOOP and BB, and + returns the state. */ + +enum bb_dom_status +{ + /* BB does not dominate latch of the LOOP. */ + DOMST_NONDOMINATING, + /* The LOOP is broken (there is no path from the header to its latch. */ + DOMST_LOOP_BROKEN, + /* BB dominates the latch of the LOOP. */ + DOMST_DOMINATING +}; + +static enum bb_dom_status +determine_bb_domination_status (struct loop *loop, basic_block bb) +{ + basic_block *bblocks; + unsigned nblocks, i; + bool bb_reachable = false; + edge_iterator ei; + edge e; + +#ifdef ENABLE_CHECKING + /* This function assumes BB is a successor of LOOP->header. */ + { + bool ok = false; + + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (e->src == loop->header) + { + ok = true; + break; + } + } + + gcc_assert (ok); + } +#endif + + if (bb == loop->latch) + return DOMST_DOMINATING; + + /* Check that BB dominates LOOP->latch, and that it is back-reachable + from it. */ + + bblocks = XCNEWVEC (basic_block, loop->num_nodes); + dbds_ce_stop = loop->header; + nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p, + bblocks, loop->num_nodes, bb); + for (i = 0; i < nblocks; i++) + FOR_EACH_EDGE (e, ei, bblocks[i]->preds) + { + if (e->src == loop->header) + { + free (bblocks); + return DOMST_NONDOMINATING; + } + if (e->src == bb) + bb_reachable = true; + } + + free (bblocks); + return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN); +} + +/* Thread jumps through the header of LOOP. Returns true if cfg changes. + If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges + to the inside of the loop. */ + +static bool +thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers) +{ + basic_block header = loop->header; + edge e, tgt_edge, latch = loop_latch_edge (loop); + edge_iterator ei; + basic_block tgt_bb, atgt_bb; + enum bb_dom_status domst; + + /* We have already threaded through headers to exits, so all the threading + requests now are to the inside of the loop. We need to avoid creating + irreducible regions (i.e., loops with more than one entry block), and + also loop with several latch edges, or new subloops of the loop (although + there are cases where it might be appropriate, it is difficult to decide, + and doing it wrongly may confuse other optimizers). + + We could handle more general cases here. However, the intention is to + preserve some information about the loop, which is impossible if its + structure changes significantly, in a way that is not well understood. + Thus we only handle few important special cases, in which also updating + of the loop-carried information should be feasible: + + 1) Propagation of latch edge to a block that dominates the latch block + of a loop. This aims to handle the following idiom: + + first = 1; + while (1) + { + if (first) + initialize; + first = 0; + body; + } + + After threading the latch edge, this becomes + + first = 1; + if (first) + initialize; + while (1) + { + first = 0; + body; + } + + The original header of the loop is moved out of it, and we may thread + the remaining edges through it without further constraints. + + 2) All entry edges are propagated to a single basic block that dominates + the latch block of the loop. This aims to handle the following idiom + (normally created for "for" loops): + + i = 0; + while (1) + { + if (i >= 100) + break; + body; + i++; + } + + This becomes + + i = 0; + while (1) + { + body; + i++; + if (i >= 100) + break; + } + */ + + /* Threading through the header won't improve the code if the header has just + one successor. */ + if (single_succ_p (header)) + goto fail; + + if (latch->aux) + { + tgt_edge = latch->aux; + tgt_bb = tgt_edge->dest; + } + else if (!may_peel_loop_headers + && !redirection_block_p (loop->header)) + goto fail; + else + { + tgt_bb = NULL; + tgt_edge = NULL; + FOR_EACH_EDGE (e, ei, header->preds) + { + if (!e->aux) + { + if (e == latch) + continue; + + /* If latch is not threaded, and there is a header + edge that is not threaded, we would create loop + with multiple entries. */ + goto fail; + } + + tgt_edge = e->aux; + atgt_bb = tgt_edge->dest; + if (!tgt_bb) + tgt_bb = atgt_bb; + /* Two targets of threading would make us create loop + with multiple entries. */ + else if (tgt_bb != atgt_bb) + goto fail; + } + + if (!tgt_bb) + { + /* There are no threading requests. */ + return false; + } + + /* Redirecting to empty loop latch is useless. */ + if (tgt_bb == loop->latch + && empty_block_p (loop->latch)) + goto fail; + } + + /* The target block must dominate the loop latch, otherwise we would be + creating a subloop. */ + domst = determine_bb_domination_status (loop, tgt_bb); + if (domst == DOMST_NONDOMINATING) + goto fail; + if (domst == DOMST_LOOP_BROKEN) + { + /* If the loop ceased to exist, mark it as such, and thread through its + original header. */ + loop->header = NULL; + loop->latch = NULL; + return thread_block (header, false); + } + + if (tgt_bb->loop_father->header == tgt_bb) + { + /* If the target of the threading is a header of a subloop, we need + to create a preheader for it, so that the headers of the two loops + do not merge. */ + if (EDGE_COUNT (tgt_bb->preds) > 2) + { + tgt_bb = create_preheader (tgt_bb->loop_father, 0); + gcc_assert (tgt_bb != NULL); + } + else + tgt_bb = split_edge (tgt_edge); + } + + if (latch->aux) + { + /* First handle the case latch edge is redirected. */ + loop->latch = thread_single_edge (latch); + gcc_assert (single_succ (loop->latch) == tgt_bb); + loop->header = tgt_bb; + + /* Thread the remaining edges through the former header. */ + thread_block (header, false); + } + else + { + basic_block new_preheader; + + /* Now consider the case entry edges are redirected to the new entry + block. Remember one entry edge, so that we can find the new + preheader (its destination after threading). */ + FOR_EACH_EDGE (e, ei, header->preds) + { + if (e->aux) + break; + } + + /* The duplicate of the header is the new preheader of the loop. Ensure + that it is placed correctly in the loop hierarchy. */ + loop->copy = loop_outer (loop); + + thread_block (header, false); + loop->copy = NULL; + new_preheader = e->dest; + + /* Create the new latch block. This is always necessary, as the latch + must have only a single successor, but the original header had at + least two successors. */ + loop->latch = NULL; + mfb_kj_edge = single_succ_edge (new_preheader); + loop->header = mfb_kj_edge->dest; + latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL); + loop->header = latch->dest; + loop->latch = latch->src; + } + + return true; + +fail: + /* We failed to thread anything. Cancel the requests. */ + FOR_EACH_EDGE (e, ei, header->preds) + { + e->aux = NULL; + } + return false; +} + /* Walk through the registered jump threads and convert them into a form convenient for this pass. @@ -838,6 +963,11 @@ static void mark_threaded_blocks (bitmap threaded_blocks) { unsigned int i; + bitmap_iterator bi; + bitmap tmp = BITMAP_ALLOC (NULL); + basic_block bb; + edge e; + edge_iterator ei; for (i = 0; i < VEC_length (edge, threaded_edges); i += 2) { @@ -845,8 +975,30 @@ mark_threaded_blocks (bitmap threaded_blocks) edge e2 = VEC_index (edge, threaded_edges, i + 1); e->aux = e2; - bitmap_set_bit (threaded_blocks, e->dest->index); + bitmap_set_bit (tmp, e->dest->index); + } + + /* If optimizing for size, only thread through block if we don't have + to duplicate it or it's an otherwise empty redirection block. */ + if (optimize_size) + { + EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) + { + bb = BASIC_BLOCK (i); + if (EDGE_COUNT (bb->preds) > 1 + && !redirection_block_p (bb)) + { + FOR_EACH_EDGE (e, ei, bb->preds) + e->aux = NULL; + } + else + bitmap_set_bit (threaded_blocks, i); + } } + else + bitmap_copy (threaded_blocks, tmp); + + BITMAP_FREE(tmp); } @@ -856,15 +1008,20 @@ mark_threaded_blocks (bitmap threaded_blocks) It is the caller's responsibility to fix the dominance information and rewrite duplicated SSA_NAMEs back into SSA form. + If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through + loop headers if it does not simplify the loop. + Returns true if one or more edges were threaded, false otherwise. */ bool -thread_through_all_blocks (void) +thread_through_all_blocks (bool may_peel_loop_headers) { bool retval = false; unsigned int i; bitmap_iterator bi; bitmap threaded_blocks; + struct loop *loop; + loop_iterator li; if (threaded_edges == NULL) return false; @@ -874,14 +1031,38 @@ thread_through_all_blocks (void) mark_threaded_blocks (threaded_blocks); + if (current_loops) + FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) + loop->copy = NULL; + + /* First perform the threading requests that do not affect + loop structure. */ EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi) { basic_block bb = BASIC_BLOCK (i); if (EDGE_COUNT (bb->preds) > 0) - retval |= thread_block (bb); + retval |= thread_block (bb, true); + } + + /* Then perform the threading through loop headers. We start with the + innermost loop, so that the changes in cfg we perform won't affect + further threading. */ + if (current_loops) + { + FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) + { + if (!loop->header + || !bitmap_bit_p (threaded_blocks, loop->header->index)) + continue; + + retval |= thread_through_loop_header (loop, may_peel_loop_headers); + } } + if (retval) + free_dominance_info (CDI_DOMINATORS); + if (dump_file && (dump_flags & TDF_STATS)) fprintf (dump_file, "\nJumps threaded: %lu\n", thread_stats.num_threaded_edges); @@ -890,6 +1071,7 @@ thread_through_all_blocks (void) threaded_blocks = NULL; VEC_free (edge, heap, threaded_edges); threaded_edges = NULL; + return retval; } diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c index a9141d76ed2..5f42ff6d1a5 100644 --- a/gcc/tree-vrp.c +++ b/gcc/tree-vrp.c @@ -5865,13 +5865,7 @@ identify_jump_threads (void) static void finalize_jump_threads (void) { - bool cfg_altered = false; - cfg_altered = thread_through_all_blocks (); - - /* If we threaded jumps, then we need to recompute the dominance - information. */ - if (cfg_altered) - free_dominance_info (CDI_DOMINATORS); + thread_through_all_blocks (false); VEC_free (tree, heap, stack); } @@ -5990,22 +5984,19 @@ vrp_finalize (void) static unsigned int execute_vrp (void) { - insert_range_assertions (); - - loop_optimizer_init (LOOPS_NORMAL); + loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); if (current_loops) - scev_initialize (); + { + rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); + scev_initialize (); + } + + insert_range_assertions (); vrp_initialize (); ssa_propagate (vrp_visit_stmt, vrp_visit_phi_node); vrp_finalize (); - if (current_loops) - { - scev_finalize (); - loop_optimizer_finalize (); - } - /* ASSERT_EXPRs must be removed before finalizing jump threads as finalizing jump threads calls the CFG cleanup code which does not properly handle ASSERT_EXPRs. */ @@ -6019,6 +6010,12 @@ execute_vrp (void) update_ssa (TODO_update_ssa); finalize_jump_threads (); + if (current_loops) + { + scev_finalize (); + loop_optimizer_finalize (); + } + return 0; }