From d4193b85ae9e6ddb2c4e563fcf622c81ba5b05c0 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Tue, 17 Jan 2017 13:49:41 +0100 Subject: [PATCH] re PR tree-optimization/77445 (Performance drop after r239219 on coremark test) PR middle-end/77445 * tree-ssa-threadupdate.c (remove_ctrl_stmt_and_useless_edges): correctly set frequency of oudgoing edge. (duplicate_thread_path): Fix profile updating. * gcc.dg/tree-ssa/pr77445-2.c: New testcase. * gcc.dg/tree-ssa/pr77445.c: New testcase. From-SVN: r244528 --- gcc/ChangeLog | 7 ++ gcc/testsuite/ChangeLog | 6 ++ gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c | 123 ++++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr77445.c | 29 +++++ gcc/tree-ssa-threadupdate.c | 105 ++++++++++-------- 5 files changed, 227 insertions(+), 43 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr77445.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bef211cbd3e..5754ee21f81 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2017-01-17 Jan Hubicka + + PR middle-end/77445 + * tree-ssa-threadupdate.c (remove_ctrl_stmt_and_useless_edges): + correctly set frequency of oudgoing edge. + (duplicate_thread_path): Fix profile updating. + 2017-01-17 Jakub Jelinek PR other/79046 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9f61689d194..c34cac3d7ff 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2017-01-17 Jan Hubicka + + PR middle-end/77445 + * gcc.dg/tree-ssa/pr77445-2.c: New testcase. + * gcc.dg/tree-ssa/pr77445.c: New testcase. + 2017-01-17 Jakub Jelinek * g++.dg/tree-ssa/ssa-dse-2.C (size_t): Typedef to __SIZE_TYPE__ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c new file mode 100644 index 00000000000..47402ec5c4f --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c @@ -0,0 +1,123 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-thread1-details-blocks-stats" } */ +typedef enum STATES { + START=0, + INVALID, + S1, + S2, + INT, + FLOAT , + EXPONENT, + SCIENTIFIC, + NUM_STATES +} state_e ; + +typedef unsigned char u8; +typedef unsigned int u32; + +static u8 is_digit(u8 c) { + return (u8)((c>='0') & (c<='9')) ? 1 : 0; +} + +enum STATES FMS( u8 **in , u32 *transitions) { + u8 *str = *in; + u8 NEXT_SYMBOL; + enum STATES state=START; + for( ; *str && state != INVALID; str++ ) { + NEXT_SYMBOL = *str; + if (NEXT_SYMBOL==',') /* end of this input */ { + transitions[state]++; + str++; + break; + } + switch(state) { + case START: + if(is_digit(NEXT_SYMBOL)) { + state = INT; + } + else if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) { + state = S1; + } + else if( NEXT_SYMBOL == '.' ) { + state = FLOAT ; + } + else { + state = INVALID; + } + transitions[START]++; + break; + case S1: + if(is_digit(NEXT_SYMBOL)) { + state = INT; + transitions[S1]++; + } + else if( NEXT_SYMBOL == '.' ) { + state = FLOAT ; + transitions[S1]++; + } + else { + state = INVALID; + transitions[S1]++; + } + break; + case INT: + if( NEXT_SYMBOL == '.' ) { + state = FLOAT ; + transitions[INT]++; + } + else if(!is_digit(NEXT_SYMBOL)) { + state = INVALID; + transitions[INT]++; + } + break; + case FLOAT : + if( NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e' ) { + state = S2; + transitions[FLOAT ]++; + } + else if(!is_digit(NEXT_SYMBOL)) { + state = INVALID; + transitions[FLOAT ]++; + } + break; + case S2: + if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) { + state = EXPONENT; + transitions[S2]++; + } + else { + state = INVALID; + transitions[S2]++; + } + break; + case EXPONENT: + if(is_digit(NEXT_SYMBOL)) { + state = SCIENTIFIC; + transitions[EXPONENT]++; + } + else { + state = INVALID; + transitions[EXPONENT]++; + } + break; + case SCIENTIFIC: + if(!is_digit(NEXT_SYMBOL)) { + state = INVALID; + } + break; + default: + break; + } + } + if (state==INVALID) + transitions[INVALID]++; + + *in = str; + return state; +} + +/* The profile is not updated perfectly because it is inconsitent from + profile estimation stage. But the number of inconsistencies should not + increase much. */ +/* { dg-final { scan-tree-dump "Jumps threaded: 16" "thread1" } } */ +/* { dg-final { scan-tree-dump-times "Invalid sum" 2 "thread1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr77445.c b/gcc/testsuite/gcc.dg/tree-ssa/pr77445.c new file mode 100644 index 00000000000..98eb0f2cec6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr77445.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-thread3-details-blocks -fno-early-inlining -fno-tree-vrp -fno-tree-dominator-opts" } */ + +static int a; +static int b; +void test2 (); +void +test () +{ + b = 7; +} + +void +main (int argc) +{ + if (argc) + { + a = 7; + test (); + } + else + a = 0; + if (a) + test2 (); + if (b) + test2 (); +} +/* { dg-final { scan-tree-dump-times "Registering FSM jump thread" 2 "thread3" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "thread3" } } */ diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c index 2da93a803d4..f85fed314f0 100644 --- a/gcc/tree-ssa-threadupdate.c +++ b/gcc/tree-ssa-threadupdate.c @@ -301,7 +301,11 @@ remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb) remove_edge (e); } else - ei_next (&ei); + { + e->probability = REG_BR_PROB_BASE; + e->count = bb->count; + ei_next (&ei); + } } /* If the remaining edge is a loop exit, there must have @@ -2212,8 +2216,8 @@ duplicate_thread_path (edge entry, edge exit, struct loop *loop = entry->dest->loop_father; edge exit_copy; edge redirected; - int total_freq = 0, entry_freq = 0; - gcov_type total_count = 0, entry_count = 0; + int curr_freq; + gcov_type curr_count; if (!can_copy_bbs_p (region, n_region)) return false; @@ -2240,27 +2244,6 @@ duplicate_thread_path (edge entry, edge exit, free_region_copy = true; } - if (entry->dest->count) - { - total_count = entry->dest->count; - entry_count = entry->count; - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (entry_count > total_count) - entry_count = total_count; - } - else - { - total_freq = entry->dest->frequency; - entry_freq = EDGE_FREQUENCY (entry); - /* Fix up corner cases, to avoid division by zero or creation of negative - frequencies. */ - if (total_freq == 0) - total_freq = 1; - else if (entry_freq > total_freq) - entry_freq = total_freq; - } - copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop, split_edge_bb_loc (entry), false); @@ -2270,17 +2253,61 @@ duplicate_thread_path (edge entry, edge exit, invalidating the property that is propagated by executing all the blocks of the jump-thread path in order. */ + curr_count = entry->count; + curr_freq = EDGE_FREQUENCY (entry); + for (i = 0; i < n_region; i++) { edge e; edge_iterator ei; basic_block bb = region_copy[i]; + /* Watch inconsistent profile. */ + if (curr_count > region[i]->count) + curr_count = region[i]->count; + if (curr_freq > region[i]->frequency) + curr_freq = region[i]->frequency; + /* Scale current BB. */ + if (region[i]->count) + { + /* In the middle of the path we only scale the frequencies. + In last BB we need to update probabilities of outgoing edges + because we know which one is taken at the threaded path. */ + if (i + 1 != n_region) + scale_bbs_frequencies_gcov_type (region + i, 1, + region[i]->count - curr_count, + region[i]->count); + else + update_bb_profile_for_threading (region[i], + curr_freq, curr_count, + exit); + scale_bbs_frequencies_gcov_type (region_copy + i, 1, curr_count, + region_copy[i]->count); + } + else if (region[i]->frequency) + { + if (i + 1 != n_region) + scale_bbs_frequencies_int (region + i, 1, + region[i]->frequency - curr_freq, + region[i]->frequency); + else + update_bb_profile_for_threading (region[i], + curr_freq, curr_count, + exit); + scale_bbs_frequencies_int (region_copy + i, 1, curr_freq, + region_copy[i]->frequency); + } + if (single_succ_p (bb)) { /* Make sure the successor is the next node in the path. */ gcc_assert (i + 1 == n_region || region_copy[i + 1] == single_succ_edge (bb)->dest); + if (i + 1 != n_region) + { + curr_freq = EDGE_FREQUENCY (single_succ_edge (bb)); + curr_count = single_succ_edge (bb)->count; + } continue; } @@ -2307,22 +2334,13 @@ duplicate_thread_path (edge entry, edge exit, if (orig) redirect_edge_and_branch_force (e, orig); } + else + { + curr_freq = EDGE_FREQUENCY (e); + curr_count = e->count; + } } - if (total_count) - { - scale_bbs_frequencies_gcov_type (region, n_region, - total_count - entry_count, - total_count); - scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count, - total_count); - } - else - { - scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq, - total_freq); - scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq); - } if (flag_checking) verify_jump_thread (region_copy, n_region); @@ -2337,11 +2355,12 @@ duplicate_thread_path (edge entry, edge exit, edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU); - if (e) { - rescan_loop_exit (e, true, false); - e->probability = REG_BR_PROB_BASE; - e->count = region_copy[n_region - 1]->count; - } + if (e) + { + rescan_loop_exit (e, true, false); + e->probability = REG_BR_PROB_BASE; + e->count = region_copy[n_region - 1]->count; + } /* Redirect the entry and add the phi node arguments. */ if (entry->dest == loop->header) -- 2.30.2