From fcae5121154d1c3382b056bcc2c563cedac28e74 Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Tue, 6 Oct 2020 12:53:09 -0400 Subject: [PATCH] Hybrid EVRP and testcases Provide a hybrid EVRP pass which uses legacy EVRP and adds additonal enhancements from the new ranger infrastructure. A New option is also provided, -fevrp-mode= And adjust testcases gcc/ChangeLog: 2020-10-06 Andrew MacLeod * flag-types.h (enum evrp_mode): New enumerated type EVRP_MODE_*. * common.opt (fevrp-mode): New undocumented flag. * gimple-ssa-evrp.c: Include gimple-range.h (class rvrp_folder): EVRP folding using ranger exclusively. (rvrp_folder::rvrp_folder): New. (rvrp_folder::~rvrp_folder): New. (rvrp_folder::value_of_expr): New. Use rangers value_of_expr. (rvrp_folder::value_on_edge): New. Use rangers value_on_edge. (rvrp_folder::value_of_Stmt): New. Use rangers value_of_stmt. (rvrp_folder::fold_stmt): New. Call the simplifier. (class hybrid_folder): EVRP folding using both engines. (hybrid_folder::hybrid_folder): New. (hybrid_folder::~hybrid_folder): New. (hybrid_folder::fold_stmt): New. Simplify with one engne, then the other. (hybrid_folder::value_of_expr): New. Use both value routines. (hybrid_folder::value_on_edge): New. Use both value routines. (hybrid_folder::value_of_stmt): New. Use both value routines. (hybrid_folder::choose_value): New. Choose between range_analzyer and rangers values. (execute_early_vrp): Choose a folder based on flag_evrp_mode. * vr-values.c (simplify_using_ranges::fold_cond): Try range_of_stmt first to see if it returns a value. (simplify_using_ranges::simplify_switch_using_ranges): Return true if any changes were made to the switch. gcc/testsuite/ChangeLog: 2020-10-06 Andrew MacLeod * gcc.dg/pr81192.c: Disable EVRP pass. * gcc.dg/tree-ssa/pr77445-2.c: Ditto. * gcc.dg/tree-ssa/ssa-dom-thread-6.c: Adjust. * gcc.dg/tree-ssa/ssa-dom-thread-7.c: Ditto. --- gcc/common.opt | 31 +++ gcc/flag-types.h | 13 ++ gcc/gimple-ssa-evrp.c | 221 +++++++++++++++++- gcc/testsuite/gcc.dg/pr81192.c | 18 +- gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c | 2 +- .../gcc.dg/tree-ssa/ssa-dom-thread-6.c | 38 ++- .../gcc.dg/tree-ssa/ssa-dom-thread-7.c | 27 ++- gcc/vr-values.c | 31 ++- 8 files changed, 365 insertions(+), 16 deletions(-) diff --git a/gcc/common.opt b/gcc/common.opt index 7e789d1c47f..e2bd90c450d 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2870,6 +2870,37 @@ ftree-vrp Common Report Var(flag_tree_vrp) Init(0) Optimization Perform Value Range Propagation on trees. +fevrp-mode= +Common Undocumented Joined RejectNegative Enum(evrp_mode) Var(flag_evrp_mode) Init(EVRP_MODE_EVRP_FIRST) Optimization +-fevrp-mode=[legacy|ranger|legacy-first|ranger-first|ranger-trace|ranger-debug|trace|debug] Specifies the mode Early VRP should operate in. + +Enum +Name(evrp_mode) Type(enum evrp_mode) UnknownError(unknown evrp mode %qs) + +EnumValue +Enum(evrp_mode) String(legacy) Value(EVRP_MODE_EVRP_ONLY) + +EnumValue +Enum(evrp_mode) String(ranger) Value(EVRP_MODE_RVRP_ONLY) + +EnumValue +Enum(evrp_mode) String(legacy-first) Value(EVRP_MODE_EVRP_FIRST) + +EnumValue +Enum(evrp_mode) String(ranger-first) Value(EVRP_MODE_RVRP_FIRST) + +EnumValue +Enum(evrp_mode) String(ranger-trace) Value(EVRP_MODE_RVRP_TRACE) + +EnumValue +Enum(evrp_mode) String(ranger-debug) Value(EVRP_MODE_RVRP_DEBUG) + +EnumValue +Enum(evrp_mode) String(trace) Value(EVRP_MODE_TRACE) + +EnumValue +Enum(evrp_mode) String(debug) Value(EVRP_MODE_DEBUG) + fsplit-paths Common Report Var(flag_split_paths) Init(0) Optimization Split paths leading to loop backedges. diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 852ea76eaa2..cd0f7f8f959 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -382,4 +382,17 @@ enum parloops_schedule_type PARLOOPS_SCHEDULE_RUNTIME }; +/* EVRP mode. */ +enum evrp_mode +{ + EVRP_MODE_EVRP_FIRST = 0, + EVRP_MODE_EVRP_ONLY = 1, + EVRP_MODE_RVRP_ONLY = 2, + EVRP_MODE_RVRP_FIRST = 3, + EVRP_MODE_TRACE = 4, + EVRP_MODE_DEBUG = 8 | EVRP_MODE_TRACE, + EVRP_MODE_RVRP_TRACE = EVRP_MODE_RVRP_ONLY | EVRP_MODE_TRACE, + EVRP_MODE_RVRP_DEBUG = EVRP_MODE_RVRP_ONLY | EVRP_MODE_DEBUG +}; + #endif /* ! GCC_FLAG_TYPES_H */ diff --git a/gcc/gimple-ssa-evrp.c b/gcc/gimple-ssa-evrp.c index 60bf82a6805..6be32d7a3f6 100644 --- a/gcc/gimple-ssa-evrp.c +++ b/gcc/gimple-ssa-evrp.c @@ -41,6 +41,10 @@ along with GCC; see the file COPYING3. If not see #include "tree-cfgcleanup.h" #include "vr-values.h" #include "gimple-ssa-evrp-analyze.h" +#include "gimple-range.h" + +// This is the classic EVRP folder which uses a dominator walk and pushes +// ranges into the next block if it is a single predecessor block. class evrp_folder : public substitute_and_fold_engine { @@ -98,12 +102,195 @@ public: m_range_analyzer.set_defs_to_varying (stmt); } -private: +protected: DISABLE_COPY_AND_ASSIGN (evrp_folder); evrp_range_analyzer m_range_analyzer; simplify_using_ranges simplifier; }; +// This is a ranger based folder which continues to use the dominator +// walk to access the substitute and fold machinery. Ranges are calculated +// on demand. + +class rvrp_folder : public substitute_and_fold_engine +{ +public: + + rvrp_folder () : substitute_and_fold_engine (), m_simplifier () + { + if (flag_evrp_mode & EVRP_MODE_TRACE) + m_ranger = new trace_ranger (); + else + m_ranger = new gimple_ranger (); + m_simplifier.set_range_query (m_ranger); + } + + ~rvrp_folder () + { + if (dump_file && (dump_flags & TDF_DETAILS)) + m_ranger->dump (dump_file); + delete m_ranger; + } + + tree value_of_expr (tree name, gimple *s = NULL) OVERRIDE + { + return m_ranger->value_of_expr (name, s); + } + + tree value_on_edge (edge e, tree name) OVERRIDE + { + return m_ranger->value_on_edge (e, name); + } + + tree value_of_stmt (gimple *s, tree name = NULL) OVERRIDE + { + return m_ranger->value_of_stmt (s, name); + } + + bool fold_stmt (gimple_stmt_iterator *gsi) OVERRIDE + { + return m_simplifier.simplify (gsi); + } + +private: + DISABLE_COPY_AND_ASSIGN (rvrp_folder); + gimple_ranger *m_ranger; + simplify_using_ranges m_simplifier; +}; + +// In a hybrid folder, start with an EVRP folder, and add the required +// fold_stmt bits to either try the ranger first or second. +// +// The 3 value_* routines will always query both EVRP and the ranger for +// a result, and ensure they return the same value. If either returns a value +// when the other doesn't, it is flagged in the listing, and the discoverd +// value is returned. +// +// The simplifier is unable to process 2 different sources, thus we try to +// use one engine, and if it fails to simplify, try using the other engine. +// It is reported when the first attempt fails and the second succeeds. + +class hybrid_folder : public evrp_folder +{ +public: + hybrid_folder (bool evrp_first) + { + if (flag_evrp_mode & EVRP_MODE_TRACE) + m_ranger = new trace_ranger (); + else + m_ranger = new gimple_ranger (); + + if (evrp_first) + { + first = &m_range_analyzer; + second = m_ranger; + } + else + { + first = m_ranger; + second = &m_range_analyzer; + } + } + + ~hybrid_folder () + { + if (dump_file && (dump_flags & TDF_DETAILS)) + m_ranger->dump (dump_file); + delete m_ranger; + } + + bool fold_stmt (gimple_stmt_iterator *gsi) OVERRIDE + { + simplifier.set_range_query (first); + if (simplifier.simplify (gsi)) + return true; + + simplifier.set_range_query (second); + if (simplifier.simplify (gsi)) + { + if (dump_file) + fprintf (dump_file, "EVRP:hybrid: Second query simplifed stmt\n"); + return true; + } + return false; + } + + tree value_of_expr (tree name, gimple *) OVERRIDE; + tree value_on_edge (edge, tree name) OVERRIDE; + tree value_of_stmt (gimple *, tree name) OVERRIDE; + +private: + DISABLE_COPY_AND_ASSIGN (hybrid_folder); + gimple_ranger *m_ranger; + range_query *first; + range_query *second; + tree choose_value (tree evrp_val, tree ranger_val); +}; + + +tree +hybrid_folder::value_of_expr (tree op, gimple *stmt) +{ + tree evrp_ret = evrp_folder::value_of_expr (op, stmt); + tree ranger_ret = m_ranger->value_of_expr (op, stmt); + return choose_value (evrp_ret, ranger_ret); +} + +tree +hybrid_folder::value_on_edge (edge e, tree op) +{ + tree evrp_ret = evrp_folder::value_on_edge (e, op); + tree ranger_ret = m_ranger->value_on_edge (e, op); + return choose_value (evrp_ret, ranger_ret); +} + +tree +hybrid_folder::value_of_stmt (gimple *stmt, tree op) +{ + tree evrp_ret = evrp_folder::value_of_stmt (stmt, op); + tree ranger_ret = m_ranger->value_of_stmt (stmt, op); + return choose_value (evrp_ret, ranger_ret); +} + +// Given trees returned by EVRP and Ranger, choose/report the value to use +// by the folder. + +tree +hybrid_folder::choose_value (tree evrp_val, tree ranger_val) +{ + if (!ranger_val) + { + // If neither returned a value, return NULL_TREE. + if (!evrp_val) + return NULL_TREE; + + // Otherwise EVRP found something. + if (dump_file) + { + fprintf (dump_file, "EVRP:hybrid: EVRP found singleton "); + print_generic_expr (dump_file, evrp_val); + fprintf (dump_file, "\n"); + } + return evrp_val; + } + + // Otherwise ranger found a value, if they match we're good. + if (evrp_val && !compare_values (evrp_val, ranger_val)) + return evrp_val; + + // We should never get different singletons. + gcc_checking_assert (!evrp_val); + + // Now ranger has found a value, but EVRP did not. + if (dump_file) + { + fprintf (dump_file, "EVRP:hybrid: RVRP found singleton "); + print_generic_expr (dump_file, ranger_val); + fprintf (dump_file, "\n"); + } + return ranger_val; +} + /* Main entry point for the early vrp pass which is a simplified non-iterative version of vrp where basic blocks are visited in dominance order. Value ranges discovered in early vrp will also be used by ipa-vrp. */ @@ -120,8 +307,36 @@ execute_early_vrp () scev_initialize (); calculate_dominance_info (CDI_DOMINATORS); - evrp_folder folder; - folder.substitute_and_fold (); + // only the last 2 bits matter for choosing the folder. + switch (flag_evrp_mode & EVRP_MODE_RVRP_FIRST) + { + case EVRP_MODE_EVRP_ONLY: + { + evrp_folder folder; + folder.substitute_and_fold (); + break; + } + case EVRP_MODE_RVRP_ONLY: + { + rvrp_folder folder; + folder.substitute_and_fold (); + break; + } + case EVRP_MODE_EVRP_FIRST: + { + hybrid_folder folder (true); + folder.substitute_and_fold (); + break; + } + case EVRP_MODE_RVRP_FIRST: + { + hybrid_folder folder (false); + folder.substitute_and_fold (); + break; + } + default: + gcc_unreachable (); + } scev_finalize (); loop_optimizer_finalize (); diff --git a/gcc/testsuite/gcc.dg/pr81192.c b/gcc/testsuite/gcc.dg/pr81192.c index 0049f371b3d..71bbc13a0e9 100644 --- a/gcc/testsuite/gcc.dg/pr81192.c +++ b/gcc/testsuite/gcc.dg/pr81192.c @@ -1,4 +1,20 @@ -/* { dg-options "-Os -fdump-tree-pre-details" } */ +/* { dg-options "-Os -fdump-tree-pre-details -fdisable-tree-evrp" } */ + +/* Disable tree-evrp because the new version of evrp sees + : + if (j_8(D) != 2147483647) + goto ; [50.00%] + else + goto ; [50.00%] + : + iftmp.2_11 = j_8(D) + 1; + : + # iftmp.2_12 = PHI + +EVRP now recognizes a constant can be propagated into the 3->5 edge and +produces + # iftmp.2_12 = PHI <2147483647(3), iftmp.2_11(4)> +which causes the situation being tested to dissapear before we get to PRE. */ #if __SIZEOF_INT__ == 2 #define unsigned __UINT32_TYPE__ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c index 9c22c538da8..cf74e156109 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-thread-details-blocks-stats" } */ +/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-thread-details-blocks-stats" } */ typedef enum STATES { START=0, INVALID, diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c index 551fbac3dad..16a9ef4e28a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c @@ -1,7 +1,41 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-thread1-details -fdump-tree-thread2-details" } */ -/* { dg-final { scan-tree-dump-times "FSM" 3 "thread1" } } */ -/* { dg-final { scan-tree-dump-times "FSM" 5 "thread2" } } */ + +/* All the threads in the thread1 dump start on a X->BB12 edge, as can + be seen in the dump: + + Registering FSM jump thread: (x, 12) incoming edge; ... + etc + etc + + Before the new evrp, we were threading paths that started at the + following edges: + + Registering FSM jump thread: (10, 12) incoming edge + Registering FSM jump thread: (6, 12) incoming edge + Registering FSM jump thread: (9, 12) incoming edge + + This was because the PHI at BB12 had constant values coming in from + BB10, BB6, and BB9: + + # state_10 = PHI + + Now with the new evrp, we get: + + # state_10 = PHI <0(7), 0(10), state_11(5), 1(6), 0(8), 2(9), 1(11)> + + Thus, we have 3 more paths that are known to be constant and can be + threaded. Which means that by the second threading pass, we can + only find one profitable path. + + For the record, all these extra constants are better paths coming + out of switches. For example: + + SWITCH_BB -> BBx -> BBy -> BBz -> PHI + + We now know the value of the switch index at PHI. */ +/* { dg-final { scan-tree-dump-times "FSM" 6 "thread1" } } */ +/* { dg-final { scan-tree-dump-times "FSM" 1 "thread2" } } */ int sum0, sum1, sum2, sum3; int foo (char *s, char **ret) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c index a3395578f78..bad5bc1d003 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c @@ -1,20 +1,31 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-thread1-stats -fdump-tree-thread2-stats -fdump-tree-dom2-stats -fdump-tree-thread3-stats -fdump-tree-dom3-stats -fdump-tree-vrp2-stats -fno-guess-branch-probability" } */ -/* { dg-final { scan-tree-dump "Jumps threaded: 16" "thread1" } } */ -/* { dg-final { scan-tree-dump "Jumps threaded: 9" "thread2" } } */ + +/* Here we have the same issue as was commented in ssa-dom-thread-6.c. + The PHI coming into the threader has a lot more constants, so the + threader can thread more paths. + +$ diff clean/a.c.105t.mergephi2 a.c.105t.mergephi2 +252c252 +< # s_50 = PHI +--- +> # s_50 = PHI +272a273 + + I spot checked a few and they all have the same pattern. We are + basically tracking the switch index better through multiple + paths. */ + +/* { dg-final { scan-tree-dump "Jumps threaded: 19" "thread1" } } */ +/* { dg-final { scan-tree-dump "Jumps threaded: 8" "thread2" } } */ /* { dg-final { scan-tree-dump-not "Jumps threaded" "dom2" } } */ + /* aarch64 has the highest CASE_VALUES_THRESHOLD in GCC. It's high enough to change decisions in switch expansion which in turn can expose new jump threading opportunities. Skip the later tests on aarch64. */ /* { dg-final { scan-tree-dump-not "Jumps threaded" "dom3" { target { ! aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-not "Jumps threaded" "vrp2" { target { ! aarch64*-*-* } } } } */ -/* Most architectures get 3 threadable paths here, whereas aarch64 and - possibly others get 5. We really should rewrite threading tests to - test a specific IL sequence, not gobs of code whose IL can vary - from architecture to architecture. */ -/* { dg-final { scan-tree-dump "Jumps threaded: \[35\]" "thread3" } } */ - enum STATE { S0=0, SI, diff --git a/gcc/vr-values.c b/gcc/vr-values.c index 4d7dfd0b4bf..88aa672466c 100644 --- a/gcc/vr-values.c +++ b/gcc/vr-values.c @@ -3606,6 +3606,35 @@ simplify_using_ranges::fold_cond (gcond *cond) some point we should merge all variants of this code. */ edge taken_edge; vrp_visit_cond_stmt (cond, &taken_edge); + + int_range_max r; + if (query->range_of_stmt (r, cond) && r.singleton_p ()) + { + // COND has already been folded if arguments are constant. + if (TREE_CODE (gimple_cond_lhs (cond)) != SSA_NAME + && TREE_CODE (gimple_cond_rhs (cond)) != SSA_NAME) + return false; + + if (r.zero_p ()) + { + gcc_checking_assert (!taken_edge + || taken_edge->flags & EDGE_FALSE_VALUE); + if (dump_file && (dump_flags & TDF_DETAILS) && !taken_edge) + fprintf (dump_file, "\nPredicate evaluates to: 0\n"); + gimple_cond_make_false (cond); + } + else + { + gcc_checking_assert (!taken_edge + || taken_edge->flags & EDGE_TRUE_VALUE); + if (dump_file && (dump_flags & TDF_DETAILS) && !taken_edge) + fprintf (dump_file, "\nPredicate evaluates to: 1\n"); + gimple_cond_make_true (cond); + } + update_stmt (cond); + return true; + } + if (taken_edge) { if (taken_edge->flags & EDGE_TRUE_VALUE) @@ -3947,7 +3976,7 @@ simplify_using_ranges::simplify_switch_using_ranges (gswitch *stmt) su.stmt = stmt; su.vec = vec2; to_update_switch_stmts.safe_push (su); - return false; + return true; } void -- 2.30.2