From c51b04ec33fea61e89363fd5c94e38b4df8b12c1 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 3 Apr 2019 12:30:16 +0000 Subject: [PATCH] re PR rtl-optimization/84101 (-O3 and -ftree-vectorize trying too hard for function returning trivial pair-of-uint64_t-structure) 2019-04-03 Richard Biener PR tree-optimization/84101 * tree-vect-stmts.c: Include explow.h for hard_function_value, regs.h for hard_regno_nregs. (cfun_returns): New helper. (vect_model_store_cost): When vectorizing a store to a decl we return and the function ABI returns in a multi-reg location account for the possible spilling that will happen. * gcc.target/i386/pr84101.c: New testcase. From-SVN: r270123 --- gcc/ChangeLog | 10 ++++ gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/gcc.target/i386/pr84101.c | 21 ++++++++ gcc/tree-vect-stmts.c | 64 +++++++++++++++++++++++++ 4 files changed, 100 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr84101.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a280b6c2cc6..9149d9c9a9e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2019-04-03 Richard Biener + + PR tree-optimization/84101 + * tree-vect-stmts.c: Include explow.h for hard_function_value, + regs.h for hard_regno_nregs. + (cfun_returns): New helper. + (vect_model_store_cost): When vectorizing a store to a decl + we return and the function ABI returns in a multi-reg location + account for the possible spilling that will happen. + 2019-04-03 Andreas Krebbel * config/s390/s390.c (s390_legitimate_address_p): Reject long diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0344e24ba1b..94729eed46c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-04-03 Richard Biener + + PR tree-optimization/84101 + * gcc.target/i386/pr84101.c: New testcase. + 2019-04-02 Jeff Law * gcc.target/visium/bit_shift.c: xfail. diff --git a/gcc/testsuite/gcc.target/i386/pr84101.c b/gcc/testsuite/gcc.target/i386/pr84101.c new file mode 100644 index 00000000000..006e6a455d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr84101.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-slp2-details" } */ + +typedef struct uint64_pair uint64_pair_t ; +struct uint64_pair +{ + unsigned long w0 ; + unsigned long w1 ; +} ; + +uint64_pair_t pair(int num) +{ + uint64_pair_t p ; + + p.w0 = num << 1 ; + p.w1 = num >> 1 ; + + return p ; +} + +/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp2" } } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 6c631db9039..2388c93af34 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-cfg.h" #include "tree-ssa-loop-manip.h" #include "cfgloop.h" +#include "explow.h" #include "tree-ssa-loop.h" #include "tree-scalar-evolution.h" #include "tree-vectorizer.h" @@ -52,6 +53,7 @@ along with GCC; see the file COPYING3. If not see #include "vec-perm-indices.h" #include "tree-ssa-loop-niter.h" #include "gimple-fold.h" +#include "regs.h" /* For lang_hooks.types.type_for_mode. */ #include "langhooks.h" @@ -948,6 +950,37 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, "prologue_cost = %d .\n", inside_cost, prologue_cost); } +/* Returns true if the current function returns DECL. */ + +static bool +cfun_returns (tree decl) +{ + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) + { + greturn *ret = safe_dyn_cast (last_stmt (e->src)); + if (!ret) + continue; + if (gimple_return_retval (ret) == decl) + return true; + /* We often end up with an aggregate copy to the result decl, + handle that case as well. First skip intermediate clobbers + though. */ + gimple *def = ret; + do + { + def = SSA_NAME_DEF_STMT (gimple_vuse (def)); + } + while (gimple_clobber_p (def)); + if (is_a (def) + && gimple_assign_lhs (def) == gimple_return_retval (ret) + && gimple_assign_rhs1 (def) == decl) + return true; + } + return false; +} + /* Function vect_model_store_cost Models cost for stores. In the case of grouped accesses, one access @@ -1032,6 +1065,37 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, vec_to_scalar, stmt_info, 0, vect_body); } + /* When vectorizing a store into the function result assign + a penalty if the function returns in a multi-register location. + In this case we assume we'll end up with having to spill the + vector result and do piecewise loads as a conservative estimate. */ + tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref); + if (base + && (TREE_CODE (base) == RESULT_DECL + || (DECL_P (base) && cfun_returns (base))) + && !aggregate_value_p (base, cfun->decl)) + { + rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1); + /* ??? Handle PARALLEL in some way. */ + if (REG_P (reg)) + { + int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg)); + /* Assume that a single reg-reg move is possible and cheap, + do not account for vector to gp register move cost. */ + if (nregs > 1) + { + /* Spill. */ + prologue_cost += record_stmt_cost (cost_vec, ncopies, + vector_store, + stmt_info, 0, vect_epilogue); + /* Loads. */ + prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs, + scalar_load, + stmt_info, 0, vect_epilogue); + } + } + } + if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vect_model_store_cost: inside_cost = %d, " -- 2.30.2