From d03431d0f59f5aa5c2a8e2e5ea94ea1660b240f8 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 15 Nov 2019 12:48:34 +0000 Subject: [PATCH] re PR tree-optimization/92324 (ICE in expand_direct_optab_fn, at internal-fn.c:2890) 2019-11-15 Richard Biener PR tree-optimization/92324 * tree-vect-loop.c (vect_create_epilog_for_reduction): Fix singedness of SLP reduction epilouge operations. Also reduce the vector width for SLP reductions before doing elementwise operations if possible. * gcc.dg/vect/pr92324-4.c: New testcase. From-SVN: r278289 --- gcc/ChangeLog | 8 ++++ gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/gcc.dg/vect/pr92324-4.c | 30 ++++++++++++ gcc/tree-vect-loop.c | 68 +++++++++++++++------------ 4 files changed, 81 insertions(+), 30 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr92324-4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2f9325289a2..5eb21e7e4a8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2019-11-15 Richard Biener + + PR tree-optimization/92324 + * tree-vect-loop.c (vect_create_epilog_for_reduction): Fix + singedness of SLP reduction epilouge operations. Also reduce + the vector width for SLP reductions before doing elementwise + operations if possible. + 2019-11-15 Matthew Malcomson * passes.c (skip_pass): Set epilogue_completed if skipping the diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6b123832d5d..4377c4c38c1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-11-15 Richard Biener + + PR tree-optimization/92324 + * gcc.dg/vect/pr92324-4.c: New testcase. + 2019-11-15 Paul Thomas PR fortran/69654 diff --git a/gcc/testsuite/gcc.dg/vect/pr92324-4.c b/gcc/testsuite/gcc.dg/vect/pr92324-4.c new file mode 100644 index 00000000000..83479852233 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr92324-4.c @@ -0,0 +1,30 @@ +#include "tree-vect.h" + +unsigned a[1024]; +int gres1, gres2; + +int __attribute__((noipa)) +foo (int n) +{ + int res1 = 0; + int res2 = 0; + for (int i = 0; i < n; ++i) + { + res1 = res1 > a[2*i] ? res1 : a[2*i]; + res2 = res2 > a[2*i+1] ? res2 : a[2*i+1]; + } + gres1 = res1; + gres2 = res2; +} + +int main () +{ + check_vect (); + a[30] = (unsigned)__INT_MAX__ + 1; + a[31] = (unsigned)__INT_MAX__ + 1; + foo (16); + if (gres1 != -__INT_MAX__ - 1 + || gres2 != -__INT_MAX__ - 1) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 567fcc75e2b..389ef170607 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4930,6 +4930,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, bool reduce_with_shift; tree vec_temp; + gcc_assert (slp_reduc || new_phis.length () == 1); + /* See if the target wants to do the final (shift) reduction in a vector mode of smaller size and first reduce upper/lower halves against each other. */ @@ -4937,6 +4939,21 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, tree stype = TREE_TYPE (vectype); unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); unsigned nunits1 = nunits; + if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode + && new_phis.length () == 1) + { + nunits1 = GET_MODE_NUNITS (mode1).to_constant (); + /* For SLP reductions we have to make sure lanes match up, but + since we're doing individual element final reduction reducing + vector width here is even more important. + ??? We can also separate lanes with permutes, for the common + case of power-of-two group-size odd/even extracts would work. */ + if (slp_reduc && nunits != nunits1) + { + nunits1 = least_common_multiple (nunits1, group_size); + gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits); + } + } if (!slp_reduc && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) nunits1 = GET_MODE_NUNITS (mode1).to_constant (); @@ -4958,7 +4975,6 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, new_temp = new_phi_result; while (nunits > nunits1) { - gcc_assert (!slp_reduc); nunits /= 2; vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), stype, nunits); @@ -5113,6 +5129,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1)); int element_bitsize = tree_to_uhwi (bitsize); + tree compute_type = TREE_TYPE (vectype); + gimple_seq stmts = NULL; FOR_EACH_VEC_ELT (new_phis, i, new_phi) { int bit_offset; @@ -5120,12 +5138,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, vec_temp = PHI_RESULT (new_phi); else vec_temp = gimple_assign_lhs (new_phi); - tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, - bitsize_zero_node); - epilog_stmt = gimple_build_assign (new_scalar_dest, rhs); - new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); - gimple_assign_set_lhs (epilog_stmt, new_temp); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); + new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type, + vec_temp, bitsize, bitsize_zero_node); /* In SLP we don't need to apply reduction operation, so we just collect s' values in SCALAR_RESULTS. */ @@ -5137,14 +5151,9 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, bit_offset += element_bitsize) { tree bitpos = bitsize_int (bit_offset); - tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, - bitsize, bitpos); - - epilog_stmt = gimple_build_assign (new_scalar_dest, rhs); - new_name = make_ssa_name (new_scalar_dest, epilog_stmt); - gimple_assign_set_lhs (epilog_stmt, new_name); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - + new_name = gimple_build (&stmts, BIT_FIELD_REF, + compute_type, vec_temp, + bitsize, bitpos); if (slp_reduc) { /* In SLP we don't need to apply reduction operation, so @@ -5153,13 +5162,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, scalar_results.safe_push (new_name); } else - { - epilog_stmt = gimple_build_assign (new_scalar_dest, code, - new_name, new_temp); - new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); - gimple_assign_set_lhs (epilog_stmt, new_temp); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - } + new_temp = gimple_build (&stmts, code, compute_type, + new_name, new_temp); } } @@ -5170,24 +5174,28 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, if (slp_reduc) { tree res, first_res, new_res; - gimple *new_stmt; /* Reduce multiple scalar results in case of SLP unrolling. */ for (j = group_size; scalar_results.iterate (j, &res); j++) { first_res = scalar_results[j % group_size]; - new_stmt = gimple_build_assign (new_scalar_dest, code, - first_res, res); - new_res = make_ssa_name (new_scalar_dest, new_stmt); - gimple_assign_set_lhs (new_stmt, new_res); - gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT); + new_res = gimple_build (&stmts, code, compute_type, + first_res, res); scalar_results[j % group_size] = new_res; } + for (k = 0; k < group_size; k++) + scalar_results[k] = gimple_convert (&stmts, scalar_type, + scalar_results[k]); } else - /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */ - scalar_results.safe_push (new_temp); + { + /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */ + new_temp = gimple_convert (&stmts, scalar_type, new_temp); + scalar_results.safe_push (new_temp); + } + + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); } if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) -- 2.30.2