From: Richard Biener Date: Thu, 20 Jul 2017 11:17:21 +0000 (+0000) Subject: re PR tree-optimization/61171 (vectorization fails for a reduction in presence of... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=891ad31c7b151704de655a1a2b70568830a65086;p=gcc.git re PR tree-optimization/61171 (vectorization fails for a reduction in presence of subtraction) 2017-07-20 Richard Biener PR tree-optimization/61171 * tree-vectorizer.h (slp_instance): Add reduc_phis member. (vect_analyze_stmt): Add slp instance parameter. (vectorizable_reduction): Likewise. * tree-vect-loop.c (vect_analyze_loop_operations): Adjust. (vect_is_simple_reduction): Deal with chains not detected as SLP reduction chain, specifically not properly associated chains containing a mix of plus/minus. (get_reduction_op): Remove. (get_initial_defs_for_reduction): Simplify, pass in whether this is a reduction chain, pass in the SLP node for the PHIs. (vect_create_epilog_for_reduction): Get the SLP instance as arg and adjust. (vectorizable_reduction): Get the SLP instance as arg. During analysis remember the SLP node with the PHIs in the instance. Simplify getting at the vectorized reduction PHIs. * tree-vect-slp.c (vect_slp_analyze_node_operations): Pass through SLP instance. (vect_slp_analyze_operations): Likewise. * tree-vect-stms.c (vect_analyze_stmt): Likewise. (vect_transform_stmt): Likewise. * g++.dg/vect/pr61171.cc: New testcase. * gfortran.dg/vect/pr61171.f: Likewise. * gcc.dg/vect/vect-reduc-11.c: Likewise. From-SVN: r250382 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b6750e84013..5bea9dd1e3e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,27 @@ +2017-07-20 Richard Biener + + PR tree-optimization/61171 + * tree-vectorizer.h (slp_instance): Add reduc_phis member. + (vect_analyze_stmt): Add slp instance parameter. + (vectorizable_reduction): Likewise. + * tree-vect-loop.c (vect_analyze_loop_operations): Adjust. + (vect_is_simple_reduction): Deal with chains not detected + as SLP reduction chain, specifically not properly associated + chains containing a mix of plus/minus. + (get_reduction_op): Remove. + (get_initial_defs_for_reduction): Simplify, pass in whether + this is a reduction chain, pass in the SLP node for the PHIs. + (vect_create_epilog_for_reduction): Get the SLP instance as + arg and adjust. + (vectorizable_reduction): Get the SLP instance as arg. + During analysis remember the SLP node with the PHIs in the + instance. Simplify getting at the vectorized reduction PHIs. + * tree-vect-slp.c (vect_slp_analyze_node_operations): Pass + through SLP instance. + (vect_slp_analyze_operations): Likewise. + * tree-vect-stms.c (vect_analyze_stmt): Likewise. + (vect_transform_stmt): Likewise. + 2017-07-20 Tom de Vries PR tree-optimization/81489 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d222849a98f..8ff6c4f200c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2017-07-20 Richard Biener + + PR tree-optimization/61171 + * g++.dg/vect/pr61171.cc: New testcase. + * gfortran.dg/vect/pr61171.f: Likewise. + * gcc.dg/vect/vect-reduc-11.c: Likewise. + 2017-07-20 Richard Biener * gcc.dg/vect/slp-43.c: Increase loop count to enable vectorization diff --git a/gcc/testsuite/g++.dg/vect/pr61171.cc b/gcc/testsuite/g++.dg/vect/pr61171.cc new file mode 100644 index 00000000000..f3e2e3e4c13 --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/pr61171.cc @@ -0,0 +1,23 @@ +// { dg-do compile } +// { dg-require-effective-target vect_float } +// { dg-additional-options "-ffast-math" } + +float px[1024]; +float xx, vv; +unsigned int N=1024; + +void ok() { + for (unsigned j=0U; j > path; + auto_bitmap visited; + tree lookfor = PHI_RESULT (phi); + ssa_op_iter curri; + use_operand_p curr = op_iter_init_phiuse (&curri, as_a (phi), + SSA_OP_USE); + while (USE_FROM_PTR (curr) != loop_arg) + curr = op_iter_next_use (&curri); + curri.i = curri.numops; + do + { + path.safe_push (std::make_pair (curri, curr)); + tree use = USE_FROM_PTR (curr); + if (use == lookfor) + break; + gimple *def = SSA_NAME_DEF_STMT (use); + if (gimple_nop_p (def) + || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) + { +pop: + do + { + std::pair x = path.pop (); + curri = x.first; + curr = x.second; + do + curr = op_iter_next_use (&curri); + /* Skip already visited or non-SSA operands (from iterating + over PHI args). */ + while (curr != NULL_USE_OPERAND_P + && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME + || ! bitmap_set_bit (visited, + SSA_NAME_VERSION + (USE_FROM_PTR (curr))))); + } + while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); + if (curr == NULL_USE_OPERAND_P) + break; + } + else + { + if (gimple_code (def) == GIMPLE_PHI) + curr = op_iter_init_phiuse (&curri, as_a (def), SSA_OP_USE); + else + curr = op_iter_init_use (&curri, def, SSA_OP_USE); + while (curr != NULL_USE_OPERAND_P + && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME + || ! bitmap_set_bit (visited, + SSA_NAME_VERSION + (USE_FROM_PTR (curr))))) + curr = op_iter_next_use (&curri); + if (curr == NULL_USE_OPERAND_P) + goto pop; + } + } + while (1); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + dump_printf_loc (MSG_NOTE, vect_location, + "reduction path: "); + unsigned i; + std::pair *x; + FOR_EACH_VEC_ELT (path, i, x) + { + dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second)); + dump_printf (MSG_NOTE, " "); + } + dump_printf (MSG_NOTE, "\n"); + } + + /* Check whether the reduction path detected is valid. */ + bool fail = false; + bool neg = false; + for (unsigned i = 1; i < path.length (); ++i) + { + gimple *use_stmt = USE_STMT (path[i].second); + tree op = USE_FROM_PTR (path[i].second); + if (! has_single_use (op) + || ! is_gimple_assign (use_stmt)) + { + fail = true; + break; + } + if (gimple_assign_rhs_code (use_stmt) != code) + { + if (code == PLUS_EXPR + && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) + { + /* Track whether we negate the reduction value each iteration. */ + if (gimple_assign_rhs2 (use_stmt) == op) + neg = ! neg; + } + else + { + fail = true; + break; + } + } + } + if (! fail && ! neg) + return def_stmt; + if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: unknown pattern: "); - + { + report_vect_op (MSG_MISSED_OPTIMIZATION, + SSA_NAME_DEF_STMT + (USE_FROM_PTR (path[path.length ()-1].second)), + "reduction: unknown pattern: "); + } + return NULL; } @@ -3656,29 +3774,6 @@ have_whole_vector_shift (machine_mode mode) return true; } -/* Return the reduction operand (with index REDUC_INDEX) of STMT. */ - -static tree -get_reduction_op (gimple *stmt, int reduc_index) -{ - switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) - { - case GIMPLE_SINGLE_RHS: - gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) - == ternary_op); - return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index); - case GIMPLE_UNARY_RHS: - return gimple_assign_rhs1 (stmt); - case GIMPLE_BINARY_RHS: - return (reduc_index - ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt)); - case GIMPLE_TERNARY_RHS: - return gimple_op (stmt, reduc_index + 1); - default: - gcc_unreachable (); - } -} - /* TODO: Close dependency between vect_model_*_cost and vectorizable_* functions. Design better to avoid maintenance issues. */ @@ -4043,15 +4138,14 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val, return init_def; } -/* Get at the initial defs for OP in the reduction SLP_NODE. - NUMBER_OF_VECTORS is the number of vector defs to create. - REDUC_INDEX is the index of the reduction operand in the statements. */ +/* Get at the initial defs for the reduction PHIs in SLP_NODE. + NUMBER_OF_VECTORS is the number of vector defs to create. */ static void get_initial_defs_for_reduction (slp_tree slp_node, vec *vec_oprnds, unsigned int number_of_vectors, - int reduc_index, enum tree_code code) + enum tree_code code, bool reduc_chain) { vec stmts = SLP_TREE_SCALAR_STMTS (slp_node); gimple *stmt = stmts[0]; @@ -4069,7 +4163,6 @@ get_initial_defs_for_reduction (slp_tree slp_node, voprnds.create (number_of_vectors); bool constant_p; tree neutral_op = NULL; - gimple *def_stmt; struct loop *loop; gimple_seq ctor_seq = NULL; @@ -4077,8 +4170,10 @@ get_initial_defs_for_reduction (slp_tree slp_node, scalar_type = TREE_TYPE (vector_type); nunits = TYPE_VECTOR_SUBPARTS (vector_type); - gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def - && reduc_index != -1); + gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def); + + loop = (gimple_bb (stmt))->loop_father; + gcc_assert (loop); /* op is the reduction operand of the first stmt already. */ /* For additional copies (see the explanation of NUMBER_OF_COPIES below) @@ -4109,20 +4204,15 @@ get_initial_defs_for_reduction (slp_tree slp_node, a reduction chain we have to force a neutral element. */ case MAX_EXPR: case MIN_EXPR: - if (!GROUP_FIRST_ELEMENT (stmt_vinfo)) + if (! reduc_chain) neutral_op = NULL; else - { - tree op = get_reduction_op (stmts[0], reduc_index); - def_stmt = SSA_NAME_DEF_STMT (op); - loop = (gimple_bb (stmt))->loop_father; - neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt, - loop_preheader_edge (loop)); - } + neutral_op = PHI_ARG_DEF_FROM_EDGE (stmt, + loop_preheader_edge (loop)); break; default: - gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo)); + gcc_assert (! reduc_chain); neutral_op = NULL; } @@ -4151,21 +4241,15 @@ get_initial_defs_for_reduction (slp_tree slp_node, { for (i = group_size - 1; stmts.iterate (i, &stmt); i--) { - tree op = get_reduction_op (stmt, reduc_index); - loop = (gimple_bb (stmt))->loop_father; - def_stmt = SSA_NAME_DEF_STMT (op); - - gcc_assert (loop); - + tree op; /* Get the def before the loop. In reduction chain we have only one initial value. */ if ((j != (number_of_copies - 1) - || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) - && i != 0)) + || (reduc_chain && i != 0)) && neutral_op) op = neutral_op; else - op = PHI_ARG_DEF_FROM_EDGE (def_stmt, + op = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop)); /* Create 'vect_ = {op0,op1,...,opn}'. */ @@ -4306,8 +4390,9 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, gimple *reduc_def_stmt, int ncopies, enum tree_code reduc_code, vec reduction_phis, - int reduc_index, bool double_reduc, - slp_tree slp_node) + bool double_reduc, + slp_tree slp_node, + slp_instance slp_node_instance) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info prev_phi_info; @@ -4385,8 +4470,9 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, { unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); vec_initial_defs.reserve (vec_num); - get_initial_defs_for_reduction (slp_node, &vec_initial_defs, - vec_num, reduc_index, code); + get_initial_defs_for_reduction (slp_node_instance->reduc_phis, + &vec_initial_defs, vec_num, code, + GROUP_FIRST_ELEMENT (stmt_info)); } else { @@ -5551,7 +5637,8 @@ is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop) bool vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, - gimple **vec_stmt, slp_tree slp_node) + gimple **vec_stmt, slp_tree slp_node, + slp_instance slp_node_instance) { tree vec_dest; tree scalar_dest; @@ -5567,7 +5654,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, tree new_temp = NULL_TREE; gimple *def_stmt; enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type; - gphi *new_phi = NULL; tree scalar_type; bool is_simple_use; gimple *orig_stmt; @@ -5625,6 +5711,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, /* Analysis is fully done on the reduction stmt invocation. */ if (! vec_stmt) { + if (slp_node) + slp_node_instance->reduc_phis = slp_node; + STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; return true; } @@ -5689,7 +5778,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, { /* Create the reduction-phi that defines the reduction operand. */ - new_phi = create_phi_node (vec_dest, loop->header); + gimple *new_phi = create_phi_node (vec_dest, loop->header); set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo)); @@ -6312,31 +6401,13 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, if (!slp_node) vect_defs.quick_push (NULL_TREE); - auto_vec vec_oprnds; + if (slp_node) + phis.splice (SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)); + else + phis.quick_push (STMT_VINFO_VEC_STMT (vinfo_for_stmt (reduc_def_stmt))); + for (j = 0; j < ncopies; j++) { - if (j == 0 || !single_defuse_cycle) - { - for (i = 0; i < vec_num; i++) - { - /* Get the created reduction-phi that defines the reduction - operand. */ - tree reduc_def = gimple_phi_result (reduc_def_stmt); - if (j == 0) - vect_get_vec_defs (reduc_def, NULL, stmt, &vec_oprnds, NULL, - slp_node); - else - { - dt = vect_reduction_def; - vect_get_vec_defs_for_stmt_copy (&dt, - &vec_oprnds, NULL); - } - new_phi = as_a (SSA_NAME_DEF_STMT (vec_oprnds[i])); - if (j == 0 || slp_node) - phis.quick_push (new_phi); - } - } - if (code == COND_EXPR) { gcc_assert (!slp_node); @@ -6450,8 +6521,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, vect_create_epilog_for_reduction (vect_defs, stmt, reduc_def_stmt, epilog_copies, - epilog_reduc_code, phis, reduc_index, - double_reduc, slp_node); + epilog_reduc_code, phis, + double_reduc, slp_node, slp_node_instance); return true; } diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 4502146595d..7cfeeb98978 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2439,7 +2439,7 @@ destroy_bb_vec_info (bb_vec_info bb_vinfo) the subtree. Return TRUE if the operations are supported. */ static bool -vect_slp_analyze_node_operations (slp_tree node) +vect_slp_analyze_node_operations (slp_tree node, slp_instance node_instance) { bool dummy; int i, j; @@ -2450,7 +2450,7 @@ vect_slp_analyze_node_operations (slp_tree node) return true; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - if (!vect_slp_analyze_node_operations (child)) + if (!vect_slp_analyze_node_operations (child, node_instance)) return false; stmt = SLP_TREE_SCALAR_STMTS (node)[0]; @@ -2507,7 +2507,7 @@ vect_slp_analyze_node_operations (slp_tree node) if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) = SLP_TREE_DEF_TYPE (child); - bool res = vect_analyze_stmt (stmt, &dummy, node); + bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance); /* Restore def-types. */ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) @@ -2535,7 +2535,8 @@ vect_slp_analyze_operations (vec slp_instances, void *data) for (i = 0; slp_instances.iterate (i, &instance); ) { - if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance))) + if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance), + instance)) { dump_printf_loc (MSG_NOTE, vect_location, "removing SLP instance operations starting from: "); diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index c50555499d2..a22947d02ac 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -8397,7 +8397,8 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi, /* Make sure the statement is vectorizable. */ bool -vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node) +vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node, + slp_instance node_instance) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); @@ -8476,7 +8477,8 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node) dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); } - if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node)) + if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node, + node_instance)) return false; } @@ -8501,7 +8503,7 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node) } if (!vect_analyze_stmt (pattern_def_stmt, - need_to_vectorize, node)) + need_to_vectorize, node, node_instance)) return false; } } @@ -8561,7 +8563,7 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node) || vectorizable_load (stmt, NULL, NULL, node, NULL) || vectorizable_call (stmt, NULL, NULL, node) || vectorizable_store (stmt, NULL, NULL, node) - || vectorizable_reduction (stmt, NULL, NULL, node) + || vectorizable_reduction (stmt, NULL, NULL, node, node_instance) || vectorizable_induction (stmt, NULL, NULL, node) || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node) || vectorizable_comparison (stmt, NULL, NULL, NULL, node)); @@ -8711,7 +8713,8 @@ vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi, break; case reduc_vec_info_type: - done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node); + done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node, + slp_node_instance); gcc_assert (done); break; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 8935e78afea..accac72324f 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -127,6 +127,9 @@ typedef struct _slp_instance { /* The group of nodes that contain loads of this SLP instance. */ vec loads; + + /* The SLP node containing the reduction PHIs. */ + slp_tree reduc_phis; } *slp_instance; @@ -1107,7 +1110,7 @@ extern tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree); extern bool vect_transform_stmt (gimple *, gimple_stmt_iterator *, bool *, slp_tree, slp_instance); extern void vect_remove_stores (gimple *); -extern bool vect_analyze_stmt (gimple *, bool *, slp_tree); +extern bool vect_analyze_stmt (gimple *, bool *, slp_tree, slp_instance); extern bool vectorizable_condition (gimple *, gimple_stmt_iterator *, gimple **, tree, int, slp_tree); extern void vect_get_load_cost (struct data_reference *, int, bool, @@ -1178,7 +1181,7 @@ extern loop_vec_info vect_analyze_loop_form (struct loop *); extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *, slp_tree, int, gimple **); extern bool vectorizable_reduction (gimple *, gimple_stmt_iterator *, - gimple **, slp_tree); + gimple **, slp_tree, slp_instance); extern bool vectorizable_induction (gimple *, gimple_stmt_iterator *, gimple **, slp_tree); extern tree get_initial_def_for_reduction (gimple *, tree, tree *);