From c78e36522d21297b555185f5eeedd6e848eda684 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 3 Jul 2017 07:28:28 +0000 Subject: [PATCH] tree-vect-loop.c (vect_analyze_loop_operations): Also analyze reduction PHIs. 2016-07-03 Richard Biener * tree-vect-loop.c (vect_analyze_loop_operations): Also analyze reduction PHIs. (vect_force_simple_reduction): Record reduction def -> phi mapping. (vectorizable_reduction): Perform reduction PHI creation when visiting a reduction PHI and adjust and simplify code generation phase of the reduction op. Cache dts, use fold_binary, not fold_build2. (vect_transform_loop): Visit reduction PHIs. * tree-vect-slp.c (vect_get_and_check_slp_defs): Record reduction defs into the SLP tree. (vect_build_slp_tree): Reduction defs terminate the recursion. * tree-vect-stmts.c (vect_get_vec_def_for_operand_1): Allow lookup of reduction defs. (vect_get_vec_defs_for_stmt_copy): Export. (vect_get_vec_defs): Likewise. * tree-vectorizer.h (struct _stmt_vec_info): Amend reduc_def purpose. (vect_get_vec_defs_for_stmt_copy): Declare. (vect_get_vec_defs): Likewise. From-SVN: r249897 --- gcc/ChangeLog | 21 +++++ gcc/tree-vect-loop.c | 208 ++++++++++++++++++++++++++++-------------- gcc/tree-vect-slp.c | 15 +-- gcc/tree-vect-stmts.c | 14 +-- gcc/tree-vectorizer.h | 8 +- 5 files changed, 179 insertions(+), 87 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 08657e0accf..402f8b74f67 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2016-07-03 Richard Biener + + * tree-vect-loop.c (vect_analyze_loop_operations): Also analyze + reduction PHIs. + (vect_force_simple_reduction): Record reduction def -> phi mapping. + (vectorizable_reduction): Perform reduction PHI creation when + visiting a reduction PHI and adjust and simplify code generation + phase of the reduction op. Cache dts, use fold_binary, not fold_build2. + (vect_transform_loop): Visit reduction PHIs. + * tree-vect-slp.c (vect_get_and_check_slp_defs): Record reduction + defs into the SLP tree. + (vect_build_slp_tree): Reduction defs terminate the recursion. + * tree-vect-stmts.c (vect_get_vec_def_for_operand_1): Allow lookup + of reduction defs. + (vect_get_vec_defs_for_stmt_copy): Export. + (vect_get_vec_defs): Likewise. + * tree-vectorizer.h (struct _stmt_vec_info): Amend reduc_def + purpose. + (vect_get_vec_defs_for_stmt_copy): Declare. + (vect_get_vec_defs): Likewise. + 2017-07-03 Richard Sandiford * tree-data-ref.c (dr_analyze_innermost): Replace the "nest" diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index f75b943e1b5..ace3b8c9cc9 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1778,6 +1778,10 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def && ! PURE_SLP_STMT (stmt_info)) ok = vectorizable_induction (phi, NULL, NULL, NULL); + else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) + && ! PURE_SLP_STMT (stmt_info)) + ok = vectorizable_reduction (phi, NULL, NULL, NULL); } if (ok && STMT_VINFO_LIVE_P (stmt_info)) @@ -3185,6 +3189,8 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi, stmt_vec_info reduc_def_info = vinfo_for_stmt (phi); STMT_VINFO_REDUC_TYPE (reduc_def_info) = v_reduc_type; STMT_VINFO_REDUC_DEF (reduc_def_info) = def; + reduc_def_info = vinfo_for_stmt (def); + STMT_VINFO_REDUC_DEF (reduc_def_info) = phi; } return def; } @@ -5558,7 +5564,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, { tree vec_dest; tree scalar_dest; - tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); tree vectype_in = NULL_TREE; @@ -5576,7 +5581,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, bool is_simple_use; gimple *orig_stmt; stmt_vec_info orig_stmt_info; - tree expr = NULL_TREE; int i; int ncopies; int epilog_copies; @@ -5586,6 +5590,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, gimple *new_stmt = NULL; int j; tree ops[3]; + enum vect_def_type dts[3]; bool nested_cycle = false, found_nested_cycle_def = false; gimple *reduc_def_stmt = NULL; bool double_reduc = false; @@ -5598,11 +5603,23 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, auto_vec vect_defs; auto_vec phis; int vec_num; - tree def0, def1, tem, op1 = NULL_TREE; + tree def0, tem; bool first_p = true; tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE; tree cond_reduc_val = NULL_TREE; + /* Make sure it was already recognized as a reduction computation. */ + if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def + && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle) + return false; + + if (nested_in_vect_loop_p (loop, stmt)) + { + outer_loop = loop; + loop = loop->inner; + nested_cycle = true; + } + /* In case of reduction chain we switch to the first stmt in the chain, but we don't update STMT_INFO, since only the last stmt is marked as reduction and has reduction properties. */ @@ -5613,11 +5630,82 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, first_p = false; } - if (nested_in_vect_loop_p (loop, stmt)) + if (gimple_code (stmt) == GIMPLE_PHI) { - outer_loop = loop; - loop = loop->inner; - nested_cycle = true; + /* Analysis is fully done on the reduction stmt invocation. */ + if (! vec_stmt) + { + STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; + return true; + } + + gimple *reduc_stmt = STMT_VINFO_REDUC_DEF (stmt_info); + if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (reduc_stmt))) + reduc_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (reduc_stmt)); + if (STMT_VINFO_RELEVANT (vinfo_for_stmt (reduc_stmt)) <= vect_used_only_live) + single_defuse_cycle = true; + + gcc_assert (is_gimple_assign (reduc_stmt)); + for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k) + { + tree op = gimple_op (reduc_stmt, k); + if (op == gimple_phi_result (stmt)) + continue; + if (k == 1 + && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR) + continue; + vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op)); + break; + } + gcc_assert (vectype_in); + + if (slp_node) + ncopies = 1; + else + ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo) + / TYPE_VECTOR_SUBPARTS (vectype_in)); + + /* Create the destination vector */ + scalar_dest = gimple_assign_lhs (reduc_stmt); + vec_dest = vect_create_destination_var (scalar_dest, vectype_out); + + if (slp_node) + /* The size vect_schedule_slp_instance computes is off for us. */ + vec_num = ((LOOP_VINFO_VECT_FACTOR (loop_vinfo) + * SLP_TREE_SCALAR_STMTS (slp_node).length ()) + / TYPE_VECTOR_SUBPARTS (vectype_in)); + else + vec_num = 1; + + /* Generate the reduction PHIs upfront. */ + prev_phi_info = NULL; + for (j = 0; j < ncopies; j++) + { + if (j == 0 || !single_defuse_cycle) + { + for (i = 0; i < vec_num; i++) + { + /* Create the reduction-phi that defines the reduction + operand. */ + new_phi = create_phi_node (vec_dest, loop->header); + set_vinfo_for_stmt (new_phi, + new_stmt_vec_info (new_phi, loop_vinfo)); + + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi); + else + { + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi; + else + STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi; + prev_phi_info = vinfo_for_stmt (new_phi); + } + } + } + } + + return true; } /* 1. Is vectorizable reduction? */ @@ -5633,11 +5721,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, && !STMT_VINFO_LIVE_P (stmt_info)) return false; - /* Make sure it was already recognized as a reduction computation. */ - if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def - && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle) - return false; - /* 2. Has this been recognized as a reduction pattern? Check if STMT represents a pattern that has been recognized @@ -5718,11 +5801,12 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, continue; is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, - &def_stmt, &dt, &tem); + &def_stmt, &dts[i], &tem); if (!vectype_in) vectype_in = tem; gcc_assert (is_simple_use); + dt = dts[i]; if (dt != vect_internal_def && dt != vect_external_def && dt != vect_constant_def @@ -5752,7 +5836,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, } is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo, - &def_stmt, &dt, &tem); + &def_stmt, &dts[reduc_index], &tem); if (!vectype_in) vectype_in = tem; gcc_assert (is_simple_use); @@ -5762,6 +5846,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI) return false; + dt = dts[reduc_index]; if (!(dt == vect_reduction_def || dt == vect_nested_cycle || ((dt == vect_internal_def || dt == vect_external_def @@ -5820,7 +5905,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, && types_compatible_p (TREE_TYPE (cond_initial_val), TREE_TYPE (cond_reduc_val))) { - tree e = fold_build2 (LE_EXPR, boolean_type_node, + tree e = fold_binary (LE_EXPR, boolean_type_node, cond_initial_val, cond_reduc_val); if (e && (integer_onep (e) || integer_zerop (e))) { @@ -6190,19 +6275,28 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, if (!slp_node) vect_defs.quick_push (NULL_TREE); + auto_vec vec_oprnds; for (j = 0; j < ncopies; j++) { if (j == 0 || !single_defuse_cycle) { for (i = 0; i < vec_num; i++) { - /* Create the reduction-phi that defines the reduction + /* Get the created reduction-phi that defines the reduction operand. */ - new_phi = create_phi_node (vec_dest, loop->header); - set_vinfo_for_stmt (new_phi, - new_stmt_vec_info (new_phi, loop_vinfo)); - if (j == 0 || slp_node) - phis.quick_push (new_phi); + tree reduc_def = gimple_phi_result (reduc_def_stmt); + if (j == 0) + vect_get_vec_defs (reduc_def, NULL, stmt, &vec_oprnds, NULL, + slp_node); + else + { + dt = vect_reduction_def; + vect_get_vec_defs_for_stmt_copy (&dt, + &vec_oprnds, NULL); + } + new_phi = as_a (SSA_NAME_DEF_STMT (vec_oprnds[i])); + if (j == 0 || slp_node) + phis.quick_push (new_phi); } } @@ -6243,42 +6337,30 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, } else { - loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index], - stmt); - vec_oprnds0.quick_push (loop_vec_def0); + vec_oprnds0.quick_push + (vect_get_vec_def_for_operand (ops[!reduc_index], stmt)); if (op_type == ternary_op) - { - op1 = reduc_index == 0 ? ops[2] : ops[1]; - loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt); - vec_oprnds1.quick_push (loop_vec_def1); - } + vec_oprnds1.quick_push + (vect_get_vec_def_for_operand (reduc_index == 0 + ? ops[2] : ops[1], stmt)); } } else { if (!slp_node) { - enum vect_def_type dt; - gimple *dummy_stmt; - - vect_is_simple_use (ops[!reduc_index], loop_vinfo, - &dummy_stmt, &dt); - loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, - loop_vec_def0); - vec_oprnds0[0] = loop_vec_def0; + vec_oprnds0[0] + = vect_get_vec_def_for_stmt_copy (dts[!reduc_index], + vec_oprnds0[0]); if (op_type == ternary_op) - { - vect_is_simple_use (op1, loop_vinfo, &dummy_stmt, &dt); - loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, - loop_vec_def1); - vec_oprnds1[0] = loop_vec_def1; - } + vec_oprnds1[0] + = vect_get_vec_def_for_stmt_copy (dts[reduc_index == 0 + ? 2 : 1], + vec_oprnds1[0]); } if (single_defuse_cycle) reduc_def = gimple_assign_lhs (new_stmt); - - STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi; } FOR_EACH_VEC_ELT (vec_oprnds0, i, def0) @@ -6291,31 +6373,16 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, reduc_def = PHI_RESULT (new_phi); } - def1 = ((op_type == ternary_op) - ? vec_oprnds1[i] : NULL); - if (op_type == binary_op) - { - if (reduc_index == 0) - expr = build2 (code, vectype_out, reduc_def, def0); - else - expr = build2 (code, vectype_out, def0, reduc_def); - } - else - { - if (reduc_index == 0) - expr = build3 (code, vectype_out, reduc_def, def0, def1); - else - { - if (reduc_index == 1) - expr = build3 (code, vectype_out, def0, reduc_def, def1); - else - expr = build3 (code, vectype_out, def0, def1, reduc_def); - } - } + tree vop[3] = { def0, NULL_TREE, NULL_TREE }; + if (op_type == ternary_op) + vop[1] = vec_oprnds1[i]; + for (int k = 2; k > reduc_index; --k) + vop[k] = vop[k - 1]; + vop[reduc_index] = reduc_def; - new_stmt = gimple_build_assign (vec_dest, expr); new_temp = make_ssa_name (vec_dest, new_stmt); - gimple_assign_set_lhs (new_stmt, new_temp); + new_stmt = gimple_build_assign (new_temp, code, + vop[0], vop[1], vop[2]); vect_finish_stmt_generation (stmt, new_stmt, gsi); if (slp_node) @@ -6336,7 +6403,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; prev_stmt_info = vinfo_for_stmt (new_stmt); - prev_phi_info = vinfo_for_stmt (new_phi); } /* Finalize the reduction-phi (set its arguments) and create the @@ -7285,7 +7351,9 @@ vect_transform_loop (loop_vec_info loop_vinfo) && dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n"); - if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def + if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) && ! PURE_SLP_STMT (stmt_info)) { if (dump_enabled_p ()) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index dd8658e32f0..131f8f3b1a7 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -403,9 +403,9 @@ again: { case vect_constant_def: case vect_external_def: - case vect_reduction_def: break; + case vect_reduction_def: case vect_induction_def: case vect_internal_def: oprnd_info->def_stmts.quick_push (def_stmt); @@ -943,13 +943,15 @@ vect_build_slp_tree (vec_info *vinfo, else return NULL; - /* If the SLP node is a PHI (induction), terminate the recursion. */ + /* If the SLP node is a PHI (induction or reduction), terminate + the recursion. */ if (gimple_code (stmt) == GIMPLE_PHI) { - FOR_EACH_VEC_ELT (stmts, i, stmt) - if (stmt != stmts[0]) - /* Induction from different IVs is not supported. */ - return NULL; + /* Induction from different IVs is not supported. */ + if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) == vect_induction_def) + FOR_EACH_VEC_ELT (stmts, i, stmt) + if (stmt != stmts[0]) + return NULL; node = vect_create_new_slp_node (stmts); return node; } @@ -1005,6 +1007,7 @@ vect_build_slp_tree (vec_info *vinfo, unsigned int j; if (oprnd_info->first_dt != vect_internal_def + && oprnd_info->first_dt != vect_reduction_def && oprnd_info->first_dt != vect_induction_def) continue; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 9647c111d65..99c071557d4 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1367,14 +1367,10 @@ vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt) return vec_oprnd; } - /* operand is defined by a loop header phi - reduction */ + /* operand is defined by a loop header phi. */ case vect_reduction_def: case vect_double_reduction_def: case vect_nested_cycle: - /* Code should use get_initial_def_for_reduction. */ - gcc_unreachable (); - - /* operand is defined by loop-header phi - induction. */ case vect_induction_def: { gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI); @@ -1535,7 +1531,7 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) /* Get vectorized definitions for the operands to create a copy of an original stmt. See vect_get_vec_def_for_stmt_copy () for details. */ -static void +void vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, vec *vec_oprnds0, vec *vec_oprnds1) @@ -1554,11 +1550,9 @@ vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, } -/* Get vectorized definitions for OP0 and OP1. - REDUC_INDEX is the index of reduction operand in case of reduction, - and -1 otherwise. */ +/* Get vectorized definitions for OP0 and OP1. */ -static void +void vect_get_vec_defs (tree op0, tree op1, gimple *stmt, vec *vec_oprnds0, vec *vec_oprnds1, diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 1ab12319d53..c4a84e8298d 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -647,7 +647,9 @@ typedef struct _stmt_vec_info { vect_force_simple_reduction. */ enum vect_reduction_type reduc_type; - /* On a reduction PHI the def returned by vect_force_simple_reduction. */ + /* On a reduction PHI the def returned by vect_force_simple_reduction. + On the def returned by vect_force_simple_reduction the + corresponding PHI. */ gimple *reduc_def; /* The number of scalar stmt references from active SLP instances. */ @@ -1078,6 +1080,10 @@ extern void vect_finish_stmt_generation (gimple *, gimple *, extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info); extern tree vect_get_vec_def_for_operand_1 (gimple *, enum vect_def_type); extern tree vect_get_vec_def_for_operand (tree, gimple *, tree = NULL); +extern void vect_get_vec_defs (tree, tree, gimple *, vec *, + vec *, slp_tree); +extern void vect_get_vec_defs_for_stmt_copy (enum vect_def_type *, + vec *, vec *); extern tree vect_init_vector (gimple *, tree, tree, gimple_stmt_iterator *); extern tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree); -- 2.30.2