From 306b0c920fc691999ecd078f4b1efe015923feea Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 28 Jun 2017 07:47:35 +0000 Subject: [PATCH] tree-vectorizer.h (vect_get_vec_defs): Remove. 2017-06-28 Richard Biener * tree-vectorizer.h (vect_get_vec_defs): Remove. (vect_get_slp_defs): Adjust. * tree-vect-loop.c (get_initial_defs_for_reduction): Split out from ... * tree-vect-slp.c (vect_get_constant_vectors): ... here and simplify. * tree-vect-loop.c (vect_create_epilog_for_reduction): Use get_initial_defs_for_reduction instead of vect_get_vec_defs. (vectorizable_reduction): Adjust. * tree-vect-slp.c (vect_get_constant_vectors): Remove reduction handling. (vect_get_slp_defs): Likewise. * tree-vect-stmts.c (vect_get_vec_defs): Make static and adjust. (vectorizable_bswap): Adjust. (vectorizable_call): Likewise. (vectorizable_conversion): Likewise. (vectorizable_assignment): Likewise. (vectorizable_shift): Likewise. (vectorizable_operation): Likewise. (vectorizable_store): Likewise. (vectorizable_condition): Likewise. (vectorizable_comparison): Likewise. From-SVN: r249722 --- gcc/ChangeLog | 25 +++++ gcc/tree-vect-loop.c | 207 +++++++++++++++++++++++++++++++++++++++++- gcc/tree-vect-slp.c | 92 +------------------ gcc/tree-vect-stmts.c | 41 ++++----- gcc/tree-vectorizer.h | 5 +- 5 files changed, 254 insertions(+), 116 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 79c825ec7eb..9a7e095c08c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2017-06-28 Richard Biener + + * tree-vectorizer.h (vect_get_vec_defs): Remove. + (vect_get_slp_defs): Adjust. + * tree-vect-loop.c (get_initial_defs_for_reduction): Split + out from ... + * tree-vect-slp.c (vect_get_constant_vectors): ... here and + simplify. + * tree-vect-loop.c (vect_create_epilog_for_reduction): Use + get_initial_defs_for_reduction instead of vect_get_vec_defs. + (vectorizable_reduction): Adjust. + * tree-vect-slp.c (vect_get_constant_vectors): Remove reduction + handling. + (vect_get_slp_defs): Likewise. + * tree-vect-stmts.c (vect_get_vec_defs): Make static and adjust. + (vectorizable_bswap): Adjust. + (vectorizable_call): Likewise. + (vectorizable_conversion): Likewise. + (vectorizable_assignment): Likewise. + (vectorizable_shift): Likewise. + (vectorizable_operation): Likewise. + (vectorizable_store): Likewise. + (vectorizable_condition): Likewise. + (vectorizable_comparison): Likewise. + 2017-06-28 Michael Collison PR target/68535 diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index a7c3d3d7e29..0e725aedd8f 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4045,6 +4045,203 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val, return init_def; } +/* Get at the initial defs for OP in the reduction SLP_NODE. + NUMBER_OF_VECTORS is the number of vector defs to create. + REDUC_INDEX is the index of the reduction operand in the statements. */ + +static void +get_initial_defs_for_reduction (slp_tree slp_node, + vec *vec_oprnds, + unsigned int number_of_vectors, + int reduc_index, enum tree_code code) +{ + vec stmts = SLP_TREE_SCALAR_STMTS (slp_node); + gimple *stmt = stmts[0]; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); + unsigned nunits; + tree vec_cst; + tree *elts; + unsigned j, number_of_places_left_in_vector; + tree vector_type, scalar_type; + tree vop; + int group_size = stmts.length (); + unsigned int vec_num, i; + unsigned number_of_copies = 1; + vec voprnds; + voprnds.create (number_of_vectors); + bool constant_p; + tree neutral_op = NULL; + gimple *def_stmt; + struct loop *loop; + gimple_seq ctor_seq = NULL; + + vector_type = STMT_VINFO_VECTYPE (stmt_vinfo); + scalar_type = TREE_TYPE (vector_type); + nunits = TYPE_VECTOR_SUBPARTS (vector_type); + + gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def + && reduc_index != -1); + + /* op is the reduction operand of the first stmt already. */ + /* For additional copies (see the explanation of NUMBER_OF_COPIES below) + we need either neutral operands or the original operands. See + get_initial_def_for_reduction() for details. */ + switch (code) + { + case WIDEN_SUM_EXPR: + case DOT_PROD_EXPR: + case SAD_EXPR: + case PLUS_EXPR: + case MINUS_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + neutral_op = build_zero_cst (scalar_type); + break; + + case MULT_EXPR: + neutral_op = build_one_cst (scalar_type); + break; + + case BIT_AND_EXPR: + neutral_op = build_all_ones_cst (scalar_type); + break; + + /* For MIN/MAX we don't have an easy neutral operand but + the initial values can be used fine here. Only for + a reduction chain we have to force a neutral element. */ + case MAX_EXPR: + case MIN_EXPR: + if (!GROUP_FIRST_ELEMENT (stmt_vinfo)) + neutral_op = NULL; + else + { + tree op = get_reduction_op (stmts[0], reduc_index); + def_stmt = SSA_NAME_DEF_STMT (op); + loop = (gimple_bb (stmt))->loop_father; + neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt, + loop_preheader_edge (loop)); + } + break; + + default: + gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo)); + neutral_op = NULL; + } + + /* NUMBER_OF_COPIES is the number of times we need to use the same values in + created vectors. It is greater than 1 if unrolling is performed. + + For example, we have two scalar operands, s1 and s2 (e.g., group of + strided accesses of size two), while NUNITS is four (i.e., four scalars + of this type can be packed in a vector). The output vector will contain + two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES + will be 2). + + If GROUP_SIZE > NUNITS, the scalars will be split into several vectors + containing the operands. + + For example, NUNITS is four as before, and the group size is 8 + (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and + {s5, s6, s7, s8}. */ + + number_of_copies = nunits * number_of_vectors / group_size; + + number_of_places_left_in_vector = nunits; + constant_p = true; + elts = XALLOCAVEC (tree, nunits); + for (j = 0; j < number_of_copies; j++) + { + for (i = group_size - 1; stmts.iterate (i, &stmt); i--) + { + tree op = get_reduction_op (stmt, reduc_index); + loop = (gimple_bb (stmt))->loop_father; + def_stmt = SSA_NAME_DEF_STMT (op); + + gcc_assert (loop); + + /* Get the def before the loop. In reduction chain we have only + one initial value. */ + if ((j != (number_of_copies - 1) + || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) + && i != 0)) + && neutral_op) + op = neutral_op; + else + op = PHI_ARG_DEF_FROM_EDGE (def_stmt, + loop_preheader_edge (loop)); + + /* Create 'vect_ = {op0,op1,...,opn}'. */ + number_of_places_left_in_vector--; + elts[number_of_places_left_in_vector] = op; + if (!CONSTANT_CLASS_P (op)) + constant_p = false; + + if (number_of_places_left_in_vector == 0) + { + if (constant_p) + vec_cst = build_vector (vector_type, elts); + else + { + vec *v; + unsigned k; + vec_alloc (v, nunits); + for (k = 0; k < nunits; ++k) + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]); + vec_cst = build_constructor (vector_type, v); + } + tree init; + gimple_stmt_iterator gsi; + init = vect_init_vector (stmt, vec_cst, vector_type, NULL); + if (ctor_seq != NULL) + { + gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init)); + gsi_insert_seq_before_without_update (&gsi, ctor_seq, + GSI_SAME_STMT); + ctor_seq = NULL; + } + voprnds.quick_push (init); + + number_of_places_left_in_vector = nunits; + constant_p = true; + } + } + } + + /* Since the vectors are created in the reverse order, we should invert + them. */ + vec_num = voprnds.length (); + for (j = vec_num; j != 0; j--) + { + vop = voprnds[j - 1]; + vec_oprnds->quick_push (vop); + } + + voprnds.release (); + + /* In case that VF is greater than the unrolling factor needed for the SLP + group of stmts, NUMBER_OF_VECTORS to be created is greater than + NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have + to replicate the vectors. */ + while (number_of_vectors > vec_oprnds->length ()) + { + tree neutral_vec = NULL; + + if (neutral_op) + { + if (!neutral_vec) + neutral_vec = build_vector_from_val (vector_type, neutral_op); + + vec_oprnds->quick_push (neutral_vec); + } + else + { + for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++) + vec_oprnds->quick_push (vop); + } + } +} + + /* Function vect_create_epilog_for_reduction Create code at the loop-epilog to finalize the result of a reduction @@ -4189,8 +4386,12 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, /* Get the loop-entry arguments. */ enum vect_def_type initial_def_dt = vect_unknown_def_type; if (slp_node) - vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs, - NULL, slp_node, reduc_index); + { + unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + vec_initial_defs.reserve (vec_num); + get_initial_defs_for_reduction (slp_node, &vec_initial_defs, + vec_num, reduc_index, code); + } else { /* Get at the scalar def before the loop, that defines the initial value @@ -5939,7 +6140,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, if (op_type == ternary_op) slp_ops.quick_push (reduc_index == 2 ? NULL : ops[2]); - vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1); + vect_get_slp_defs (slp_ops, slp_node, &vec_defs); vec_oprnds0.safe_splice (vec_defs[reduc_index == 0 ? 1 : 0]); vec_defs[reduc_index == 0 ? 1 : 0].release (); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 0800a3f6b23..b755562ed76 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2976,8 +2976,7 @@ vect_mask_constant_operand_p (gimple *stmt, int opnum) static void vect_get_constant_vectors (tree op, slp_tree slp_node, vec *vec_oprnds, - unsigned int op_num, unsigned int number_of_vectors, - int reduc_index) + unsigned int op_num, unsigned int number_of_vectors) { vec stmts = SLP_TREE_SCALAR_STMTS (slp_node); gimple *stmt = stmts[0]; @@ -2996,8 +2995,6 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, bool constant_p, is_store; tree neutral_op = NULL; enum tree_code code = gimple_expr_code (stmt); - gimple *def_stmt; - struct loop *loop; gimple_seq ctor_seq = NULL; /* Check if vector type is a boolean vector. */ @@ -3009,64 +3006,6 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); nunits = TYPE_VECTOR_SUBPARTS (vector_type); - if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def - && reduc_index != -1) - { - op_num = reduc_index; - op = gimple_op (stmt, op_num + 1); - /* For additional copies (see the explanation of NUMBER_OF_COPIES below) - we need either neutral operands or the original operands. See - get_initial_def_for_reduction() for details. */ - switch (code) - { - case WIDEN_SUM_EXPR: - case DOT_PROD_EXPR: - case SAD_EXPR: - case PLUS_EXPR: - case MINUS_EXPR: - case BIT_IOR_EXPR: - case BIT_XOR_EXPR: - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op))) - neutral_op = build_real (TREE_TYPE (op), dconst0); - else - neutral_op = build_int_cst (TREE_TYPE (op), 0); - - break; - - case MULT_EXPR: - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op))) - neutral_op = build_real (TREE_TYPE (op), dconst1); - else - neutral_op = build_int_cst (TREE_TYPE (op), 1); - - break; - - case BIT_AND_EXPR: - neutral_op = build_int_cst (TREE_TYPE (op), -1); - break; - - /* For MIN/MAX we don't have an easy neutral operand but - the initial values can be used fine here. Only for - a reduction chain we have to force a neutral element. */ - case MAX_EXPR: - case MIN_EXPR: - if (!GROUP_FIRST_ELEMENT (stmt_vinfo)) - neutral_op = NULL; - else - { - def_stmt = SSA_NAME_DEF_STMT (op); - loop = (gimple_bb (stmt))->loop_father; - neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt, - loop_preheader_edge (loop)); - } - break; - - default: - gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo)); - neutral_op = NULL; - } - } - if (STMT_VINFO_DATA_REF (stmt_vinfo)) { is_store = true; @@ -3154,25 +3093,6 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, } } - if (reduc_index != -1) - { - loop = (gimple_bb (stmt))->loop_father; - def_stmt = SSA_NAME_DEF_STMT (op); - - gcc_assert (loop); - - /* Get the def before the loop. In reduction chain we have only - one initial value. */ - if ((j != (number_of_copies - 1) - || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) - && i != 0)) - && neutral_op) - op = neutral_op; - else - op = PHI_ARG_DEF_FROM_EDGE (def_stmt, - loop_preheader_edge (loop)); - } - /* Create 'vect_ = {op0,op1,...,opn}'. */ number_of_places_left_in_vector--; tree orig_op = op; @@ -3340,7 +3260,7 @@ vect_get_slp_vect_defs (slp_tree slp_node, vec *vec_oprnds) void vect_get_slp_defs (vec ops, slp_tree slp_node, - vec > *vec_oprnds, int reduc_index) + vec > *vec_oprnds) { gimple *first_stmt; int number_of_vects = 0, i; @@ -3421,20 +3341,16 @@ vect_get_slp_defs (vec ops, slp_tree slp_node, /* For reduction defs we call vect_get_constant_vectors (), since we are looking for initial loop invariant values. */ - if (vectorized_defs && reduc_index == -1) + if (vectorized_defs) /* The defs are already vectorized. */ vect_get_slp_vect_defs (child, &vec_defs); else /* Build vectors from scalar defs. */ vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i, - number_of_vects, reduc_index); + number_of_vects); vec_oprnds->quick_push (vec_defs); - /* For reductions, we only need initial values. */ - if (reduc_index != -1) - return; - first_iteration = false; } } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index d1d97cb662e..e04390ec4b0 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1558,11 +1558,11 @@ vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, REDUC_INDEX is the index of reduction operand in case of reduction, and -1 otherwise. */ -void +static void vect_get_vec_defs (tree op0, tree op1, gimple *stmt, vec *vec_oprnds0, vec *vec_oprnds1, - slp_tree slp_node, int reduc_index) + slp_tree slp_node) { if (slp_node) { @@ -1574,7 +1574,7 @@ vect_get_vec_defs (tree op0, tree op1, gimple *stmt, if (op1) ops.quick_push (op1); - vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index); + vect_get_slp_defs (ops, slp_node, &vec_defs); *vec_oprnds0 = vec_defs[0]; if (op1) @@ -2525,7 +2525,7 @@ vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi, { /* Handle uses. */ if (j == 0) - vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1); + vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); else vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); @@ -2860,7 +2860,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, for (i = 0; i < nargs; i++) vargs.quick_push (gimple_call_arg (stmt, i)); - vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); + vect_get_slp_defs (vargs, slp_node, &vec_defs); vec_oprnds0 = vec_defs[0]; /* Arguments are ready. Create the new vector stmt. */ @@ -2990,7 +2990,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, for (i = 0; i < nargs; i++) vargs.quick_push (gimple_call_arg (stmt, i)); - vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); + vect_get_slp_defs (vargs, slp_node, &vec_defs); vec_oprnds0 = vec_defs[0]; /* Arguments are ready. Create the new vector stmt. */ @@ -4401,8 +4401,7 @@ vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi, for (j = 0; j < ncopies; j++) { if (j == 0) - vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node, - -1); + vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node); else vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); @@ -4462,11 +4461,11 @@ vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi, vec_oprnds1.quick_push (vec_oprnd1); vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, - slp_node, -1); + slp_node); } else vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, - &vec_oprnds1, slp_node, -1); + &vec_oprnds1, slp_node); } else { @@ -4565,7 +4564,7 @@ vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi, /* Handle uses. */ if (slp_node) vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, - slp_node, -1); + slp_node); else { vec_oprnds0.truncate (0); @@ -4748,7 +4747,7 @@ vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi, { /* Handle uses. */ if (j == 0) - vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1); + vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); else vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); @@ -5155,10 +5154,10 @@ vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi, operand 1 should be of a vector type (the usual case). */ if (vec_oprnd1) vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, - slp_node, -1); + slp_node); else vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, - slp_node, -1); + slp_node); } else vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); @@ -5505,13 +5504,13 @@ vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi, { if (op_type == binary_op || op_type == ternary_op) vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, - slp_node, -1); + slp_node); else vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, - slp_node, -1); + slp_node); if (op_type == ternary_op) vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL, - slp_node, -1); + slp_node); } else { @@ -6076,7 +6075,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, if (slp) { vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL, - slp_node, -1); + slp_node); vec_oprnd = vec_oprnds[0]; } else @@ -6223,7 +6222,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, { /* Get vectorized arguments for SLP_NODE. */ vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, - NULL, slp_node, -1); + NULL, slp_node); vec_oprnd = vec_oprnds[0]; } @@ -7976,7 +7975,7 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi, } ops.safe_push (then_clause); ops.safe_push (else_clause); - vect_get_slp_defs (ops, slp_node, &vec_defs, -1); + vect_get_slp_defs (ops, slp_node, &vec_defs); vec_oprnds3 = vec_defs.pop (); vec_oprnds2 = vec_defs.pop (); if (!masked) @@ -8314,7 +8313,7 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi, ops.safe_push (rhs1); ops.safe_push (rhs2); - vect_get_slp_defs (ops, slp_node, &vec_defs, -1); + vect_get_slp_defs (ops, slp_node, &vec_defs); vec_oprnds1 = vec_defs.pop (); vec_oprnds0 = vec_defs.pop (); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 2e46467ee37..1ab12319d53 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1094,8 +1094,6 @@ extern void vect_get_load_cost (struct data_reference *, int, bool, extern void vect_get_store_cost (struct data_reference *, int, unsigned int *, stmt_vector_for_cost *); extern bool vect_supportable_shift (enum tree_code, tree); -extern void vect_get_vec_defs (tree, tree, gimple *, vec *, - vec *, slp_tree, int); extern tree vect_gen_perm_mask_any (tree, const unsigned char *); extern tree vect_gen_perm_mask_checked (tree, const unsigned char *); extern void optimize_mask_stores (struct loop*); @@ -1179,8 +1177,7 @@ extern bool vect_schedule_slp (vec_info *); extern bool vect_analyze_slp (vec_info *, unsigned); extern bool vect_make_slp_decision (loop_vec_info); extern void vect_detect_hybrid_slp (loop_vec_info); -extern void vect_get_slp_defs (vec , slp_tree, - vec > *, int); +extern void vect_get_slp_defs (vec , slp_tree, vec > *); extern bool vect_slp_bb (basic_block); extern gimple *vect_find_last_scalar_stmt_in_slp (slp_tree); extern bool is_simple_and_all_uses_invariant (gimple *, loop_vec_info); -- 2.30.2