From 99f76d9bacfcef28546dda5a51cb3651ba422518 Mon Sep 17 00:00:00 2001 From: Alan Lawrence Date: Mon, 27 Oct 2014 14:04:43 +0000 Subject: [PATCH] [Vectorizer] Make REDUC_xxx_EXPR tree codes produce a scalar result PR tree-optimization/61114 * expr.c (expand_expr_real_2): For REDUC_{MIN,MAX,PLUS}_EXPR, add extract_bit_field around optab result. * fold-const.c (fold_unary_loc): For REDUC_{MIN,MAX,PLUS}_EXPR, produce scalar not vector. * tree-cfg.c (verify_gimple_assign_unary): Check result vs operand type for REDUC_{MIN,MAX,PLUS}_EXPR. * tree-vect-loop.c (vect_analyze_loop): Update comment. (vect_create_epilog_for_reduction): For direct vector reduction, use result of tree code directly without extract_bit_field. * tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): Update comment. From-SVN: r216736 --- gcc/ChangeLog | 19 +++++++++++++++++++ gcc/expr.c | 12 +++++++++++- gcc/fold-const.c | 6 +++--- gcc/tree-cfg.c | 15 ++++++++++++--- gcc/tree-vect-loop.c | 31 ++++++++++++++++++++++--------- gcc/tree.def | 7 +++---- 6 files changed, 70 insertions(+), 20 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2c8b4655380..c2a25c9ab42 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2014-10-27 Alan Lawrence + + PR tree-optimization/61114 + * expr.c (expand_expr_real_2): For REDUC_{MIN,MAX,PLUS}_EXPR, add + extract_bit_field around optab result. + + * fold-const.c (fold_unary_loc): For REDUC_{MIN,MAX,PLUS}_EXPR, produce + scalar not vector. + + * tree-cfg.c (verify_gimple_assign_unary): Check result vs operand type + for REDUC_{MIN,MAX,PLUS}_EXPR. + + * tree-vect-loop.c (vect_analyze_loop): Update comment. + (vect_create_epilog_for_reduction): For direct vector reduction, use + result of tree code directly without extract_bit_field. + + * tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): Update + comment. + 2014-10-27 Andrew MacLeod * basic-block.h: Remove all includes. diff --git a/gcc/expr.c b/gcc/expr.c index 81c92190bca..e9cabbe412d 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -9051,7 +9051,17 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, { op0 = expand_normal (treeop0); this_optab = optab_for_tree_code (code, type, optab_default); - temp = expand_unop (mode, this_optab, op0, target, unsignedp); + enum machine_mode vec_mode = TYPE_MODE (TREE_TYPE (treeop0)); + temp = expand_unop (vec_mode, this_optab, op0, NULL_RTX, unsignedp); + gcc_assert (temp); + /* The tree code produces a scalar result, but (somewhat by convention) + the optab produces a vector with the result in element 0 if + little-endian, or element N-1 if big-endian. So pull the scalar + result out of that element. */ + int index = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (vec_mode) - 1 : 0; + int bitsize = GET_MODE_BITSIZE (GET_MODE_INNER (vec_mode)); + temp = extract_bit_field (temp, bitsize, bitsize * index, unsignedp, + target, mode, mode); gcc_assert (temp); return temp; } diff --git a/gcc/fold-const.c b/gcc/fold-const.c index be6241f7c5d..ba9b013db1c 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -8259,12 +8259,13 @@ fold_unary_loc (location_t loc, enum tree_code code, tree type, tree op0) case REDUC_MAX_EXPR: case REDUC_PLUS_EXPR: { - unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i; + unsigned int nelts, i; tree *elts; enum tree_code subcode; if (TREE_CODE (op0) != VECTOR_CST) return NULL_TREE; + nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)); elts = XALLOCAVEC (tree, nelts); if (!vec_cst_ctor_to_array (op0, elts)) @@ -8283,10 +8284,9 @@ fold_unary_loc (location_t loc, enum tree_code code, tree type, tree op0) elts[0] = const_binop (subcode, elts[0], elts[i]); if (elts[0] == NULL_TREE || !CONSTANT_CLASS_P (elts[0])) return NULL_TREE; - elts[i] = build_zero_cst (TREE_TYPE (type)); } - return build_vector (type, elts); + return elts[0]; } default: diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 097de235836..761d6f48fb9 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -3553,12 +3553,21 @@ verify_gimple_assign_unary (gimple stmt) return false; } - - case VEC_UNPACK_HI_EXPR: - case VEC_UNPACK_LO_EXPR: case REDUC_MAX_EXPR: case REDUC_MIN_EXPR: case REDUC_PLUS_EXPR: + if (!VECTOR_TYPE_P (rhs1_type) + || !useless_type_conversion_p (lhs_type, TREE_TYPE (rhs1_type))) + { + error ("reduction should convert from vector to element type"); + debug_generic_expr (lhs_type); + debug_generic_expr (rhs1_type); + return true; + } + return false; + + case VEC_UNPACK_HI_EXPR: + case VEC_UNPACK_LO_EXPR: case VEC_UNPACK_FLOAT_HI_EXPR: case VEC_UNPACK_FLOAT_LO_EXPR: /* FIXME. */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 8acc6fe9342..b4847ab7b76 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1903,9 +1903,9 @@ vect_analyze_loop (struct loop *loop) Output: REDUC_CODE - the corresponding tree-code to be used to reduce the - vector of partial results into a single scalar result (which - will also reside in a vector) or ERROR_MARK if the operation is - a supported reduction operation, but does not have such tree-code. + vector of partial results into a single scalar result, or ERROR_MARK + if the operation is a supported reduction operation, but does not have + such a tree-code. Return FALSE if CODE currently cannot be vectorized as reduction. */ @@ -4179,6 +4179,7 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple stmt, if (reduc_code != ERROR_MARK && !slp_reduc) { tree tmp; + tree vec_elem_type; /*** Case 1: Create: v_out2 = reduc_expr */ @@ -4187,14 +4188,26 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple stmt, dump_printf_loc (MSG_NOTE, vect_location, "Reduce using direct vector reduction.\n"); - vec_dest = vect_create_destination_var (scalar_dest, vectype); - tmp = build1 (reduc_code, vectype, new_phi_result); - epilog_stmt = gimple_build_assign (vec_dest, tmp); - new_temp = make_ssa_name (vec_dest, epilog_stmt); + vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result)); + if (!useless_type_conversion_p (scalar_type, vec_elem_type)) + { + tree tmp_dest = + vect_create_destination_var (scalar_dest, vec_elem_type); + tmp = build1 (reduc_code, vec_elem_type, new_phi_result); + epilog_stmt = gimple_build_assign (tmp_dest, tmp); + new_temp = make_ssa_name (tmp_dest, epilog_stmt); + gimple_assign_set_lhs (epilog_stmt, new_temp); + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); + + tmp = build1 (NOP_EXPR, scalar_type, new_temp); + } + else + tmp = build1 (reduc_code, scalar_type, new_phi_result); + epilog_stmt = gimple_build_assign (new_scalar_dest, tmp); + new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); gimple_assign_set_lhs (epilog_stmt, new_temp); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - - extract_scalar_result = true; + scalar_results.safe_push (new_temp); } else { diff --git a/gcc/tree.def b/gcc/tree.def index bd39e4b89bd..c830e4bad84 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1161,10 +1161,9 @@ DEFTREECODE (TRANSACTION_EXPR, "transaction_expr", tcc_expression, 1) result (e.g. summing the elements of the vector, finding the minimum over the vector elements, etc). Operand 0 is a vector. - The expression returns a vector of the same type, with the first - element in the vector holding the result of the reduction of all elements - of the operand. The content of the other elements in the returned vector - is undefined. */ + The expression returns a scalar, with type the same as the elements of the + vector, holding the result of the reduction of all elements of the operand. + */ DEFTREECODE (REDUC_MAX_EXPR, "reduc_max_expr", tcc_unary, 1) DEFTREECODE (REDUC_MIN_EXPR, "reduc_min_expr", tcc_unary, 1) DEFTREECODE (REDUC_PLUS_EXPR, "reduc_plus_expr", tcc_unary, 1) -- 2.30.2