From 12c667b5b449a9c86c763438fb96e6e029533fb7 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 12 Dec 2017 09:55:02 +0100 Subject: [PATCH] re PR tree-optimization/80631 (Compiling with -O3 -mavx2 gives wrong code) PR tree-optimization/80631 * tree-vect-loop.c (get_initial_def_for_reduction): Fix comment typo. (vect_create_epilog_for_reduction): Add INDUC_VAL and INDUC_CODE arguments, for INTEGER_INDUC_COND_REDUCTION use INDUC_VAL instead of hardcoding zero as the value if COND_EXPR is never true. For INTEGER_INDUC_COND_REDUCTION don't emit the final COND_EXPR if INDUC_VAL is equal to INITIAL_DEF, and use INDUC_CODE instead of hardcoding MAX_EXPR as the reduction operation. (is_nonwrapping_integer_induction): Allow negative step. (vectorizable_reduction): Compute INDUC_VAL and INDUC_CODE for vect_create_epilog_for_reduction, if no value is suitable, don't use INTEGER_INDUC_COND_REDUCTION for now. Formatting fixes. * gcc.dg/vect/pr80631-1.c: New test. * gcc.dg/vect/pr80631-2.c: New test. * gcc.dg/vect/pr65947-13.c: Expect integer induc cond reduction vectorization. From-SVN: r255574 --- gcc/ChangeLog | 15 +++ gcc/testsuite/ChangeLog | 8 ++ gcc/testsuite/gcc.dg/vect/pr65947-13.c | 5 +- gcc/testsuite/gcc.dg/vect/pr80631-1.c | 76 ++++++++++++++ gcc/testsuite/gcc.dg/vect/pr80631-2.c | 76 ++++++++++++++ gcc/tree-vect-loop.c | 139 ++++++++++++++++++------- 6 files changed, 280 insertions(+), 39 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr80631-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr80631-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6ab0e47c2d0..1e26d7b9747 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2017-12-12 Jakub Jelinek + + PR tree-optimization/80631 + * tree-vect-loop.c (get_initial_def_for_reduction): Fix comment typo. + (vect_create_epilog_for_reduction): Add INDUC_VAL and INDUC_CODE + arguments, for INTEGER_INDUC_COND_REDUCTION use INDUC_VAL instead of + hardcoding zero as the value if COND_EXPR is never true. For + INTEGER_INDUC_COND_REDUCTION don't emit the final COND_EXPR if + INDUC_VAL is equal to INITIAL_DEF, and use INDUC_CODE instead of + hardcoding MAX_EXPR as the reduction operation. + (is_nonwrapping_integer_induction): Allow negative step. + (vectorizable_reduction): Compute INDUC_VAL and INDUC_CODE for + vect_create_epilog_for_reduction, if no value is suitable, don't + use INTEGER_INDUC_COND_REDUCTION for now. Formatting fixes. + 2017-12-12 Richard Biener PR tree-optimization/81889 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index dfc5ed854ca..fcb4c6d8323 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2017-12-12 Jakub Jelinek + + PR tree-optimization/80631 + * gcc.dg/vect/pr80631-1.c: New test. + * gcc.dg/vect/pr80631-2.c: New test. + * gcc.dg/vect/pr65947-13.c: Expect integer induc cond reduction + vectorization. + 2017-12-12 Richard Biener PR tree-optimization/81889 diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c index 061777af34c..ce290459c50 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c @@ -6,8 +6,7 @@ extern void abort (void) __attribute__ ((noreturn)); #define N 32 -/* Simple condition reduction with a reversed loop. - Will fail to vectorize to a simple case. */ +/* Simple condition reduction with a reversed loop. */ int condition_reduction (int *a, int min_v) @@ -42,4 +41,4 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-1.c b/gcc/testsuite/gcc.dg/vect/pr80631-1.c new file mode 100644 index 00000000000..4af741d5307 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr80631-1.c @@ -0,0 +1,76 @@ +/* PR tree-optimization/80631 */ +/* { dg-do run } */ + +#include "tree-vect.h" + +int v[8] = { 77, 1, 79, 3, 4, 3, 6, 7 }; + +__attribute__((noipa)) void +f1 (void) +{ + int k, r = -1; + for (k = 0; k < 8; k++) + if (v[k] == 77) + r = k; + if (r != 0) + abort (); +} + +__attribute__((noipa)) void +f2 (void) +{ + int k, r = 4; + for (k = 0; k < 8; k++) + if (v[k] == 79) + r = k; + if (r != 2) + abort (); +} + +__attribute__((noipa)) void +f3 (void) +{ + int k, r = -17; + for (k = 0; k < 8; k++) + if (v[k] == 78) + r = k; + if (r != -17) + abort (); +} + +__attribute__((noipa)) void +f4 (void) +{ + int k, r = 7; + for (k = 0; k < 8; k++) + if (v[k] == 78) + r = k; + if (r != 7) + abort (); +} + +__attribute__((noipa)) void +f5 (void) +{ + int k, r = -1; + for (k = 0; k < 8; k++) + if (v[k] == 3) + r = k; + if (r != 5) + abort (); +} + +int +main () +{ + check_vect (); + f1 (); + f2 (); + f3 (); + f4 (); + f5 (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */ +/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target vect_condition } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-2.c b/gcc/testsuite/gcc.dg/vect/pr80631-2.c new file mode 100644 index 00000000000..777738ad763 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr80631-2.c @@ -0,0 +1,76 @@ +/* PR tree-optimization/80631 */ +/* { dg-do run } */ + +#include "tree-vect.h" + +int v[8] = { 77, 1, 79, 3, 4, 3, 6, 7 }; + +__attribute__((noipa)) void +f1 (void) +{ + int k, r = -1; + for (k = 7; k >= 0; k--) + if (v[k] == 77) + r = k; + if (r != 0) + abort (); +} + +__attribute__((noipa)) void +f2 (void) +{ + int k, r = 4; + for (k = 7; k >= 0; k--) + if (v[k] == 79) + r = k; + if (r != 2) + abort (); +} + +__attribute__((noipa)) void +f3 (void) +{ + int k, r = -17; + for (k = 7; k >= 0; k--) + if (v[k] == 78) + r = k; + if (r != -17) + abort (); +} + +__attribute__((noipa)) void +f4 (void) +{ + int k, r = 7; + for (k = 7; k >= 0; k--) + if (v[k] == 78) + r = k; + if (r != 7) + abort (); +} + +__attribute__((noipa)) void +f5 (void) +{ + int k, r = -1; + for (k = 7; k >= 0; k--) + if (v[k] == 3) + r = k; + if (r != 3) + abort (); +} + +int +main () +{ + check_vect (); + f1 (); + f2 (); + f3 (); + f4 (); + f5 (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */ +/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target vect_condition } } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 3e4703a3600..a66c8cfccad 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4034,7 +4034,7 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val, case MULT_EXPR: case BIT_AND_EXPR: { - /* ADJUSMENT_DEF is NULL when called from + /* ADJUSTMENT_DEF is NULL when called from vect_create_epilog_for_reduction to vectorize double reduction. */ if (adjustment_def) *adjustment_def = init_val; @@ -4283,6 +4283,11 @@ get_initial_defs_for_reduction (slp_tree slp_node, DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled. SLP_NODE is an SLP node containing a group of reduction statements. The first one in this group is STMT. + INDUC_VAL is for INTEGER_INDUC_COND_REDUCTION the value to use for the case + when the COND_EXPR is never true in the loop. For MAX_EXPR, it needs to + be smaller than any value of the IV in the loop, for MIN_EXPR larger than + any value of the IV in the loop. + INDUC_CODE is the code for epilog reduction if INTEGER_INDUC_COND_REDUCTION. This function: 1. Creates the reduction def-use cycles: sets the arguments for @@ -4330,7 +4335,8 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, vec reduction_phis, bool double_reduc, slp_tree slp_node, - slp_instance slp_node_instance) + slp_instance slp_node_instance, + tree induc_val, enum tree_code induc_code) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info prev_phi_info; @@ -4419,6 +4425,18 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, gimple *def_stmt; initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt, loop_preheader_edge (loop)); + /* Optimize: if initial_def is for REDUC_MAX smaller than the base + and we can't use zero for induc_val, use initial_def. Similarly + for REDUC_MIN and initial_def larger than the base. */ + if (TREE_CODE (initial_def) == INTEGER_CST + && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) + == INTEGER_INDUC_COND_REDUCTION) + && !integer_zerop (induc_val) + && ((reduc_fn == IFN_REDUC_MAX + && tree_int_cst_lt (initial_def, induc_val)) + || (reduc_fn == IFN_REDUC_MIN + && tree_int_cst_lt (induc_val, initial_def)))) + induc_val = initial_def; vect_is_simple_use (initial_def, loop_vinfo, &def_stmt, &initial_def_dt); vec_initial_def = get_initial_def_for_reduction (stmt, initial_def, &adjustment_def); @@ -4453,9 +4471,10 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, gcc_assert (i == 0); tree vec_init_def_type = TREE_TYPE (vec_init_def); - tree zero_vec = build_zero_cst (vec_init_def_type); + tree induc_val_vec + = build_vector_from_val (vec_init_def_type, induc_val); - add_phi_arg (as_a (phi), zero_vec, + add_phi_arg (as_a (phi), induc_val_vec, loop_preheader_edge (loop), UNKNOWN_LOCATION); } else @@ -4983,14 +5002,16 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, gimple_set_lhs (epilog_stmt, new_temp); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) - == INTEGER_INDUC_COND_REDUCTION) + if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) + == INTEGER_INDUC_COND_REDUCTION) + && !operand_equal_p (initial_def, induc_val, 0)) { - /* Earlier we set the initial value to be zero. Check the result - and if it is zero then replace with the original initial - value. */ - tree zero = build_zero_cst (scalar_type); - tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero); + /* Earlier we set the initial value to be a vector if induc_val + values. Check the result and if it is induc_val then replace + with the original initial value, unless induc_val is + the same as initial_def already. */ + tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, + induc_val); tmp = make_ssa_name (new_scalar_dest); epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, @@ -5008,9 +5029,16 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); tree vec_temp; - /* COND reductions all do the final reduction with MAX_EXPR. */ + /* COND reductions all do the final reduction with MAX_EXPR + or MIN_EXPR. */ if (code == COND_EXPR) - code = MAX_EXPR; + { + if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) + == INTEGER_INDUC_COND_REDUCTION) + code = induc_code; + else + code = MAX_EXPR; + } /* Regardless of whether we have a whole vector shift, if we're emulating the operation via tree-vect-generic, we don't want @@ -5110,7 +5138,7 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, else vec_temp = gimple_assign_lhs (new_phi); tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, - bitsize_zero_node); + bitsize_zero_node); epilog_stmt = gimple_build_assign (new_scalar_dest, rhs); new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); gimple_assign_set_lhs (epilog_stmt, new_temp); @@ -5179,14 +5207,16 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, scalar_results.safe_push (new_temp); } - if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) - == INTEGER_INDUC_COND_REDUCTION) + if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) + == INTEGER_INDUC_COND_REDUCTION) + && !operand_equal_p (initial_def, induc_val, 0)) { - /* Earlier we set the initial value to be zero. Check the result - and if it is zero then replace with the original initial - value. */ - tree zero = build_zero_cst (scalar_type); - tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero); + /* Earlier we set the initial value to be a vector if induc_val + values. Check the result and if it is induc_val then replace + with the original initial value, unless induc_val is + the same as initial_def already. */ + tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, + induc_val); tree tmp = make_ssa_name (new_scalar_dest); epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, @@ -5513,10 +5543,6 @@ is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop) || TREE_CODE (step) != INTEGER_CST) return false; - /* Check that the induction increments. */ - if (tree_int_cst_sgn (step) == -1) - return false; - /* Check that the max size of the loop will not wrap. */ if (TYPE_OVERFLOW_UNDEFINED (lhs_type)) @@ -5608,6 +5634,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, tree new_temp = NULL_TREE; gimple *def_stmt; enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type; + gimple *cond_reduc_def_stmt = NULL; + enum tree_code cond_reduc_op_code = ERROR_MARK; tree scalar_type; bool is_simple_use; gimple *orig_stmt; @@ -5879,9 +5907,13 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, cond_reduc_dt = dt; cond_reduc_val = ops[i]; } - if (dt == vect_induction_def && def_stmt != NULL + if (dt == vect_induction_def + && def_stmt != NULL && is_nonwrapping_integer_induction (def_stmt, loop)) - cond_reduc_dt = dt; + { + cond_reduc_dt = dt; + cond_reduc_def_stmt = def_stmt; + } } } @@ -5928,12 +5960,46 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, { if (cond_reduc_dt == vect_induction_def) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "condition expression based on " - "integer induction.\n"); - STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) - = INTEGER_INDUC_COND_REDUCTION; + stmt_vec_info cond_stmt_vinfo = vinfo_for_stmt (cond_reduc_def_stmt); + tree base + = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (cond_stmt_vinfo); + tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (cond_stmt_vinfo); + + gcc_assert (TREE_CODE (base) == INTEGER_CST + && TREE_CODE (step) == INTEGER_CST); + cond_reduc_val = NULL_TREE; + /* Find a suitable value, for MAX_EXPR below base, for MIN_EXPR + above base; punt if base is the minimum value of the type for + MAX_EXPR or maximum value of the type for MIN_EXPR for now. */ + if (tree_int_cst_sgn (step) == -1) + { + cond_reduc_op_code = MIN_EXPR; + if (tree_int_cst_sgn (base) == -1) + cond_reduc_val = build_int_cst (TREE_TYPE (base), 0); + else if (tree_int_cst_lt (base, + TYPE_MAX_VALUE (TREE_TYPE (base)))) + cond_reduc_val + = int_const_binop (PLUS_EXPR, base, integer_one_node); + } + else + { + cond_reduc_op_code = MAX_EXPR; + if (tree_int_cst_sgn (base) == 1) + cond_reduc_val = build_int_cst (TREE_TYPE (base), 0); + else if (tree_int_cst_lt (TYPE_MIN_VALUE (TREE_TYPE (base)), + base)) + cond_reduc_val + = int_const_binop (MINUS_EXPR, base, integer_one_node); + } + if (cond_reduc_val) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "condition expression based on " + "integer induction.\n"); + STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) + = INTEGER_INDUC_COND_REDUCTION; + } } /* Loop peeling modifies initial value of reduction PHI, which @@ -6127,8 +6193,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR); } else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) - == INTEGER_INDUC_COND_REDUCTION) - orig_code = MAX_EXPR; + == INTEGER_INDUC_COND_REDUCTION) + orig_code = cond_reduc_op_code; } if (nested_cycle) @@ -6464,7 +6530,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, vect_create_epilog_for_reduction (vect_defs, stmt, reduc_def_stmt, epilog_copies, reduc_fn, phis, - double_reduc, slp_node, slp_node_instance); + double_reduc, slp_node, slp_node_instance, + cond_reduc_val, cond_reduc_op_code); return true; } -- 2.30.2