From 8da4c8d83b7a21127cbe464aa54c5f3e7c034feb Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 8 Dec 2017 08:06:31 +0000 Subject: [PATCH] re PR tree-optimization/81303 (410.bwaves regression caused by r249919) 2017-12-08 Richard Biener PR tree-optimization/81303 * tree-vect-stmts.c (vect_is_simple_cond): For invariant conditions try to create a comparison vector type matching the data vector type. (vectorizable_condition): Adjust. * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Leave invariant conditions alone in case we can vectorize those. * gcc.target/i386/vectorize9.c: New testcase. * gcc.target/i386/vectorize10.c: New testcase. From-SVN: r255497 --- gcc/ChangeLog | 10 ++++++++ gcc/testsuite/ChangeLog | 6 +++++ gcc/testsuite/gcc.target/i386/vectorize10.c | 16 +++++++++++++ gcc/testsuite/gcc.target/i386/vectorize9.c | 16 +++++++++++++ gcc/tree-vect-patterns.c | 26 +++++++++++++++++++++ gcc/tree-vect-stmts.c | 19 +++++++++++++-- 6 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/vectorize10.c create mode 100644 gcc/testsuite/gcc.target/i386/vectorize9.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1d20dc5234c..a666d115e37 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2017-12-08 Richard Biener + + PR tree-optimization/81303 + * tree-vect-stmts.c (vect_is_simple_cond): For invariant + conditions try to create a comparison vector type matching + the data vector type. + (vectorizable_condition): Adjust. + * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): + Leave invariant conditions alone in case we can vectorize those. + 2017-12-08 Julia Koval * config/i386/avx512vnniintrin.h (_mm512_dpwssd_epi32, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b13130d4454..8324d4f3ad4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2017-12-08 Richard Biener + + PR tree-optimization/81303 + * gcc.target/i386/vectorize9.c: New testcase. + * gcc.target/i386/vectorize10.c: New testcase. + 2017-12-08 Julia Koval * gcc.target/i386/avx512f-vnni-1.c: Add vdpwssd checks. diff --git a/gcc/testsuite/gcc.target/i386/vectorize10.c b/gcc/testsuite/gcc.target/i386/vectorize10.c new file mode 100644 index 00000000000..4353aec2aca --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vectorize10.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -ftree-loop-vectorize -fdump-tree-vect-details -fno-tree-loop-im -msse2 -mno-avx" } */ + +double x[1024][1024], red[1024]; +void foo (void) +{ + for (int i = 0; i < 1024; ++i) + for (int j = 0; j < 1024; ++j) + { + double v = i == 0 ? 0.0 : red[j]; + v = v + x[i][j]; + red[j] = v; + } +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vectorize9.c b/gcc/testsuite/gcc.target/i386/vectorize9.c new file mode 100644 index 00000000000..3f02be427b9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vectorize9.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -ftree-loop-vectorize -fdump-tree-vect-details -fno-tree-loop-im -mavx2 -mprefer-vector-width=256" } */ + +double x[1024][1024], red[1024]; +void foo (void) +{ + for (int i = 0; i < 1024; ++i) + for (int j = 0; j < 1024; ++j) + { + double v = i == 0 ? 0.0 : red[j]; + v = v + x[i][j]; + red[j] = v; + } +} + +/* { dg-final { scan-tree-dump "vectorization factor = 4" "vect" } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index e4051b68dd0..a2c629309e0 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -3976,6 +3976,32 @@ vect_recog_mask_conversion_pattern (vec *stmts, tree *type_in, || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2)) return NULL; + /* If rhs1 is invariant and we can promote it leave the COND_EXPR + in place, we can handle it in vectorizable_condition. This avoids + unnecessary promotion stmts and increased vectorization factor. */ + if (COMPARISON_CLASS_P (rhs1) + && INTEGRAL_TYPE_P (rhs1_type) + && TYPE_VECTOR_SUBPARTS (vectype1) < TYPE_VECTOR_SUBPARTS (vectype2)) + { + gimple *dummy; + enum vect_def_type dt; + if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo, + &dummy, &dt) + && dt == vect_external_def + && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo, + &dummy, &dt) + && (dt == vect_external_def + || dt == vect_constant_def)) + { + tree wide_scalar_type = build_nonstandard_integer_type + (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))), + TYPE_UNSIGNED (rhs1_type)); + tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type); + if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1))) + return NULL; + } + } + /* If rhs1 is a comparison we need to move it into a separate statement. */ if (TREE_CODE (rhs1) != SSA_NAME) diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index ee2f1ca1ccd..2bebad152eb 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -7791,7 +7791,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, static bool vect_is_simple_cond (tree cond, vec_info *vinfo, - tree *comp_vectype, enum vect_def_type *dts) + tree *comp_vectype, enum vect_def_type *dts, + tree vectype) { tree lhs, rhs; tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; @@ -7844,6 +7845,20 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, return false; *comp_vectype = vectype1 ? vectype1 : vectype2; + /* Invariant comparison. */ + if (! *comp_vectype) + { + tree scalar_type = TREE_TYPE (lhs); + /* If we can widen the comparison to match vectype do so. */ + if (INTEGRAL_TYPE_P (scalar_type) + && tree_int_cst_lt (TYPE_SIZE (scalar_type), + TYPE_SIZE (TREE_TYPE (vectype)))) + scalar_type = build_nonstandard_integer_type + (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))), + TYPE_UNSIGNED (scalar_type)); + *comp_vectype = get_vectype_for_scalar_type (scalar_type); + } + return true; } @@ -7941,7 +7956,7 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi, else_clause = gimple_assign_rhs3 (stmt); if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, - &comp_vectype, &dts[0]) + &comp_vectype, &dts[0], vectype) || !comp_vectype) return false; -- 2.30.2