From f53e9d40de7212413b361758d66aafb833173dd9 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 3 Nov 2020 09:53:11 +0100 Subject: [PATCH] tree-optimization/97678 - fix SLP induction epilogue vectorization This restores not tracking SLP nodes for induction initial values in not nested context because this interferes with peeling and epilogue vectorization. 2020-11-03 Richard Biener PR tree-optimization/97678 * tree-vect-slp.c (vect_build_slp_tree_2): Do not track the initial values of inductions when not nested. * tree-vect-loop.c (vectorizable_induction): Look at PHI node initial values again for SLP and not nested inductions. Handle LOOP_VINFO_MASK_SKIP_NITERS and cost invariants. * gcc.dg/vect/pr97678.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr97678.c | 29 +++++++++++++++++ gcc/tree-vect-loop.c | 49 ++++++++++++++++++++++++++--- gcc/tree-vect-slp.c | 8 +++-- 3 files changed, 79 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr97678.c diff --git a/gcc/testsuite/gcc.dg/vect/pr97678.c b/gcc/testsuite/gcc.dg/vect/pr97678.c new file mode 100644 index 00000000000..ebe4a35bb3f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97678.c @@ -0,0 +1,29 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-mavx2" { target avx2_runtime } } */ + +#include "tree-vect.h" + +int +main () +{ + unsigned int i = 0; + unsigned short b[158 * 2]; + + check_vect (); + + for (i = 0; i < 158; i++) + { + b[i * 2] = i * 7; + b[i * 2 + 1] = i * 8; + } + + for (i = 0; i < 158; ++i) + if (b[i*2] != (unsigned short)(i*7) + || b[i*2+1] != (unsigned short)(i*8)) + abort (); + + return 0; +} + +/* The init loop should be vectorized with SLP. */ +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index fcea28935bc..6fa185daa28 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -7800,6 +7800,10 @@ vectorizable_induction (loop_vec_info loop_vinfo, = record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), vector_stmt, stmt_info, 0, vect_body); + /* prologue cost for vec_init (if not nested) and step. */ + prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop, + scalar_to_vec, + stmt_info, 0, vect_prologue); } else /* if (!slp_node) */ { @@ -7858,9 +7862,15 @@ vectorizable_induction (loop_vec_info loop_vinfo, cycles we have to reconstruct the step from SCEV data. */ unsigned group_size = SLP_TREE_LANES (slp_node); tree *steps = XALLOCAVEC (tree, group_size); + tree *inits = XALLOCAVEC (tree, group_size); stmt_vec_info phi_info; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info) - steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info); + { + steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info); + if (!init_node) + inits[i] = gimple_phi_arg_def (as_a (phi_info->stmt), + pe->dest_idx); + } /* Now generate the IVs. */ unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); @@ -7875,16 +7885,39 @@ vectorizable_induction (loop_vec_info loop_vinfo, ? build_real_from_wide (stept, lup_mul, UNSIGNED) : build_int_cstu (stept, lup_mul)); + tree peel_mul = NULL_TREE; + if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)) + { + stmts = NULL; + if (SCALAR_FLOAT_TYPE_P (stept)) + peel_mul = gimple_build (&stmts, FLOAT_EXPR, stept, + LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); + else + peel_mul = gimple_convert (&stmts, stept, + LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); + peel_mul = gimple_build_vector_from_val (&stmts, step_vectype, peel_mul); + if (stmts) + { + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); + gcc_assert (!new_bb); + } + } unsigned ivn; auto_vec vec_steps; for (ivn = 0; ivn < nivs; ++ivn) { - tree_vector_builder elts (step_vectype, const_nunits, 1); + tree_vector_builder step_elts (step_vectype, const_nunits, 1); + tree_vector_builder init_elts (vectype, const_nunits, 1); tree_vector_builder mul_elts (step_vectype, const_nunits, 1); for (unsigned eltn = 0; eltn < const_nunits; ++eltn) { tree elt = steps[(ivn*const_nunits + eltn) % group_size]; - elts.quick_push (elt); + step_elts.quick_push (elt); + if (!init_node) + { + elt = inits[(ivn*const_nunits + eltn) % group_size]; + init_elts.quick_push (elt); + } unsigned mul_elt = (ivn*const_nunits + eltn) / group_size; mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept) ? build_real_from_wide (stept, @@ -7892,10 +7925,15 @@ vectorizable_induction (loop_vec_info loop_vinfo, : build_int_cstu (stept, mul_elt)); } stmts = NULL; - vec_step = gimple_build_vector (&stmts, &elts); + vec_step = gimple_build_vector (&stmts, &step_elts); vec_step = gimple_convert (&stmts, step_vectype, vec_step); vec_steps.safe_push (vec_step); tree step_mul = gimple_build_vector (&stmts, &mul_elts); + if (peel_mul) + gimple_build (&stmts, PLUS_EXPR, step_vectype, + step_mul, peel_mul); + if (!init_node) + vec_init = gimple_build_vector (&stmts, &init_elts); if (stmts) { new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); @@ -7926,7 +7964,8 @@ vectorizable_induction (loop_vec_info loop_vinfo, add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), UNKNOWN_LOCATION); - vec_init = vect_get_slp_vect_def (init_node, ivn); + if (init_node) + vec_init = vect_get_slp_vect_def (init_node, ivn); if (!integer_zerop (step_mul)) { stmts = NULL; diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 63a59c0c8ed..e97fbe897a7 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1444,9 +1444,13 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, if (def_type == vect_induction_def) { /* Induction PHIs are not cycles but walk the initial - value. */ + value. Only for inner loops through, for outer loops + we need to pick up the value from the actual PHIs + to more easily support peeling and epilogue vectorization. */ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - if (nested_in_vect_loop_p (loop, stmt_info)) + if (!nested_in_vect_loop_p (loop, stmt_info)) + skip_args[loop_preheader_edge (loop)->dest_idx] = true; + else loop = loop->inner; skip_args[loop_latch_edge (loop)->dest_idx] = true; } -- 2.30.2