From ac6affba97130bcbffb21bd9f8ca53c7aac89551 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 3 Nov 2020 11:52:47 +0100 Subject: [PATCH] tree-optimization/80928 - SLP vectorize nested loop induction This adds SLP vectorization of nested inductions. 2020-11-03 Richard Biener PR tree-optimization/80928 * tree-vect-loop.c (vectorizable_induction): SLP vectorize nested inductions. * gcc.dg/vect/vect-outer-slp-2.c: New testcase. * gcc.dg/vect/vect-outer-slp-3.c: Likewise. --- gcc/testsuite/gcc.dg/vect/vect-outer-slp-2.c | 51 ++++++++ gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c | 62 ++++++++++ gcc/tree-vect-loop.c | 116 ++++++++----------- 3 files changed, 164 insertions(+), 65 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-outer-slp-2.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-slp-2.c b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-2.c new file mode 100644 index 00000000000..08b4fc52430 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-2.c @@ -0,0 +1,51 @@ +/* { dg-require-effective-target vect_double } */ +/* { dg-require-effective-target vect_intdouble_cvt } */ + +#include "tree-vect.h" + +double image[40]; + +void __attribute__((noipa)) +foo (void) +{ + for (int i = 0; i < 20; i++) + { + double suma = 0; + double sumb = 0; + for (int j = 0; j < 40; j++) + { + suma += j+i; + sumb += j+i; + } + image[2*i] = suma; + image[2*i+1] = sumb; + } +} + +int main () +{ + check_vect (); + + foo (); + + for (int i = 0; i < 20; i++) + { + double suma = 0; + double sumb = 0; + for (int j = 0; j < 40; j++) + { + suma += j+i; + sumb += j+i; + asm ("" : "+g" (suma)); + asm ("" : "+g" (sumb)); + } + if (image[2*i] != suma + || image[2*i+1] != sumb) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c new file mode 100644 index 00000000000..c67d3690bb4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c @@ -0,0 +1,62 @@ +/* { dg-require-effective-target vect_double } */ +/* { dg-require-effective-target vect_intdouble_cvt } */ + +#include "tree-vect.h" + +double image[40]; + +void __attribute__((noipa)) +foo (void) +{ + for (int i = 0; i < 20; i++) + { + double suma = 0; + double sumb = 0; + int k = image[2*i]; + int l = image[2*i+1]; + for (int j = 0; j < 40; j++) + { + suma += k+i; + sumb += l+i; + k++; + l++; + } + image[2*i] = suma; + image[2*i+1] = sumb; + } +} + +int main () +{ + check_vect (); + + for (int i = 0; i < 40; ++i) + image[i] = 1.; + + foo (); + + for (int i = 0; i < 20; i++) + { + double suma = 0; + double sumb = 0; + int k = 1; + int l = 1; + for (int j = 0; j < 40; j++) + { + suma += k+i; + sumb += l+i; + asm ("" : "+g" (suma)); + asm ("" : "+g" (sumb)); + k++; + l++; + } + if (image[2*i] != suma + || image[2*i+1] != sumb) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 6fa185daa28..41e2e2ade20 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -7686,7 +7686,6 @@ vectorizable_induction (loop_vec_info loop_vinfo, poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); unsigned i; tree expr; - gimple_seq stmts; gimple_stmt_iterator si; gphi *phi = dyn_cast (stmt_info->stmt); @@ -7726,10 +7725,6 @@ vectorizable_induction (loop_vec_info loop_vinfo, return false; } - /* FORNOW: outer loop induction with SLP not supported. */ - if (STMT_SLP_TYPE (stmt_info)) - return false; - exit_phi = NULL; latch_e = loop_latch_edge (loop->inner); loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); @@ -7875,32 +7870,37 @@ vectorizable_induction (loop_vec_info loop_vinfo, /* Now generate the IVs. */ unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); gcc_assert ((const_nunits * nvects) % group_size == 0); - unsigned nivs = least_common_multiple (group_size, - const_nunits) / const_nunits; - unsigned lup_mul = (nvects * const_nunits) / group_size; + unsigned nivs; + if (nested_in_vect_loop) + nivs = nvects; + else + nivs = least_common_multiple (group_size, + const_nunits) / const_nunits; tree stept = TREE_TYPE (step_vectype); - tree lupdate_mul - = build_vector_from_val (step_vectype, - SCALAR_FLOAT_TYPE_P (stept) - ? build_real_from_wide (stept, lup_mul, - UNSIGNED) - : build_int_cstu (stept, lup_mul)); + tree lupdate_mul = NULL_TREE; + if (!nested_in_vect_loop) + { + /* The number of iterations covered in one vector iteration. */ + unsigned lup_mul = (nvects * const_nunits) / group_size; + lupdate_mul + = build_vector_from_val (step_vectype, + SCALAR_FLOAT_TYPE_P (stept) + ? build_real_from_wide (stept, lup_mul, + UNSIGNED) + : build_int_cstu (stept, lup_mul)); + } tree peel_mul = NULL_TREE; + gimple_seq init_stmts = NULL; if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)) { - stmts = NULL; if (SCALAR_FLOAT_TYPE_P (stept)) - peel_mul = gimple_build (&stmts, FLOAT_EXPR, stept, + peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); else - peel_mul = gimple_convert (&stmts, stept, + peel_mul = gimple_convert (&init_stmts, stept, LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); - peel_mul = gimple_build_vector_from_val (&stmts, step_vectype, peel_mul); - if (stmts) - { - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } + peel_mul = gimple_build_vector_from_val (&init_stmts, + step_vectype, peel_mul); } unsigned ivn; auto_vec vec_steps; @@ -7911,34 +7911,31 @@ vectorizable_induction (loop_vec_info loop_vinfo, tree_vector_builder mul_elts (step_vectype, const_nunits, 1); for (unsigned eltn = 0; eltn < const_nunits; ++eltn) { + /* The scalar steps of the IVs. */ tree elt = steps[(ivn*const_nunits + eltn) % group_size]; step_elts.quick_push (elt); if (!init_node) { + /* The scalar inits of the IVs if not vectorized. */ elt = inits[(ivn*const_nunits + eltn) % group_size]; init_elts.quick_push (elt); } + /* The number of steps to add to the initial values. */ unsigned mul_elt = (ivn*const_nunits + eltn) / group_size; mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept) ? build_real_from_wide (stept, mul_elt, UNSIGNED) : build_int_cstu (stept, mul_elt)); } - stmts = NULL; - vec_step = gimple_build_vector (&stmts, &step_elts); - vec_step = gimple_convert (&stmts, step_vectype, vec_step); + vec_step = gimple_build_vector (&init_stmts, &step_elts); + vec_step = gimple_convert (&init_stmts, step_vectype, vec_step); vec_steps.safe_push (vec_step); - tree step_mul = gimple_build_vector (&stmts, &mul_elts); + tree step_mul = gimple_build_vector (&init_stmts, &mul_elts); if (peel_mul) - gimple_build (&stmts, PLUS_EXPR, step_vectype, - step_mul, peel_mul); + step_mul = gimple_build (&init_stmts, PLUS_EXPR, step_vectype, + step_mul, peel_mul); if (!init_node) - vec_init = gimple_build_vector (&stmts, &init_elts); - if (stmts) - { - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } + vec_init = gimple_build_vector (&init_stmts, &init_elts); /* Create the induction-phi that defines the induction-operand. */ vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, @@ -7947,15 +7944,11 @@ vectorizable_induction (loop_vec_info loop_vinfo, induc_def = PHI_RESULT (induction_phi); /* Create the iv update inside the loop */ - stmts = NULL; - tree up = gimple_build (&stmts, MULT_EXPR, step_vectype, - vec_step, lupdate_mul); - if (stmts) - { - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } - stmts = NULL; + tree up = vec_step; + if (lupdate_mul) + up = gimple_build (&init_stmts, MULT_EXPR, step_vectype, + vec_step, lupdate_mul); + gimple_seq stmts = NULL; vec_def = gimple_convert (&stmts, step_vectype, induc_def); vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, vec_def, up); @@ -7966,17 +7959,15 @@ vectorizable_induction (loop_vec_info loop_vinfo, if (init_node) vec_init = vect_get_slp_vect_def (init_node, ivn); - if (!integer_zerop (step_mul)) + if (!nested_in_vect_loop + && !integer_zerop (step_mul)) { - stmts = NULL; - vec_def = gimple_convert (&stmts, step_vectype, vec_init); - up = gimple_build (&stmts, MULT_EXPR, step_vectype, + vec_def = gimple_convert (&init_stmts, step_vectype, vec_init); + up = gimple_build (&init_stmts, MULT_EXPR, step_vectype, vec_step, step_mul); - vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, + vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype, vec_def, up); - vec_init = gimple_convert (&stmts, vectype, vec_def); - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); + vec_init = gimple_convert (&init_stmts, vectype, vec_def); } /* Set the arguments of the phi node: */ @@ -8001,19 +7992,11 @@ vectorizable_induction (loop_vec_info loop_vinfo, { gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs]; tree def = gimple_get_lhs (iv); - gimple_seq stmts = NULL; if (ivn < 2*nivs) - { - vec_steps[ivn - nivs] - = gimple_build (&stmts, MULT_EXPR, step_vectype, - vec_steps[ivn - nivs], lupdate_mul); - if (stmts) - { - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } - } - stmts = NULL; + vec_steps[ivn - nivs] + = gimple_build (&init_stmts, MULT_EXPR, step_vectype, + vec_steps[ivn - nivs], lupdate_mul); + gimple_seq stmts = NULL; def = gimple_convert (&stmts, step_vectype, def); def = gimple_build (&stmts, PLUS_EXPR, step_vectype, def, vec_steps[ivn % nivs]); @@ -8030,13 +8013,16 @@ vectorizable_induction (loop_vec_info loop_vinfo, } } + new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts); + gcc_assert (!new_bb); + return true; } init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (iv_loop)); - stmts = NULL; + gimple_seq stmts = NULL; if (!nested_in_vect_loop) { /* Convert the initial value to the IV update type. */ -- 2.30.2