From b17dc4d4e42c4366f60ad79cdc31f7b1260e058e Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 15 Jun 2016 09:54:17 +0000 Subject: [PATCH] tree-vect-stmts.c (vectorizable_store): Remove strided grouped store restrictions. 2016-06-15 Richard Biener * tree-vect-stmts.c (vectorizable_store): Remove strided grouped store restrictions. * gcc.dg/vect/slp-45.c: New testcase. From-SVN: r237474 --- gcc/ChangeLog | 5 ++ gcc/testsuite/ChangeLog | 4 ++ gcc/testsuite/gcc.dg/vect/slp-45.c | 78 ++++++++++++++++++++++++++++++ gcc/tree-vect-stmts.c | 67 +++++++++++++++---------- 4 files changed, 129 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/slp-45.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f593bd52b70..ebc4d6eeca1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2016-06-15 Richard Biener + + * tree-vect-stmts.c (vectorizable_store): Remove strided grouped + store restrictions. + 2016-06-15 Richard Biener * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Do diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3eaaa91b033..679f0225109 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2016-06-15 Richard Biener + + * gcc.dg/vect/slp-45.c: New testcase. + 2016-06-15 Richard Biener * gcc.dg/vect/bb-slp-pattern-2.c: Disable loop vectorization. diff --git a/gcc/testsuite/gcc.dg/vect/slp-45.c b/gcc/testsuite/gcc.dg/vect/slp-45.c new file mode 100644 index 00000000000..be721cb7102 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-45.c @@ -0,0 +1,78 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } */ + +#include +#include "tree-vect.h" + +#define FOO(T,N) \ +void __attribute__((noinline,noclone)) \ +foo_ ## T ## _ ## N (T * __restrict__ in_, T * __restrict__ out_, int s) \ +{ \ + T *in = __builtin_assume_aligned (in_, __BIGGEST_ALIGNMENT__); \ + T *out = __builtin_assume_aligned (out_, __BIGGEST_ALIGNMENT__); \ + for (int i = 0; i < 16; i++) \ + { \ + for (int j = 0; j < N; ++j) \ + out[j] = in[j]; \ + in += N; \ + out += s*N; \ + } \ +} + +#define TEST(T,N) \ + do { \ + memset (out, 0, 4096); \ + foo_ ## T ## _ ## N ((T *)in, (T *)out, 1); \ + if (memcmp (in, out, sizeof (T) * 16 * N) != 0) \ + __builtin_abort (); \ + for (int i = sizeof (T) * 16 * N; i < 4096; ++i) \ + if (out[i] != 0) \ + __builtin_abort (); \ + } while (0) + +FOO(char, 1) +FOO(char, 2) +FOO(char, 3) +FOO(char, 4) +FOO(char, 6) +FOO(char, 8) +FOO(int, 1) +FOO(int, 2) +FOO(int, 3) +FOO(int, 4) +FOO(int, 6) +FOO(int, 8) +FOO(int, 16) + +char in[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__))); +char out[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__))); + +int main() +{ + check_vect (); + + for (int i = 0; i < 4096; ++i) + { + in[i] = i; + __asm__ volatile ("" : : : "memory"); + } + + TEST(char, 1); + TEST(char, 2); + TEST(char, 3); + TEST(char, 4); + TEST(char, 6); + TEST(char, 8); + TEST(int, 1); + TEST(int, 2); + TEST(int, 3); + TEST(int, 4); + TEST(int, 6); + TEST(int, 8); + TEST(int, 16); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 13 "vect" } } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index bee064ef38e..c74f14f0205 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -5234,6 +5234,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, enum vect_def_type scatter_idx_dt = vect_unknown_def_type; enum vect_def_type scatter_src_dt = vect_unknown_def_type; gimple *new_stmt; + int vf; if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) return false; @@ -5270,7 +5271,12 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype); if (loop_vinfo) - loop = LOOP_VINFO_LOOP (loop_vinfo); + { + loop = LOOP_VINFO_LOOP (loop_vinfo); + vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + } + else + vf = 1; /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in @@ -5365,16 +5371,6 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, return false; } - if (STMT_VINFO_STRIDED_P (stmt_info) - && slp - && (group_size > nunits - || nunits % group_size != 0)) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "unhandled strided group store\n"); - return false; - } - if (first_stmt == stmt) { /* STMT is the leader of the group. Check the operands of all the @@ -5653,23 +5649,31 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, */ unsigned nstores = nunits; + unsigned lnel = 1; tree ltype = elem_type; if (slp) { - nstores = nunits / group_size; - if (group_size < nunits) - ltype = build_vector_type (elem_type, group_size); - else - ltype = vectype; + if (group_size < nunits + && nunits % group_size == 0) + { + nstores = nunits / group_size; + lnel = group_size; + ltype = build_vector_type (elem_type, group_size); + } + else if (group_size >= nunits + && group_size % nunits == 0) + { + nstores = 1; + lnel = nunits; + ltype = vectype; + } ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type)); ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - group_size = 1; } ivstep = stride_step; ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep, - build_int_cst (TREE_TYPE (ivstep), - ncopies * nstores)); + build_int_cst (TREE_TYPE (ivstep), vf)); standard_iv_increment_position (loop, &incr_gsi, &insert_after); @@ -5700,6 +5704,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, vect_finish_stmt_generation (stmt, incr, gsi); running_off = newoff; } + unsigned int group_el = 0; + unsigned HOST_WIDE_INT + elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); for (j = 0; j < ncopies; j++) { /* We've set op and dt above, from gimple_assign_rhs1(stmt), @@ -5745,19 +5752,27 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, NULL_TREE, true, GSI_SAME_STMT); + tree this_off = build_int_cst (TREE_TYPE (alias_off), + group_el * elsz); newref = build2 (MEM_REF, ltype, - running_off, alias_off); + running_off, this_off); /* And store it to *running_off. */ assign = gimple_build_assign (newref, elem); vect_finish_stmt_generation (stmt, assign, gsi); - newoff = copy_ssa_name (running_off, NULL); - incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, - running_off, stride_step); - vect_finish_stmt_generation (stmt, incr, gsi); + group_el += lnel; + if (! slp + || group_el == group_size) + { + newoff = copy_ssa_name (running_off, NULL); + incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, + running_off, stride_step); + vect_finish_stmt_generation (stmt, incr, gsi); - running_off = newoff; + running_off = newoff; + group_el = 0; + } if (g == group_size - 1 && !slp) { @@ -5771,6 +5786,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, } } next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); + if (slp) + break; } return true; } -- 2.30.2