From e09b4c37aca7a12184b3bbd1692601765769fb1b Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 8 Jun 2016 13:17:41 +0000 Subject: [PATCH] tree-vect-stmts.c (vectorizable_load): Remove restrictions on strided SLP loads and fall back to scalar loads in case... 2016-06-08 Richard Biener * tree-vect-stmts.c (vectorizable_load): Remove restrictions on strided SLP loads and fall back to scalar loads in case we can't chunk them. * gcc.dg/vect/slp-43.c: New testcase. From-SVN: r237215 --- gcc/ChangeLog | 6 ++ gcc/testsuite/ChangeLog | 4 ++ gcc/testsuite/gcc.dg/vect/slp-43.c | 78 +++++++++++++++++++++++++ gcc/tree-vect-stmts.c | 94 +++++++++++++++--------------- 4 files changed, 134 insertions(+), 48 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/slp-43.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4f5d3177f1e..474b064ff04 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2016-06-08 Richard Biener + + * tree-vect-stmts.c (vectorizable_load): Remove restrictions + on strided SLP loads and fall back to scalar loads in case + we can't chunk them. + 2016-06-08 Richard Biener PR tree-optimization/71452 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 28b17cf4d02..1d80915d65e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2016-06-08 Richard Biener + + * gcc.dg/vect/slp-43.c: New testcase. + 2016-06-08 Richard Biener PR tree-optimization/71452 diff --git a/gcc/testsuite/gcc.dg/vect/slp-43.c b/gcc/testsuite/gcc.dg/vect/slp-43.c new file mode 100644 index 00000000000..4e8df46c863 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-43.c @@ -0,0 +1,78 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } */ + +#include +#include "tree-vect.h" + +#define FOO(T,N) \ +void __attribute__((noinline,noclone)) \ +foo_ ## T ## _ ## N (T * __restrict__ in_, T * __restrict__ out_, int s) \ +{ \ + T *in = __builtin_assume_aligned (in_, __BIGGEST_ALIGNMENT__); \ + T *out = __builtin_assume_aligned (out_, __BIGGEST_ALIGNMENT__); \ + for (int i = 0; i < 16; i++) \ + { \ + for (int j = 0; j < N; ++j) \ + out[j] = in[j]; \ + in += s*N; \ + out += N; \ + } \ +} + +#define TEST(T,N) \ + do { \ + memset (out, 0, 4096); \ + foo_ ## T ## _ ## N ((T *)in, (T *)out, 1); \ + if (memcmp (in, out, sizeof (T) * 16 * N) != 0) \ + __builtin_abort (); \ + for (int i = sizeof (T) * 16 * N; i < 4096; ++i) \ + if (out[i] != 0) \ + __builtin_abort (); \ + } while (0) + +FOO(char, 1) +FOO(char, 2) +FOO(char, 3) +FOO(char, 4) +FOO(char, 6) +FOO(char, 8) +FOO(int, 1) +FOO(int, 2) +FOO(int, 3) +FOO(int, 4) +FOO(int, 6) +FOO(int, 8) +FOO(int, 16) + +char in[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__))); +char out[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__))); + +int main() +{ + check_vect (); + + for (int i = 0; i < 4096; ++i) + { + in[i] = i; + __asm__ volatile ("" : : : "memory"); + } + + TEST(char, 1); + TEST(char, 2); + TEST(char, 3); + TEST(char, 4); + TEST(char, 6); + TEST(char, 8); + TEST(int, 1); + TEST(int, 2); + TEST(int, 3); + TEST(int, 4); + TEST(int, 6); + TEST(int, 8); + TEST(int, 16); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 13 "vect" } } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 373ecd7278d..bee064ef38e 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6440,17 +6440,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, } } else if (STMT_VINFO_STRIDED_P (stmt_info)) - { - if (grouped_load - && slp - && (group_size > nunits - || nunits % group_size != 0)) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "unhandled strided group load\n"); - return false; - } - } + ; else { negative = tree_int_cst_compare (nested_in_vect_loop @@ -6744,16 +6734,29 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, running_off = offvar; alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0); int nloads = nunits; + int lnel = 1; tree ltype = TREE_TYPE (vectype); auto_vec dr_chain; if (slp) { - nloads = nunits / group_size; - if (group_size < nunits) - ltype = build_vector_type (TREE_TYPE (vectype), group_size); - else - ltype = vectype; - ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); + if (group_size < nunits + && nunits % group_size == 0) + { + nloads = nunits / group_size; + lnel = group_size; + ltype = build_vector_type (TREE_TYPE (vectype), group_size); + ltype = build_aligned_type (ltype, + TYPE_ALIGN (TREE_TYPE (vectype))); + } + else if (group_size >= nunits + && group_size % nunits == 0) + { + nloads = 1; + lnel = nunits; + ltype = vectype; + ltype = build_aligned_type (ltype, + TYPE_ALIGN (TREE_TYPE (vectype))); + } /* For SLP permutation support we need to load the whole group, not only the number of vector stmts the permutation result fits in. */ @@ -6765,48 +6768,43 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, else ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); } + int group_el = 0; + unsigned HOST_WIDE_INT + elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); for (j = 0; j < ncopies; j++) { - tree vec_inv; - if (nloads > 1) + vec_alloc (v, nloads); + for (i = 0; i < nloads; i++) { - vec_alloc (v, nloads); - for (i = 0; i < nloads; i++) + tree this_off = build_int_cst (TREE_TYPE (alias_off), + group_el * elsz); + new_stmt = gimple_build_assign (make_ssa_name (ltype), + build2 (MEM_REF, ltype, + running_off, this_off)); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + if (nloads > 1) + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, + gimple_assign_lhs (new_stmt)); + + group_el += lnel; + if (! slp + || group_el == group_size) { - tree newref, newoff; - gimple *incr; - newref = build2 (MEM_REF, ltype, running_off, alias_off); - - newref = force_gimple_operand_gsi (gsi, newref, true, - NULL_TREE, true, - GSI_SAME_STMT); - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref); - newoff = copy_ssa_name (running_off); - incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, - running_off, stride_step); + tree newoff = copy_ssa_name (running_off); + gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, + running_off, stride_step); vect_finish_stmt_generation (stmt, incr, gsi); running_off = newoff; + group_el = 0; } - - vec_inv = build_constructor (vectype, v); - new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); - new_stmt = SSA_NAME_DEF_STMT (new_temp); } - else + if (nloads > 1) { - new_stmt = gimple_build_assign (make_ssa_name (ltype), - build2 (MEM_REF, ltype, - running_off, alias_off)); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - - tree newoff = copy_ssa_name (running_off); - gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, - running_off, stride_step); - vect_finish_stmt_generation (stmt, incr, gsi); - - running_off = newoff; + tree vec_inv = build_constructor (vectype, v); + new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); + new_stmt = SSA_NAME_DEF_STMT (new_temp); } if (slp) -- 2.30.2