From b267968e676627af45f63c64f4c8c4a0f1939561 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 18 Jul 2017 13:55:47 +0000 Subject: [PATCH] re PR tree-optimization/81410 (-O3 breaks code) 2017-06-18 Richard Biener PR tree-optimization/81410 * tree-vect-stmts.c (vectorizable_load): Properly adjust for the gap in the ! slp_perm SLP case after each group. * gcc.dg/vect/pr81410.c: New testcase. From-SVN: r250312 --- gcc/ChangeLog | 6 +++++ gcc/testsuite/ChangeLog | 5 ++++ gcc/testsuite/gcc.dg/vect/pr81410.c | 38 +++++++++++++++++++++++++++++ gcc/tree-vect-stmts.c | 32 +++++++++++++++++++++--- 4 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr81410.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 601de6a110a..d6ffb757d01 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2017-06-18 Richard Biener + + PR tree-optimization/81410 + * tree-vect-stmts.c (vectorizable_load): Properly adjust for + the gap in the ! slp_perm SLP case after each group. + 2017-07-18 Jan Hubicka PR middle-end/81463 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 01f24abca7c..80d57b8773d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-06-18 Richard Biener + + PR tree-optimization/81410 + * gcc.dg/vect/pr81410.c: New testcase. + 2017-07-18 Jan Hubicka PR middle-end/81462 diff --git a/gcc/testsuite/gcc.dg/vect/pr81410.c b/gcc/testsuite/gcc.dg/vect/pr81410.c new file mode 100644 index 00000000000..929850f2391 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr81410.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_long_long } */ + +#include "tree-vect.h" + +typedef long long uint64_t; +uint64_t x[24]; +uint64_t y[16]; +uint64_t z[8]; + +void __attribute__((noinline)) foo() +{ + for (int i = 0; i < 8; ++i) + { + y[2*i] = x[3*i]; + y[2*i + 1] = x[3*i + 1]; + z[i] = 1; + } +} + +int main() +{ + check_vect (); + + for (int i = 0; i < 24; ++i) + { + x[i] = i; + __asm__ volatile ("" : : : "memory"); + } + foo (); + for (int i = 0; i < 8; ++i) + if (y[2*i] != 3*i || y[2*i+1] != 3*i + 1) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 90770a35af1..c50555499d2 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -7118,6 +7118,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, { first_stmt = GROUP_FIRST_ELEMENT (stmt_info); group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); + int group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt)); /* For SLP vectorization we directly vectorize a subchain without permutation. */ if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) @@ -7153,10 +7154,15 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, not only the number of vector stmts the permutation result fits in. */ if (slp_perm) - vec_num = (group_size * vf + nunits - 1) / nunits; + { + vec_num = (group_size * vf + nunits - 1) / nunits; + group_gap_adj = vf * group_size - nunits * vec_num; + } else - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - group_gap_adj = vf * group_size - nunits * vec_num; + { + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + group_gap_adj = group_gap; + } } else vec_num = group_size; @@ -7316,6 +7322,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, aggr_type = vectype; prev_stmt_info = NULL; + int group_elt = 0; for (j = 0; j < ncopies; j++) { /* 1. Create the vector or array pointer update chain. */ @@ -7603,10 +7610,27 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, /* Store vector loads in the corresponding SLP_NODE. */ if (slp && !slp_perm) SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + + /* With SLP permutation we load the gaps as well, without + we need to skip the gaps after we manage to fully load + all elements. group_gap_adj is GROUP_SIZE here. */ + group_elt += nunits; + if (group_gap_adj != 0 && ! slp_perm + && group_elt == group_size - group_gap_adj) + { + bool ovf; + tree bump + = wide_int_to_tree (sizetype, + wi::smul (TYPE_SIZE_UNIT (elem_type), + group_gap_adj, &ovf)); + dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, + stmt, bump); + group_elt = 0; + } } /* Bump the vector pointer to account for a gap or for excess elements loaded for a permuted SLP load. */ - if (group_gap_adj != 0) + if (group_gap_adj != 0 && slp_perm) { bool ovf; tree bump -- 2.30.2