From 9b999e8c82296dc7acee8e74b49cab47004c47a5 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 8 Jun 2015 12:40:20 +0000 Subject: [PATCH] tree-vect-stmts.c (vectorizable_load): Compute the pointer adjustment for gaps at the end of a SLP load group properly. 2015-06-08 Richard Biener * tree-vect-stmts.c (vectorizable_load): Compute the pointer adjustment for gaps at the end of a SLP load group properly. * tree-vect-slp.c (vect_supported_load_permutation_p): Allow all permutations we can generate. (vect_transform_slp_perm_load): Use the correct group-size. * gcc.dg/vect/slp-perm-10.c: New testcase. * gcc.dg/vect/slp-23.c: Adjust. * gcc.dg/torture/pr53366-2.c: Also verify cross-iteration vector pointer update. From-SVN: r224221 --- gcc/ChangeLog | 8 ++++ gcc/testsuite/ChangeLog | 7 ++++ gcc/testsuite/gcc.dg/torture/pr53366-2.c | 23 +++++----- gcc/testsuite/gcc.dg/vect/slp-23.c | 3 +- gcc/testsuite/gcc.dg/vect/slp-perm-10.c | 53 ++++++++++++++++++++++++ gcc/tree-vect-slp.c | 41 +++--------------- gcc/tree-vect-stmts.c | 25 +++++------ 7 files changed, 101 insertions(+), 59 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/slp-perm-10.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e7616d3226e..b84e325c716 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2015-06-08 Richard Biener + + * tree-vect-stmts.c (vectorizable_load): Compute the pointer + adjustment for gaps at the end of a SLP load group properly. + * tree-vect-slp.c (vect_supported_load_permutation_p): Allow + all permutations we can generate. + (vect_transform_slp_perm_load): Use the correct group-size. + 2015-06-08 Marc Glisse * genmatch.c (expr::gen_transform): For conditions, guess the type diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c7902233b31..46e207bb980 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2015-06-08 Richard Biener + + * gcc.dg/vect/slp-perm-10.c: New testcase. + * gcc.dg/vect/slp-23.c: Adjust. + * gcc.dg/torture/pr53366-2.c: Also verify cross-iteration + vector pointer update. + 2015-06-08 Marek Polacek PR sanitizer/66452 diff --git a/gcc/testsuite/gcc.dg/torture/pr53366-2.c b/gcc/testsuite/gcc.dg/torture/pr53366-2.c index 6be6a5610bf..b141b5e717e 100644 --- a/gcc/testsuite/gcc.dg/torture/pr53366-2.c +++ b/gcc/testsuite/gcc.dg/torture/pr53366-2.c @@ -4,18 +4,18 @@ extern void abort (void); struct T { float r[3], i[3]; }; -struct U { struct T j[2]; }; +struct U { struct T j[4]; }; void __attribute__ ((noinline)) foo (struct U *__restrict y, const float _Complex *__restrict x) { int i, j; - for (j = 0; j < 2; ++j) + for (j = 0; j < 4; ++j) { float a = __real__ x[j]; float b = __imag__ x[j]; - float c = __real__ x[j + 2]; - float d = __imag__ x[j + 2]; + float c = __real__ x[j + 4]; + float d = __imag__ x[j + 4]; for (i = 0; i < 3; ++i) { y->j[j].r[i] = y->j[j].r[i] + a + c; @@ -24,20 +24,23 @@ foo (struct U *__restrict y, const float _Complex *__restrict x) } } -_Complex float x[4]; +_Complex float x[8]; struct U y; int main () { int i, j; - for (i = 0; i < 4; ++i) - x[i] = i + 1.0iF * (2 * i); + for (i = 0; i < 8; ++i) + { + x[i] = i + 1.0iF * (2 * i); + __asm__ volatile (""); + } foo (&y, x); - for (j = 0; j < 2; ++j) + for (j = 0; j < 4; ++j) for (i = 0; i < 3; ++i) - if (y.j[j].r[i] != __real__ (x[j] + x[j + 2]) - || y.j[j].i[i] != __imag__ (x[j] + x[j + 2])) + if (y.j[j].r[i] != __real__ (x[j] + x[j + 4]) + || y.j[j].i[i] != __imag__ (x[j] + x[j + 4])) __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.dg/vect/slp-23.c b/gcc/testsuite/gcc.dg/vect/slp-23.c index cc5df2a19b9..b1fe6e4272e 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-23.c +++ b/gcc/testsuite/gcc.dg/vect/slp-23.c @@ -108,5 +108,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-10.c b/gcc/testsuite/gcc.dg/vect/slp-perm-10.c new file mode 100644 index 00000000000..1c2a8580d3c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-10.c @@ -0,0 +1,53 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +int a[256], b[256]; + +void __attribute__((noinline)) +foo (void) +{ + int i; + for (i = 0; i < 32; ++i) + { + b[i*8+0] = a[i*8+0]; + b[i*8+1] = a[i*8+0]; + b[i*8+2] = a[i*8+3]; + b[i*8+3] = a[i*8+3]; + b[i*8+4] = a[i*8+4]; + b[i*8+5] = a[i*8+6]; + b[i*8+6] = a[i*8+4]; + b[i*8+7] = a[i*8+6]; + } +} + +int main () +{ + int i; + + check_vect (); + + for (i = 0; i < 256; ++i) + { + a[i] = i; + __asm__ volatile (""); + } + + foo (); + + for (i = 0; i < 32; ++i) + if (b[i*8+0] != i*8+0 + || b[i*8+1] != i*8+0 + || b[i*8+2] != i*8+3 + || b[i*8+3] != i*8+3 + || b[i*8+4] != i*8+4 + || b[i*8+5] != i*8+6 + || b[i*8+6] != i*8+4 + || b[i*8+7] != i*8+6) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index b09c2c3300b..612099878d1 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1497,47 +1497,14 @@ vect_supported_load_permutation_p (slp_instance slp_instn) return true; } - /* FORNOW: the only supported permutation is 0..01..1.. of length equal to - GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as - well (unless it's reduction). */ - if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size) - return false; - FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) - if (!node->load_permutation.exists ()) - return false; - - load_index = sbitmap_alloc (group_size); - bitmap_clear (load_index); - FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) - { - unsigned int lidx = node->load_permutation[0]; - if (bitmap_bit_p (load_index, lidx)) - { - sbitmap_free (load_index); - return false; - } - bitmap_set_bit (load_index, lidx); - FOR_EACH_VEC_ELT (node->load_permutation, j, k) - if (k != lidx) - { - sbitmap_free (load_index); - return false; - } - } - for (i = 0; i < group_size; i++) - if (!bitmap_bit_p (load_index, i)) - { - sbitmap_free (load_index); - return false; - } - sbitmap_free (load_index); - + /* For loop vectorization verify we can generate the permutation. */ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) if (node->load_permutation.exists () && !vect_transform_slp_perm_load (node, vNULL, NULL, SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true)) return false; + return true; } @@ -3282,6 +3249,8 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) return false; + stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)); + /* Generate permutation masks for every NODE. Number of masks for each NODE is equal to GROUP_SIZE. E.g., we have a group of three nodes with three loads from the same @@ -3316,7 +3285,7 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, for (k = 0; k < group_size; k++) { i = SLP_TREE_LOAD_PERMUTATION (node)[k]; - first_mask_element = i + j * group_size; + first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info); if (!vect_get_mask_element (stmt, first_mask_element, 0, nunits, only_one_vec, index, mask, ¤t_mask_element, diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 75832407d5b..6f31312e5dc 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -5802,7 +5802,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, gimple ptr_incr = NULL; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies; - int i, j, group_size = -1, group_gap; + int i, j, group_size = -1, group_gap_adj; tree msq = NULL_TREE, lsq; tree offset = NULL_TREE; tree byte_offset = NULL_TREE; @@ -6391,26 +6391,24 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, } first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); + group_gap_adj = 0; /* VEC_NUM is the number of vect stmts to be created for this group. */ if (slp) { grouped_load = false; vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt)); + group_gap_adj = vf * group_size - nunits * vec_num; } else - { - vec_num = group_size; - group_gap = 0; - } + vec_num = group_size; } else { first_stmt = stmt; first_dr = dr; group_size = vec_num = 1; - group_gap = 0; + group_gap_adj = 0; } alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); @@ -6826,12 +6824,15 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (slp && !slp_perm) SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); } - /* Bump the vector pointer to account for a gap. */ - if (slp && group_gap != 0) + /* Bump the vector pointer to account for a gap or for excess + elements loaded for a permuted SLP load. */ + if (group_gap_adj != 0) { - tree bump = size_binop (MULT_EXPR, - TYPE_SIZE_UNIT (elem_type), - size_int (group_gap)); + bool ovf; + tree bump + = wide_int_to_tree (sizetype, + wi::smul (TYPE_SIZE_UNIT (elem_type), + group_gap_adj, &ovf)); dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, bump); } -- 2.30.2