From: Richard Sandiford Date: Wed, 22 Aug 2018 12:59:08 +0000 (+0000) Subject: Make the vectoriser drop to strided accesses for stores with gaps X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=203942b8af64926d787b4a545184866f9572978d;p=gcc.git Make the vectoriser drop to strided accesses for stores with gaps We could vectorise: for (...) { a[0] = ...; a[1] = ...; a[2] = ...; a[3] = ...; a += stride; } (including the case when stride == 8) but not: for (...) { a[0] = ...; a[1] = ...; a[2] = ...; a[3] = ...; a += 8; } (where the stride is always 8). The former was treated as a "grouped and strided" store, while the latter was treated as a grouped store with gaps, which we don't support. This patch makes us treat groups of stores with gaps at the end as strided groups too. I tried to go through all uses of STMT_VINFO_STRIDED_P and all vector uses of DR_STEP to see whether there were any hard-baked assumptions, but couldn't see any. I wondered whether we should relax: /* We do not have to consider dependences between accesses that belong to the same group, unless the stride could be smaller than the group size. */ if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a) && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a) == DR_GROUP_FIRST_ELEMENT (stmtinfo_b)) && !STMT_VINFO_STRIDED_P (stmtinfo_a)) return false; for cases in which the step is constant and the absolute step is known to be greater than the group size, but data dependence analysis should already return chrec_known for those cases. The new test is a version of vect-avg-15.c with the variable step replaced by a constant one. A natural follow-on would be to do the same for groups with gaps in the middle: /* Check that the distance between two accesses is equal to the type size. Otherwise, we have gaps. */ diff = (TREE_INT_CST_LOW (DR_INIT (data_ref)) - TREE_INT_CST_LOW (prev_init)) / type_size; if (diff != 1) { [...] if (DR_IS_WRITE (data_ref)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "interleaved store with gaps\n"); return false; } But I think we should do that separately and see what the fallout from this change is first. 2018-08-22 Richard Sandiford gcc/ * tree-vect-data-refs.c (vect_analyze_group_access_1): Convert grouped stores with gaps to a strided group. gcc/testsuite/ * gcc.dg/vect/vect-avg-16.c: New test. * gcc.dg/vect/slp-37.c: Expect the loop to be vectorized. * gcc.dg/vect/vect-strided-u8-i8-gap4.c, * gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c: Likewise for the second loop in main1. From-SVN: r263772 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 099cf968b14..6fdb1b62d2d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2018-08-22 Richard Sandiford + + * tree-vect-data-refs.c (vect_analyze_group_access_1): Convert + grouped stores with gaps to a strided group. + 2018-08-22 Richard Sandiford * tree-vect-stmts.c (get_group_load_store_type) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b958ad2369e..9c624ff0739 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2018-08-22 Richard Sandiford + + * gcc.dg/vect/vect-avg-16.c: New test. + * gcc.dg/vect/slp-37.c: Expect the loop to be vectorized. + * gcc.dg/vect/vect-strided-u8-i8-gap4.c, + * gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c: Likewise for + the second loop in main1. + 2018-08-22 Iain Sandoe * gcc.dg/lto/pr85248_0.c (test_alias): diff --git a/gcc/testsuite/gcc.dg/vect/slp-37.c b/gcc/testsuite/gcc.dg/vect/slp-37.c index b6a044dfd3a..54a5e18c51f 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-37.c +++ b/gcc/testsuite/gcc.dg/vect/slp-37.c @@ -17,8 +17,8 @@ foo1 (s1 *arr) int i; s1 *ptr = arr; - /* Different constant types - not SLPable. The group size is not power of 2, - interleaving is not supported either. */ + /* Vectorized as a strided SLP pair of accesses to and a single + strided access to c. */ for (i = 0; i < N; i++) { ptr->a = 6; @@ -58,6 +58,5 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ - +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-16.c b/gcc/testsuite/gcc.dg/vect/vect-avg-16.c new file mode 100644 index 00000000000..f3e3839a879 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-avg-16.c @@ -0,0 +1,52 @@ +/* { dg-additional-options "-O3" } */ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +#define N 80 + +void __attribute__ ((noipa)) +f (signed char *restrict a, signed char *restrict b, + signed char *restrict c, int n) +{ + for (int j = 0; j < n; ++j) + { + for (int i = 0; i < 16; ++i) + a[i] = (b[i] + c[i]) >> 1; + a += 20; + b += 20; + c += 20; + } +} + +#define BASE1 -126 +#define BASE2 -42 + +signed char a[N], b[N], c[N]; + +int +main (void) +{ + check_vect (); + + for (int i = 0; i < N; ++i) + { + a[i] = i; + b[i] = BASE1 + i * 3; + c[i] = BASE2 + i * 2; + asm volatile ("" ::: "memory"); + } + f (a, b, c, N / 20); + for (int i = 0; i < N; ++i) + { + int d = (BASE1 + BASE2 + i * 5) >> 1; + if (a[i] != (i % 20 < 16 ? d : i)) + __builtin_abort (); + } + return 0; +} + +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" { target vect_avg_qi } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c index 28ba4170fba..b5eb87f4b96 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c @@ -55,7 +55,7 @@ main1 (s *arr) } ptr = arr; - /* Not vectorizable: gap in store. */ + /* Vectorized as a strided SLP pair. */ for (i = 0; i < N; i++) { res[i].a = ptr->b; @@ -110,5 +110,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ - +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect_strided8 } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c index 901b1a925bd..f1d05a5aaf9 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c @@ -53,7 +53,7 @@ main1 (s *arr) } ptr = arr; - /* Not vectorizable: gap in store. */ + /* Vectorized as a strided SLP pair. */ for (i = 0; i < N; i++) { res[i].a = ptr->b; @@ -97,5 +97,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ - +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect_strided8 } } } */ diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index d70d207c7d2..e0d05333b45 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -2633,10 +2633,8 @@ vect_analyze_group_access_1 (dr_vec_info *dr_info) if (groupsize != count && !DR_IS_READ (dr)) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "interleaved store with gaps\n"); - return false; + groupsize = count; + STMT_VINFO_STRIDED_P (stmt_info) = true; } /* If there is a gap after the last load in the group it is the @@ -2652,6 +2650,8 @@ vect_analyze_group_access_1 (dr_vec_info *dr_info) "Detected interleaving "); if (DR_IS_READ (dr)) dump_printf (MSG_NOTE, "load "); + else if (STMT_VINFO_STRIDED_P (stmt_info)) + dump_printf (MSG_NOTE, "strided store "); else dump_printf (MSG_NOTE, "store "); dump_printf (MSG_NOTE, "of size %u starting with ",