From 2fd579ab530ca9ca5682eeba305c4946bf710c25 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 11 Apr 2019 07:30:59 +0000 Subject: [PATCH] re PR tree-optimization/90018 (r265453 miscompiled 527.cam4_r in SPEC CPU 2017) 2019-04-11 Richard Biener PR tree-optimization/90018 * tree-vect-data-refs.c (vect_preserves_scalar_order_p): Test both SLP and interleaving variants. * gcc.dg/vect/pr90018.c: New testcase. From-SVN: r270273 --- gcc/ChangeLog | 6 +++ gcc/testsuite/ChangeLog | 5 +++ gcc/testsuite/gcc.dg/vect/pr90018.c | 52 +++++++++++++++++++++++ gcc/tree-vect-data-refs.c | 64 ++++++++++++++++++++++------- 4 files changed, 112 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr90018.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a111272da86..f1ed98e6f66 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2019-04-11 Richard Biener + + PR tree-optimization/90018 + * tree-vect-data-refs.c (vect_preserves_scalar_order_p): + Test both SLP and interleaving variants. + 2019-04-11 Robin Dapp * config/s390/8561.md: New file. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 71ae61702e3..2443e443215 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-04-11 Richard Biener + + PR tree-optimization/90018 + * gcc.dg/vect/pr90018.c: New testcase. + 2018-04-10 Steve Ellcey PR rtl-optimization/87763 diff --git a/gcc/testsuite/gcc.dg/vect/pr90018.c b/gcc/testsuite/gcc.dg/vect/pr90018.c new file mode 100644 index 00000000000..d98b4c86c06 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr90018.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_double } */ + +#include "tree-vect.h" + +void __attribute__((noinline,noclone)) +foo (double *a4, int n) +{ + for (int i = 0; i < n; ++i) + { + /* We may not apply interleaving to the group (a), (b) because of (c). + Instead group (d) and (b). */ + double tem1 = a4[i*4] + a4[i*4+n*4] /* (a) */; + double tem2 = a4[i*4+2*n*4+1]; + a4[i*4+n*4+1] = tem1; /* (c) */ + a4[i*4+1] = tem2; + double tem3 = a4[i*4] - tem2; + double tem4 = tem3 + a4[i*4+n*4] /* (d) */; + a4[i*4+n*4+1] = tem4 + a4[i*4+n*4+1] /* (b) */; + } +} +int main(int argc, char **argv) +{ + int n = 11; + double a4[4 * n * 3]; + double a42[4 * n * 3]; + check_vect (); + for (int i = 0; i < 4 * n * 3; ++i) + { + a4[i] = a42[i] = i; + __asm__ volatile ("": : : "memory"); + } + foo (a4, n); + for (int i = 0; i < n; ++i) + { + double tem1 = a42[i*4] + a42[i*4+n*4]; + double tem2 = a42[i*4+2*n*4+1]; + a42[i*4+n*4+1] = tem1; + a42[i*4+1] = tem2; + double tem3 = a42[i*4] - tem2; + double tem4 = tem3 + a42[i*4+n*4]; + a42[i*4+n*4+1] = tem4 + a42[i*4+n*4+1]; + __asm__ volatile ("": : : "memory"); + } + for (int i = 0; i < 4 * n * 3; ++i) + if (a4[i] != a42[i]) + __builtin_abort (); + return 0; +} + +/* For v2df we try to use SLP and fail miserably. */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_sizes_32B_16B } } } */ diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 85997cf9617..d71a39ffd78 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -234,26 +234,60 @@ vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b) return true; /* STMT_A and STMT_B belong to overlapping groups. All loads in a - group are emitted at the position of the last scalar load and all - stores in a group are emitted at the position of the last scalar store. + SLP group are emitted at the position of the last scalar load and + all loads in an interleaving group are emitted at the position + of the first scalar load. + Stores in a group are emitted at the position of the last scalar store. Compute that position and check whether the resulting order matches - the current one. */ - stmt_vec_info last_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a); + the current one. + We have not yet decided between SLP and interleaving so we have + to conservatively assume both. */ + stmt_vec_info il_a; + stmt_vec_info last_a = il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a); if (last_a) - for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s; - s = DR_GROUP_NEXT_ELEMENT (s)) - last_a = get_later_stmt (last_a, s); + { + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + last_a = get_later_stmt (last_a, s); + if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a))) + { + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + if (get_later_stmt (il_a, s) == il_a) + il_a = s; + } + else + il_a = last_a; + } else - last_a = stmtinfo_a; - stmt_vec_info last_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b); + last_a = il_a = stmtinfo_a; + stmt_vec_info il_b; + stmt_vec_info last_b = il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b); if (last_b) - for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s; - s = DR_GROUP_NEXT_ELEMENT (s)) - last_b = get_later_stmt (last_b, s); + { + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + last_b = get_later_stmt (last_b, s); + if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b))) + { + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + if (get_later_stmt (il_b, s) == il_b) + il_b = s; + } + else + il_b = last_b; + } else - last_b = stmtinfo_b; - return ((get_later_stmt (last_a, last_b) == last_a) - == (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a)); + last_b = il_b = stmtinfo_b; + bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a); + return (/* SLP */ + (get_later_stmt (last_a, last_b) == last_a) == a_after_b + /* Interleaving */ + && (get_later_stmt (il_a, il_b) == il_a) == a_after_b + /* Mixed */ + && (get_later_stmt (il_a, last_b) == il_a) == a_after_b + && (get_later_stmt (last_a, il_b) == last_a) == a_after_b); } /* A subroutine of vect_analyze_data_ref_dependence. Handle -- 2.30.2