From fb607032b8009d141409b8dc3c5e4df42c93a231 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 1 Dec 2017 13:33:40 +0000 Subject: [PATCH] re PR tree-optimization/83232 (fma3d spec2000 regression on zen with -Ofast (generic tuning) after r255268 by missed SLP oppurtunity) 2017-12-01 Richard Biener PR tree-optimization/83232 * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix detection of same access. Instead of breaking the group here do not consider the duplicate. Add comment explaining real fix. * gfortran.dg/vect/pr83232.f90: New testcase. From-SVN: r255307 --- gcc/ChangeLog | 7 +++++ gcc/testsuite/ChangeLog | 5 ++++ gcc/testsuite/gfortran.dg/vect/pr83232.f90 | 33 ++++++++++++++++++++++ gcc/tree-vect-data-refs.c | 29 +++++++++++++------ 4 files changed, 65 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/vect/pr83232.f90 diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d8b2aebf205..0213e8b0929 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2017-12-01 Richard Biener + + PR tree-optimization/83232 + * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix + detection of same access. Instead of breaking the group here + do not consider the duplicate. Add comment explaining real fix. + 2017-12-01 Jonathan Wakely * doc/md.texi (Insn Splitting): Fix "central flowgraph" typo. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b51bd2a8ad6..245ab25ac10 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-12-01 Richard Biener + + PR tree-optimization/83232 + * gfortran.dg/vect/pr83232.f90: New testcase. + 2017-12-01 Sudakshina Das * gcc.target/arm/armv8_2-fp16-move-2.c: New test. diff --git a/gcc/testsuite/gfortran.dg/vect/pr83232.f90 b/gcc/testsuite/gfortran.dg/vect/pr83232.f90 new file mode 100644 index 00000000000..8fd9f0373eb --- /dev/null +++ b/gcc/testsuite/gfortran.dg/vect/pr83232.f90 @@ -0,0 +1,33 @@ +! { dg-do compile } +! { dg-require-effective-target vect_double } +! { dg-additional-options "-funroll-loops --param vect-max-peeling-for-alignment=0 -fdump-tree-slp-details" } + + SUBROUTINE MATERIAL_41_INTEGRATION ( STRESS,YLDC,EFPS, & + & DTnext,Dxx,Dyy,Dzz,Dxy,Dxz,Dyz,MatID,P1,P3 ) + REAL(KIND(0D0)), INTENT(INOUT) :: STRESS(6) + REAL(KIND(0D0)), INTENT(IN) :: DTnext + REAL(KIND(0D0)), INTENT(IN) :: Dxx,Dyy,Dzz,Dxy,Dxz,Dyz + REAL(KIND(0D0)) :: Einc(6) + REAL(KIND(0D0)) :: P1,P3 + + Einc(1) = DTnext * Dxx ! (1) + Einc(2) = DTnext * Dyy + Einc(3) = DTnext * Dzz + Einc(4) = DTnext * Dxy + Einc(5) = DTnext * Dxz + Einc(6) = DTnext * Dyz + DO i = 1,6 + STRESS(i) = STRESS(i) + P3*Einc(i) + ENDDO + STRESS(1) = STRESS(1) + (DTnext * P1 * (Dxx+Dyy+Dzz)) ! (2) + STRESS(2) = STRESS(2) + (DTnext * P1 * (Dxx+Dyy+Dzz)) + STRESS(3) = 0.0 + Einc(5) = 0.0 ! (3) + Einc(6) = 0.0 + call foo (Einc) + END SUBROUTINE + +! We should vectorize (1) and (2) +! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "slp1" } } +! We fail to vectorize at (3), this can be fixed in the future +! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp1" { xfail *-*-* } } } diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index ca864987a59..996d156a239 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -2841,10 +2841,6 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0) break; - /* Do not place the same access in the interleaving chain twice. */ - if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0) - break; - /* Check the types are compatible. ??? We don't distinguish this during sorting. */ if (!types_compatible_p (TREE_TYPE (DR_REF (dra)), @@ -2854,7 +2850,25 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */ HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra)); HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb)); - gcc_assert (init_a <= init_b); + HOST_WIDE_INT init_prev + = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1])); + gcc_assert (init_a <= init_b + && init_a <= init_prev + && init_prev <= init_b); + + /* Do not place the same access in the interleaving chain twice. */ + if (init_b == init_prev) + { + gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1])) + < gimple_uid (DR_STMT (drb))); + /* ??? For now we simply "drop" the later reference which is + otherwise the same rather than finishing off this group. + In the end we'd want to re-process duplicates forming + multiple groups from the refs, likely by just collecting + all candidates (including duplicates and split points + below) in a vector and then process them together. */ + continue; + } /* If init_b == init_a + the size of the type * k, we have an interleaving, and DRA is accessed before DRB. */ @@ -2866,10 +2880,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) /* If we have a store, the accesses are adjacent. This splits groups into chunks we support (we don't support vectorization of stores with gaps). */ - if (!DR_IS_READ (dra) - && (init_b - (HOST_WIDE_INT) TREE_INT_CST_LOW - (DR_INIT (datarefs_copy[i-1])) - != type_size_a)) + if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a) break; /* If the step (if not zero or non-constant) is greater than the -- 2.30.2