+2017-12-01 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/83232
+ * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix
+ detection of same access. Instead of breaking the group here
+ do not consider the duplicate. Add comment explaining real fix.
+
2017-12-01 Jonathan Wakely <jwakely@redhat.com>
* doc/md.texi (Insn Splitting): Fix "central flowgraph" typo.
+2017-12-01 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/83232
+ * gfortran.dg/vect/pr83232.f90: New testcase.
+
2017-12-01 Sudakshina Das <sudi.das@arm.com>
* gcc.target/arm/armv8_2-fp16-move-2.c: New test.
--- /dev/null
+! { dg-do compile }
+! { dg-require-effective-target vect_double }
+! { dg-additional-options "-funroll-loops --param vect-max-peeling-for-alignment=0 -fdump-tree-slp-details" }
+
+ SUBROUTINE MATERIAL_41_INTEGRATION ( STRESS,YLDC,EFPS, &
+ & DTnext,Dxx,Dyy,Dzz,Dxy,Dxz,Dyz,MatID,P1,P3 )
+ REAL(KIND(0D0)), INTENT(INOUT) :: STRESS(6)
+ REAL(KIND(0D0)), INTENT(IN) :: DTnext
+ REAL(KIND(0D0)), INTENT(IN) :: Dxx,Dyy,Dzz,Dxy,Dxz,Dyz
+ REAL(KIND(0D0)) :: Einc(6)
+ REAL(KIND(0D0)) :: P1,P3
+
+ Einc(1) = DTnext * Dxx ! (1)
+ Einc(2) = DTnext * Dyy
+ Einc(3) = DTnext * Dzz
+ Einc(4) = DTnext * Dxy
+ Einc(5) = DTnext * Dxz
+ Einc(6) = DTnext * Dyz
+ DO i = 1,6
+ STRESS(i) = STRESS(i) + P3*Einc(i)
+ ENDDO
+ STRESS(1) = STRESS(1) + (DTnext * P1 * (Dxx+Dyy+Dzz)) ! (2)
+ STRESS(2) = STRESS(2) + (DTnext * P1 * (Dxx+Dyy+Dzz))
+ STRESS(3) = 0.0
+ Einc(5) = 0.0 ! (3)
+ Einc(6) = 0.0
+ call foo (Einc)
+ END SUBROUTINE
+
+! We should vectorize (1) and (2)
+! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "slp1" } }
+! We fail to vectorize at (3), this can be fixed in the future
+! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp1" { xfail *-*-* } } }
if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
break;
- /* Do not place the same access in the interleaving chain twice. */
- if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0)
- break;
-
/* Check the types are compatible.
??? We don't distinguish this during sorting. */
if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
/* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
- gcc_assert (init_a <= init_b);
+ HOST_WIDE_INT init_prev
+ = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
+ gcc_assert (init_a <= init_b
+ && init_a <= init_prev
+ && init_prev <= init_b);
+
+ /* Do not place the same access in the interleaving chain twice. */
+ if (init_b == init_prev)
+ {
+ gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
+ < gimple_uid (DR_STMT (drb)));
+ /* ??? For now we simply "drop" the later reference which is
+ otherwise the same rather than finishing off this group.
+ In the end we'd want to re-process duplicates forming
+ multiple groups from the refs, likely by just collecting
+ all candidates (including duplicates and split points
+ below) in a vector and then process them together. */
+ continue;
+ }
/* If init_b == init_a + the size of the type * k, we have an
interleaving, and DRA is accessed before DRB. */
/* If we have a store, the accesses are adjacent. This splits
groups into chunks we support (we don't support vectorization
of stores with gaps). */
- if (!DR_IS_READ (dra)
- && (init_b - (HOST_WIDE_INT) TREE_INT_CST_LOW
- (DR_INIT (datarefs_copy[i-1]))
- != type_size_a))
+ if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
break;
/* If the step (if not zero or non-constant) is greater than the