Optimise WAR and WAW alias checks

author Richard Sandiford <richard.sandiford@arm.com>

Mon, 18 Nov 2019 15:26:55 +0000 (15:26 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Mon, 18 Nov 2019 15:26:55 +0000 (15:26 +0000)
author Richard Sandiford <richard.sandiford@arm.com>
Mon, 18 Nov 2019 15:26:55 +0000 (15:26 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Mon, 18 Nov 2019 15:26:55 +0000 (15:26 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index eac041016acdd6828efbaa975632d9a0bfbccaeb..6ca0c57e6c62276c3dd6e19e78725ebb67cb7ee4 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2019-11-18  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * tree-data-ref.c (create_intersect_range_checks_index): If the
+       alias pair describes simple WAW and WAR dependencies, just check
+       whether the first B access overlaps later A accesses.
+       (create_waw_or_war_checks): New function that performs the same
+       optimization on addresses.
+       (create_intersect_range_checks): Call it.
+
  2019-11-18  Richard Sandiford  <richard.sandiford@arm.com>
  
         * lra-constraints.c (valid_address_p): Take the operand and a
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 163ea21842f43173a73aaa0606a96c1afa633b4b..12a0d08238cebb72999d31730ed3cd268b247de3 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,15 @@
+2019-11-18  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.dg/vect/vect-alias-check-8.c: Expect WAR/WAW checks to be used.
+       * gcc.dg/vect/vect-alias-check-14.c: Likewise.
+       * gcc.dg/vect/vect-alias-check-15.c: Likewise.
+       * gcc.dg/vect/vect-alias-check-18.c: Likewise.
+       * gcc.dg/vect/vect-alias-check-19.c: Likewise.
+       * gcc.target/aarch64/sve/var_stride_1.c: Update expected sequence.
+       * gcc.target/aarch64/sve/var_stride_2.c: Likewise.
+       * gcc.target/aarch64/sve/var_stride_3.c: Likewise.
+       * gcc.target/aarch64/sve/var_stride_5.c: Likewise.
+
  2019-11-18  Richard Sandiford  <richard.sandiford@arm.com>
  
         * gcc.target/aarch64/sve/acle/asm/ld1rq_f16.c: Remove XFAIL.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c

index 87045c92424c4d021740a73c4b43897697dcdbd3..1d148a04918d9a813b63ea57434bffa3670c3146 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
@@ -60,5 +60,5 @@ main (void)
  
  /* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
  /* { dg-final { scan-tree-dump-not {flags: [^\n]*ARBITRARY\n} "vect" } } */
-/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
+/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
  /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c

index ad8d69e2fe0ebb28ab1a4e589d5e39fdcda93e47..fbe3f8431ff05e5727f7ce888d280a64670fbc4e 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
@@ -57,5 +57,5 @@ main (void)
  }
  
  /* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
-/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
+/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
  /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c

index e9fd31a14002cb5a1fd52a0cfb047be06bdab58e..9d0739151d956b1a2b136826d9260fe733f631d5 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
@@ -60,5 +60,5 @@ main (void)
  }
  
  /* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
-/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
+/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
  /* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c

index 583e296f7f17ccaec747e67371c405c849d6c77b..7c0ff36a8c43f11197de413cb682bcd0a3afcae8 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
@@ -58,5 +58,5 @@ main (void)
  }
  
  /* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
-/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
+/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
  /* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c

index 6cd0219495c9369811d0e0f0a4084596eb84efe8..7e5df1389991da8115df2c6784b52ff3e15f8124 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c
@@ -60,5 +60,5 @@ main (void)
  }
  
  /* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
-/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
+/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
  /* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c

index 68baba9e9657482f4bc64b9cfc9e3bde86f59bb0..40ff2d561a812fb3924b31ee8a7ef6a760b1d7a9 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
@@ -15,13 +15,9 @@ f (TYPE *x, TYPE *y, unsigned short n, long m __attribute__((unused)))
  /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
  /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
  /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
-/* Should multiply by (VF-1)*4 rather than (257-1)*4.  */
-/* { dg-final { scan-assembler-not {, 1024} } } */
-/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
-/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */
-/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
-/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
-/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */
-/* Two range checks and a check for n being zero.  */
-/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
+/* Should use a WAR check that multiplies by (VF-2)*4 rather than
+   an overlap check that multiplies by (257-1)*4.  */
+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
+/* One range check and a check for n being zero.  */
+/* { dg-final { scan-assembler-times {\t(?:cmp|tst)\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c

index 30f6d2691b8552eab8f38cf8e8a6bdd0576395aa..b8afea70207f8ec2ed8f07eaa531bc3a01f1c0f0 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
@@ -15,7 +15,7 @@ f (TYPE *x, TYPE *y, unsigned short n, unsigned short m)
  /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
  /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
  /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
-/* Should multiply by (257-1)*4 rather than (VF-1)*4.  */
+/* Should multiply by (257-1)*4 rather than (VF-1)*4 or (VF-2)*4.  */
  /* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x2, 10, 16\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x3, 10, 16\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c

index 70792ff9f332441a69124e6672df48ce2ce4ec32..5ab6859ad4e3b6d7adcbefc36072c39318e98c97 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c
@@ -15,13 +15,10 @@ f (TYPE *x, TYPE *y, int n, long m __attribute__((unused)))
  /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
  /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
  /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
-/* Should multiply by (VF-1)*4 rather than (257-1)*4.  */
-/* { dg-final { scan-assembler-not {, 1024} } } */
-/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
-/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */
-/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
-/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */
-/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
-/* Two range checks and a check for n being zero.  */
-/* { dg-final { scan-assembler {\tcmp\t} } } */
-/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
+/* Should use a WAR check that multiplies by (VF-2)*4 rather than
+   an overlap check that multiplies by (257-1)*4.  */
+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+[^\n]*xzr} 1 } } */
+/* One range check and a check for n being zero.  */
+/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c

index 688f3be61d77df6d6322762c82a0d927776bfc1b..93c114193e90b87416c566fa6e14ef4d973506ee 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c
@@ -15,13 +15,10 @@ f (TYPE *x, TYPE *y, long n, long m __attribute__((unused)))
  /* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */
  /* { dg-final { scan-assembler {\tldr\td[0-9]+} } } */
  /* { dg-final { scan-assembler {\tstr\td[0-9]+} } } */
-/* Should multiply by (VF-1)*8 rather than (257-1)*8.  */
-/* { dg-final { scan-assembler-not {, 2048} } } */
-/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
-/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]11} } } */
-/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */
-/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
-/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
-/* Two range checks and a check for n being zero.  */
-/* { dg-final { scan-assembler {\tcmp\t} } } */
-/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
+/* Should use a WAR check that multiplies by (VF-2)*8 rather than
+   an overlap check that multiplies by (257-1)*4.  */
+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #16\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+[^\n]*xzr} 1 } } */
+/* One range check and a check for n being zero.  */
+/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c

index 4dfa33404e3da0b15f395678987fa326d08e6ea9..bad80e1a23deb86e78aa12bbbb706ba022e3cbf8 100644 (file)
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -1805,6 +1805,8 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
                            abs_step, &niter_access2))
      return false;
  
+  bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
+
    unsigned int i;
    for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
      {
@@ -1906,16 +1908,57 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
  
          Combining the tests requires limit to be computable in an unsigned
          form of the index type; if it isn't, we fall back to the usual
-        pointer-based checks.  */
-      poly_offset_int limit = (idx_len1 + idx_access1 - 1
-                              + idx_len2 + idx_access2 - 1);
+        pointer-based checks.
+
+        We can do better if DR_B is a write and if DR_A and DR_B are
+        well-ordered in both the original and the new code (see the
+        comment above the DR_ALIAS_* flags for details).  In this case
+        we know that for each i in [0, n-1], the write performed by
+        access i of DR_B occurs after access numbers j<=i of DR_A in
+        both the original and the new code.  Any write or anti
+        dependencies wrt those DR_A accesses are therefore maintained.
+
+        We just need to make sure that each individual write in DR_B does not
+        overlap any higher-indexed access in DR_A; such DR_A accesses happen
+        after the DR_B access in the original code but happen before it in
+        the new code.
+
+        We know the steps for both accesses are equal, so by induction, we
+        just need to test whether the first write of DR_B overlaps a later
+        access of DR_A.  In other words, we need to move min1 along by
+        one iteration:
+
+          min1' = min1 + idx_step
+
+        and use the ranges:
+
+          [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
+
+        and:
+
+          [min2, min2 + idx_access2 - 1]
+
+        where:
+
+           low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
+          high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
+      if (waw_or_war_p)
+       idx_len1 -= abs_idx_step;
+
+      poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
+      if (!waw_or_war_p)
+       limit += idx_len2;
+
        tree utype = unsigned_type_for (TREE_TYPE (min1));
        if (!wi::fits_to_tree_p (limit, utype))
         return false;
  
        poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
-      poly_offset_int high_offset2 = neg_step ? 0 : idx_len2;
+      poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
        poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
+      /* Equivalent to adding IDX_STEP to MIN1.  */
+      if (waw_or_war_p)
+       bias -= wi::to_offset (idx_step);
  
        tree subject = fold_build2 (MINUS_EXPR, utype,
                                   fold_convert (utype, min2),
@@ -1931,7 +1974,169 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
         *cond_expr = part_cond_expr;
      }
    if (dump_enabled_p ())
-    dump_printf (MSG_NOTE, "using an index-based overlap test\n");
+    {
+      if (waw_or_war_p)
+       dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
+      else
+       dump_printf (MSG_NOTE, "using an index-based overlap test\n");
+    }
+  return true;
+}
+
+/* A subroutine of create_intersect_range_checks, with a subset of the
+   same arguments.  Try to optimize cases in which the second access
+   is a write and in which some overlap is valid.  */
+
+static bool
+create_waw_or_war_checks (tree *cond_expr,
+                         const dr_with_seg_len_pair_t &alias_pair)
+{
+  const dr_with_seg_len& dr_a = alias_pair.first;
+  const dr_with_seg_len& dr_b = alias_pair.second;
+
+  /* Check for cases in which:
+
+     (a) DR_B is always a write;
+     (b) the accesses are well-ordered in both the original and new code
+        (see the comment above the DR_ALIAS_* flags for details); and
+     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
+  if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
+    return false;
+
+  /* Check for equal (but possibly variable) steps.  */
+  tree step = DR_STEP (dr_a.dr);
+  if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
+    return false;
+
+  /* Make sure that we can operate on sizetype without loss of precision.  */
+  tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
+  if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
+    return false;
+
+  /* All addresses involved are known to have a common alignment ALIGN.
+     We can therefore subtract ALIGN from an exclusive endpoint to get
+     an inclusive endpoint.  In the best (and common) case, ALIGN is the
+     same as the access sizes of both DRs, and so subtracting ALIGN
+     cancels out the addition of an access size.  */
+  unsigned int align = MIN (dr_a.align, dr_b.align);
+  poly_uint64 last_chunk_a = dr_a.access_size - align;
+  poly_uint64 last_chunk_b = dr_b.access_size - align;
+
+  /* Get a boolean expression that is true when the step is negative.  */
+  tree indicator = dr_direction_indicator (dr_a.dr);
+  tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
+                              fold_convert (ssizetype, indicator),
+                              ssize_int (0));
+
+  /* Get lengths in sizetype.  */
+  tree seg_len_a
+    = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
+  step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
+
+  /* Each access has the following pattern:
+
+         <- |seg_len| ->
+         <--- A: -ve step --->
+         +-----+-------+-----+-------+-----+
+         | n-1 | ..... |  0  | ..... | n-1 |
+         +-----+-------+-----+-------+-----+
+                       <--- B: +ve step --->
+                       <- |seg_len| ->
+                       |
+                  base address
+
+     where "n" is the number of scalar iterations covered by the segment.
+
+     A is the range of bytes accessed when the step is negative,
+     B is the range when the step is positive.
+
+     We know that DR_B is a write.  We also know (from checking that
+     DR_A and DR_B are well-ordered) that for each i in [0, n-1],
+     the write performed by access i of DR_B occurs after access numbers
+     j<=i of DR_A in both the original and the new code.  Any write or
+     anti dependencies wrt those DR_A accesses are therefore maintained.
+
+     We just need to make sure that each individual write in DR_B does not
+     overlap any higher-indexed access in DR_A; such DR_A accesses happen
+     after the DR_B access in the original code but happen before it in
+     the new code.
+
+     We know the steps for both accesses are equal, so by induction, we
+     just need to test whether the first write of DR_B overlaps a later
+     access of DR_A.  In other words, we need to move addr_a along by
+     one iteration:
+
+       addr_a' = addr_a + step
+
+     and check whether:
+
+       [addr_b, addr_b + last_chunk_b]
+
+     overlaps:
+
+       [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
+
+     where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
+
+       low_offset_a = +ve step ? 0 : seg_len_a - step
+       high_offset_a = +ve step ? seg_len_a - step : 0
+
+     This is equivalent to testing whether:
+
+       addr_a' + low_offset_a <= addr_b + last_chunk_b
+       && addr_b <= addr_a' + high_offset_a + last_chunk_a
+
+     Converting this into a single test, there is an overlap if:
+
+       0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
+
+     where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
+
+     If DR_A is performed, limit + |step| - last_chunk_b is known to be
+     less than the size of the object underlying DR_A.  We also know
+     that last_chunk_b <= |step|; this is checked elsewhere if it isn't
+     guaranteed at compile time.  There can therefore be no overflow if
+     "limit" is calculated in an unsigned type with pointer precision.  */
+  tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
+                                        DR_OFFSET (dr_a.dr));
+  addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
+
+  tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
+                                        DR_OFFSET (dr_b.dr));
+  addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
+
+  /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
+  addr_a = fold_build_pointer_plus (addr_a, step);
+  tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
+                                          seg_len_a, step);
+  if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
+    seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
+
+  tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
+                                  seg_len_a_minus_step, size_zero_node);
+  if (!CONSTANT_CLASS_P (low_offset_a))
+    low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
+
+  /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
+     but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
+  tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
+                                   low_offset_a);
+
+  /* The amount added to addr_b - addr_a'.  */
+  tree bias = fold_build2 (MINUS_EXPR, sizetype,
+                          size_int (last_chunk_b), low_offset_a);
+
+  tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
+  limit = fold_build2 (PLUS_EXPR, sizetype, limit,
+                      size_int (last_chunk_a + last_chunk_b));
+
+  tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
+  subject = fold_build2 (PLUS_EXPR, sizetype,
+                        fold_convert (sizetype, subject), bias);
+
+  *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
+  if (dump_enabled_p ())
+    dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
    return true;
  }
  
@@ -2035,6 +2240,9 @@ create_intersect_range_checks (class loop *loop, tree *cond_expr,
    if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
      return;
  
+  if (create_waw_or_war_checks (cond_expr, alias_pair))
+    return;
+
    unsigned HOST_WIDE_INT min_align;
    tree_code cmp_code;
    /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
author	Richard Sandiford <richard.sandiford@arm.com>
	Mon, 18 Nov 2019 15:26:55 +0000 (15:26 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Mon, 18 Nov 2019 15:26:55 +0000 (15:26 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c		patch \| blob \| history
gcc/tree-data-ref.c		patch \| blob \| history