vect: Tweak vect_better_loop_vinfo_p handling of variable VFs

author Richard Sandiford <richard.sandiford@arm.com>

Mon, 20 Apr 2020 16:13:29 +0000 (17:13 +0100)

committer Richard Sandiford <richard.sandiford@arm.com>

Mon, 20 Apr 2020 16:13:29 +0000 (17:13 +0100)
author Richard Sandiford <richard.sandiford@arm.com>
Mon, 20 Apr 2020 16:13:29 +0000 (17:13 +0100)
committer Richard Sandiford <richard.sandiford@arm.com>
Mon, 20 Apr 2020 16:13:29 +0000 (17:13 +0100)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 4c9de79c1fd56d5b78eba8c1778c7ea69b32c1eb..433b976077c60d129e908034e54206df49172c2f 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2020-04-20  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * tree-vect-loop.c (vect_better_loop_vinfo_p): If old_loop_vinfo
+       has a variable VF, prefer new_loop_vinfo if it is cheaper for the
+       estimated VF and is no worse at double the estimated VF.
+
  2020-04-20  Richard Sandiford  <richard.sandiford@arm.com>
  
         PR target/94668
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 9bf3581b7700a151d62626fa4a3879f687dc2882..ea3ba36d97f676ae454dfbd8c4a0e3251f2f5099 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2020-04-20  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.target/aarch64/sve/cost_model_8.c: New test.
+       * gcc.target/aarch64/sve/cost_model_9.c: Likewise.
+       * gcc.target/aarch64/sve/pr89007-1.c: Add -msve-vector-bits=512.
+       * gcc.target/aarch64/sve/pr89007-2.c: Likewise.
+
  2020-04-20  Richard Sandiford  <richard.sandiford@arm.com>
  
         PR target/94668
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_8.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_8.c

new file mode 100644 (file)

index 0000000..80c3a23
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_8.c
@@ -0,0 +1,12 @@
+/* { dg-options "-O3 -msve-vector-bits=scalable" } */
+
+void
+vset (int *restrict dst, int *restrict src, int count)
+{
+  for (int i = 0; i < count; ++i)
+#pragma GCC unroll 4
+    for (int j = 0; j < 4; ++j)
+      *dst++ = 1;
+}
+
+/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_9.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_9.c

new file mode 100644 (file)

index 0000000..e7a1bac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_9.c
@@ -0,0 +1,13 @@
+/* { dg-options "-O3 -msve-vector-bits=scalable" } */
+
+void
+vset (int *restrict dst, int *restrict src, int count)
+{
+  for (int i = 0; i < count; ++i)
+#pragma GCC unroll 8
+    for (int j = 0; j < 8; ++j)
+      *dst++ = 1;
+}
+
+/* { dg-final { scan-assembler-not {\tst1w\tz} } } */
+/* { dg-final { scan-assembler-times {\tstp\tq} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c

index af4aff4ec6d9495fb6bbfa704b073caa11e6f85d..ff9550c91091cc55e180094f7603eeafba0ac8d1 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c
@@ -1,5 +1,5 @@
  /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
+/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve -msve-vector-bits=512 --save-temps" } */
  /* { dg-final { check-function-bodies "**" "" } } */
  
  #define N 1024
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c

index 2ccdd0d353ebbbdeea44086b1954f8c893de1638..da345fe8bd6e744179fb910aa221e48574af1b61 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c
@@ -1,5 +1,5 @@
  /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
+/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve -msve-vector-bits=512 --save-temps" } */
  /* { dg-final { check-function-bodies "**" "" } } */
  
  #define N 1024
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c

index 265bcfdc5afb0eee59d734aa3262b2357047d717..b6c3faeae5153e1cac91cfd2936479de773352f4 100644 (file)
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2414,7 +2414,36 @@ vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
    poly_widest_int rel_old = (old_loop_vinfo->vec_inside_cost
                              * poly_widest_int (new_vf));
    if (maybe_lt (rel_old, rel_new))
-    return false;
+    {
+      /* When old_loop_vinfo uses a variable vectorization factor,
+        we know that it has a lower cost for at least one runtime VF.
+        However, we don't know how likely that VF is.
+
+        One option would be to compare the costs for the estimated VFs.
+        The problem is that that can put too much pressure on the cost
+        model.  E.g. if the estimated VF is also the lowest possible VF,
+        and if old_loop_vinfo is 1 unit worse than new_loop_vinfo
+        for the estimated VF, we'd then choose new_loop_vinfo even
+        though (a) new_loop_vinfo might not actually be better than
+        old_loop_vinfo for that VF and (b) it would be significantly
+        worse at larger VFs.
+
+        Here we go for a hacky compromise: pick new_loop_vinfo if it is
+        no more expensive than old_loop_vinfo even after doubling the
+        estimated old_loop_vinfo VF.  For all but trivial loops, this
+        ensures that we only pick new_loop_vinfo if it is significantly
+        better than old_loop_vinfo at the estimated VF.  */
+      if (rel_new.is_constant ())
+       return false;
+
+      HOST_WIDE_INT new_estimated_vf = estimated_poly_value (new_vf);
+      HOST_WIDE_INT old_estimated_vf = estimated_poly_value (old_vf);
+      widest_int estimated_rel_new = (new_loop_vinfo->vec_inside_cost
+                                     * widest_int (old_estimated_vf));
+      widest_int estimated_rel_old = (old_loop_vinfo->vec_inside_cost
+                                     * widest_int (new_estimated_vf));
+      return estimated_rel_new * 2 <= estimated_rel_old;
+    }
    if (known_lt (rel_new, rel_old))
      return true;
author	Richard Sandiford <richard.sandiford@arm.com>
	Mon, 20 Apr 2020 16:13:29 +0000 (17:13 +0100)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Mon, 20 Apr 2020 16:13:29 +0000 (17:13 +0100)
gcc/ChangeLog		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/cost_model_8.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/cost_model_9.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c		patch \| blob \| history
gcc/tree-vect-loop.c		patch \| blob \| history