[AArch64] Fix INDEX patterns for partial VNx2 modes
authorRichard Sandiford <richard.sandiford@arm.com>
Tue, 10 Dec 2019 16:32:40 +0000 (16:32 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Tue, 10 Dec 2019 16:32:40 +0000 (16:32 +0000)
The INDEX patterns handle partial modes by choosing the container
size rather than the element size, so that the number of lanes
(and thus number of additions) matches the mode.  This means that
all VNx4 modes use .s and all VNx2 modes use .d, etc.

When adding this, I'd forgotten that the choice between Wn and Xn
registers would need to be updated to use the container size too.
For partial VNx2s, we were using .d containers with Wn rather than
Xn source registers.

2019-12-10  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* config/aarch64/iterators.md (vccore): New iterator.
* config/aarch64/aarch64-sve.md (vec_series<mode>): Use it instead
of vwcore.
(*vec_series<mode>_plus): Likewise.

gcc/testsuite/
* gcc.target/aarch64/sve/mixed_size_6.c: New test.

From-SVN: r279173

gcc/ChangeLog
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/iterators.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/mixed_size_6.c [new file with mode: 0644]

index f559d3a6f8398e0170a04840e2f57bddf1182e24..577acb18d78d52fe8ad654674848f7c38588190f 100644 (file)
@@ -1,3 +1,10 @@
+2019-12-10  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * config/aarch64/iterators.md (vccore): New iterator.
+       * config/aarch64/aarch64-sve.md (vec_series<mode>): Use it instead
+       of vwcore.
+       (*vec_series<mode>_plus): Likewise.
+
 2019-12-10  Frederik Harwath  <frederik@codesourcery.com>
 
        * omp-low.c (scan_omp_for): Use clause location in warning.
index 4427609b57907c47f6abd23a6137babd65586e3f..1d9cdad23bdc3f73c2a6e01baa7376c73022886b 100644 (file)
          (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
   "TARGET_SVE"
   "@
-   index\t%0.<Vctype>, #%1, %<vwcore>2
-   index\t%0.<Vctype>, %<vwcore>1, #%2
-   index\t%0.<Vctype>, %<vwcore>1, %<vwcore>2"
+   index\t%0.<Vctype>, #%1, %<vccore>2
+   index\t%0.<Vctype>, %<vccore>1, #%2
+   index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
 )
 
 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
   "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
   {
     operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
-    return "index\t%0.<Vctype>, %<vwcore>1, #%2";
+    return "index\t%0.<Vctype>, %<vccore>1, #%2";
   }
 )
 
index 83a0d156e84baf7dde8f9e46eeeca4edfa1f9037..1ca5ed1ef1bc66a4ecb52ee240338f18fd560384 100644 (file)
                          (VNx2DI "x")
                          (VNx2DF "x")])
 
+;; Like vwcore, but for the container mode rather than the element mode.
+(define_mode_attr vccore [(VNx16QI "w") (VNx8QI "w") (VNx4QI "w") (VNx2QI "x")
+                         (VNx8HI "w") (VNx4HI "w") (VNx2HI "x")
+                         (VNx4SI "w") (VNx2SI "x")
+                         (VNx2DI "x")])
+
 ;; Double vector types for ALLX.
 (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")])
 
index e2454a65da27da48eabd4a00f05e8400e64c4e1d..c1f22f2c28ed79d95a3b650567ef23af58ff5233 100644 (file)
@@ -1,3 +1,7 @@
+2019-12-10  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.target/aarch64/sve/mixed_size_6.c: New test.
+
 2019-12-10  Frederik Harwath  <frederik@codesourcery.com>
 
        * c-c++-common/goacc/clause-locations.c: New test.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_6.c b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_6.c
new file mode 100644 (file)
index 0000000..837edec
--- /dev/null
@@ -0,0 +1,47 @@
+/* { dg-options "-O3 -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+void
+f1 (uint64_t *restrict ptr1, uint8_t *restrict ptr2, uint8_t start)
+{
+#pragma GCC unroll 0
+  for (int i = 0; i < 4; ++i)
+    {
+      ptr1[i] = 10;
+      ptr2[i] = start;
+      start += 1;
+    }
+}
+
+void
+f2 (uint64_t *restrict ptr1, uint16_t *restrict ptr2, uint16_t start)
+{
+#pragma GCC unroll 0
+  for (int i = 0; i < 4; ++i)
+    {
+      ptr1[i] = 10;
+      ptr2[i] = start;
+      start += 2;
+    }
+}
+
+void
+f3 (uint64_t *restrict ptr1, uint32_t *restrict ptr2, uint32_t start)
+{
+#pragma GCC unroll 0
+  for (int i = 0; i < 4; ++i)
+    {
+      ptr1[i] = 10;
+      ptr2[i] = start;
+      start += 4;
+    }
+}
+
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #4\n} } } */
+
+/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #4\n} } } */