aarch64: Add support for unpacked SVE shifts
authorRichard Sandiford <richard.sandiford@arm.com>
Mon, 11 Jan 2021 18:03:20 +0000 (18:03 +0000)
committerRichard Sandiford <richard.sandiford@arm.com>
Mon, 11 Jan 2021 18:03:20 +0000 (18:03 +0000)
This patch adds support for unpacked SVE LSL, ASR and LSR.
For right shifts, the type suffix needs to be taken from the
element size rather than the container size.

gcc/
* config/aarch64/aarch64-sve.md (<ASHIFT:optab><mode>3)
(v<ASHIFT:optab><mode>3, @aarch64_pred_<optab><mode>)
(*post_ra_v<ASHIFT:optab><mode>3): Extend from SVE_FULL_I to SVE_I.

gcc/testsuite/
* gcc.target/aarch64/sve/shift_2.c: New test.

gcc/config/aarch64/aarch64-sve.md
gcc/testsuite/gcc.target/aarch64/sve/shift_2.c [new file with mode: 0644]

index 2f5a5e3c9148cd7480022f61a4110b6cbd788243..a58324da869a3da7c8a9b0457f5b9332369b3dcd 100644 (file)
 ;; Unpredicated shift by a scalar, which expands into one of the vector
 ;; shifts below.
 (define_expand "<ASHIFT:optab><mode>3"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-       (ASHIFT:SVE_FULL_I
-         (match_operand:SVE_FULL_I 1 "register_operand")
+  [(set (match_operand:SVE_I 0 "register_operand")
+       (ASHIFT:SVE_I
+         (match_operand:SVE_I 1 "register_operand")
          (match_operand:<VEL> 2 "general_operand")))]
   "TARGET_SVE"
   {
 
 ;; Unpredicated shift by a vector.
 (define_expand "v<optab><mode>3"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+       (unspec:SVE_I
          [(match_dup 3)
-          (ASHIFT:SVE_FULL_I
-            (match_operand:SVE_FULL_I 1 "register_operand")
-            (match_operand:SVE_FULL_I 2 "aarch64_sve_<lr>shift_operand"))]
+          (ASHIFT:SVE_I
+            (match_operand:SVE_I 1 "register_operand")
+            (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
 ;; likely to gain much and would make the instruction seem less uniform
 ;; to the register allocator.
 (define_insn_and_split "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
+       (unspec:SVE_I
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
-          (ASHIFT:SVE_FULL_I
-            (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")
-            (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
+          (ASHIFT:SVE_I
+            (match_operand:SVE_I 2 "register_operand" "w, 0, w, w")
+            (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
   "@
    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
   "&& reload_completed
    && !register_operand (operands[3], <MODE>mode)"
-  [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))]
+  [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
   ""
   [(set_attr "movprfx" "*,*,*,yes")]
 )
 ;; These are generated by splitting a predicated instruction whose
 ;; predicate is unused.
 (define_insn "*post_ra_v<optab><mode>3"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
-       (ASHIFT:SVE_FULL_I
-         (match_operand:SVE_FULL_I 1 "register_operand" "w")
-         (match_operand:SVE_FULL_I 2 "aarch64_simd_<lr>shift_imm")))]
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+       (ASHIFT:SVE_I
+         (match_operand:SVE_I 1 "register_operand" "w")
+         (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
   "TARGET_SVE && reload_completed"
   "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
 )
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/shift_2.c b/gcc/testsuite/gcc.target/aarch64/sve/shift_2.c
new file mode 100644 (file)
index 0000000..b7462c4
--- /dev/null
@@ -0,0 +1,81 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_SHIFT_IMM(TYPE, NAME, OP, AMT) \
+  TYPE NAME##_##TYPE##_##AMT (TYPE a) { return a OP AMT; }
+
+#define TEST_SHIFT(TYPE, NAME, OP, LIMIT) \
+  TYPE NAME##_##TYPE##_reg (TYPE a, TYPE b) { return a OP b; } \
+  TEST_SHIFT_IMM (TYPE, NAME, OP, 1) \
+  TEST_SHIFT_IMM (TYPE, NAME, OP, 5) \
+  TEST_SHIFT_IMM (TYPE, NAME, OP, LIMIT)
+
+#define TEST_TYPE(TYPE, SIZE, LIMIT) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_SHIFT (TYPE##SIZE, shl, <<, LIMIT) \
+  TEST_SHIFT (TYPE##SIZE, shr, >>, LIMIT) \
+
+TEST_TYPE (int8_t, 32, 7)
+TEST_TYPE (uint8_t, 32, 7)
+
+TEST_TYPE (int8_t, 64, 7)
+TEST_TYPE (uint8_t, 64, 7)
+TEST_TYPE (int16_t, 64, 15)
+TEST_TYPE (uint16_t, 64, 15)
+
+TEST_TYPE (int8_t, 128, 7)
+TEST_TYPE (uint8_t, 128, 7)
+TEST_TYPE (int16_t, 128, 15)
+TEST_TYPE (uint16_t, 128, 15)
+TEST_TYPE (int32_t, 128, 31)
+TEST_TYPE (uint32_t, 128, 31)
+
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */