aarch64: Add support for unpacked SVE mult, max and min
authorRichard Sandiford <richard.sandiford@arm.com>
Mon, 11 Jan 2021 18:03:21 +0000 (18:03 +0000)
committerRichard Sandiford <richard.sandiford@arm.com>
Mon, 11 Jan 2021 18:03:21 +0000 (18:03 +0000)
This patch makes the SVE_INT_BINARY_IMM patterns support
unpacked arithmetic, covering MUL, SMAX, SMIN, UMAX and UMIN.
For min and max, the type suffix must be taken from the element
size rather than the container size.

The XFAILs are due to PR98602.

gcc/
* config/aarch64/aarch64-sve.md (<SVE_INT_BINARY_IMM:optab><mode>3)
(@aarch64_pred_<SVE_INT_BINARY_IMM:optab><mode>)
(*post_ra_<SVE_INT_BINARY_IMM:optab><mode>3): Extend from SVE_FULL_I
to SVE_I.

gcc/testsuite/
PR testsuite/98602
* g++.target/aarch64/sve/max_1.C: New test.
* g++.target/aarch64/sve/min_1.C: Likewise.
* gcc.target/aarch64/sve/mul_2.c: Likewise.

gcc/config/aarch64/aarch64-sve.md
gcc/testsuite/g++.target/aarch64/sve/max_1.C [new file with mode: 0644]
gcc/testsuite/g++.target/aarch64/sve/min_1.C [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mul_2.c [new file with mode: 0644]

index a58324da869a3da7c8a9b0457f5b9332369b3dcd..697a55e1cad44764b670843c7467aacdff2bf633 100644 (file)
 
 ;; Unpredicated integer binary operations that have an immediate form.
 (define_expand "<optab><mode>3"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+       (unspec:SVE_I
          [(match_dup 3)
-          (SVE_INT_BINARY_IMM:SVE_FULL_I
-            (match_operand:SVE_FULL_I 1 "register_operand")
-            (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
+          (SVE_INT_BINARY_IMM:SVE_I
+            (match_operand:SVE_I 1 "register_operand")
+            (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
 ;; and would make the instruction seem less uniform to the register
 ;; allocator.
 (define_insn_and_split "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, ?&w")
+       (unspec:SVE_I
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
-          (SVE_INT_BINARY_IMM:SVE_FULL_I
-            (match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w")
-            (match_operand:SVE_FULL_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
+          (SVE_INT_BINARY_IMM:SVE_I
+            (match_operand:SVE_I 2 "register_operand" "%0, 0, w, w")
+            (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
   "@
   "&& reload_completed
    && !register_operand (operands[3], <MODE>mode)"
   [(set (match_dup 0)
-       (SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))]
+       (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))]
   ""
   [(set_attr "movprfx" "*,*,yes,yes")]
 )
 ;; These are generated by splitting a predicated instruction whose
 ;; predicate is unused.
 (define_insn "*post_ra_<optab><mode>3"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-       (SVE_INT_BINARY_IMM:SVE_FULL_I
-         (match_operand:SVE_FULL_I 1 "register_operand" "0, w")
-         (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (SVE_INT_BINARY_IMM:SVE_I
+         (match_operand:SVE_I 1 "register_operand" "0, w")
+         (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
   "TARGET_SVE && reload_completed"
   "@
    <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
diff --git a/gcc/testsuite/g++.target/aarch64/sve/max_1.C b/gcc/testsuite/g++.target/aarch64/sve/max_1.C
new file mode 100644 (file)
index 0000000..caf9d7c
--- /dev/null
@@ -0,0 +1,73 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_OP_IMM(TYPE, OP, NAME, AMT) \
+  TYPE test##_##TYPE##_##NAME (TYPE a) { return a > AMT ? a : AMT; }
+
+#define TEST_OP(TYPE, MINV, MAXV) \
+  TYPE test##_##TYPE##_reg (TYPE a, TYPE b) { return a > b ? a : b; } \
+  TEST_OP_IMM (TYPE, OP, a, MINV) \
+  TEST_OP_IMM (TYPE, OP, b, 50) \
+  TEST_OP_IMM (TYPE, OP, c, MAXV)
+
+#define TEST_TYPE(TYPE, SIZE, MINV, MAXV) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE, MINV, MAXV)
+
+TEST_TYPE (int8_t, 32, -100, 100)
+TEST_TYPE (uint8_t, 32, 2, 250)
+
+TEST_TYPE (int8_t, 64, -110, 110)
+TEST_TYPE (uint8_t, 64, 3, 253)
+TEST_TYPE (int16_t, 64, -128, 127)
+TEST_TYPE (uint16_t, 64, 4, 255)
+
+TEST_TYPE (int8_t, 128, -120, 120)
+TEST_TYPE (uint8_t, 128, 5, 251)
+TEST_TYPE (int16_t, 128, -128, 127)
+TEST_TYPE (uint16_t, 128, 6, 255)
+TEST_TYPE (int32_t, 128, -128, 127)
+TEST_TYPE (uint32_t, 128, 7, 255)
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-100\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-110\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-120\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #-128\n} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #-128\n} 1 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #50\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #50\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #50\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #100\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #110\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #120\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #127\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #127\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #4\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #6\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #7\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #50\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #50\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #50\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #250\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #251\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #253\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/min_1.C b/gcc/testsuite/g++.target/aarch64/sve/min_1.C
new file mode 100644 (file)
index 0000000..9c84690
--- /dev/null
@@ -0,0 +1,73 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_OP_IMM(TYPE, OP, NAME, AMT) \
+  TYPE test##_##TYPE##_##NAME (TYPE a) { return a < AMT ? a : AMT; }
+
+#define TEST_OP(TYPE, MINV, MAXV) \
+  TYPE test##_##TYPE##_reg (TYPE a, TYPE b) { return a < b ? a : b; } \
+  TEST_OP_IMM (TYPE, OP, a, MINV) \
+  TEST_OP_IMM (TYPE, OP, b, 50) \
+  TEST_OP_IMM (TYPE, OP, c, MAXV)
+
+#define TEST_TYPE(TYPE, SIZE, MINV, MAXV) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE, MINV, MAXV)
+
+TEST_TYPE (int8_t, 32, -100, 100)
+TEST_TYPE (uint8_t, 32, 2, 250)
+
+TEST_TYPE (int8_t, 64, -110, 110)
+TEST_TYPE (uint8_t, 64, 3, 253)
+TEST_TYPE (int16_t, 64, -128, 127)
+TEST_TYPE (uint16_t, 64, 4, 255)
+
+TEST_TYPE (int8_t, 128, -120, 120)
+TEST_TYPE (uint8_t, 128, 5, 251)
+TEST_TYPE (int16_t, 128, -128, 127)
+TEST_TYPE (uint16_t, 128, 6, 255)
+TEST_TYPE (int32_t, 128, -128, 127)
+TEST_TYPE (uint32_t, 128, 7, 255)
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-100\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-120\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #-128\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #-128\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #50\n} 3 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #50\n} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #50\n} 1 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #100\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #110\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #120\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #127\n} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #127\n} 1 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #2\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #3\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #4\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #6\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #7\n} 1 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #50\n} 3 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #50\n} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #50\n} 1 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #250\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #251\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #253\n} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mul_2.c b/gcc/testsuite/gcc.target/aarch64/sve/mul_2.c
new file mode 100644 (file)
index 0000000..ff049f5
--- /dev/null
@@ -0,0 +1,52 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_OP_IMM(TYPE, OP, NAME, AMT) \
+  TYPE test##_##TYPE##_##NAME (TYPE a) { return a * AMT; }
+
+#define TEST_OP(TYPE, MINV, MAXV) \
+  TYPE test##_##TYPE##_reg (TYPE a, TYPE b) { return a * b; } \
+  TEST_OP_IMM (TYPE, OP, a, MINV) \
+  TEST_OP_IMM (TYPE, OP, b, 50) \
+  TEST_OP_IMM (TYPE, OP, c, MAXV)
+
+#define TEST_TYPE(TYPE, SIZE, MINV, MAXV) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE, MINV, MAXV)
+
+TEST_TYPE (int8_t, 32, -100, 100)
+TEST_TYPE (uint8_t, 32, 2, 250)
+
+TEST_TYPE (int8_t, 64, -110, 110)
+TEST_TYPE (uint8_t, 64, 3, 253)
+TEST_TYPE (int16_t, 64, -128, 123)
+TEST_TYPE (uint16_t, 64, 3, 255)
+
+TEST_TYPE (int8_t, 128, -120, 120)
+TEST_TYPE (uint8_t, 128, 4, 251)
+TEST_TYPE (int16_t, 128, -128, 123)
+TEST_TYPE (uint16_t, 128, 2, 255)
+TEST_TYPE (int32_t, 128, -128, 123)
+TEST_TYPE (uint32_t, 128, 4, 255)
+
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #-100\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #-120\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #-128\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #-128\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #50\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #50\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #50\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #100\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #110\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #120\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #123\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #123\n} 1 } } */