[AArch64] Add support for SVE HF vconds
authorRichard Sandiford <richard.sandiford@arm.com>
Wed, 14 Aug 2019 08:25:56 +0000 (08:25 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Wed, 14 Aug 2019 08:25:56 +0000 (08:25 +0000)
We were missing vcond patterns that had HF comparisons and HI or HF data.

2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* config/aarch64/iterators.md (SVE_HSD): New mode iterator.
(V_FP_EQUIV, v_fp_equiv): Handle VNx8HI and VNx8HF.
* config/aarch64/aarch64-sve.md (vcond<mode><v_fp_equiv>): Use
SVE_HSD instead of SVE_SD.

gcc/testsuite/
* gcc.target/aarch64/sve/vcond_17.c: New test.
* gcc.target/aarch64/sve/vcond_17_run.c: Likewise.

From-SVN: r274420

gcc/ChangeLog
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/iterators.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/vcond_17.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_17_run.c [new file with mode: 0644]

index 03577f147c2cc0eedd067f5ab1874c61489c3c4a..9f4efd78e6e06a12f59f253446c8b3646df13510 100644 (file)
@@ -1,3 +1,10 @@
+2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * config/aarch64/iterators.md (SVE_HSD): New mode iterator.
+       (V_FP_EQUIV, v_fp_equiv): Handle VNx8HI and VNx8HF.
+       * config/aarch64/aarch64-sve.md (vcond<mode><v_fp_equiv>): Use
+       SVE_HSD instead of SVE_SD.
+
 2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
            Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>
 
index b6466473dfa0ec433f3b3b85e9d220fb0e4e5ee9..d2583914be9c9cef2d9dc5e21f5b8f9d4f7412b7 100644 (file)
 ;; Floating-point vcond.  All comparisons except FCMUO allow a zero operand;
 ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
 (define_expand "vcond<mode><v_fp_equiv>"
-  [(set (match_operand:SVE_SD 0 "register_operand")
-       (if_then_else:SVE_SD
+  [(set (match_operand:SVE_HSD 0 "register_operand")
+       (if_then_else:SVE_HSD
          (match_operator 3 "comparison_operator"
            [(match_operand:<V_FP_EQUIV> 4 "register_operand")
             (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
-         (match_operand:SVE_SD 1 "register_operand")
-         (match_operand:SVE_SD 2 "register_operand")))]
+         (match_operand:SVE_HSD 1 "register_operand")
+         (match_operand:SVE_HSD 2 "register_operand")))]
   "TARGET_SVE"
   {
     aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
index 077c3f373d5323466d41e942b170dc9185be4837..32c662f90b6a0041fdcf90fcd671066b7164eb66 100644 (file)
 ;; All SVE floating-point vector modes that have 16-bit or 32-bit elements.
 (define_mode_iterator SVE_HSF [VNx8HF VNx4SF])
 
+;; All SVE vector modes that have 16-bit, 32-bit or 64-bit elements.
+(define_mode_iterator SVE_HSD [VNx8HI VNx4SI VNx2DI VNx8HF VNx4SF VNx2DF])
+
 ;; All SVE vector modes that have 32-bit or 64-bit elements.
 (define_mode_iterator SVE_SD [VNx4SI VNx2DI VNx4SF VNx2DF])
 
 ])
 
 ;; Floating-point equivalent of selected modes.
-(define_mode_attr V_FP_EQUIV [(VNx4SI "VNx4SF") (VNx4SF "VNx4SF")
+(define_mode_attr V_FP_EQUIV [(VNx8HI "VNx8HF") (VNx8HF "VNx8HF")
+                             (VNx4SI "VNx4SF") (VNx4SF "VNx4SF")
                              (VNx2DI "VNx2DF") (VNx2DF "VNx2DF")])
-(define_mode_attr v_fp_equiv [(VNx4SI "vnx4sf") (VNx4SF "vnx4sf")
+(define_mode_attr v_fp_equiv [(VNx8HI "vnx8hf") (VNx8HF "vnx8hf")
+                             (VNx4SI "vnx4sf") (VNx4SF "vnx4sf")
                              (VNx2DI "vnx2df") (VNx2DF "vnx2df")])
 
 ;; Mode for vector conditional operations where the comparison has
index a502ffb254f5f2533cf40359e2d904e49bf2a348..52e25330087f452a8f0d499bae8c6668dec28572 100644 (file)
@@ -1,3 +1,8 @@
+2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.target/aarch64/sve/vcond_17.c: New test.
+       * gcc.target/aarch64/sve/vcond_17_run.c: Likewise.
+
 2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
 
        * gcc.target/aarch64/sve/spill_4.c: Expect all ptrues to be .Bs.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_17.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_17.c
new file mode 100644 (file)
index 0000000..cabcfa7
--- /dev/null
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define eq(A, B) ((A) == (B))
+#define ne(A, B) ((A) != (B))
+#define olt(A, B) ((A) < (B))
+#define ole(A, B) ((A) <= (B))
+#define oge(A, B) ((A) >= (B))
+#define ogt(A, B) ((A) > (B))
+#define ordered(A, B) (!__builtin_isunordered (A, B))
+#define unordered(A, B) (__builtin_isunordered (A, B))
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
+#define ult(A, B) (__builtin_isless (A, B))
+#define ule(A, B) (__builtin_islessequal (A, B))
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
+#define ugt(A, B) (__builtin_isgreater (A, B))
+#define nueq(A, B) (__builtin_islessgreater (A, B))
+#define nult(A, B) (!__builtin_isless (A, B))
+#define nule(A, B) (!__builtin_islessequal (A, B))
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
+#define nugt(A, B) (!__builtin_isgreater (A, B))
+
+#define DEF_LOOP(CMP, EXPECT_INVALID)                                  \
+  void __attribute__ ((noinline, noclone))                             \
+  test_##CMP##_var (__fp16 *restrict dest, __fp16 *restrict src,       \
+                   __fp16 fallback, __fp16 *restrict a,                \
+                   __fp16 *restrict b, int count)                      \
+  {                                                                    \
+    for (int i = 0; i < count; ++i)                                    \
+      dest[i] = CMP (a[i], b[i]) ? src[i] : fallback;                  \
+  }                                                                    \
+                                                                       \
+  void __attribute__ ((noinline, noclone))                             \
+  test_##CMP##_zero (__fp16 *restrict dest,  __fp16 *restrict src,     \
+                    __fp16 fallback, __fp16 *restrict a,               \
+                    int count)                                         \
+  {                                                                    \
+    for (int i = 0; i < count; ++i)                                    \
+      dest[i] = CMP (a[i], (__fp16) 0) ? src[i] : fallback;            \
+  }                                                                    \
+                                                                       \
+  void __attribute__ ((noinline, noclone))                             \
+  test_##CMP##_sel (__fp16 *restrict dest, __fp16 if_true,             \
+                   __fp16 if_false, __fp16 *restrict a,                \
+                   __fp16 b, int count)                                \
+  {                                                                    \
+    for (int i = 0; i < count; ++i)                                    \
+      dest[i] = CMP (a[i], b) ? if_true : if_false;                    \
+  }
+
+#define TEST_ALL(T)                            \
+  T (eq, 0)                                    \
+  T (ne, 0)                                    \
+  T (olt, 1)                                   \
+  T (ole, 1)                                   \
+  T (oge, 1)                                   \
+  T (ogt, 1)                                   \
+  T (ordered, 0)                               \
+  T (unordered, 0)                             \
+  T (ueq, 0)                                   \
+  T (ult, 0)                                   \
+  T (ule, 0)                                   \
+  T (uge, 0)                                   \
+  T (ugt, 0)                                   \
+  T (nueq, 0)                                  \
+  T (nult, 0)                                  \
+  T (nule, 0)                                  \
+  T (nuge, 0)                                  \
+  T (nugt, 0)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler {\tfcmeq\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} { xfail *-*-* } } } */
+/* { dg-final { scan-assembler {\tfcmeq\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+
+/* { dg-final { scan-assembler {\tfcmne\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
+/* { dg-final { scan-assembler {\tfcmne\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+
+/* { dg-final { scan-assembler {\tfcmlt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
+/* { dg-final { scan-assembler {\tfcmlt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+
+/* { dg-final { scan-assembler {\tfcmle\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
+/* { dg-final { scan-assembler {\tfcmle\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+
+/* { dg-final { scan-assembler {\tfcmgt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
+/* { dg-final { scan-assembler {\tfcmgt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+
+/* { dg-final { scan-assembler {\tfcmge\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
+/* { dg-final { scan-assembler {\tfcmge\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+
+/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */
+/* { dg-final { scan-assembler {\tfcmuo\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_17_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_17_run.c
new file mode 100644 (file)
index 0000000..4a228c8
--- /dev/null
@@ -0,0 +1,54 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-require-effective-target fenv_exceptions } */
+
+#include <fenv.h>
+
+#include "vcond_17.c"
+
+#define N 401
+
+#define TEST_LOOP(CMP, EXPECT_INVALID)                         \
+  {                                                            \
+    __fp16 dest1[N], dest2[N], dest3[N], src[N];               \
+    __fp16 a[N], b[N];                                         \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       src[i] = i * i;                                         \
+       if (i % 5 == 0)                                         \
+         a[i] = 0;                                             \
+       else if (i % 3)                                         \
+         a[i] = i * 0.1;                                       \
+       else                                                    \
+         a[i] = i;                                             \
+       if (i % 7 == 0)                                         \
+         b[i] = __builtin_nan ("");                            \
+       else if (i % 6)                                         \
+         b[i] = i * 0.1;                                       \
+       else                                                    \
+         b[i] = i;                                             \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    feclearexcept (FE_ALL_EXCEPT);                             \
+    test_##CMP##_var (dest1, src, 11, a, b, N);                        \
+    test_##CMP##_zero (dest2, src, 22, a, N);                  \
+    test_##CMP##_sel (dest3, 33, 44, a, 9, N);                 \
+    if (!fetestexcept (FE_INVALID) != !(EXPECT_INVALID))       \
+      __builtin_abort ();                                      \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       if (dest1[i] != (CMP (a[i], b[i]) ? src[i] : 11))       \
+         __builtin_abort ();                                   \
+       if (dest2[i] != (CMP (a[i], 0) ? src[i] : 22))          \
+         __builtin_abort ();                                   \
+       if (dest3[i] != (CMP (a[i], 9) ? 33 : 44))              \
+         __builtin_abort ();                                   \
+      }                                                                \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}