arm: Implement vceqq_p64, vceqz_p64 and vceqzq_p64 intrinsics

author Christophe Lyon <christophe.lyon@linaro.org>

Thu, 15 Oct 2020 17:13:59 +0000 (17:13 +0000)

committer Christophe Lyon <christophe.lyon@linaro.org>

Fri, 15 Jan 2021 12:39:19 +0000 (12:39 +0000)
author Christophe Lyon <christophe.lyon@linaro.org>
Thu, 15 Oct 2020 17:13:59 +0000 (17:13 +0000)
committer Christophe Lyon <christophe.lyon@linaro.org>
Fri, 15 Jan 2021 12:39:19 +0000 (12:39 +0000)
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h

index f99b9396cce3fb83be5a32cde3b43e5a3abcb1e7..dc28b92b5afeaa545571d65cd6d7dc56d4dcf7bc 100644 (file)
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -16912,6 +16912,37 @@ vceq_p64 (poly64x1_t __a, poly64x1_t __b)
    return vreinterpret_u64_u32 (__m);
  }
  
+__extension__ extern __inline uint64x1_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vceqz_p64 (poly64x1_t __a)
+{
+  poly64x1_t __b = vreinterpret_p64_u32 (vdup_n_u32 (0));
+  return vceq_p64 (__a, __b);
+}
+
+/* For vceqq_p64, we rely on vceq_p64 for each of the two elements.  */
+__extension__ extern __inline uint64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vceqq_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+  poly64_t __high_a = vget_high_p64 (__a);
+  poly64_t __high_b = vget_high_p64 (__b);
+  uint64x1_t __high = vceq_p64 (__high_a, __high_b);
+
+  poly64_t __low_a = vget_low_p64 (__a);
+  poly64_t __low_b = vget_low_p64 (__b);
+  uint64x1_t __low = vceq_p64 (__low_a, __low_b);
+  return vcombine_u64 (__low, __high);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vceqzq_p64 (poly64x2_t __a)
+{
+  poly64x2_t __b = vreinterpretq_p64_u32 (vdupq_n_u32 (0));
+  return vceqq_p64 (__a, __b);
+}
+
  /* The vtst_p64 intrinsic does not map to a single instruction.
     We emulate it in way similar to vceq_p64 above but here we do
     a reduction with max since if any two corresponding bits
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c

index a3210a94b1dcd00ea8b9f687a7805386ef5c0b08..6aed096fc6f3985d5ab15ed6378d75a7e8a89d7a 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
@@ -16,6 +16,11 @@ VECT_VAR_DECL(vbsl_expected,poly,64,2) [] = { 0xfffffff1,
  
  /* Expected results: vceq.  */
  VECT_VAR_DECL(vceq_expected,uint,64,1) [] = { 0x0 };
+VECT_VAR_DECL(vceq_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff };
+
+/* Expected results: vceqz.  */
+VECT_VAR_DECL(vceqz_expected,uint,64,1) [] = { 0x0 };
+VECT_VAR_DECL(vceqz_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff };
  
  /* Expected results: vcombine.  */
  VECT_VAR_DECL(vcombine_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x88 };
@@ -213,7 +218,7 @@ int main (void)
  
    /* vceq_p64 tests. */
  #undef TEST_MSG
-#define TEST_MSG "VCEQ"
+#define TEST_MSG "VCEQ/VCEQQ"
  
  #define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)                         \
    VECT_VAR(vceq_vector_res, T3, W, N) =                                        \
@@ -227,16 +232,55 @@ int main (void)
    DECL_VARIABLE(vceq_vector, poly, 64, 1);
    DECL_VARIABLE(vceq_vector2, poly, 64, 1);
    DECL_VARIABLE(vceq_vector_res, uint, 64, 1);
+  DECL_VARIABLE(vceq_vector, poly, 64, 2);
+  DECL_VARIABLE(vceq_vector2, poly, 64, 2);
+  DECL_VARIABLE(vceq_vector_res, uint, 64, 2);
  
    CLEAN(result, uint, 64, 1);
+  CLEAN(result, uint, 64, 2);
  
    VLOAD(vceq_vector, buffer, , poly, p, 64, 1);
+  VLOAD(vceq_vector, buffer, q, poly, p, 64, 2);
  
    VDUP(vceq_vector2, , poly, p, 64, 1, 0x88);
+  VSET_LANE(vceq_vector2, q, poly, p, 64, 2, 0, 0x88);
+  VSET_LANE(vceq_vector2, q, poly, p, 64, 2, 1, 0xFFFFFFFFFFFFFFF1);
  
    TEST_VCOMP(vceq, , poly, p, uint, 64, 1);
+  TEST_VCOMP(vceq, q, poly, p, uint, 64, 2);
  
    CHECK(TEST_MSG, uint, 64, 1, PRIx64, vceq_expected, "");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, vceq_expected, "");
+
+  /* vceqz_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VCEQZ/VCEQZQ"
+
+#define TEST_VCOMPZ1(INSN, Q, T1, T2, T3, W, N)                                \
+  VECT_VAR(vceqz_vector_res, T3, W, N) =                               \
+    INSN##Q##_##T2##W(VECT_VAR(vceqz_vector, T1, W, N));               \
+  vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vceqz_vector_res, T3, W, N))
+
+#define TEST_VCOMPZ(INSN, Q, T1, T2, T3, W, N)                         \
+  TEST_VCOMPZ1(INSN, Q, T1, T2, T3, W, N)
+
+  DECL_VARIABLE(vceqz_vector, poly, 64, 1);
+  DECL_VARIABLE(vceqz_vector_res, uint, 64, 1);
+  DECL_VARIABLE(vceqz_vector, poly, 64, 2);
+  DECL_VARIABLE(vceqz_vector_res, uint, 64, 2);
+
+  CLEAN(result, uint, 64, 1);
+  CLEAN(result, uint, 64, 2);
+
+  VLOAD(vceqz_vector, buffer, , poly, p, 64, 1);
+  VLOAD(vceqz_vector, buffer, q, poly, p, 64, 2);
+  VSET_LANE(vceqz_vector, q, poly, p, 64, 2, 1, 0);
+
+  TEST_VCOMPZ(vceqz, , poly, p, uint, 64, 1);
+  TEST_VCOMPZ(vceqz, q, poly, p, uint, 64, 2);
+
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, vceqz_expected, "");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, vceqz_expected, "");
  
    /* vcombine_p64 tests.  */
  #undef TEST_MSG
author	Christophe Lyon <christophe.lyon@linaro.org>
	Thu, 15 Oct 2020 17:13:59 +0000 (17:13 +0000)
committer	Christophe Lyon <christophe.lyon@linaro.org>
	Fri, 15 Jan 2021 12:39:19 +0000 (12:39 +0000)
gcc/config/arm/arm_neon.h		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c		patch \| blob \| history