AArch64: Implement missing p128<->f64 reinterpret intrinsics
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 23 Sep 2020 16:37:58 +0000 (17:37 +0100)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 23 Sep 2020 16:37:58 +0000 (17:37 +0100)
This patch implements the missing reinterprets to and from poly128_t and
float64x2_t.
I've plugged in the appropriate testing in the advsimd-intrinsics.exp
too.

Bootstrapped and tested on aarch64-none-linux-gnu.
Tested advsimd-intrinsics.exp on arm-none-eabi too to make sure arm
testing isn't affected.

gcc/
PR target/71233
* config/aarch64/arm_neon.h (vreinterpretq_f64_p128,
vreinterpretq_p128_f64): Define.

gcc/testsuite/
PR target/71233
* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
(clean_results): Add float64x2_t cleanup.
(DECL_VARIABLE_128BITS_VARIANTS): Add float64x2_t variable.
* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c: Add
testing of vreinterpretq_f64_p128, vreinterpretq_p128_f64.

gcc/config/aarch64/arm_neon.h
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c

index b3c9b64201424d5b7aa8b34e8a6b548567812070..9a970e7ed1e3c394e30ae1e2aade734d3d6fa483 100644 (file)
@@ -6088,6 +6088,20 @@ vreinterpretq_u32_p128 (poly128_t __a)
   return (uint32x4_t)__a;
 }
 
+__extension__ extern __inline float64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f64_p128 (poly128_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_f64 (float64x2_t __a)
+{
+  return (poly128_t) __a;
+}
+
 /* vset_lane  */
 
 __extension__ extern __inline float16x4_t
index fde6029b7fc1d2ccd9efad8f9abd043e12b78b37..791972c737e7605ed13ac973da07616a91222a27 100644 (file)
@@ -460,6 +460,8 @@ static void clean_results (void)
 #endif
   CLEAN(result, float, 32, 4);
 
+  AARCH64_ONLY(CLEAN(result, float, 64, 2));
+
 #if defined(__aarch64__)
   /* On AArch64, make sure to return DefaultNaN to have the same
      results as on AArch32.  */
@@ -544,7 +546,8 @@ static void clean_results (void)
   DECL_VARIABLE(VAR, poly, 16, 8);             \
   DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2);      \
   DECL_VARIABLE(VAR, float, 16, 8);            \
-  DECL_VARIABLE(VAR, float, 32, 4)
+  DECL_VARIABLE(VAR, float, 32, 4);            \
+  AARCH64_ONLY(DECL_VARIABLE(VAR, float, 64, 2))
 #else
 #define DECL_VARIABLE_128BITS_VARIANTS(VAR)    \
   DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);  \
@@ -552,7 +555,8 @@ static void clean_results (void)
   DECL_VARIABLE(VAR, poly, 8, 16);             \
   DECL_VARIABLE(VAR, poly, 16, 8);             \
   DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2);      \
-  DECL_VARIABLE(VAR, float, 32, 4)
+  DECL_VARIABLE(VAR, float, 32, 4);            \
+  AARCH64_ONLY(DECL_VARIABLE(VAR, float, 64, 2))
 #endif
 /* Declare all variants.  */
 #define DECL_VARIABLE_ALL_VARIANTS(VAR)                \
index 25b348223f3dbffa14176d5b1be9d80e63561381..67f809c706516ee2d2f5d97ef7b70e2dfdab7595 100644 (file)
@@ -33,6 +33,10 @@ VECT_VAR_DECL(vreint_expected_q_p128_f32,poly,64,2) [] = { 0xc1700000c1800000,
                                                           0xc1500000c1600000 };
 VECT_VAR_DECL(vreint_expected_q_p128_f16,poly,64,2) [] = { 0xca80cb00cb80cc00,
                                                           0xc880c900c980ca00 };
+#ifdef __aarch64__
+VECT_VAR_DECL(vreint_expected_q_p128_f64,poly,64,2) [] = { 0xc030000000000000,
+                                                          0xc02e000000000000 };
+#endif
 
 /* Expected results: vreinterpretq_*_p128.  */
 VECT_VAR_DECL(vreint_expected_q_s8_p128,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff,
@@ -75,6 +79,10 @@ VECT_VAR_DECL(vreint_expected_q_f16_p128,hfloat,16,8) [] = { 0xfff0, 0xffff,
                                                             0xffff, 0xffff,
                                                             0xfff1, 0xffff,
                                                             0xffff, 0xffff };
+#ifdef __aarch64__
+VECT_VAR_DECL(vreint_expected_q_f64_p128,hfloat,64,2) [] = { 0xfffffffffffffff0,
+                                                            0xfffffffffffffff1 };
+#endif
 
 int main (void)
 {
@@ -90,6 +98,10 @@ int main (void)
 #endif
   VLOAD(vreint_vector, buffer, q, float, f, 32, 4);
 
+#ifdef __aarch64__
+  VLOAD(vreint_vector, buffer, q, float, f, 64, 2);
+#endif
+
   /* vreinterpretq_p128_* tests.  */
 #undef TEST_MSG
 #define TEST_MSG "VREINTERPRETQ_P128_*"
@@ -121,6 +133,10 @@ int main (void)
 #endif
   TEST_VREINTERPRET128(q, poly, p, 128, 1, float, f, 32, 4, vreint_expected_q_p128_f32);
 
+#ifdef __aarch64__
+  TEST_VREINTERPRET128(q, poly, p, 128, 1, float, f, 64, 2, vreint_expected_q_p128_f64);
+#endif
+
   /* vreinterpretq_*_p128 tests.  */
 #undef TEST_MSG
 #define TEST_MSG "VREINTERPRETQ_*_P128"
@@ -161,5 +177,8 @@ int main (void)
 #endif
   TEST_VREINTERPRET_FP_FROM_P128(q, float, f, 32, 4, poly, p, 128, 1, vreint_expected_q_f32_p128);
 
+#ifdef __aarch64__
+  TEST_VREINTERPRET_FP_FROM_P128(q, float, f, 64, 2, poly, p, 128, 1, vreint_expected_q_f64_p128);
+#endif
   return 0;
 }