aarch64: Reimplement vmovl_high_* intrinsics using builtins
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 1 Feb 2021 15:29:13 +0000 (15:29 +0000)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 1 Feb 2021 16:45:05 +0000 (16:45 +0000)
The vmovl_high_* intrinsics map down to the SXTL2/UXTL2 instructions
that already have appropriately-named patterns and expanders,
so it's straightforward to wire them up.

gcc/ChangeLog:

* config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi,
vec_unpacku_hi_): Define builtins.
* config/aarch64/arm_neon.h (vmovl_high_s8): Reimplement using
builtin.
(vmovl_high_s16): Likewise.
(vmovl_high_s32): Likewise.
(vmovl_high_u8): Likewise.
(vmovl_high_u16): Likewise.
(vmovl_high_u32): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/simd/vmovl_high_1.c: New test.

gcc/config/aarch64/aarch64-simd-builtins.def
gcc/config/aarch64/arm_neon.h
gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c [new file with mode: 0644]

index 441a45648655db1f1b4460f9bf3998554b83e6c4..3115b73e7db3282e7e758573486dd71d0b39e570 100644 (file)
   /* Implemented by aarch64_xtn2<mode>.  */
   BUILTIN_VQN (UNOP, xtn2, 0, NONE)
 
+  /* Implemented by vec_unpack<su>_hi_<mode>.  */
+  BUILTIN_VQW (UNOP, vec_unpacks_hi_, 10, NONE)
+  BUILTIN_VQW (UNOPU, vec_unpacku_hi_, 10, NONE)
+
   /* Implemented by aarch64_reduc_plus_<mode>.  */
   BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, NONE)
 
index 0911ddbb387df8372621fe5f5ead91a29daefb14..691c0c056031d7330607977f56ce5bbaf3496749 100644 (file)
@@ -8125,72 +8125,42 @@ __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovl_high_s8 (int8x16_t __a)
 {
-  int16x8_t __result;
-  __asm__ ("sshll2 %0.8h,%1.16b,#0"
-           : "=w"(__result)
-           : "w"(__a)
-           : /* No clobbers */);
-  return __result;
+  return __builtin_aarch64_vec_unpacks_hi_v16qi (__a);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovl_high_s16 (int16x8_t __a)
 {
-  int32x4_t __result;
-  __asm__ ("sshll2 %0.4s,%1.8h,#0"
-           : "=w"(__result)
-           : "w"(__a)
-           : /* No clobbers */);
-  return __result;
+  return __builtin_aarch64_vec_unpacks_hi_v8hi (__a);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovl_high_s32 (int32x4_t __a)
 {
-  int64x2_t __result;
-  __asm__ ("sshll2 %0.2d,%1.4s,#0"
-           : "=w"(__result)
-           : "w"(__a)
-           : /* No clobbers */);
-  return __result;
+  return __builtin_aarch64_vec_unpacks_hi_v4si (__a);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovl_high_u8 (uint8x16_t __a)
 {
-  uint16x8_t __result;
-  __asm__ ("ushll2 %0.8h,%1.16b,#0"
-           : "=w"(__result)
-           : "w"(__a)
-           : /* No clobbers */);
-  return __result;
+  return __builtin_aarch64_vec_unpacku_hi_v16qi_uu (__a);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovl_high_u16 (uint16x8_t __a)
 {
-  uint32x4_t __result;
-  __asm__ ("ushll2 %0.4s,%1.8h,#0"
-           : "=w"(__result)
-           : "w"(__a)
-           : /* No clobbers */);
-  return __result;
+  return __builtin_aarch64_vec_unpacku_hi_v8hi_uu (__a);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmovl_high_u32 (uint32x4_t __a)
 {
-  uint64x2_t __result;
-  __asm__ ("ushll2 %0.2d,%1.4s,#0"
-           : "=w"(__result)
-           : "w"(__a)
-           : /* No clobbers */);
-  return __result;
+  return __builtin_aarch64_vec_unpacku_hi_v4si_uu (__a);
 }
 
 __extension__ extern __inline int16x8_t
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c
new file mode 100644 (file)
index 0000000..d45bb83
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+#include <arm_neon.h>
+
+#define FUNC(IT, OT, S)         \
+OT                              \
+foo_##S (IT a)                  \
+{                               \
+  return vmovl_high_##S (a);    \
+}
+
+FUNC (int8x16_t, int16x8_t, s8)
+/* { dg-final { scan-assembler-times {sxtl2\tv0\.8h, v0\.16b} 1} }  */
+
+FUNC (int16x8_t, int32x4_t, s16)
+/* { dg-final { scan-assembler-times {sxtl2\tv0\.4s, v0\.8h} 1} }  */
+
+FUNC (int32x4_t, int64x2_t, s32)
+/* { dg-final { scan-assembler-times {sxtl2\tv0\.2d, v0\.4s} 1} }  */
+
+FUNC (uint8x16_t, uint16x8_t, u8)
+/* { dg-final { scan-assembler-times {uxtl2\tv0\.8h, v0\.16b} 1} }  */
+
+FUNC (uint16x8_t, uint32x4_t, u16)
+/* { dg-final { scan-assembler-times {uxtl2\tv0\.4s, v0\.8h} 1} }  */
+
+FUNC (uint32x4_t, uint64x2_t, u32)
+/* { dg-final { scan-assembler-times {uxtl2\tv0\.2d, v0\.4s} 1} }  */
+