From: Kyrylo Tkachov Date: Mon, 1 Feb 2021 15:29:13 +0000 (+0000) Subject: aarch64: Reimplement vmovl_high_* intrinsics using builtins X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8bfdf51d8595537937f990947a7a36d3a63dca5f;p=gcc.git aarch64: Reimplement vmovl_high_* intrinsics using builtins The vmovl_high_* intrinsics map down to the SXTL2/UXTL2 instructions that already have appropriately-named patterns and expanders, so it's straightforward to wire them up. gcc/ChangeLog: * config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi, vec_unpacku_hi_): Define builtins. * config/aarch64/arm_neon.h (vmovl_high_s8): Reimplement using builtin. (vmovl_high_s16): Likewise. (vmovl_high_s32): Likewise. (vmovl_high_u8): Likewise. (vmovl_high_u16): Likewise. (vmovl_high_u32): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/vmovl_high_1.c: New test. --- diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 441a4564865..3115b73e7db 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -394,6 +394,10 @@ /* Implemented by aarch64_xtn2. */ BUILTIN_VQN (UNOP, xtn2, 0, NONE) + /* Implemented by vec_unpack_hi_. */ + BUILTIN_VQW (UNOP, vec_unpacks_hi_, 10, NONE) + BUILTIN_VQW (UNOPU, vec_unpacku_hi_, 10, NONE) + /* Implemented by aarch64_reduc_plus_. */ BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, NONE) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 0911ddbb387..691c0c05603 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -8125,72 +8125,42 @@ __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_s8 (int8x16_t __a) { - int16x8_t __result; - __asm__ ("sshll2 %0.8h,%1.16b,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacks_hi_v16qi (__a); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_s16 (int16x8_t __a) { - int32x4_t __result; - __asm__ ("sshll2 %0.4s,%1.8h,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacks_hi_v8hi (__a); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_s32 (int32x4_t __a) { - int64x2_t __result; - __asm__ ("sshll2 %0.2d,%1.4s,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacks_hi_v4si (__a); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_u8 (uint8x16_t __a) { - uint16x8_t __result; - __asm__ ("ushll2 %0.8h,%1.16b,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacku_hi_v16qi_uu (__a); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_u16 (uint16x8_t __a) { - uint32x4_t __result; - __asm__ ("ushll2 %0.4s,%1.8h,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacku_hi_v8hi_uu (__a); } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmovl_high_u32 (uint32x4_t __a) { - uint64x2_t __result; - __asm__ ("ushll2 %0.2d,%1.4s,#0" - : "=w"(__result) - : "w"(__a) - : /* No clobbers */); - return __result; + return __builtin_aarch64_vec_unpacku_hi_v4si_uu (__a); } __extension__ extern __inline int16x8_t diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c new file mode 100644 index 00000000000..d45bb83e350 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmovl_high_1.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#include + +#include + +#define FUNC(IT, OT, S) \ +OT \ +foo_##S (IT a) \ +{ \ + return vmovl_high_##S (a); \ +} + +FUNC (int8x16_t, int16x8_t, s8) +/* { dg-final { scan-assembler-times {sxtl2\tv0\.8h, v0\.16b} 1} } */ + +FUNC (int16x8_t, int32x4_t, s16) +/* { dg-final { scan-assembler-times {sxtl2\tv0\.4s, v0\.8h} 1} } */ + +FUNC (int32x4_t, int64x2_t, s32) +/* { dg-final { scan-assembler-times {sxtl2\tv0\.2d, v0\.4s} 1} } */ + +FUNC (uint8x16_t, uint16x8_t, u8) +/* { dg-final { scan-assembler-times {uxtl2\tv0\.8h, v0\.16b} 1} } */ + +FUNC (uint16x8_t, uint32x4_t, u16) +/* { dg-final { scan-assembler-times {uxtl2\tv0\.4s, v0\.8h} 1} } */ + +FUNC (uint32x4_t, uint64x2_t, u32) +/* { dg-final { scan-assembler-times {uxtl2\tv0\.2d, v0\.4s} 1} } */ +