From 30957092db46d8798e632feefb5df634488dbb33 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 22 Sep 2020 12:03:49 +0100 Subject: [PATCH] AArch64: Implement missing vcls intrinsics on unsigned types This patch implements some missing intrinsics that perform a CLS on unsigned SIMD types. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ PR target/71233 * config/aarch64/arm_neon.h (vcls_u8, vcls_u16, vcls_u32, vclsq_u8, vclsq_u16, vclsq_u32): Define. gcc/testsuite/ PR target/71233 * gcc.target/aarch64/simd/vcls_unsigned_1.c: New test. --- gcc/config/aarch64/arm_neon.h | 42 +++++++++++++++ .../gcc.target/aarch64/simd/vcls_unsigned_1.c | 54 +++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vcls_unsigned_1.c diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index caeba10d435..341019bc648 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -14075,6 +14075,48 @@ vclsq_s32 (int32x4_t __a) return __builtin_aarch64_clrsbv4si (__a); } +__extension__ extern __inline int8x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcls_u8 (uint8x8_t __a) +{ + return __builtin_aarch64_clrsbv8qi ((int8x8_t) __a); +} + +__extension__ extern __inline int16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcls_u16 (uint16x4_t __a) +{ + return __builtin_aarch64_clrsbv4hi ((int16x4_t) __a); +} + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcls_u32 (uint32x2_t __a) +{ + return __builtin_aarch64_clrsbv2si ((int32x2_t) __a); +} + +__extension__ extern __inline int8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vclsq_u8 (uint8x16_t __a) +{ + return __builtin_aarch64_clrsbv16qi ((int8x16_t) __a); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vclsq_u16 (uint16x8_t __a) +{ + return __builtin_aarch64_clrsbv8hi ((int16x8_t) __a); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vclsq_u32 (uint32x4_t __a) +{ + return __builtin_aarch64_clrsbv4si ((int32x4_t) __a); +} + /* vclz. */ __extension__ extern __inline int8x8_t diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vcls_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vcls_unsigned_1.c new file mode 100644 index 00000000000..f7078d1a67c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vcls_unsigned_1.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +int16x8_t +test_16x8 (uint16x8_t a) +{ + return vclsq_u16 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */ + + +int8x16_t +test_8x16 (uint8x16_t a) +{ + return vclsq_u8 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ + +int32x4_t +test_32x4 (uint32x4_t a) +{ + return vclsq_u32 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */ + +int16x4_t +test_16x4 (uint16x4_t a) +{ + return vcls_u16 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */ + +int8x8_t +test_8x8 (uint8x8_t a) +{ + return vcls_u8 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */ + +int32x2_t +test32x2 (uint32x2_t a) +{ + return vcls_u32 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */ + -- 2.30.2