AArch64: Implement missing vcls intrinsics on unsigned types
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Tue, 22 Sep 2020 11:03:49 +0000 (12:03 +0100)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Tue, 22 Sep 2020 11:03:49 +0000 (12:03 +0100)
This patch implements some missing intrinsics that perform a CLS on unsigned SIMD types.

Bootstrapped and tested on aarch64-none-linux-gnu.

gcc/
PR target/71233
* config/aarch64/arm_neon.h (vcls_u8, vcls_u16, vcls_u32,
vclsq_u8, vclsq_u16, vclsq_u32): Define.

gcc/testsuite/
PR target/71233
* gcc.target/aarch64/simd/vcls_unsigned_1.c: New test.

gcc/config/aarch64/arm_neon.h
gcc/testsuite/gcc.target/aarch64/simd/vcls_unsigned_1.c [new file with mode: 0644]

index caeba10d435393b7a98fcb4946fdc60ada6159ec..341019bc648babcddb0311c36652814609aab22f 100644 (file)
@@ -14075,6 +14075,48 @@ vclsq_s32 (int32x4_t __a)
   return __builtin_aarch64_clrsbv4si (__a);
 }
 
+__extension__ extern __inline int8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcls_u8 (uint8x8_t __a)
+{
+  return __builtin_aarch64_clrsbv8qi ((int8x8_t) __a);
+}
+
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcls_u16 (uint16x4_t __a)
+{
+  return __builtin_aarch64_clrsbv4hi ((int16x4_t) __a);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcls_u32 (uint32x2_t __a)
+{
+  return __builtin_aarch64_clrsbv2si ((int32x2_t) __a);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vclsq_u8 (uint8x16_t __a)
+{
+  return __builtin_aarch64_clrsbv16qi ((int8x16_t) __a);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vclsq_u16 (uint16x8_t __a)
+{
+  return __builtin_aarch64_clrsbv8hi ((int16x8_t) __a);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vclsq_u32 (uint32x4_t __a)
+{
+  return __builtin_aarch64_clrsbv4si ((int32x4_t) __a);
+}
+
 /* vclz.  */
 
 __extension__ extern __inline int8x8_t
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vcls_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vcls_unsigned_1.c
new file mode 100644 (file)
index 0000000..f7078d1
--- /dev/null
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+int16x8_t
+test_16x8 (uint16x8_t a)
+{
+  return vclsq_u16 (a);
+}
+
+/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+
+
+int8x16_t
+test_8x16 (uint8x16_t a)
+{
+  return vclsq_u8 (a);
+}
+
+/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+
+int32x4_t
+test_32x4 (uint32x4_t a)
+{
+  return vclsq_u32 (a);
+}
+
+/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
+
+int16x4_t
+test_16x4 (uint16x4_t a)
+{
+  return vcls_u16 (a);
+}
+
+/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
+
+int8x8_t
+test_8x8 (uint8x8_t a)
+{
+  return vcls_u8 (a);
+}
+
+/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */
+
+int32x2_t
+test32x2 (uint32x2_t a)
+{
+  return vcls_u32 (a);
+}
+
+/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */
+