+2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
+
+ * config/aarch64/iterators.md (SVE_INT_UNARY): Add clrsb and clz.
+ (optab, sve_int_op): Handle them.
+ * config/aarch64/aarch64-sve.md: Expand comment.
+
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/predicates.md (const_1_to_3_operand): New predicate.
;; -------------------------------------------------------------------------
;; Includes:
;; - ABS
+;; - CLS (= clrsb)
+;; - CLZ
;; - CNT (= popcount)
;; - NEG
;; - NOT
(define_code_iterator FAC_COMPARISONS [lt le ge gt])
;; SVE integer unary operations.
-(define_code_iterator SVE_INT_UNARY [abs neg not popcount])
+(define_code_iterator SVE_INT_UNARY [abs neg not clrsb clz popcount])
;; SVE integer binary operations.
(define_code_iterator SVE_INT_BINARY [plus minus mult smax umax smin umin
(unsigned_fix "fixuns")
(float "float")
(unsigned_float "floatuns")
+ (clrsb "clrsb")
+ (clz "clz")
(popcount "popcount")
(and "and")
(ior "ior")
(ior "orr")
(xor "eor")
(not "not")
+ (clrsb "cls")
+ (clz "clz")
(popcount "cnt")])
(define_code_attr sve_int_op_rev [(plus "add")
+2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.target/aarch64/vect-clz.c: Force SVE off.
+ * gcc.target/aarch64/sve/clrsb_1.c: New test.
+ * gcc.target/aarch64/sve/clrsb_1_run.c: Likewise.
+ * gcc.target/aarch64/sve/clz_1.c: Likewise.
+ * gcc.target/aarch64/sve/clz_1_run.c: Likewise.
+
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/adr_1.c: New test.
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+clrsb_32 (unsigned int *restrict dst, uint32_t *restrict src, int size)
+{
+ for (int i = 0; i < size; ++i)
+ dst[i] = __builtin_clrsb (src[i]);
+}
+
+void __attribute__ ((noinline, noclone))
+clrsb_64 (unsigned int *restrict dst, uint64_t *restrict src, int size)
+{
+ for (int i = 0; i < size; ++i)
+ dst[i] = __builtin_clrsbll (src[i]);
+}
+
+/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "clrsb_1.c"
+
+extern void abort (void) __attribute__ ((noreturn));
+
+unsigned int data[] = {
+ 0xffffff80, 24,
+ 0xffffffff, 31,
+ 0x00000000, 31,
+ 0x80000000, 0,
+ 0x7fffffff, 0,
+ 0x000003ff, 21,
+ 0x1fffffff, 2,
+ 0x0000ffff, 15,
+ 0xffff0000, 15
+};
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ unsigned int count = sizeof (data) / sizeof (data[0]) / 2;
+
+ uint32_t in32[count];
+ unsigned int out32[count];
+ for (unsigned int i = 0; i < count; ++i)
+ {
+ in32[i] = data[i * 2];
+ asm volatile ("" ::: "memory");
+ }
+ clrsb_32 (out32, in32, count);
+ for (unsigned int i = 0; i < count; ++i)
+ if (out32[i] != data[i * 2 + 1])
+ abort ();
+
+ uint64_t in64[count];
+ unsigned int out64[count];
+ for (unsigned int i = 0; i < count; ++i)
+ {
+ in64[i] = (uint64_t) data[i * 2] << 32;
+ asm volatile ("" ::: "memory");
+ }
+ clrsb_64 (out64, in64, count);
+ for (unsigned int i = 0; i < count; ++i)
+ if (out64[i] != (data[i * 2] ? data[i * 2 + 1] : 63))
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+clz_32 (unsigned int *restrict dst, uint32_t *restrict src, int size)
+{
+ for (int i = 0; i < size; ++i)
+ dst[i] = __builtin_clz (src[i]);
+}
+
+void __attribute__ ((noinline, noclone))
+clz_64 (unsigned int *restrict dst, uint64_t *restrict src, int size)
+{
+ for (int i = 0; i < size; ++i)
+ dst[i] = __builtin_clzll (src[i]);
+}
+
+/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "clz_1.c"
+
+extern void abort (void) __attribute__ ((noreturn));
+
+unsigned int data[] = {
+ 0xffffff80, 0,
+ 0xffffffff, 0,
+ 0x00000000, 32,
+ 0x80000000, 0,
+ 0x7fffffff, 1,
+ 0x000003ff, 22,
+ 0x1fffffff, 3,
+ 0x0000ffff, 16,
+ 0xffff0000, 0
+};
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ unsigned int count = sizeof (data) / sizeof (data[0]) / 2;
+
+ uint32_t in32[count];
+ unsigned int out32[count];
+ for (unsigned int i = 0; i < count; ++i)
+ {
+ in32[i] = data[i * 2];
+ asm volatile ("" ::: "memory");
+ }
+ clz_32 (out32, in32, count);
+ for (unsigned int i = 0; i < count; ++i)
+ if (out32[i] != data[i * 2 + 1])
+ abort ();
+
+ uint64_t in64[count];
+ unsigned int out64[count];
+ for (unsigned int i = 0; i < count; ++i)
+ {
+ in64[i] = (uint64_t) data[i * 2] << 10;
+ asm volatile ("" ::: "memory");
+ }
+ clz_64 (out64, in64, count);
+ for (unsigned int i = 0; i < count; ++i)
+ if (out64[i] != (data[i * 2] ? data[i * 2 + 1] + 22 : 64))
+ abort ();
+
+ return 0;
+}
/* { dg-do run } */
/* { dg-options "-O3 -save-temps -fno-inline -fno-vect-cost-model" } */
+#pragma GCC target "+nosve"
+
extern void abort ();
void