+2014-12-07 Felix Yang <felix.yang@huawei.com>
+ Shanyao Chen <chenshanyao@huawei.com>
+
+ * config/aarch64/aarch64-simd.md (clrsb<mode>2, popcount<mode>2): New
+ patterns.
+ * config/aarch64/aarch64-simd-builtins.def (clrsb, popcount): New
+ builtins.
+ * config/aarch64/arm_neon.h (vcls_s8, vcls_s16, vcls_s32, vclsq_s8,
+ vclsq_s16, vclsq_s32, vcnt_p8, vcnt_s8, vcnt_u8, vcntq_p8, vcntq_s8,
+ vcntq_u8): Rewrite using builtin functions.
+
2014-12-07 Jan Hubicka <hubicka@ucw.cz>
* symtab.c (symtab_node::equal_address_to): New function.
DONE;
})
+(define_insn "clrsb<mode>2"
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+ (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "cls\\t%0.<Vtype>, %1.<Vtype>"
+ [(set_attr "type" "neon_cls<q>")]
+)
+
(define_insn "clz<mode>2"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
[(set_attr "type" "neon_cls<q>")]
)
+(define_insn "popcount<mode>2"
+ [(set (match_operand:VB 0 "register_operand" "=w")
+ (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
+ [(set_attr "type" "neon_cnt<q>")]
+)
+
;; 'across lanes' max and min ops.
;; Template for outputting a scalar, so we can create __builtins which can be
return result;
}
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vcls_s8 (int8x8_t a)
-{
- int8x8_t result;
- __asm__ ("cls %0.8b,%1.8b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vcls_s16 (int16x4_t a)
-{
- int16x4_t result;
- __asm__ ("cls %0.4h,%1.4h"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vcls_s32 (int32x2_t a)
-{
- int32x2_t result;
- __asm__ ("cls %0.2s,%1.2s"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vclsq_s8 (int8x16_t a)
-{
- int8x16_t result;
- __asm__ ("cls %0.16b,%1.16b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vclsq_s16 (int16x8_t a)
-{
- int16x8_t result;
- __asm__ ("cls %0.8h,%1.8h"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vclsq_s32 (int32x4_t a)
-{
- int32x4_t result;
- __asm__ ("cls %0.4s,%1.4s"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vcnt_p8 (poly8x8_t a)
-{
- poly8x8_t result;
- __asm__ ("cnt %0.8b,%1.8b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vcnt_s8 (int8x8_t a)
-{
- int8x8_t result;
- __asm__ ("cnt %0.8b,%1.8b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vcnt_u8 (uint8x8_t a)
-{
- uint8x8_t result;
- __asm__ ("cnt %0.8b,%1.8b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vcntq_p8 (poly8x16_t a)
-{
- poly8x16_t result;
- __asm__ ("cnt %0.16b,%1.16b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vcntq_s8 (int8x16_t a)
-{
- int8x16_t result;
- __asm__ ("cnt %0.16b,%1.16b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vcntq_u8 (uint8x16_t a)
-{
- uint8x16_t result;
- __asm__ ("cnt %0.16b,%1.16b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
#define vcopyq_lane_f32(a, b, c, d) \
__extension__ \
({ \
return __a < 0.0 ? -1ll : 0ll;
}
+/* vcls. */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcls_s8 (int8x8_t __a)
+{
+ return __builtin_aarch64_clrsbv8qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcls_s16 (int16x4_t __a)
+{
+ return __builtin_aarch64_clrsbv4hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcls_s32 (int32x2_t __a)
+{
+ return __builtin_aarch64_clrsbv2si (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vclsq_s8 (int8x16_t __a)
+{
+ return __builtin_aarch64_clrsbv16qi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vclsq_s16 (int16x8_t __a)
+{
+ return __builtin_aarch64_clrsbv8hi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vclsq_s32 (int32x4_t __a)
+{
+ return __builtin_aarch64_clrsbv4si (__a);
+}
+
/* vclz. */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
}
+/* vcnt. */
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vcnt_p8 (poly8x8_t __a)
+{
+ return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcnt_s8 (int8x8_t __a)
+{
+ return __builtin_aarch64_popcountv8qi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcnt_u8 (uint8x8_t __a)
+{
+ return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vcntq_p8 (poly8x16_t __a)
+{
+ return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vcntq_s8 (int8x16_t __a)
+{
+ return __builtin_aarch64_popcountv16qi (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcntq_u8 (uint8x16_t __a)
+{
+ return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
+}
+
/* vcvt (double -> float). */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))