From: Felix Yang Date: Mon, 19 Jan 2015 13:22:41 +0000 (+0000) Subject: aarch64-simd.md (aarch64_p): New pattern. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7abab3d1d054c66bbec22a20405fcd6d4cebe028;p=gcc.git aarch64-simd.md (aarch64_p): New pattern. * config/aarch64/aarch64-simd.md (aarch64_p): New pattern. * config/aarch64/aarch64-simd-builtins.def (smaxp, sminp, umaxp, uminp, smax_nanp, smin_nanp): New builtins. * config/aarch64/arm_neon.h (vpmax_s8, vpmax_s16, vpmax_s32, vpmax_u8, vpmax_u16, vpmax_u32, vpmaxq_s8, vpmaxq_s16, vpmaxq_s32, vpmaxq_u8, vpmaxq_u16, vpmaxq_u32, vpmax_f32, vpmaxq_f32, vpmaxq_f64, vpmaxqd_f64, vpmaxs_f32, vpmaxnm_f32, vpmaxnmq_f32, vpmaxnmq_f64, vpmaxnmqd_f64, vpmaxnms_f32, vpmin_s8, vpmin_s16, vpmin_s32, vpmin_u8, vpmin_u16, vpmin_u32, vpminq_s8, vpminq_s16, vpminq_s32, vpminq_u8, vpminq_u16, vpminq_u32, vpmin_f32, vpminq_f32, vpminq_f64, vpminqd_f64, vpmins_f32, vpminnm_f32, vpminnmq_f32, vpminnmq_f64, vpminnmqd_f64, vpminnms_f32): Rewrite using builtin functions. From-SVN: r219840 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e7f4bc8c986..807d7d09d9c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2015-01-19 Felix Yang + + * config/aarch64/aarch64-simd.md (aarch64_p): New + pattern. + * config/aarch64/aarch64-simd-builtins.def (smaxp, sminp, umaxp, + uminp, smax_nanp, smin_nanp): New builtins. + * config/aarch64/arm_neon.h (vpmax_s8, vpmax_s16, vpmax_s32, + vpmax_u8, vpmax_u16, vpmax_u32, vpmaxq_s8, vpmaxq_s16, vpmaxq_s32, + vpmaxq_u8, vpmaxq_u16, vpmaxq_u32, vpmax_f32, vpmaxq_f32, vpmaxq_f64, + vpmaxqd_f64, vpmaxs_f32, vpmaxnm_f32, vpmaxnmq_f32, vpmaxnmq_f64, + vpmaxnmqd_f64, vpmaxnms_f32, vpmin_s8, vpmin_s16, vpmin_s32, vpmin_u8, + vpmin_u16, vpmin_u32, vpminq_s8, vpminq_s16, vpminq_s32, vpminq_u8, + vpminq_u16, vpminq_u32, vpmin_f32, vpminq_f32, vpminq_f64, vpminqd_f64, + vpmins_f32, vpminnm_f32, vpminnmq_f32, vpminnmq_f64, vpminnmqd_f64, + vpminnms_f32): Rewrite using builtin functions. + 2015-01-19 Thomas Schwinge PR libgomp/64625 diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index b41d9f6d07e..1a1520c465b 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -250,6 +250,16 @@ BUILTIN_VDQF (BINOP, smax_nan, 3) BUILTIN_VDQF (BINOP, smin_nan, 3) + /* Implemented by aarch64_p. */ + BUILTIN_VDQ_BHSI (BINOP, smaxp, 0) + BUILTIN_VDQ_BHSI (BINOP, sminp, 0) + BUILTIN_VDQ_BHSI (BINOP, umaxp, 0) + BUILTIN_VDQ_BHSI (BINOP, uminp, 0) + BUILTIN_VDQF (BINOP, smaxp, 0) + BUILTIN_VDQF (BINOP, sminp, 0) + BUILTIN_VDQF (BINOP, smax_nanp, 0) + BUILTIN_VDQF (BINOP, smin_nanp, 0) + /* Implemented by 2. */ BUILTIN_VDQF (UNOP, btrunc, 2) BUILTIN_VDQF (UNOP, ceil, 2) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 9651b99c2b1..968f5b2b1cb 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -997,6 +997,28 @@ DONE; }) +;; Pairwise Integer Max/Min operations. +(define_insn "aarch64_p" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") + (match_operand:VDQ_BHSI 2 "register_operand" "w")] + MAXMINV))] + "TARGET_SIMD" + "p\t%0., %1., %2." + [(set_attr "type" "neon_minmax")] +) + +;; Pairwise FP Max/Min operations. +(define_insn "aarch64_p" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")] + FMAXMINV))] + "TARGET_SIMD" + "p\t%0., %1., %2." + [(set_attr "type" "neon_minmax")] +) + ;; vec_concat gives a new vector with the low elements from operand 1, and ;; the high elements from operand 2. That is to say, given op1 = { a, b } ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index c679802e9e5..d4ce0b8a02c 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -8796,490 +8796,6 @@ vpadds_f32 (float32x2_t a) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vpmax_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("fmaxp %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vpmax_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("smaxp %0.8b, %1.8b, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vpmax_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("smaxp %0.4h, %1.4h, %2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vpmax_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("smaxp %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vpmax_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("umaxp %0.8b, %1.8b, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vpmax_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("umaxp %0.4h, %1.4h, %2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vpmax_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("umaxp %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vpmaxnm_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vpmaxnmq_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vpmaxnmq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vpmaxnmqd_f64 (float64x2_t a) -{ - float64_t result; - __asm__ ("fmaxnmp %d0,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vpmaxnms_f32 (float32x2_t a) -{ - float32_t result; - __asm__ ("fmaxnmp %s0,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vpmaxq_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("fmaxp %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vpmaxq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("fmaxp %0.2d, %1.2d, %2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vpmaxq_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("smaxp %0.16b, %1.16b, %2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vpmaxq_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("smaxp %0.8h, %1.8h, %2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vpmaxq_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("smaxp %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vpmaxq_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("umaxp %0.16b, %1.16b, %2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vpmaxq_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("umaxp %0.8h, %1.8h, %2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vpmaxq_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("umaxp %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vpmaxqd_f64 (float64x2_t a) -{ - float64_t result; - __asm__ ("fmaxp %d0,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vpmaxs_f32 (float32x2_t a) -{ - float32_t result; - __asm__ ("fmaxp %s0,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vpmin_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("fminp %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vpmin_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("sminp %0.8b, %1.8b, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vpmin_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("sminp %0.4h, %1.4h, %2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vpmin_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("sminp %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vpmin_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("uminp %0.8b, %1.8b, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vpmin_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("uminp %0.4h, %1.4h, %2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vpmin_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("uminp %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vpminnm_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("fminnmp %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vpminnmq_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("fminnmp %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vpminnmq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("fminnmp %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vpminnmqd_f64 (float64x2_t a) -{ - float64_t result; - __asm__ ("fminnmp %d0,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vpminnms_f32 (float32x2_t a) -{ - float32_t result; - __asm__ ("fminnmp %s0,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vpminq_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("fminp %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vpminq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("fminp %0.2d, %1.2d, %2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vpminq_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("sminp %0.16b, %1.16b, %2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vpminq_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("sminp %0.8h, %1.8h, %2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vpminq_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("sminp %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vpminq_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("uminp %0.16b, %1.16b, %2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vpminq_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("uminp %0.8h, %1.8h, %2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vpminq_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("uminp %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vpminqd_f64 (float64x2_t a) -{ - float64_t result; - __asm__ ("fminp %d0,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vpmins_f32 (float32x2_t a) -{ - float32_t result; - __asm__ ("fminp %s0,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vqdmulh_n_s16 (int16x4_t a, int16_t b) { @@ -17928,6 +17444,290 @@ vmaxq_u32 (uint32x4_t __a, uint32x4_t __b) (int32x4_t) __b); } +/* vpmax */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpmax_s8 (int8x8_t a, int8x8_t b) +{ + return __builtin_aarch64_smaxpv8qi (a, b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpmax_s16 (int16x4_t a, int16x4_t b) +{ + return __builtin_aarch64_smaxpv4hi (a, b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpmax_s32 (int32x2_t a, int32x2_t b) +{ + return __builtin_aarch64_smaxpv2si (a, b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpmax_u8 (uint8x8_t a, uint8x8_t b) +{ + return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) a, + (int8x8_t) b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpmax_u16 (uint16x4_t a, uint16x4_t b) +{ + return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) a, + (int16x4_t) b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpmax_u32 (uint32x2_t a, uint32x2_t b) +{ + return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) a, + (int32x2_t) b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vpmaxq_s8 (int8x16_t a, int8x16_t b) +{ + return __builtin_aarch64_smaxpv16qi (a, b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpmaxq_s16 (int16x8_t a, int16x8_t b) +{ + return __builtin_aarch64_smaxpv8hi (a, b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpmaxq_s32 (int32x4_t a, int32x4_t b) +{ + return __builtin_aarch64_smaxpv4si (a, b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vpmaxq_u8 (uint8x16_t a, uint8x16_t b) +{ + return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) a, + (int8x16_t) b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpmaxq_u16 (uint16x8_t a, uint16x8_t b) +{ + return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) a, + (int16x8_t) b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpmaxq_u32 (uint32x4_t a, uint32x4_t b) +{ + return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) a, + (int32x4_t) b); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpmax_f32 (float32x2_t a, float32x2_t b) +{ + return __builtin_aarch64_smax_nanpv2sf (a, b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpmaxq_f32 (float32x4_t a, float32x4_t b) +{ + return __builtin_aarch64_smax_nanpv4sf (a, b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpmaxq_f64 (float64x2_t a, float64x2_t b) +{ + return __builtin_aarch64_smax_nanpv2df (a, b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpmaxqd_f64 (float64x2_t a) +{ + return __builtin_aarch64_reduc_smax_nan_scal_v2df (a); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpmaxs_f32 (float32x2_t a) +{ + return __builtin_aarch64_reduc_smax_nan_scal_v2sf (a); +} + +/* vpmaxnm */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpmaxnm_f32 (float32x2_t a, float32x2_t b) +{ + return __builtin_aarch64_smaxpv2sf (a, b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpmaxnmq_f32 (float32x4_t a, float32x4_t b) +{ + return __builtin_aarch64_smaxpv4sf (a, b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpmaxnmq_f64 (float64x2_t a, float64x2_t b) +{ + return __builtin_aarch64_smaxpv2df (a, b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpmaxnmqd_f64 (float64x2_t a) +{ + return __builtin_aarch64_reduc_smax_scal_v2df (a); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpmaxnms_f32 (float32x2_t a) +{ + return __builtin_aarch64_reduc_smax_scal_v2sf (a); +} + +/* vpmin */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpmin_s8 (int8x8_t a, int8x8_t b) +{ + return __builtin_aarch64_sminpv8qi (a, b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpmin_s16 (int16x4_t a, int16x4_t b) +{ + return __builtin_aarch64_sminpv4hi (a, b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpmin_s32 (int32x2_t a, int32x2_t b) +{ + return __builtin_aarch64_sminpv2si (a, b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpmin_u8 (uint8x8_t a, uint8x8_t b) +{ + return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) a, + (int8x8_t) b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpmin_u16 (uint16x4_t a, uint16x4_t b) +{ + return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) a, + (int16x4_t) b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpmin_u32 (uint32x2_t a, uint32x2_t b) +{ + return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) a, + (int32x2_t) b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vpminq_s8 (int8x16_t a, int8x16_t b) +{ + return __builtin_aarch64_sminpv16qi (a, b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpminq_s16 (int16x8_t a, int16x8_t b) +{ + return __builtin_aarch64_sminpv8hi (a, b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpminq_s32 (int32x4_t a, int32x4_t b) +{ + return __builtin_aarch64_sminpv4si (a, b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vpminq_u8 (uint8x16_t a, uint8x16_t b) +{ + return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) a, + (int8x16_t) b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpminq_u16 (uint16x8_t a, uint16x8_t b) +{ + return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) a, + (int16x8_t) b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpminq_u32 (uint32x4_t a, uint32x4_t b) +{ + return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) a, + (int32x4_t) b); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpmin_f32 (float32x2_t a, float32x2_t b) +{ + return __builtin_aarch64_smin_nanpv2sf (a, b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpminq_f32 (float32x4_t a, float32x4_t b) +{ + return __builtin_aarch64_smin_nanpv4sf (a, b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpminq_f64 (float64x2_t a, float64x2_t b) +{ + return __builtin_aarch64_smin_nanpv2df (a, b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpminqd_f64 (float64x2_t a) +{ + return __builtin_aarch64_reduc_smin_nan_scal_v2df (a); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpmins_f32 (float32x2_t a) +{ + return __builtin_aarch64_reduc_smin_nan_scal_v2sf (a); +} + +/* vpminnm */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpminnm_f32 (float32x2_t a, float32x2_t b) +{ + return __builtin_aarch64_sminpv2sf (a, b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpminnmq_f32 (float32x4_t a, float32x4_t b) +{ + return __builtin_aarch64_sminpv4sf (a, b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpminnmq_f64 (float64x2_t a, float64x2_t b) +{ + return __builtin_aarch64_sminpv2df (a, b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpminnmqd_f64 (float64x2_t a) +{ + return __builtin_aarch64_reduc_smin_scal_v2df (a); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpminnms_f32 (float32x2_t a) +{ + return __builtin_aarch64_reduc_smin_scal_v2sf (a); +} + /* vmaxnm */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))