From 703bbcdfe9f2a442ecc58366d3fcd0672a14c367 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Mon, 25 Jul 2016 15:00:14 +0000 Subject: [PATCH] [AArch64][6/14] ARMv8.2-A FP16 reduction vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_, reduc_smin_scal_): Use VDQIF_F16. (reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF. * config/aarch64/aarch64-simd.md (reduc__scal_): Use VHSDF. (aarch64_reduc__internal): Likewise. * config/aarch64/iterators.md (VDQIF_F16): New. (vp): Support HF modes. * config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16, vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New. From-SVN: r238721 --- gcc/ChangeLog | 13 +++++ gcc/config/aarch64/aarch64-simd-builtins.def | 8 ++-- gcc/config/aarch64/aarch64-simd.md | 12 ++--- gcc/config/aarch64/arm_neon.h | 50 ++++++++++++++++++++ gcc/config/aarch64/iterators.md | 7 ++- 5 files changed, 78 insertions(+), 12 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3481f752fc1..e9dd4f36aad 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2016-07-25 Jiong Wang + + * config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_, + reduc_smin_scal_): Use VDQIF_F16. + (reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF. + * config/aarch64/aarch64-simd.md (reduc__scal_): + Use VHSDF. + (aarch64_reduc__internal): Likewise. + * config/aarch64/iterators.md (VDQIF_F16): New. + (vp): Support HF modes. + * config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16, + vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New. + 2016-07-25 Jiong Wang * config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index b888fd64f04..363e131327d 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -234,12 +234,12 @@ BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) /* Implemented by reduc__scal_ (producing scalar). */ - BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) - BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) + BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10) + BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) - BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10) - BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10) + BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10) + BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10) /* Implemented by 3. smax variants map to fmaxnm, diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 7d2e97f9f01..501858d9ac3 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2130,8 +2130,8 @@ ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin). (define_expand "reduc__scal_" [(match_operand: 0 "register_operand") - (unspec:VDQF [(match_operand:VDQF 1 "register_operand")] - FMAXMINV)] + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] + FMAXMINV)] "TARGET_SIMD" { rtx elt = GEN_INT (ENDIAN_LANE_N (mode, 0)); @@ -2178,12 +2178,12 @@ ) (define_insn "aarch64_reduc__internal" - [(set (match_operand:VDQF 0 "register_operand" "=w") - (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] - FMAXMINV))] + [(set (match_operand:VHSDF 0 "register_operand" "=w") + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] + FMAXMINV))] "TARGET_SIMD" "\\t%0, %1." - [(set_attr "type" "neon_fp_reduc_minmax_")] + [(set_attr "type" "neon_fp_reduc_minmax_")] ) ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 8b31e31d600..4382efda8c3 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26927,6 +26927,56 @@ vmulxq_n_f16 (float16x8_t __a, float16_t __b) return vmulxq_f16 (__a, vdupq_n_f16 (__b)); } +/* ARMv8.2-A FP16 reduction vector intrinsics. */ + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smax_nan_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smax_nan_scal_v8hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smin_nan_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smin_nan_scal_v8hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxnmv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smax_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vmaxnmvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smax_scal_v8hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminnmv_f16 (float16x4_t __a) +{ + return __builtin_aarch64_reduc_smin_scal_v4hf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vminnmvq_f16 (float16x8_t __a) +{ + return __builtin_aarch64_reduc_smin_scal_v8hf (__a); +} + #pragma GCC pop_options #undef __aarch64_vget_lane_any diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 8d4dc6cedd4..011b937105e 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -159,6 +159,8 @@ ;; Vector modes except double int. (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) +(define_mode_iterator VDQIF_F16 [V8QI V16QI V4HI V8HI V2SI V4SI + V4HF V8HF V2SF V4SF V2DF]) ;; Vector modes for S type. (define_mode_iterator VDQ_SI [V2SI V4SI]) @@ -760,8 +762,9 @@ (define_mode_attr vp [(V8QI "v") (V16QI "v") (V4HI "v") (V8HI "v") (V2SI "p") (V4SI "v") - (V2DI "p") (V2DF "p") - (V2SF "p") (V4SF "v")]) + (V2DI "p") (V2DF "p") + (V2SF "p") (V4SF "v") + (V4HF "v") (V8HF "v")]) (define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")]) (define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")]) -- 2.30.2