From: Tamar Christina Date: Tue, 2 Aug 2016 09:25:19 +0000 (+0000) Subject: [PATCH AArch64] Add more AArch64 NEON intrinsics X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1efafef383b156074d4bd5ed35f656a509c7bf7a;p=gcc.git [PATCH AArch64] Add more AArch64 NEON intrinsics Add vmaxnm_f64, vminnm_f64, vmax_f64, vmin_f64. Committed on behalf of Tamar Christina . gcc/ * config/aarch64/aarch64-simd-builtins.def (__builtin_aarch64_fmindf): Change BUILTIN_VDQF to BUILTIN_VDQF_DF. (__builtin_aarch64_fmaxdf): Likewise. (__builtin_aarch64_smin_nandf): Likewise. (__builtin_aarch64_smax_nandf): Likewise. * config/aarch64/aarch64-simd.md (3): Remove. * config/aarch64/aarch64.md (3): Rename to... (3): ...this. * config/aarch64/arm_neon.h (vmaxnm_f64): New. (vminnm_f64): Likewise. (vmin_f64): Likewise. (vmax_f64): Likewise. * config/aarch64/iterators.md (FMAXMIN): Merge with... (FMAXMIN_UNS): ...this. (fmaxmin): Merged with (fmaxmin_op): ...this... (maxmin_uns_op): ...in to this. gcc/testsuite/ * gcc.target/aarch64/vminmaxnm.c: New. * gcc.target/aarch64/simd/vminmaxnm_1.c (main): Added float64x1_t tests. From-SVN: r238977 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fa109062803..f2c81b077fb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2016-08-02 Tamar Christina + + * config/aarch64/aarch64-simd-builtins.def + (__builtin_aarch64_fmindf): Change BUILTIN_VDQF to BUILTIN_VDQF_DF. + (__builtin_aarch64_fmaxdf): Likewise. + (__builtin_aarch64_smin_nandf): Likewise. + (__builtin_aarch64_smax_nandf): Likewise. + * config/aarch64/aarch64-simd.md (3): Remove. + * config/aarch64/aarch64.md (3): Rename to... + (3): ...this. + * config/aarch64/arm_neon.h (vmaxnm_f64): New. + (vminnm_f64): Likewise. + (vmin_f64): Likewise. + (vmax_f64): Likewise. + * config/aarch64/iterators.md (FMAXMIN): Merge with... + (FMAXMIN_UNS): ...this. + (fmaxmin): Merged with + (fmaxmin_op): ...this... + (maxmin_uns_op): ...in to this. + 2016-08-01 Michael Meissner * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index c7fe08bb21a..e1154b4b278 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -241,19 +241,19 @@ BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10) BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10) - /* Implemented by 3. + /* Implemented by 3. smax variants map to fmaxnm, smax_nan variants map to fmax. */ BUILTIN_VDQ_BHSI (BINOP, smax, 3) BUILTIN_VDQ_BHSI (BINOP, smin, 3) BUILTIN_VDQ_BHSI (BINOP, umax, 3) BUILTIN_VDQ_BHSI (BINOP, umin, 3) - BUILTIN_VHSDF (BINOP, smax_nan, 3) - BUILTIN_VHSDF (BINOP, smin_nan, 3) + BUILTIN_VHSDF_DF (BINOP, smax_nan, 3) + BUILTIN_VHSDF_DF (BINOP, smin_nan, 3) - /* Implemented by 3. */ - BUILTIN_VHSDF (BINOP, fmax, 3) - BUILTIN_VHSDF (BINOP, fmin, 3) + /* Implemented by 3. */ + BUILTIN_VHSDF_HSDF (BINOP, fmax, 3) + BUILTIN_VHSDF_HSDF (BINOP, fmin, 3) /* Implemented by aarch64_p. */ BUILTIN_VDQ_BHSI (BINOP, smaxp, 0) @@ -549,8 +549,4 @@ BUILTIN_GPI (UNOP, fix_truncdf, 2) BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2) BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2) - BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2) - - /* Implemented by 3. */ - VAR1 (BINOP, fmax, 3, hf) - VAR1 (BINOP, fmin, 3, hf) + BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2) \ No newline at end of file diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 0bf3ac8a875..f2575a0f300 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2038,6 +2038,9 @@ [(set_attr "type" "neon_fp_minmax_")] ) +;; Vector forms for fmax, fmin, fmaxnm, fminnm. +;; fmaxnm and fminnm are used for the fmax3 standard pattern names, +;; which implement the IEEE fmax ()/fmin () functions. (define_insn "3" [(set (match_operand:VHSDF 0 "register_operand" "=w") (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") @@ -2048,17 +2051,6 @@ [(set_attr "type" "neon_fp_minmax_")] ) -;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions -(define_insn "3" - [(set (match_operand:VHSDF 0 "register_operand" "=w") - (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") - (match_operand:VHSDF 2 "register_operand" "w")] - FMAXMIN))] - "TARGET_SIMD" - "\\t%0., %1., %2." - [(set_attr "type" "neon_fp_minmax_")] -) - ;; 'across lanes' add. (define_expand "reduc_plus_scal_" diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 9e87a0d532e..f15dd8d8672 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4841,14 +4841,16 @@ [(set_attr "type" "f_minmax")] ) -;; Scalar forms for the IEEE-754 fmax()/fmin() functions -(define_insn "3" +;; Scalar forms for fmax, fmin, fmaxnm, fminnm. +;; fmaxnm and fminnm are used for the fmax3 standard pattern names, +;; which implement the IEEE fmax ()/fmin () functions. +(define_insn "3" [(set (match_operand:GPF_F16 0 "register_operand" "=w") (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w") (match_operand:GPF_F16 2 "register_operand" "w")] - FMAXMIN))] + FMAXMIN_UNS))] "TARGET_FLOAT" - "\\t%0, %1, %2" + "\\t%0, %1, %2" [(set_attr "type" "f_minmax")] ) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index ab3a00c9ec7..fcdc977d631 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -17201,6 +17201,14 @@ vmax_f32 (float32x2_t __a, float32x2_t __b) return __builtin_aarch64_smax_nanv2sf (__a, __b); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmax_f64 (float64x1_t __a, float64x1_t __b) +{ + return (float64x1_t) + { __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0), + vget_lane_f64 (__b, 0)) }; +} + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vmax_s8 (int8x8_t __a, int8x8_t __b) { @@ -17692,6 +17700,14 @@ vmaxnm_f32 (float32x2_t __a, float32x2_t __b) return __builtin_aarch64_fmaxv2sf (__a, __b); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmaxnm_f64 (float64x1_t __a, float64x1_t __b) +{ + return (float64x1_t) + { __builtin_aarch64_fmaxdf (vget_lane_f64 (__a, 0), + vget_lane_f64 (__b, 0)) }; +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmaxnmq_f32 (float32x4_t __a, float32x4_t __b) { @@ -17824,6 +17840,14 @@ vmin_f32 (float32x2_t __a, float32x2_t __b) return __builtin_aarch64_smin_nanv2sf (__a, __b); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmin_f64 (float64x1_t __a, float64x1_t __b) +{ + return (float64x1_t) + { __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0), + vget_lane_f64 (__b, 0)) }; +} + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vmin_s8 (int8x8_t __a, int8x8_t __b) { @@ -17922,6 +17946,14 @@ vminnm_f32 (float32x2_t __a, float32x2_t __b) return __builtin_aarch64_fminv2sf (__a, __b); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vminnm_f64 (float64x1_t __a, float64x1_t __b) +{ + return (float64x1_t) + { __builtin_aarch64_fmind (vget_lane_f64 (__a, 0), + vget_lane_f64 (__b, 0)) }; +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vminnmq_f32 (float32x4_t __a, float32x4_t __b) { diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 5e8b0ad9cee..187057f2da0 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1016,9 +1016,8 @@ (define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2 UNSPEC_SUBHN2 UNSPEC_RSUBHN2]) -(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN]) - -(define_int_iterator FMAXMIN [UNSPEC_FMAXNM UNSPEC_FMINNM]) +(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN + UNSPEC_FMAXNM UNSPEC_FMINNM]) (define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH]) @@ -1102,7 +1101,9 @@ (UNSPEC_FMAXV "smax_nan") (UNSPEC_FMIN "smin_nan") (UNSPEC_FMINNMV "smin") - (UNSPEC_FMINV "smin_nan")]) + (UNSPEC_FMINV "smin_nan") + (UNSPEC_FMAXNM "fmax") + (UNSPEC_FMINNM "fmin")]) (define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax") (UNSPEC_UMINV "umin") @@ -1113,13 +1114,9 @@ (UNSPEC_FMAXV "fmax") (UNSPEC_FMIN "fmin") (UNSPEC_FMINNMV "fminnm") - (UNSPEC_FMINV "fmin")]) - -(define_int_attr fmaxmin [(UNSPEC_FMAXNM "fmax") - (UNSPEC_FMINNM "fmin")]) - -(define_int_attr fmaxmin_op [(UNSPEC_FMAXNM "fmaxnm") - (UNSPEC_FMINNM "fminnm")]) + (UNSPEC_FMINV "fmin") + (UNSPEC_FMAXNM "fmaxnm") + (UNSPEC_FMINNM "fminnm")]) (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3eb9df1a412..f5bd074cf18 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-08-02 Tamar Christina + + * gcc.target/aarch64/vminmaxnm.c: New. + * gcc.target/aarch64/simd/vminmaxnm_1.c (main): Add float64x1_t + tests. + 2016-08-01 Michael Meissner * gcc.target/powerpc/vec-extract-5.c: New tests to test diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c index 96608ebb283..192bad9879b 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c @@ -1,4 +1,4 @@ -/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic. */ +/* Test the `v[min|max]{nm}{q}_f*' AArch64 SIMD intrinsic. */ /* { dg-do run } */ /* { dg-options "-O2" } */ @@ -18,6 +18,7 @@ extern void abort (); int main (int argc, char **argv) { + /* v{min|max}nm_f32 normal. */ float32x2_t f32x2_input1 = vdup_n_f32 (-1.0); float32x2_t f32x2_input2 = vdup_n_f32 (0.0); float32x2_t f32x2_exp_minnm = vdup_n_f32 (-1.0); @@ -28,6 +29,7 @@ main (int argc, char **argv) CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm); CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm); + /* v{min|max}nm_f32 NaN. */ f32x2_input1 = vdup_n_f32 (__builtin_nanf ("")); f32x2_input2 = vdup_n_f32 (1.0); f32x2_exp_minnm = vdup_n_f32 (1.0); @@ -38,6 +40,7 @@ main (int argc, char **argv) CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm); CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm); + /* v{min|max}nmq_f32 normal. */ float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0); float32x4_t f32x4_input2 = vdupq_n_f32 (77.0); float32x4_t f32x4_exp_minnm = vdupq_n_f32 (-1024.0); @@ -48,6 +51,7 @@ main (int argc, char **argv) CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm); CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm); + /* v{min|max}nmq_f32 NaN. */ f32x4_input1 = vdupq_n_f32 (-__builtin_nanf ("")); f32x4_input2 = vdupq_n_f32 (-1.0); f32x4_exp_minnm = vdupq_n_f32 (-1.0); @@ -58,16 +62,57 @@ main (int argc, char **argv) CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm); CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm); + /* v{min|max}nm_f64 normal. */ + float64x1_t f64x1_input1 = vdup_n_f64 (1.23); + float64x1_t f64x1_input2 = vdup_n_f64 (4.56); + float64x1_t f64x1_exp_minnm = vdup_n_f64 (1.23); + float64x1_t f64x1_exp_maxnm = vdup_n_f64 (4.56); + float64x1_t f64x1_ret_minnm = vminnm_f64 (f64x1_input1, f64x1_input2); + float64x1_t f64x1_ret_maxnm = vmaxnm_f64 (f64x1_input1, f64x1_input2); + CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm); + CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm); + + /* v{min|max}_f64 normal. */ + float64x1_t f64x1_exp_min = vdup_n_f64 (1.23); + float64x1_t f64x1_exp_max = vdup_n_f64 (4.56); + float64x1_t f64x1_ret_min = vmin_f64 (f64x1_input1, f64x1_input2); + float64x1_t f64x1_ret_max = vmax_f64 (f64x1_input1, f64x1_input2); + CHECK (uint64_t, 1, f64x1_ret_min, f64x1_exp_min); + CHECK (uint64_t, 1, f64x1_ret_max, f64x1_exp_max); + + /* v{min|max}nmq_f64 normal. */ float64x2_t f64x2_input1 = vdupq_n_f64 (1.23); float64x2_t f64x2_input2 = vdupq_n_f64 (4.56); float64x2_t f64x2_exp_minnm = vdupq_n_f64 (1.23); float64x2_t f64x2_exp_maxnm = vdupq_n_f64 (4.56); float64x2_t f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2); float64x2_t f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2); - CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm); CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm); + /* v{min|max}nm_f64 NaN. */ + f64x1_input1 = vdup_n_f64 (-__builtin_nanf ("")); + f64x1_input2 = vdup_n_f64 (1.0); + f64x1_exp_minnm = vdup_n_f64 (1.0); + f64x1_exp_maxnm = vdup_n_f64 (1.0); + f64x1_ret_minnm = vminnm_f64 (f64x1_input1, f64x1_input2); + f64x1_ret_maxnm = vmaxnm_f64 (f64x1_input1, f64x1_input2); + + CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm); + CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm); + + /* v{min|max}_f64 NaN. */ + f64x1_input1 = vdup_n_f64 (-__builtin_nanf ("")); + f64x1_input2 = vdup_n_f64 (1.0); + f64x1_exp_minnm = vdup_n_f64 (-__builtin_nanf ("")); + f64x1_exp_maxnm = vdup_n_f64 (-__builtin_nanf ("")); + f64x1_ret_minnm = vmin_f64 (f64x1_input1, f64x1_input2); + f64x1_ret_maxnm = vmax_f64 (f64x1_input1, f64x1_input2); + + CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm); + CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm); + + /* v{min|max}nmq_f64 NaN. */ f64x2_input1 = vdupq_n_f64 (-__builtin_nan ("")); f64x2_input2 = vdupq_n_f64 (1.0); f64x2_exp_minnm = vdupq_n_f64 (1.0); diff --git a/gcc/testsuite/gcc.target/aarch64/vminmaxnm.c b/gcc/testsuite/gcc.target/aarch64/vminmaxnm.c new file mode 100644 index 00000000000..bdaa5649971 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vminmaxnm.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#include "arm_neon.h" + +/* For each of these intrinsics, we map directly to an unspec in RTL. + We're just using the argument directly and returning the result, so we + can precisely specify the exact instruction pattern and register + allocations we expect. */ + +float64x1_t +test_vmaxnm_f64 (float64x1_t a, float64x1_t b) +{ + /* { dg-final { scan-assembler-times "fmaxnm\td0, d0, d1" 1 } } */ + return vmaxnm_f64 (a, b); +} + +float64x1_t +test_vminnm_f64 (float64x1_t a, float64x1_t b) +{ + /* { dg-final { scan-assembler-times "fminnm\td0, d0, d1" 1 } } */ + return vminnm_f64 (a, b); +} + +float64x1_t +test_vmax_f64 (float64x1_t a, float64x1_t b) +{ + /* { dg-final { scan-assembler-times "fmax\td0, d0, d1" 1 } } */ + return vmax_f64 (a, b); +} + +float64x1_t +test_vmin_f64 (float64x1_t a, float64x1_t b) +{ + /* { dg-final { scan-assembler-times "fmin\td0, d0, d1" 1 } } */ + return vmin_f64 (a, b); +} \ No newline at end of file