From 98b3a5f289f42d2d7e9616ccd02cce40440f2adf Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Tue, 17 May 2016 16:34:46 +0000 Subject: [PATCH] [AArch64, 1/4] Add the missing support of vfms_n_f32, vfmsq_n_f32, vfmsq_n_f64 gcc/ * config/aarch64/aarch64-simd.md (*aarch64_fma4_elt_to_128df): Rename to *aarch64_fma4_elt_from_dup. (*aarch64_fnma4_elt_to_128df): Rename to *aarch64_fnma4_elt_from_dup. * config/aarch64/arm_neon.h (vfma_n_f64): New. (vfms_n_f32): Likewise. (vfms_n_f64): Likewise. (vfmsq_n_f32): Likewise. (vfmsq_n_f64): Likewise. gcc/testsuite/ * gcc.target/aarch64/fmla_intrinsic_1.c: Allow ".d[index]" besides ".2d[index]" when scan the assembly. * gcc.target/aarch64/fmls_intrinsic_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h: New entry for float64x1. * gcc.target/aarch64/advsimd-intrinsics/vfms_vfma_n.c: New. From-SVN: r236331 --- gcc/ChangeLog | 12 + gcc/config/aarch64/aarch64-simd.md | 40 +- gcc/config/aarch64/arm_neon.h | 29 ++ gcc/testsuite/ChangeLog | 9 + .../aarch64/advsimd-intrinsics/arm-neon-ref.h | 2 + .../aarch64/advsimd-intrinsics/vfms_vfma_n.c | 490 ++++++++++++++++++ .../gcc.target/aarch64/fmla_intrinsic_1.c | 2 +- .../gcc.target/aarch64/fmls_intrinsic_1.c | 2 +- 8 files changed, 564 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms_vfma_n.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 83e27da411c..bb594964f56 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2016-05-17 Jiong Wang + + * config/aarch64/aarch64-simd.md (*aarch64_fma4_elt_to_128df): Rename + to *aarch64_fma4_elt_from_dup. + (*aarch64_fnma4_elt_to_128df): Rename to + *aarch64_fnma4_elt_from_dup. + * config/aarch64/arm_neon.h (vfma_n_f64): New. + (vfms_n_f32): Likewise. + (vfms_n_f64): Likewise. + (vfmsq_n_f32): Likewise. + (vfmsq_n_f64): Likewise. + 2016-05-17 Gerald Pfeifer * wide-int.h: Change fixed_wide_int_storage from class to struct. diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index ded8bff0973..b533846378b 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1579,16 +1579,16 @@ [(set_attr "type" "neon_fp_mla__scalar")] ) -(define_insn "*aarch64_fma4_elt_to_128df" - [(set (match_operand:V2DF 0 "register_operand" "=w") - (fma:V2DF - (vec_duplicate:V2DF - (match_operand:DF 1 "register_operand" "w")) - (match_operand:V2DF 2 "register_operand" "w") - (match_operand:V2DF 3 "register_operand" "0")))] +(define_insn "*aarch64_fma4_elt_from_dup" + [(set (match_operand:VMUL 0 "register_operand" "=w") + (fma:VMUL + (vec_duplicate:VMUL + (match_operand: 1 "register_operand" "w")) + (match_operand:VMUL 2 "register_operand" "w") + (match_operand:VMUL 3 "register_operand" "0")))] "TARGET_SIMD" - "fmla\\t%0.2d, %2.2d, %1.2d[0]" - [(set_attr "type" "neon_fp_mla_d_scalar_q")] + "fmla\t%0., %2., %1.[0]" + [(set_attr "type" "neon_mla__scalar")] ) (define_insn "*aarch64_fma4_elt_to_64v2df" @@ -1656,17 +1656,17 @@ [(set_attr "type" "neon_fp_mla__scalar")] ) -(define_insn "*aarch64_fnma4_elt_to_128df" - [(set (match_operand:V2DF 0 "register_operand" "=w") - (fma:V2DF - (neg:V2DF - (match_operand:V2DF 2 "register_operand" "w")) - (vec_duplicate:V2DF - (match_operand:DF 1 "register_operand" "w")) - (match_operand:V2DF 3 "register_operand" "0")))] - "TARGET_SIMD" - "fmls\\t%0.2d, %2.2d, %1.2d[0]" - [(set_attr "type" "neon_fp_mla_d_scalar_q")] +(define_insn "*aarch64_fnma4_elt_from_dup" + [(set (match_operand:VMUL 0 "register_operand" "=w") + (fma:VMUL + (neg:VMUL + (match_operand:VMUL 2 "register_operand" "w")) + (vec_duplicate:VMUL + (match_operand: 1 "register_operand" "w")) + (match_operand:VMUL 3 "register_operand" "0")))] + "TARGET_SIMD" + "fmls\t%0., %2., %1.[0]" + [(set_attr "type" "neon_mla__scalar")] ) (define_insn "*aarch64_fnma4_elt_to_64v2df" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 2612a325718..ca7ace5aa65 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -14456,6 +14456,12 @@ vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vfma_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c) +{ + return (float64x1_t) {__b[0] * __c + __a[0]}; +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) { @@ -14597,6 +14603,29 @@ vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c) return __builtin_aarch64_fmav2df (-__b, __c, __a); } +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfms_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) +{ + return __builtin_aarch64_fmav2sf (-__b, vdup_n_f32 (__c), __a); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vfms_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c) +{ + return (float64x1_t) {-__b[0] * __c + __a[0]}; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) +{ + return __builtin_aarch64_fmav4sf (-__b, vdupq_n_f32 (__c), __a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vfmsq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c) +{ + return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a); +} /* vfms_lane */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cc3570ca7ba..837a8ff3d20 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2016-05-17 Jiong Wang + + * gcc.target/aarch64/fmla_intrinsic_1.c: Allow ".d[index]" besides + ".2d[index]" when scan the assembly. + * gcc.target/aarch64/fmls_intrinsic_1.c: Likewise. + * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h: New entry for + float64x1. + * gcc.target/aarch64/advsimd-intrinsics/vfms_vfma_n.c: New. + 2016-05-17 Richard Biener PR tree-optimization/71132 diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h index 49fbd843e50..cf90825f873 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h @@ -136,6 +136,7 @@ static ARRAY(result, poly, 16, 4); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) static ARRAY(result, float, 16, 4); #endif +static ARRAY(result, float, 64, 1); static ARRAY(result, float, 32, 2); static ARRAY(result, int, 8, 16); static ARRAY(result, int, 16, 8); @@ -169,6 +170,7 @@ extern ARRAY(expected, poly, 8, 8); extern ARRAY(expected, poly, 16, 4); extern ARRAY(expected, hfloat, 16, 4); extern ARRAY(expected, hfloat, 32, 2); +extern ARRAY(expected, hfloat, 64, 1); extern ARRAY(expected, int, 8, 16); extern ARRAY(expected, int, 16, 8); extern ARRAY(expected, int, 32, 4); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms_vfma_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms_vfma_n.c new file mode 100644 index 00000000000..26223763c59 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms_vfma_n.c @@ -0,0 +1,490 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define A0 123.4f +#define A1 -3.8f +#define A2 -29.4f +#define A3 (__builtin_inff ()) +#define A4 0.0f +#define A5 24.0f +#define A6 124.0f +#define A7 1024.0f + +#define B0 -5.8f +#define B1 -0.0f +#define B2 -10.8f +#define B3 10.0f +#define B4 23.4f +#define B5 -1234.8f +#define B6 8.9f +#define B7 4.0f + +#define E0 9.8f +#define E1 -1024.0f +#define E2 (-__builtin_inff ()) +#define E3 479.0f +float32_t elem0 = E0; +float32_t elem1 = E1; +float32_t elem2 = E2; +float32_t elem3 = E3; + +#define DA0 1231234.4 +#define DA1 -3.8 +#define DA2 -2980.4 +#define DA3 -5.8 +#define DA4 0.01123 +#define DA5 24.0 +#define DA6 124.12345 +#define DA7 1024.0 + +#define DB0 -5.8 +#define DB1 (__builtin_inf ()) +#define DB2 -105.8 +#define DB3 10.0 +#define DB4 (-__builtin_inf ()) +#define DB5 -1234.8 +#define DB6 848.9 +#define DB7 44444.0 + +#define DE0 9.8 +#define DE1 -1024.0 +#define DE2 105.8 +#define DE3 479.0 +float64_t delem0 = DE0; +float64_t delem1 = DE1; +float64_t delem2 = DE2; +float64_t delem3 = DE3; + +#if defined(__aarch64__) && defined(__ARM_FEATURE_FMA) + +/* Expected results for vfms_n. */ + +VECT_VAR_DECL(expectedfms0, float, 32, 2) [] = {A0 + -B0 * E0, A1 + -B1 * E0}; +VECT_VAR_DECL(expectedfms1, float, 32, 2) [] = {A2 + -B2 * E1, A3 + -B3 * E1}; +VECT_VAR_DECL(expectedfms2, float, 32, 2) [] = {A4 + -B4 * E2, A5 + -B5 * E2}; +VECT_VAR_DECL(expectedfms3, float, 32, 2) [] = {A6 + -B6 * E3, A7 + -B7 * E3}; +VECT_VAR_DECL(expectedfma0, float, 32, 2) [] = {A0 + B0 * E0, A1 + B1 * E0}; +VECT_VAR_DECL(expectedfma1, float, 32, 2) [] = {A2 + B2 * E1, A3 + B3 * E1}; +VECT_VAR_DECL(expectedfma2, float, 32, 2) [] = {A4 + B4 * E2, A5 + B5 * E2}; +VECT_VAR_DECL(expectedfma3, float, 32, 2) [] = {A6 + B6 * E3, A7 + B7 * E3}; + +hfloat32_t * VECT_VAR (expectedfms0_static, hfloat, 32, 2) = + (hfloat32_t *) VECT_VAR (expectedfms0, float, 32, 2); +hfloat32_t * VECT_VAR (expectedfms1_static, hfloat, 32, 2) = + (hfloat32_t *) VECT_VAR (expectedfms1, float, 32, 2); +hfloat32_t * VECT_VAR (expectedfms2_static, hfloat, 32, 2) = + (hfloat32_t *) VECT_VAR (expectedfms2, float, 32, 2); +hfloat32_t * VECT_VAR (expectedfms3_static, hfloat, 32, 2) = + (hfloat32_t *) VECT_VAR (expectedfms3, float, 32, 2); +hfloat32_t * VECT_VAR (expectedfma0_static, hfloat, 32, 2) = + (hfloat32_t *) VECT_VAR (expectedfma0, float, 32, 2); +hfloat32_t * VECT_VAR (expectedfma1_static, hfloat, 32, 2) = + (hfloat32_t *) VECT_VAR (expectedfma1, float, 32, 2); +hfloat32_t * VECT_VAR (expectedfma2_static, hfloat, 32, 2) = + (hfloat32_t *) VECT_VAR (expectedfma2, float, 32, 2); +hfloat32_t * VECT_VAR (expectedfma3_static, hfloat, 32, 2) = + (hfloat32_t *) VECT_VAR (expectedfma3, float, 32, 2); + + +VECT_VAR_DECL(expectedfms0, float, 32, 4) [] = {A0 + -B0 * E0, A1 + -B1 * E0, + A2 + -B2 * E0, A3 + -B3 * E0}; +VECT_VAR_DECL(expectedfms1, float, 32, 4) [] = {A4 + -B4 * E1, A5 + -B5 * E1, + A6 + -B6 * E1, A7 + -B7 * E1}; +VECT_VAR_DECL(expectedfms2, float, 32, 4) [] = {A0 + -B0 * E2, A2 + -B2 * E2, + A4 + -B4 * E2, A6 + -B6 * E2}; +VECT_VAR_DECL(expectedfms3, float, 32, 4) [] = {A1 + -B1 * E3, A3 + -B3 * E3, + A5 + -B5 * E3, A7 + -B7 * E3}; +VECT_VAR_DECL(expectedfma0, float, 32, 4) [] = {A0 + B0 * E0, A1 + B1 * E0, + A2 + B2 * E0, A3 + B3 * E0}; +VECT_VAR_DECL(expectedfma1, float, 32, 4) [] = {A4 + B4 * E1, A5 + B5 * E1, + A6 + B6 * E1, A7 + B7 * E1}; +VECT_VAR_DECL(expectedfma2, float, 32, 4) [] = {A0 + B0 * E2, A2 + B2 * E2, + A4 + B4 * E2, A6 + B6 * E2}; +VECT_VAR_DECL(expectedfma3, float, 32, 4) [] = {A1 + B1 * E3, A3 + B3 * E3, + A5 + B5 * E3, A7 + B7 * E3}; + +hfloat32_t * VECT_VAR (expectedfms0_static, hfloat, 32, 4) = + (hfloat32_t *) VECT_VAR (expectedfms0, float, 32, 4); +hfloat32_t * VECT_VAR (expectedfms1_static, hfloat, 32, 4) = + (hfloat32_t *) VECT_VAR (expectedfms1, float, 32, 4); +hfloat32_t * VECT_VAR (expectedfms2_static, hfloat, 32, 4) = + (hfloat32_t *) VECT_VAR (expectedfms2, float, 32, 4); +hfloat32_t * VECT_VAR (expectedfms3_static, hfloat, 32, 4) = + (hfloat32_t *) VECT_VAR (expectedfms3, float, 32, 4); +hfloat32_t * VECT_VAR (expectedfma0_static, hfloat, 32, 4) = + (hfloat32_t *) VECT_VAR (expectedfma0, float, 32, 4); +hfloat32_t * VECT_VAR (expectedfma1_static, hfloat, 32, 4) = + (hfloat32_t *) VECT_VAR (expectedfma1, float, 32, 4); +hfloat32_t * VECT_VAR (expectedfma2_static, hfloat, 32, 4) = + (hfloat32_t *) VECT_VAR (expectedfma2, float, 32, 4); +hfloat32_t * VECT_VAR (expectedfma3_static, hfloat, 32, 4) = + (hfloat32_t *) VECT_VAR (expectedfma3, float, 32, 4); + +VECT_VAR_DECL(expectedfms0, float, 64, 2) [] = {DA0 + -DB0 * DE0, + DA1 + -DB1 * DE0}; +VECT_VAR_DECL(expectedfms1, float, 64, 2) [] = {DA2 + -DB2 * DE1, + DA3 + -DB3 * DE1}; +VECT_VAR_DECL(expectedfms2, float, 64, 2) [] = {DA4 + -DB4 * DE2, + DA5 + -DB5 * DE2}; +VECT_VAR_DECL(expectedfms3, float, 64, 2) [] = {DA6 + -DB6 * DE3, + DA7 + -DB7 * DE3}; +VECT_VAR_DECL(expectedfma0, float, 64, 2) [] = {DA0 + DB0 * DE0, + DA1 + DB1 * DE0}; +VECT_VAR_DECL(expectedfma1, float, 64, 2) [] = {DA2 + DB2 * DE1, + DA3 + DB3 * DE1}; +VECT_VAR_DECL(expectedfma2, float, 64, 2) [] = {DA4 + DB4 * DE2, + DA5 + DB5 * DE2}; +VECT_VAR_DECL(expectedfma3, float, 64, 2) [] = {DA6 + DB6 * DE3, + DA7 + DB7 * DE3}; +hfloat64_t * VECT_VAR (expectedfms0_static, hfloat, 64, 2) = + (hfloat64_t *) VECT_VAR (expectedfms0, float, 64, 2); +hfloat64_t * VECT_VAR (expectedfms1_static, hfloat, 64, 2) = + (hfloat64_t *) VECT_VAR (expectedfms1, float, 64, 2); +hfloat64_t * VECT_VAR (expectedfms2_static, hfloat, 64, 2) = + (hfloat64_t *) VECT_VAR (expectedfms2, float, 64, 2); +hfloat64_t * VECT_VAR (expectedfms3_static, hfloat, 64, 2) = + (hfloat64_t *) VECT_VAR (expectedfms3, float, 64, 2); +hfloat64_t * VECT_VAR (expectedfma0_static, hfloat, 64, 2) = + (hfloat64_t *) VECT_VAR (expectedfma0, float, 64, 2); +hfloat64_t * VECT_VAR (expectedfma1_static, hfloat, 64, 2) = + (hfloat64_t *) VECT_VAR (expectedfma1, float, 64, 2); +hfloat64_t * VECT_VAR (expectedfma2_static, hfloat, 64, 2) = + (hfloat64_t *) VECT_VAR (expectedfma2, float, 64, 2); +hfloat64_t * VECT_VAR (expectedfma3_static, hfloat, 64, 2) = + (hfloat64_t *) VECT_VAR (expectedfma3, float, 64, 2); + +VECT_VAR_DECL(expectedfms0, float, 64, 1) [] = {DA0 + -DB0 * DE0}; +VECT_VAR_DECL(expectedfms1, float, 64, 1) [] = {DA2 + -DB2 * DE1}; +VECT_VAR_DECL(expectedfms2, float, 64, 1) [] = {DA4 + -DB4 * DE2}; +VECT_VAR_DECL(expectedfms3, float, 64, 1) [] = {DA6 + -DB6 * DE3}; +VECT_VAR_DECL(expectedfma0, float, 64, 1) [] = {DA0 + DB0 * DE0}; +VECT_VAR_DECL(expectedfma1, float, 64, 1) [] = {DA2 + DB2 * DE1}; +VECT_VAR_DECL(expectedfma2, float, 64, 1) [] = {DA4 + DB4 * DE2}; +VECT_VAR_DECL(expectedfma3, float, 64, 1) [] = {DA6 + DB6 * DE3}; + +hfloat64_t * VECT_VAR (expectedfms0_static, hfloat, 64, 1) = + (hfloat64_t *) VECT_VAR (expectedfms0, float, 64, 1); +hfloat64_t * VECT_VAR (expectedfms1_static, hfloat, 64, 1) = + (hfloat64_t *) VECT_VAR (expectedfms1, float, 64, 1); +hfloat64_t * VECT_VAR (expectedfms2_static, hfloat, 64, 1) = + (hfloat64_t *) VECT_VAR (expectedfms2, float, 64, 1); +hfloat64_t * VECT_VAR (expectedfms3_static, hfloat, 64, 1) = + (hfloat64_t *) VECT_VAR (expectedfms3, float, 64, 1); +hfloat64_t * VECT_VAR (expectedfma0_static, hfloat, 64, 1) = + (hfloat64_t *) VECT_VAR (expectedfma0, float, 64, 1); +hfloat64_t * VECT_VAR (expectedfma1_static, hfloat, 64, 1) = + (hfloat64_t *) VECT_VAR (expectedfma1, float, 64, 1); +hfloat64_t * VECT_VAR (expectedfma2_static, hfloat, 64, 1) = + (hfloat64_t *) VECT_VAR (expectedfma2, float, 64, 1); +hfloat64_t * VECT_VAR (expectedfma3_static, hfloat, 64, 1) = + (hfloat64_t *) VECT_VAR (expectedfma3, float, 64, 1); + +void exec_vfma_vfms_n (void) +{ +#undef TEST_MSG +#define TEST_MSG "VFMS_VFMA_N (FP32)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 32, 2); + DECL_VARIABLE(vsrc_2, float, 32, 2); + VECT_VAR_DECL (buf_src_1, float, 32, 2) [] = {A0, A1}; + VECT_VAR_DECL (buf_src_2, float, 32, 2) [] = {B0, B1}; + VLOAD (vsrc_1, buf_src_1, , float, f, 32, 2); + VLOAD (vsrc_2, buf_src_2, , float, f, 32, 2); + DECL_VARIABLE (vector_res, float, 32, 2) = + vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), + VECT_VAR (vsrc_2, float, 32, 2), elem0); + vst1_f32 (VECT_VAR (result, float, 32, 2), + VECT_VAR (vector_res, float, 32, 2)); + CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms0_static, ""); + VECT_VAR (vector_res, float, 32, 2) = + vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), + VECT_VAR (vsrc_2, float, 32, 2), elem0); + vst1_f32 (VECT_VAR (result, float, 32, 2), + VECT_VAR (vector_res, float, 32, 2)); + CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma0_static, ""); + + VECT_VAR_DECL (buf_src_3, float, 32, 2) [] = {A2, A3}; + VECT_VAR_DECL (buf_src_4, float, 32, 2) [] = {B2, B3}; + VLOAD (vsrc_1, buf_src_3, , float, f, 32, 2); + VLOAD (vsrc_2, buf_src_4, , float, f, 32, 2); + VECT_VAR (vector_res, float, 32, 2) = + vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), + VECT_VAR (vsrc_2, float, 32, 2), elem1); + vst1_f32 (VECT_VAR (result, float, 32, 2), + VECT_VAR (vector_res, float, 32, 2)); + CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms1_static, ""); + VECT_VAR (vector_res, float, 32, 2) = + vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), + VECT_VAR (vsrc_2, float, 32, 2), elem1); + vst1_f32 (VECT_VAR (result, float, 32, 2), + VECT_VAR (vector_res, float, 32, 2)); + CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma1_static, ""); + + VECT_VAR_DECL (buf_src_5, float, 32, 2) [] = {A4, A5}; + VECT_VAR_DECL (buf_src_6, float, 32, 2) [] = {B4, B5}; + VLOAD (vsrc_1, buf_src_5, , float, f, 32, 2); + VLOAD (vsrc_2, buf_src_6, , float, f, 32, 2); + VECT_VAR (vector_res, float, 32, 2) = + vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), + VECT_VAR (vsrc_2, float, 32, 2), elem2); + vst1_f32 (VECT_VAR (result, float, 32, 2), + VECT_VAR (vector_res, float, 32, 2)); + CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms2_static, ""); + VECT_VAR (vector_res, float, 32, 2) = + vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), + VECT_VAR (vsrc_2, float, 32, 2), elem2); + vst1_f32 (VECT_VAR (result, float, 32, 2), + VECT_VAR (vector_res, float, 32, 2)); + CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma2_static, ""); + + VECT_VAR_DECL (buf_src_7, float, 32, 2) [] = {A6, A7}; + VECT_VAR_DECL (buf_src_8, float, 32, 2) [] = {B6, B7}; + VLOAD (vsrc_1, buf_src_7, , float, f, 32, 2); + VLOAD (vsrc_2, buf_src_8, , float, f, 32, 2); + VECT_VAR (vector_res, float, 32, 2) = + vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), + VECT_VAR (vsrc_2, float, 32, 2), elem3); + vst1_f32 (VECT_VAR (result, float, 32, 2), + VECT_VAR (vector_res, float, 32, 2)); + CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms3_static, ""); + VECT_VAR (vector_res, float, 32, 2) = + vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), + VECT_VAR (vsrc_2, float, 32, 2), elem3); + vst1_f32 (VECT_VAR (result, float, 32, 2), + VECT_VAR (vector_res, float, 32, 2)); + CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMSQ_VFMAQ_N (FP32)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 32, 4); + DECL_VARIABLE(vsrc_2, float, 32, 4); + VECT_VAR_DECL (buf_src_1, float, 32, 4) [] = {A0, A1, A2, A3}; + VECT_VAR_DECL (buf_src_2, float, 32, 4) [] = {B0, B1, B2, B3}; + VLOAD (vsrc_1, buf_src_1, q, float, f, 32, 4); + VLOAD (vsrc_2, buf_src_2, q, float, f, 32, 4); + DECL_VARIABLE (vector_res, float, 32, 4) = + vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), + VECT_VAR (vsrc_2, float, 32, 4), elem0); + vst1q_f32 (VECT_VAR (result, float, 32, 4), + VECT_VAR (vector_res, float, 32, 4)); + CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms0_static, ""); + VECT_VAR (vector_res, float, 32, 4) = + vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), + VECT_VAR (vsrc_2, float, 32, 4), elem0); + vst1q_f32 (VECT_VAR (result, float, 32, 4), + VECT_VAR (vector_res, float, 32, 4)); + CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma0_static, ""); + + VECT_VAR_DECL (buf_src_3, float, 32, 4) [] = {A4, A5, A6, A7}; + VECT_VAR_DECL (buf_src_4, float, 32, 4) [] = {B4, B5, B6, B7}; + VLOAD (vsrc_1, buf_src_3, q, float, f, 32, 4); + VLOAD (vsrc_2, buf_src_4, q, float, f, 32, 4); + VECT_VAR (vector_res, float, 32, 4) = + vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), + VECT_VAR (vsrc_2, float, 32, 4), elem1); + vst1q_f32 (VECT_VAR (result, float, 32, 4), + VECT_VAR (vector_res, float, 32, 4)); + CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms1_static, ""); + VECT_VAR (vector_res, float, 32, 4) = + vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), + VECT_VAR (vsrc_2, float, 32, 4), elem1); + vst1q_f32 (VECT_VAR (result, float, 32, 4), + VECT_VAR (vector_res, float, 32, 4)); + CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma1_static, ""); + + VECT_VAR_DECL (buf_src_5, float, 32, 4) [] = {A0, A2, A4, A6}; + VECT_VAR_DECL (buf_src_6, float, 32, 4) [] = {B0, B2, B4, B6}; + VLOAD (vsrc_1, buf_src_5, q, float, f, 32, 4); + VLOAD (vsrc_2, buf_src_6, q, float, f, 32, 4); + VECT_VAR (vector_res, float, 32, 4) = + vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), + VECT_VAR (vsrc_2, float, 32, 4), elem2); + vst1q_f32 (VECT_VAR (result, float, 32, 4), + VECT_VAR (vector_res, float, 32, 4)); + CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms2_static, ""); + VECT_VAR (vector_res, float, 32, 4) = + vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), + VECT_VAR (vsrc_2, float, 32, 4), elem2); + vst1q_f32 (VECT_VAR (result, float, 32, 4), + VECT_VAR (vector_res, float, 32, 4)); + CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma2_static, ""); + + VECT_VAR_DECL (buf_src_7, float, 32, 4) [] = {A1, A3, A5, A7}; + VECT_VAR_DECL (buf_src_8, float, 32, 4) [] = {B1, B3, B5, B7}; + VLOAD (vsrc_1, buf_src_7, q, float, f, 32, 4); + VLOAD (vsrc_2, buf_src_8, q, float, f, 32, 4); + VECT_VAR (vector_res, float, 32, 4) = + vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), + VECT_VAR (vsrc_2, float, 32, 4), elem3); + vst1q_f32 (VECT_VAR (result, float, 32, 4), + VECT_VAR (vector_res, float, 32, 4)); + CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms3_static, ""); + VECT_VAR (vector_res, float, 32, 4) = + vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), + VECT_VAR (vsrc_2, float, 32, 4), elem3); + vst1q_f32 (VECT_VAR (result, float, 32, 4), + VECT_VAR (vector_res, float, 32, 4)); + CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMSQ_VFMAQ_N (FP64)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 64, 2); + DECL_VARIABLE(vsrc_2, float, 64, 2); + VECT_VAR_DECL (buf_src_1, float, 64, 2) [] = {DA0, DA1}; + VECT_VAR_DECL (buf_src_2, float, 64, 2) [] = {DB0, DB1}; + VLOAD (vsrc_1, buf_src_1, q, float, f, 64, 2); + VLOAD (vsrc_2, buf_src_2, q, float, f, 64, 2); + DECL_VARIABLE (vector_res, float, 64, 2) = + vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), + VECT_VAR (vsrc_2, float, 64, 2), delem0); + vst1q_f64 (VECT_VAR (result, float, 64, 2), + VECT_VAR (vector_res, float, 64, 2)); + CHECK_FP (TEST_MSG, float, 64, 2, PRIx16, expectedfms0_static, ""); + VECT_VAR (vector_res, float, 64, 2) = + vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), + VECT_VAR (vsrc_2, float, 64, 2), delem0); + vst1q_f64 (VECT_VAR (result, float, 64, 2), + VECT_VAR (vector_res, float, 64, 2)); + CHECK_FP (TEST_MSG, float, 64, 2, PRIx16, expectedfma0_static, ""); + + VECT_VAR_DECL (buf_src_3, float, 64, 2) [] = {DA2, DA3}; + VECT_VAR_DECL (buf_src_4, float, 64, 2) [] = {DB2, DB3}; + VLOAD (vsrc_1, buf_src_3, q, float, f, 64, 2); + VLOAD (vsrc_2, buf_src_4, q, float, f, 64, 2); + VECT_VAR (vector_res, float, 64, 2) = + vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), + VECT_VAR (vsrc_2, float, 64, 2), delem1); + vst1q_f64 (VECT_VAR (result, float, 64, 2), + VECT_VAR (vector_res, float, 64, 2)); + CHECK_FP (TEST_MSG, float, 64, 2, PRIx16, expectedfms1_static, ""); + VECT_VAR (vector_res, float, 64, 2) = + vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), + VECT_VAR (vsrc_2, float, 64, 2), delem1); + vst1q_f64 (VECT_VAR (result, float, 64, 2), + VECT_VAR (vector_res, float, 64, 2)); + CHECK_FP (TEST_MSG, float, 64, 2, PRIx16, expectedfma1_static, ""); + + VECT_VAR_DECL (buf_src_5, float, 64, 2) [] = {DA4, DA5}; + VECT_VAR_DECL (buf_src_6, float, 64, 2) [] = {DB4, DB5}; + VLOAD (vsrc_1, buf_src_5, q, float, f, 64, 2); + VLOAD (vsrc_2, buf_src_6, q, float, f, 64, 2); + VECT_VAR (vector_res, float, 64, 2) = + vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), + VECT_VAR (vsrc_2, float, 64, 2), delem2); + vst1q_f64 (VECT_VAR (result, float, 64, 2), + VECT_VAR (vector_res, float, 64, 2)); + CHECK_FP (TEST_MSG, float, 64, 2, PRIx16, expectedfms2_static, ""); + VECT_VAR (vector_res, float, 64, 2) = + vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), + VECT_VAR (vsrc_2, float, 64, 2), delem2); + vst1q_f64 (VECT_VAR (result, float, 64, 2), + VECT_VAR (vector_res, float, 64, 2)); + CHECK_FP (TEST_MSG, float, 64, 2, PRIx16, expectedfma2_static, ""); + + VECT_VAR_DECL (buf_src_7, float, 64, 2) [] = {DA6, DA7}; + VECT_VAR_DECL (buf_src_8, float, 64, 2) [] = {DB6, DB7}; + VLOAD (vsrc_1, buf_src_7, q, float, f, 64, 2); + VLOAD (vsrc_2, buf_src_8, q, float, f, 64, 2); + VECT_VAR (vector_res, float, 64, 2) = + vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), + VECT_VAR (vsrc_2, float, 64, 2), delem3); + vst1q_f64 (VECT_VAR (result, float, 64, 2), + VECT_VAR (vector_res, float, 64, 2)); + CHECK_FP (TEST_MSG, float, 64, 2, PRIx16, expectedfms3_static, ""); + VECT_VAR (vector_res, float, 64, 2) = + vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), + VECT_VAR (vsrc_2, float, 64, 2), delem3); + vst1q_f64 (VECT_VAR (result, float, 64, 2), + VECT_VAR (vector_res, float, 64, 2)); + CHECK_FP (TEST_MSG, float, 64, 2, PRIx16, expectedfma3_static, ""); + +#undef TEST_MSG +#define TEST_MSG "VFMS_VFMA_N (FP64)" + clean_results (); + + DECL_VARIABLE(vsrc_1, float, 64, 1); + DECL_VARIABLE(vsrc_2, float, 64, 1); + VECT_VAR_DECL (buf_src_1, float, 64, 1) [] = {DA0}; + VECT_VAR_DECL (buf_src_2, float, 64, 1) [] = {DB0}; + VLOAD (vsrc_1, buf_src_1, , float, f, 64, 1); + VLOAD (vsrc_2, buf_src_2, , float, f, 64, 1); + DECL_VARIABLE (vector_res, float, 64, 1) = + vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), + VECT_VAR (vsrc_2, float, 64, 1), delem0); + vst1_f64 (VECT_VAR (result, float, 64, 1), + VECT_VAR (vector_res, float, 64, 1)); + CHECK_FP (TEST_MSG, float, 64, 1, PRIx16, expectedfms0_static, ""); + VECT_VAR (vector_res, float, 64, 1) = + vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), + VECT_VAR (vsrc_2, float, 64, 1), delem0); + vst1_f64 (VECT_VAR (result, float, 64, 1), + VECT_VAR (vector_res, float, 64, 1)); + CHECK_FP (TEST_MSG, float, 64, 1, PRIx16, expectedfma0_static, ""); + + VECT_VAR_DECL (buf_src_3, float, 64, 1) [] = {DA2}; + VECT_VAR_DECL (buf_src_4, float, 64, 1) [] = {DB2}; + VLOAD (vsrc_1, buf_src_3, , float, f, 64, 1); + VLOAD (vsrc_2, buf_src_4, , float, f, 64, 1); + VECT_VAR (vector_res, float, 64, 1) = + vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), + VECT_VAR (vsrc_2, float, 64, 1), delem1); + vst1_f64 (VECT_VAR (result, float, 64, 1), + VECT_VAR (vector_res, float, 64, 1)); + CHECK_FP (TEST_MSG, float, 64, 1, PRIx16, expectedfms1_static, ""); + VECT_VAR (vector_res, float, 64, 1) = + vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), + VECT_VAR (vsrc_2, float, 64, 1), delem1); + vst1_f64 (VECT_VAR (result, float, 64, 1), + VECT_VAR (vector_res, float, 64, 1)); + CHECK_FP (TEST_MSG, float, 64, 1, PRIx16, expectedfma1_static, ""); + + VECT_VAR_DECL (buf_src_5, float, 64, 1) [] = {DA4}; + VECT_VAR_DECL (buf_src_6, float, 64, 1) [] = {DB4}; + VLOAD (vsrc_1, buf_src_5, , float, f, 64, 1); + VLOAD (vsrc_2, buf_src_6, , float, f, 64, 1); + VECT_VAR (vector_res, float, 64, 1) = + vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), + VECT_VAR (vsrc_2, float, 64, 1), delem2); + vst1_f64 (VECT_VAR (result, float, 64, 1), + VECT_VAR (vector_res, float, 64, 1)); + CHECK_FP (TEST_MSG, float, 64, 1, PRIx16, expectedfms2_static, ""); + VECT_VAR (vector_res, float, 64, 1) = + vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), + VECT_VAR (vsrc_2, float, 64, 1), delem2); + vst1_f64 (VECT_VAR (result, float, 64, 1), + VECT_VAR (vector_res, float, 64, 1)); + CHECK_FP (TEST_MSG, float, 64, 1, PRIx16, expectedfma2_static, ""); + + VECT_VAR_DECL (buf_src_7, float, 64, 1) [] = {DA6}; + VECT_VAR_DECL (buf_src_8, float, 64, 1) [] = {DB6}; + VLOAD (vsrc_1, buf_src_7, , float, f, 64, 1); + VLOAD (vsrc_2, buf_src_8, , float, f, 64, 1); + VECT_VAR (vector_res, float, 64, 1) = + vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), + VECT_VAR (vsrc_2, float, 64, 1), delem3); + vst1_f64 (VECT_VAR (result, float, 64, 1), + VECT_VAR (vector_res, float, 64, 1)); + CHECK_FP (TEST_MSG, float, 64, 1, PRIx16, expectedfms3_static, ""); + VECT_VAR (vector_res, float, 64, 1) = + vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), + VECT_VAR (vsrc_2, float, 64, 1), delem3); + vst1_f64 (VECT_VAR (result, float, 64, 1), + VECT_VAR (vector_res, float, 64, 1)); + CHECK_FP (TEST_MSG, float, 64, 1, PRIx16, expectedfma3_static, ""); +} +#endif + +int +main (void) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_FMA) + exec_vfma_vfms_n (); +#endif + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c index 1ba1fed98a0..5b348827002 100644 --- a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c +++ b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c @@ -110,6 +110,6 @@ main (int argc, char **argv) /* vfmaq_lane_f64. vfma_laneq_f64. vfmaq_laneq_f64. */ -/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d\\\[\[0-9\]+\\\]" 3 } } */ +/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2?d\\\[\[0-9\]+\\\]" 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c index 887ebae10da..6c194a023d3 100644 --- a/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c +++ b/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c @@ -111,6 +111,6 @@ main (int argc, char **argv) /* vfmsq_lane_f64. vfms_laneq_f64. vfmsq_laneq_f64. */ -/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d\\\[\[0-9\]+\\\]" 3 } } */ +/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2?d\\\[\[0-9\]+\\\]" 3 } } */ -- 2.30.2