From 496ea87db62abbf97da6d5791b4d5fb406350767 Mon Sep 17 00:00:00 2001 From: Bilyan Borisov Date: Tue, 3 Nov 2015 10:58:27 +0000 Subject: [PATCH] [AARCH64][PATCH 1/3] Implementing the variants of the vmulx_ NEON intrinsic gcc/ * config/aarch64/aarch64-simd-builtins.def (fmulx): New. * config/aarch64/aarch64-simd.md (aarch64_fmulx): New. * config/aarch64/arm_neon.h (vmulx_f32): Rewrite to call fmulx builtin. (vmulxq_f32): Likewise. (vmulx_f64): New. (vmulxq_f64): Rewrite to call fmulx builtin. (vmulxs_f32): Likewise. (vmulxd_f64): Likewise. (vmulx_lane_f32): Remove. * config/aarch64/iterators.md (UNSPEC): Add fmulx. gcc/testsuite/ * gcc/testsuite/gcc.target/aarch64/simd/vmulx_f32_1.c: New. * gcc/testsuite/gcc.target/aarch64/simd/vmulx_f64_1.c: New. * gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f32_1.c: New. * gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f64_1.c: New. * gcc/testsuite/gcc.target/aarch64/simd/vmulxs_f32_1.c: New. * gcc/testsuite/gcc.target/aarch64/simd/vmulxd_f64_1.c: New. From-SVN: r229702 --- gcc/ChangeLog | 14 +++ gcc/config/aarch64/aarch64-simd-builtins.def | 1 + gcc/config/aarch64/aarch64-simd.md | 12 ++ gcc/config/aarch64/arm_neon.h | 105 ++++++------------ gcc/config/aarch64/iterators.md | 1 + gcc/testsuite/ChangeLog | 9 ++ .../gcc.target/aarch64/simd/vmulx_f32_1.c | 52 +++++++++ .../gcc.target/aarch64/simd/vmulx_f64_1.c | 57 ++++++++++ .../gcc.target/aarch64/simd/vmulxd_f64_1.c | 36 ++++++ .../gcc.target/aarch64/simd/vmulxq_f32_1.c | 48 ++++++++ .../gcc.target/aarch64/simd/vmulxq_f64_1.c | 51 +++++++++ .../gcc.target/aarch64/simd/vmulxs_f32_1.c | 34 ++++++ 12 files changed, 352 insertions(+), 68 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vmulx_f32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vmulx_f64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vmulxd_f64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f32_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vmulxs_f32_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 204716ecb0a..5459551b1f7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2015-11-03 Bilyan Borisov + + * config/aarch64/aarch64-simd-builtins.def (fmulx): New. + * config/aarch64/aarch64-simd.md (aarch64_fmulx): New. + * config/aarch64/arm_neon.h (vmulx_f32): Rewrite to call fmulx + builtin. + (vmulxq_f32): Likewise. + (vmulx_f64): New. + (vmulxq_f64): Rewrite to call fmulx builtin. + (vmulxs_f32): Likewise. + (vmulxd_f64): Likewise. + (vmulx_lane_f32): Remove. + * config/aarch64/iterators.md (UNSPEC): Add fmulx. + 2015-11-03 Alan Lawrence * config/aarch64/aarch64.md (*movhf_aarch64): Use diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 654e963fa53..04dac6f84db 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -41,6 +41,7 @@ BUILTIN_VDC (COMBINE, combine, 0) BUILTIN_VB (BINOP, pmul, 0) + BUILTIN_VALLF (BINOP, fmulx, 0) BUILTIN_VDQF_DF (UNOP, sqrt, 2) BUILTIN_VD_BHSI (BINOP, addp, 0) VAR1 (UNOP, addp, 0, di) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 65a2b6fad64..269e00237bb 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2868,6 +2868,18 @@ [(set_attr "type" "neon_mul_")] ) +;; fmulx. + +(define_insn "aarch64_fmulx" + [(set (match_operand:VALLF 0 "register_operand" "=w") + (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") + (match_operand:VALLF 2 "register_operand" "w")] + UNSPEC_FMULX))] + "TARGET_SIMD" + "fmulx\t%0, %1, %2" + [(set_attr "type" "neon_fp_mul_")] +) + ;; q (define_insn "aarch64_" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index e1863488b73..265c2667908 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -8509,63 +8509,6 @@ vmulq_n_u32 (uint32x4_t a, uint32_t b) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vmulx_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("fmulx %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -#define vmulx_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32x2_t a_ = (a); \ - float32x2_t result; \ - __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vmulxd_f64 (float64_t a, float64_t b) -{ - float64_t result; - __asm__ ("fmulx %d0, %d1, %d2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vmulxq_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("fmulx %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vmulxq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("fmulx %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - #define vmulxq_lane_f32(a, b, c) \ __extension__ \ ({ \ @@ -8592,17 +8535,6 @@ vmulxq_f64 (float64x2_t a, float64x2_t b) result; \ }) -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vmulxs_f32 (float32_t a, float32_t b) -{ - float32_t result; - __asm__ ("fmulx %s0, %s1, %s2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vmvn_p8 (poly8x8_t a) { @@ -17739,6 +17671,43 @@ vmaxq_u32 (uint32x4_t __a, uint32x4_t __b) return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a, (int32x4_t) __b); } +/* vmulx */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmulx_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_fmulxv2sf (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulxq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_fmulxv4sf (__a, __b); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmulx_f64 (float64x1_t __a, float64x1_t __b) +{ + return (float64x1_t) {__builtin_aarch64_fmulxdf (__a[0], __b[0])}; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulxq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_fmulxv2df (__a, __b); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmulxs_f32 (float32_t __a, float32_t __b) +{ + return __builtin_aarch64_fmulxsf (__a, __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmulxd_f64 (float64_t __a, float64_t __b) +{ + return __builtin_aarch64_fmulxdf (__a, __b); +} /* vpmax */ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 964f8f1af68..6f35036b013 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -239,6 +239,7 @@ UNSPEC_SQDMULH ; Used in aarch64-simd.md. UNSPEC_SQRDMULH ; Used in aarch64-simd.md. UNSPEC_PMUL ; Used in aarch64-simd.md. + UNSPEC_FMULX ; Used in aarch64-simd.md. UNSPEC_USQADD ; Used in aarch64-simd.md. UNSPEC_SUQADD ; Used in aarch64-simd.md. UNSPEC_SQXTUN ; Used in aarch64-simd.md. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 48185b38131..1de0ea186a8 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2015-11-03 Bilyan Borisov + + * gcc/testsuite/gcc.target/aarch64/simd/vmulx_f32_1.c: New. + * gcc/testsuite/gcc.target/aarch64/simd/vmulx_f64_1.c: New. + * gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f32_1.c: New. + * gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f64_1.c: New. + * gcc/testsuite/gcc.target/aarch64/simd/vmulxs_f32_1.c: New. + * gcc/testsuite/gcc.target/aarch64/simd/vmulxd_f64_1.c: New. + 2015-11-03 Alan Lawrence * gcc.target/aarch64/fp16/set_zero_1.c: New. diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_f32_1.c new file mode 100644 index 00000000000..d0d26c5695e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_f32_1.c @@ -0,0 +1,52 @@ +/* Test the vmulx_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +void __attribute__ ((noinline)) +test_case (float32_t v1[2], float32_t v2[2], float32_t e[2]) +{ + int i; + float32x2_t vec1_1 = vld1_f32 (v1); + float32x2_t vec1_2 = vld1_f32 (v2); + + float32x2_t actual1 = vmulx_f32 (vec1_1, vec1_2); + float32_t actual[2]; + vst1_f32 (actual, actual1); + + for (i = 0; i < 2; ++i) + if (actual[i] != e[i]) + abort (); +} + +int +main (void) +{ + float32_t v1 = 3.14159265359; + float32_t v2 = 1.383894; + float32_t v3 = -2.71828; + float32_t v4 = -3.4891931; + + float32_t v1_1[] = {v1, v2}; + float32_t v1_2[] = {v3, v4}; + float32_t e1[] = {v1 * v3, v2 * v4}; + test_case (v1_1, v1_2, e1); + + float32_t v2_1[] = {0, -0.0}; + float32_t v2_2[] = {__builtin_huge_valf (), __builtin_huge_valf ()}; + float32_t e2[] = {2.0, -2.0}; + test_case (v2_1, v2_2, e2); + + float32_t v3_1[] = {0, -0.0}; + float32_t v3_2[] = {-__builtin_huge_valf (), -__builtin_huge_valf ()}; + float32_t e3[] = {-2.0, 2.0}; + test_case (v3_1, v3_2, e3); + + return 0; +} + +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2s, ?\[vV\]\[0-9\]+\.2s, ?\[vV\]\[0-9\]+\.2s\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_f64_1.c new file mode 100644 index 00000000000..5791d8a6b28 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_f64_1.c @@ -0,0 +1,57 @@ +/* Test the vmulx_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +void __attribute__ ((noinline)) +test_case (float64_t v1[1], float64_t v2[1], float64_t e1[1]) +{ + float64x1_t vec1_1 = vld1_f64 (v1); + float64x1_t vec1_2 = vld1_f64 (v2); + + float64x1_t actual1 = vmulx_f64 (vec1_1, vec1_2); + float64_t actual[1]; + vst1_f64 (actual, actual1); + if (actual[0] != e1[0]) + abort (); +} + +int +main (void) +{ + float64_t v1 = 3.14159265359; + float64_t v2 = -2.71828; + + float64_t v1_1[] = {v1}; + float64_t v1_2[] = {v2}; + float64_t e1[] = {v1 * v2}; + test_case (v1_1, v1_2, e1); + + float64_t v2_1[] = {0}; + float64_t v2_2[] = {__builtin_huge_val ()}; + float64_t e2[] = {2.0}; + test_case (v2_1, v2_2, e2); + + float64_t v3_1[] = {0}; + float64_t v3_2[] = {-__builtin_huge_val ()}; + float64_t e3[] = {-2.0}; + test_case (v3_1, v3_2, e3); + + float64_t v4_1[] = {-0.0}; + float64_t v4_2[] = {__builtin_huge_val ()}; + float64_t e4[] = {-2.0}; + test_case (v4_1, v4_2, e4); + + float64_t v5_1[] = {-0.0}; + float64_t v5_2[] = {-__builtin_huge_val ()}; + float64_t e5[] = {2.0}; + test_case (v5_1, v5_2, e5); + + return 0; +} + +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_f64_1.c new file mode 100644 index 00000000000..d36e7428289 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_f64_1.c @@ -0,0 +1,36 @@ +/* Test the vmulxd_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +void __attribute__ ((noinline)) +test_case (float64_t v1, float64_t v2, float64_t e1) +{ + float64_t actual1 = vmulxd_f64 (v1, v2); + if (actual1 != e1) + abort (); +} + +int +main (void) +{ + int i; + float64_t v1 = 3.14159265359; + float64_t v2 = 1.383894; + float64_t v3 = -2.71828; + float64_t v4 = -3.4891931; + + test_case (v1, v2, v1 * v2); + test_case (0.0, __builtin_huge_val (), 2.0); + test_case (0.0, -__builtin_huge_val (), -2.0); + test_case (-0.0, __builtin_huge_val (), -2.0); + test_case (-0.0, -__builtin_huge_val (), 2.0); + + return 0; +} + +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f32_1.c new file mode 100644 index 00000000000..7f8dbd9c8de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f32_1.c @@ -0,0 +1,48 @@ +/* Test the vmulxq_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +void __attribute__ ((noinline)) +test_case (float32_t v1[4], float32_t v2[4], float32_t e[4]) +{ + int i; + float32x4_t vec1_1 = vld1q_f32 (v1); + float32x4_t vec1_2 = vld1q_f32 (v2); + + float32x4_t actual1 = vmulxq_f32 (vec1_1, vec1_2); + float32_t actual[4]; + vst1q_f32 (actual, actual1); + + for (i = 0; i < 4; ++i) + if (actual[i] != e[i]) + abort (); +} + +int +main (void) +{ + float32_t v1 = 3.14159265359; + float32_t v2 = 1.383894; + float32_t v3 = -2.71828; + float32_t v4 = -3.4891931; + + float32_t v1_1[] = {v1, v2, v3, v4}; + float32_t v1_2[] = {v3, v4, v1, v2}; + float32_t e1[] = {v1 * v3, v2 * v4, v3 * v1, v4 * v2}; + test_case (v1_1, v1_2, e1); + + float32_t v2_1[] = {0, -0.0, 0, -0.0}; + float32_t v2_2[] = {-__builtin_huge_valf (), -__builtin_huge_valf (), + __builtin_huge_valf (), __builtin_huge_valf () }; + float32_t e2[] = {-2.0, 2.0, 2.0, -2.0}; + test_case (v2_1, v2_2, e2); + + return 0; +} + +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4s, ?\[vV\]\[0-9\]+\.4s, ?\[vV\]\[0-9\]+\.4s\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f64_1.c new file mode 100644 index 00000000000..f306bd05301 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f64_1.c @@ -0,0 +1,51 @@ +/* Test the vmulxq_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +void __attribute__ ((noinline)) +test_case (float64_t v1[2], float64_t v2[2], float64_t e1[2]) +{ + int i; + float64x2_t vec1_1 = vld1q_f64 (v1); + float64x2_t vec1_2 = vld1q_f64 (v2); + + float64x2_t actual1 = vmulxq_f64 (vec1_1, vec1_2); + float64_t actual[2]; + vst1q_f64 (actual, actual1); + + for (i = 0; i < 2; ++i) + if (actual[i] != e1[i]) + abort (); +} + +int +main (void) +{ + int i; + float64_t v1 = 3.14159265359; + float64_t v2 = -2.71828; + + float64_t v1_1[] = {v1, v2}; + float64_t v1_2[] = {v2, v1}; + float64_t e1[] = {v1 * v2, v2* v1}; + test_case (v1_1, v1_2, e1); + + float64_t v2_1[] = {0, 0}; + float64_t v2_2[] = {__builtin_huge_val (), -__builtin_huge_val ()}; + float64_t e2[] = {2.0, -2.0}; + test_case (v2_1, v2_2, e2); + + float64_t v3_1[] = {-0.0, -0.0}; + float64_t v3_2[] = {__builtin_huge_val (), -__builtin_huge_val ()}; + float64_t e3[] = {-2.0, 2.0}; + test_case (v3_1, v3_2, e3); + + return 0; +} + +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[dD\]\n" 1} } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_f32_1.c new file mode 100644 index 00000000000..fc08e143b87 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_f32_1.c @@ -0,0 +1,34 @@ +/* Test the vmulxs_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +void __attribute__ ((noinline)) +test_case (float32_t v1, float32_t v2, float32_t e) +{ + float32_t actual = vmulxs_f32 (v1, v2); + if (actual != e) + abort (); +} + +int +main (void) +{ + float32_t v1 = 3.14159265359; + float32_t v2 = 1.383894; + float32_t v3 = -2.71828; + float32_t v4 = -3.4891931; + + test_case (v1, v2, v1 * v2); + test_case (0.0, __builtin_huge_valf (), 2.0); + test_case (0.0, -__builtin_huge_valf (), -2.0); + test_case (-0.0, __builtin_huge_valf (), -2.0); + test_case (-0.0, -__builtin_huge_valf (), 2.0); + + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+\n" 1 } } */ -- 2.30.2