From 74dc11ed84af069ad388d919d47cf553b83c9f60 Mon Sep 17 00:00:00 2001 From: Alex Velenko Date: Tue, 22 Apr 2014 08:39:48 +0000 Subject: [PATCH] [AArch64] vrnd<*>_f64 patch This patch adds vrnd<*>_f64 aarch64 intrinsics. A testcase for those intrinsics is added. Run a complete LE and BE regression run with no regressions. From-SVN: r209559 --- gcc/ChangeLog | 17 +++ gcc/config/aarch64/aarch64-builtins.c | 2 + gcc/config/aarch64/aarch64-simd-builtins.def | 2 +- gcc/config/aarch64/aarch64-simd.md | 2 +- gcc/config/aarch64/aarch64.md | 2 +- gcc/config/aarch64/arm_neon.h | 43 +++++++ gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c | 105 ++++++++++++++++++ 8 files changed, 174 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index df12253031f..4eab80869f5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2014-04-22 Alex Velenko + + * config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro + added. + * config/aarch64/aarch64-simd-builtins.def (frintn): Use added + macro. + * config/aarch64/aarch64-simd.md (): Comment + corrected. + * config/aarch64/aarch64.md (): Likewise. + * config/aarch64/arm_neon.h (vrnd_f64): Added. + (vrnda_f64): Likewise. + (vrndi_f64): Likewise. + (vrndm_f64): Likewise. + (vrndn_f64): Likewise. + (vrndp_f64): Likewise. + (vrndx_f64): Likewise. + 2014-04-22 Zhenqiang Chen * config/arm/arm.c (arm_print_operand, thumb_exit): Make sure diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 55cfe0ab225..cd46260b6ac 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -311,6 +311,8 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) #define BUILTIN_VDQF(T, N, MAP) \ VAR3 (T, N, MAP, v2sf, v4sf, v2df) +#define BUILTIN_VDQF_DF(T, N, MAP) \ + VAR4 (T, N, MAP, v2sf, v4sf, v2df, df) #define BUILTIN_VDQH(T, N, MAP) \ VAR2 (T, N, MAP, v4hi, v8hi) #define BUILTIN_VDQHS(T, N, MAP) \ diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index c9b7570e565..c5e3b3e9fb3 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -265,7 +265,7 @@ BUILTIN_VDQF (UNOP, nearbyint, 2) BUILTIN_VDQF (UNOP, rint, 2) BUILTIN_VDQF (UNOP, round, 2) - BUILTIN_VDQF (UNOP, frintn, 2) + BUILTIN_VDQF_DF (UNOP, frintn, 2) /* Implemented by l2. */ VAR1 (UNOP, lbtruncv2sf, 2, v2si) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 73aee2c3df0..7fa76be9ae8 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1452,7 +1452,7 @@ ) ;; Vector versions of the floating-point frint patterns. -;; Expands to btrunc, ceil, floor, nearbyint, rint, round. +;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. (define_insn "2" [(set (match_operand:VDQF 0 "register_operand" "=w") (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index c86a29d8e7f..9368742ca3d 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -3188,7 +3188,7 @@ ;; ------------------------------------------------------------------- ;; frint floating-point round to integral standard patterns. -;; Expands to btrunc, ceil, floor, nearbyint, rint, round. +;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. (define_insn "2" [(set (match_operand:GPF 0 "register_operand" "=w") diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 747a292ba9b..a3c15ac3da2 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -22481,6 +22481,12 @@ vrnd_f32 (float32x2_t __a) return __builtin_aarch64_btruncv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrnd_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndq_f32 (float32x4_t __a) { @@ -22501,6 +22507,12 @@ vrnda_f32 (float32x2_t __a) return __builtin_aarch64_roundv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrnda_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndaq_f32 (float32x4_t __a) { @@ -22521,6 +22533,12 @@ vrndi_f32 (float32x2_t __a) return __builtin_aarch64_nearbyintv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndi_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndiq_f32 (float32x4_t __a) { @@ -22541,6 +22559,12 @@ vrndm_f32 (float32x2_t __a) return __builtin_aarch64_floorv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndm_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndmq_f32 (float32x4_t __a) { @@ -22560,6 +22584,13 @@ vrndn_f32 (float32x2_t __a) { return __builtin_aarch64_frintnv2sf (__a); } + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndn_f64 (float64x1_t __a) +{ + return __builtin_aarch64_frintndf (__a); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndnq_f32 (float32x4_t __a) { @@ -22580,6 +22611,12 @@ vrndp_f32 (float32x2_t __a) return __builtin_aarch64_ceilv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndp_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndpq_f32 (float32x4_t __a) { @@ -22600,6 +22637,12 @@ vrndx_f32 (float32x2_t __a) return __builtin_aarch64_rintv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndx_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndxq_f32 (float32x4_t __a) { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 58db6ef30ff..2b060294f39 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2014-04-22 Alex Velenko + + * gcc.target/aarch64/vrnd_f64_1.c : New file. + 2014-04-21 Michael Meissner PR target/60735 diff --git a/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c new file mode 100644 index 00000000000..2451ecdcfb6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c @@ -0,0 +1,105 @@ +/* Test vrnd_f64 works correctly. */ +/* { dg-do run } */ +/* { dg-options "--save-temps" } */ + +#include "arm_neon.h" + +extern void abort (void); + +/* Bit offset to round mode field in FPCR. */ +#define RMODE_START 22 + +#define FPROUNDING_ZERO 3 + +/* Set RMODE field of FPCR control register + to rounding mode passed. */ +void __inline __attribute__ ((__always_inline__)) +set_rounding_mode (uint32_t mode) +{ + uint32_t r; + + /* Read current FPCR. */ + asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :); + + /* Clear rmode. */ + r &= ~(3 << RMODE_START); + /* Calculate desired FPCR. */ + r |= mode << RMODE_START; + + /* Write desired FPCR back. */ + asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :); +} + +float64x1_t __attribute__ ((noinline)) +compare_f64 (float64x1_t passed, float64_t expected) +{ + return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected) + > __DBL_EPSILON__); +} + +void __attribute__ ((noinline)) +run_round_tests (float64x1_t *tests, + float64_t expectations[][6]) +{ + int i; + + for (i = 0; i < 6; i++) + { + if (compare_f64 (vrnd_f64 (tests[i]), expectations[0][i])) + abort (); + if (compare_f64 (vrndx_f64 (tests[i]), expectations[1][i])) + abort (); + if (compare_f64 (vrndp_f64 (tests[i]), expectations[2][i])) + abort (); + if (compare_f64 (vrndn_f64 (tests[i]), expectations[3][i])) + abort (); + if (compare_f64 (vrndm_f64 (tests[i]), expectations[4][i])) + abort (); + if (compare_f64 (vrndi_f64 (tests[i]), expectations[5][i])) + abort (); + if (compare_f64 (vrnda_f64 (tests[i]), expectations[6][i])) + abort (); + } +} + +int +main (int argc, char **argv) +{ + float64x1_t tests[6] = + { + vcreate_f64 (0x3FE0000000000000), /* Hex for: 0.5. */ + vcreate_f64 (0x3FD999999999999A), /* Hex for: 0.4. */ + vcreate_f64 (0x3FE3333333333333), /* Hex for: 0.6. */ + vcreate_f64 (0xBFE0000000000000), /* Hex for: -0.5. */ + vcreate_f64 (0xBFD999999999999A), /* Hex for: -0.4. */ + vcreate_f64 (0xBFE3333333333333), /* Hex for: -0.6. */ + }; + + float64_t expectations[7][6] = + { + { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrnd - round towards zero. */ + { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndx - round using FPCR mode. */ + { 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 }, /* vrndp - round to plus infinity. */ + { 0.0, 0.0, 1.0, 0.0, 0.0, -1.0 }, /* vrndn - round ties to even. */ + { 0.0, 0.0, 0.0, -1.0, -1.0, -1.0 }, /* vrndm - round to minus infinity. */ + { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndi - round using FPCR mode. */ + { 1.0, 0.0, 1.0, -1.0, 0.0, -1.0 }, /* vrnda - round ties away from 0. */ + }; + + /* Set floating point control register + to have predictable vrndx and vrndi behaviour. */ + set_rounding_mode (FPROUNDING_ZERO); + + run_round_tests (tests, expectations); + + return 0; +} + +/* { dg-final { scan-assembler-times "frintz\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frintx\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frintp\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frintn\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frintm\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frinti\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frinta\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ -- 2.30.2