From 77a205be4eac302ab5abda34f181fd11cc64cda8 Mon Sep 17 00:00:00 2001 From: James Greenhalgh Date: Mon, 29 Apr 2013 10:23:15 +0000 Subject: [PATCH] [AArch64] Convert NEON frint implementations to use builtins. gcc/ * config/aarch64/arm_neon.h (vrndq_f<32, 64>): Rename to... (vrndq_f<32, 64>): ...This, implement using builtin. (vrnd_f32): Implement using builtins. (vrnd_f<32, 64>): New. gcc/testsuite/ * gcc.target/aarch64/vect-vrnd.c: New. From-SVN: r198396 --- gcc/ChangeLog | 7 + gcc/config/aarch64/arm_neon.h | 304 +++++++++---------- gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.target/aarch64/vect-vrnd.c | 117 +++++++ 4 files changed, 267 insertions(+), 165 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vect-vrnd.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ae287824002..a46324a2671 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2013-04-29 James Greenhalgh + + * config/aarch64/arm_neon.h (vrndq_f<32, 64>): Rename to... + (vrndq_f<32, 64>): ...This, implement using builtin. + (vrnd_f32): Implement using builtins. + (vrnd_f<32, 64>): New. + 2013-04-29 James Greenhalgh * config/aarch64/aarch64-builtins.c diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 6f5ca8ec6d6..c868a4623b9 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -14941,171 +14941,6 @@ vrev64q_u32 (uint32x4_t a) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrnd_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintz %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrnda_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frinta %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrndm_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintm %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrndn_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintn %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrndp_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintp %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndq_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintz %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndq_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintz %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqa_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frinta %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqa_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frinta %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqm_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintm %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqm_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintm %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqn_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintn %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqn_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintn %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqp_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintp %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqp_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintp %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - #define vrshrn_high_n_s16(a, b, c) \ __extension__ \ ({ \ @@ -23069,6 +22904,145 @@ vrecpxd_f64 (float64_t __a) return __builtin_aarch64_frecpxdf (__a); } +/* vrnd */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrnd_f32 (float32x2_t __a) +{ + return __builtin_aarch64_btruncv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_btruncv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_btruncv2df (__a); +} + +/* vrnda */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrnda_f32 (float32x2_t __a) +{ + return __builtin_aarch64_roundv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndaq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_roundv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndaq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_roundv2df (__a); +} + +/* vrndi */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndi_f32 (float32x2_t __a) +{ + return __builtin_aarch64_nearbyintv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndiq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_nearbyintv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndiq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_nearbyintv2df (__a); +} + +/* vrndm */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndm_f32 (float32x2_t __a) +{ + return __builtin_aarch64_floorv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndmq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_floorv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndmq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_floorv2df (__a); +} + +/* vrndn */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndn_f32 (float32x2_t __a) +{ + return __builtin_aarch64_frintnv2sf (__a); +} +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndnq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_frintnv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndnq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_frintnv2df (__a); +} + +/* vrndp */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndp_f32 (float32x2_t __a) +{ + return __builtin_aarch64_ceilv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndpq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_ceilv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndpq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_ceilv2df (__a); +} + +/* vrndx */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndx_f32 (float32x2_t __a) +{ + return __builtin_aarch64_rintv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndxq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_rintv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndxq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_rintv2df (__a); +} + /* vrshl */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e6cee3bb6b6..898bfdf15b4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2013-04-29 James Greenhalgh + + * gcc.target/aarch64/vect-vrnd.c: New. + 2013-04-29 Richard Biener PR tree-optimization/57081 diff --git a/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c b/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c new file mode 100644 index 00000000000..aa3fd9b401a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c @@ -0,0 +1,117 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +extern void abort (void); +extern float fabsf (float); +extern double fabs (double); + +extern double trunc (double); +extern double round (double); +extern double nearbyint (double); +extern double floor (double); +extern double ceil (double); +extern double rint (double); + +extern float truncf (float); +extern float roundf (float); +extern float nearbyintf (float); +extern float floorf (float); +extern float ceilf (float); +extern float rintf (float); + +#define NUM_TESTS 8 +#define DELTA 0.000001 + +float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f, + 200.0f, -800.0f, -13.0f, -0.5f}; +double input_f64[] = {0.1, -0.1, 0.4, 10.3, + 200.0, -800.0, -13.0, -0.5}; + +#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \ +int \ +test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t (void) \ +{ \ + int ret = 1; \ + int i = 0; \ + int nlanes = LANES; \ + float##WIDTH##_t expected_out[NUM_TESTS]; \ + float##WIDTH##_t actual_out[NUM_TESTS]; \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + { \ + expected_out[i] = C_FN##F (input_f##WIDTH[i]); \ + /* Don't vectorize this. */ \ + asm volatile ("" : : : "memory"); \ + } \ + \ + /* Prevent the compiler from noticing these two loops do the same \ + thing and optimizing away the comparison. */ \ + asm volatile ("" : : : "memory"); \ + \ + for (i = 0; i < NUM_TESTS; i+=nlanes) \ + { \ + float##WIDTH##x##LANES##_t out = \ + vrnd##SUFFIX##Q##_f##WIDTH \ + (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \ + vst1##Q##_f##WIDTH (actual_out + i, out); \ + } \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + ret &= fabs##F (expected_out[i] - actual_out[i]) < DELTA; \ + \ + return ret; \ +} \ + + +#define BUILD_VARIANTS(SUFFIX, C_FN) \ +TEST (SUFFIX, , 32, 2, C_FN, f) \ +TEST (SUFFIX, q, 32, 4, C_FN, f) \ +TEST (SUFFIX, q, 64, 2, C_FN, ) \ + +BUILD_VARIANTS ( , trunc) +/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (a, round) +/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (i, nearbyint) +/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (m, floor) +/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (p, ceil) +/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (x, rint) +/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ + +#undef TEST +#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \ +{ \ + if (!test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t ()) \ + abort (); \ +} + +int +main (int argc, char **argv) +{ + BUILD_VARIANTS ( , trunc) + BUILD_VARIANTS (a, round) + BUILD_VARIANTS (i, nearbyint) + BUILD_VARIANTS (m, floor) + BUILD_VARIANTS (p, ceil) + BUILD_VARIANTS (x, rint) + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ -- 2.30.2