From d5d27976c89bdc3d1429c741d30572bd85c0a92c Mon Sep 17 00:00:00 2001 From: James Greenhalgh Date: Wed, 23 Sep 2015 11:39:48 +0000 Subject: [PATCH] [AArch64] Fix vcvt_high_f64_f32 and vcvt_figh_f32_f64 intrinsics. gcc/ * config/aarch64/aarch64-simd.md (aarch64_float_truncate_hi_v4sf): Rewrite as an expand. (aarch64_float_truncate_hi_v4sf_le): New. (aarch64_float_truncate_hi_v4sf_be): Likewise. gcc/testsuite/ * gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c: New. From-SVN: r228044 --- gcc/ChangeLog | 7 ++ gcc/config/aarch64/aarch64-simd.md | 38 ++++++- gcc/testsuite/ChangeLog | 4 + .../aarch64/advsimd-intrinsics/vcvt_high_1.c | 98 +++++++++++++++++++ 4 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c46bd6d26a7..27e527dc97a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2015-09-23 James Greenhalgh + + * config/aarch64/aarch64-simd.md + (aarch64_float_truncate_hi_v4sf): Rewrite as an expand. + (aarch64_float_truncate_hi_v4sf_le): New. + (aarch64_float_truncate_hi_v4sf_be): Likewise. + 2015-09-23 Richard Biener * tree-ssa-structalias.c (intra_create_variable_infos): Build diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index dbe52591b59..5ab2f2b7763 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1703,6 +1703,15 @@ [(set_attr "type" "neon_fp_cvt_widen_s")] ) +;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns +;; is inconsistent with vector ordering elsewhere in the compiler, in that +;; the meaning of HI and LO changes depending on the target endianness. +;; While elsewhere we map the higher numbered elements of a vector to +;; the lower architectural lanes of the vector, for these patterns we want +;; to always treat "hi" as referring to the higher architectural lanes. +;; Consequently, while the patterns below look inconsistent with our +;; other big-endian patterns their behaviour is as required. + (define_expand "vec_unpacks_lo_" [(match_operand: 0 "register_operand" "") (match_operand:VQ_HSF 1 "register_operand" "")] @@ -1757,17 +1766,42 @@ [(set_attr "type" "neon_fp_cvt_narrow_d_q")] ) -(define_insn "aarch64_float_truncate_hi_" +(define_insn "aarch64_float_truncate_hi__le" [(set (match_operand: 0 "register_operand" "=w") (vec_concat: (match_operand:VDF 1 "register_operand" "0") (float_truncate:VDF (match_operand: 2 "register_operand" "w"))))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "fcvtn2\\t%0., %2" [(set_attr "type" "neon_fp_cvt_narrow_d_q")] ) +(define_insn "aarch64_float_truncate_hi__be" + [(set (match_operand: 0 "register_operand" "=w") + (vec_concat: + (float_truncate:VDF + (match_operand: 2 "register_operand" "w")) + (match_operand:VDF 1 "register_operand" "0")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "fcvtn2\\t%0., %2" + [(set_attr "type" "neon_fp_cvt_narrow_d_q")] +) + +(define_expand "aarch64_float_truncate_hi_" + [(match_operand: 0 "register_operand" "=w") + (match_operand:VDF 1 "register_operand" "0") + (match_operand: 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN + ? gen_aarch64_float_truncate_hi__be + : gen_aarch64_float_truncate_hi__le; + emit_insn (gen (operands[0], operands[1], operands[2])); + DONE; +} +) + (define_expand "vec_pack_trunc_v2df" [(set (match_operand:V4SF 0 "register_operand") (vec_concat:V4SF diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 36e30b6056e..3e722c2c89c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2015-09-23 James Greenhalgh + + * gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c: New. + 2015-09-23 Richard Biener * g++.dg/tree-ssa/restrict2.C: Un-XFAIL testcase. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c new file mode 100644 index 00000000000..27c6b574abc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c @@ -0,0 +1,98 @@ +/* { dg-skip-if "" { arm*-*-* } } */ + +#include "arm_neon.h" + +void abort (void); + +void +foo (void) +{ + /* Test vcvt_high_f32_f64. */ + float32x2_t arg1; + float64x2_t arg2; + float32x4_t result; + arg1 = vcreate_f32 (UINT64_C (0x3f0db5793f6e1892)); + arg2 = vcombine_f64 (vcreate_f64 (UINT64_C (0x3fe8e49d23fb575d)), + vcreate_f64 (UINT64_C (0x3fd921291b3df73e))); + // Expect: "result" = 3ec909483f4724e93f0db5793f6e1892 + result = vcvt_high_f32_f64 (arg1, arg2); + float32_t got; + float32_t exp; + + /* Lane 0. */ + got = vgetq_lane_f32 (result, 0); + exp = ((float32_t) 0.9300624132156372); + if (((((exp / got) < ((float32_t) 0.999)) + || ((exp / got) > ((float32_t) 1.001))) + && (((exp - got) < ((float32_t) -1.0e-4)) + || ((exp - got) > ((float32_t) 1.0e-4))))) + abort (); + + /* Lane 1. */ + got = vgetq_lane_f32 (result, 1); + exp = ((float32_t) 0.5535503029823303); + if (((((exp / got) < ((float32_t) 0.999)) + || ((exp / got) > ((float32_t) 1.001))) + && (((exp - got) < ((float32_t) -1.0e-4)) + || ((exp - got) > ((float32_t) 1.0e-4))))) + abort (); + + /* Lane 2. */ + got = vgetq_lane_f32 (result, 2); + exp = ((float32_t) 0.7779069617051665); + if (((((exp / got) < ((float32_t) 0.999)) + || ((exp / got) > ((float32_t) 1.001))) + && (((exp - got) < ((float32_t) -1.0e-4)) + || ((exp - got) > ((float32_t) 1.0e-4))))) + abort (); + + /* Lane 3. */ + got = vgetq_lane_f32 (result, 3); + exp = ((float32_t) 0.3926489606891329); + if (((((exp / got) < ((float32_t) 0.999)) + || ((exp / got) > ((float32_t) 1.001))) + && (((exp - got) < ((float32_t) -1.0e-4)) + || ((exp - got) > ((float32_t) 1.0e-4))))) + abort (); +} + +void +bar (void) +{ + /* Test vcvt_high_f64_f32. */ + float32x4_t arg1; + float64x2_t result; + arg1 = vcombine_f32 (vcreate_f32 (UINT64_C (0x3f7c5cf13f261f74)), + vcreate_f32 (UINT64_C (0x3e3a7bc03f6ccc1d))); + // Expect: "result" = 3fc74f78000000003fed9983a0000000 + result = vcvt_high_f64_f32 (arg1); + + float64_t got; + float64_t exp; + + /* Lane 0. */ + got = vgetq_lane_f64 (result, 0); + exp = 0.9249895215034485; + if (((((exp / got) < 0.999) + || ((exp / got) > 1.001)) + && (((exp - got) < -1.0e-4) + || ((exp - got) > 1.0e-4)))) + abort (); + + /* Lane 1. */ + got = vgetq_lane_f64 (result, 1); + exp = 0.1821126937866211; + if (((((exp / got) < 0.999) + || ((exp / got) > 1.001)) + && (((exp - got) < -1.0e-4) + || ((exp - got) > 1.0e-4)))) + abort (); +} + +int +main (int argc, char **argv) +{ + foo (); + bar (); + return 0; +} -- 2.30.2