From d7f33f07d88984cbe769047e3d07fc21067fbba9 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Mon, 25 Jul 2016 16:00:28 +0000 Subject: [PATCH] [AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics gcc/ * config.gcc (aarch64*-*-*): Install arm_fp16.h. * config/aarch64/aarch64-builtins.c (hi_UP): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_frsqrte): Extend to HF mode. (aarch64_frecp): Likewise. (aarch64_cm): Likewise. * config/aarch64/aarch64.md (2): Likewise. (l2): Likewise. (fix_trunc2): Likewise. (sqrt2): Likewise. (abs2): Likewise. (hf2): New pattern for HF mode. (hihf2): Likewise. * config/aarch64/arm_neon.h: Include arm_fp16.h. * config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New. (w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE): Support HF mode. * config/aarch64/arm_fp16.h: New file. (vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16, vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16, vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16, vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16, vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16, vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16, vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16, vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16, vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16, vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16, vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16, vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16, vsqrth_f16): New. From-SVN: r238722 --- gcc/ChangeLog | 35 ++ gcc/config.gcc | 2 +- gcc/config/aarch64/aarch64-builtins.c | 1 + gcc/config/aarch64/aarch64-simd-builtins.def | 54 ++- gcc/config/aarch64/aarch64-simd.md | 42 ++- gcc/config/aarch64/aarch64.md | 59 +-- gcc/config/aarch64/arm_fp16.h | 365 +++++++++++++++++++ gcc/config/aarch64/arm_neon.h | 2 + gcc/config/aarch64/iterators.md | 32 +- 9 files changed, 534 insertions(+), 58 deletions(-) create mode 100644 gcc/config/aarch64/arm_fp16.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e9dd4f36aad..a26721b8c51 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +2016-07-25 Jiong Wang + + * config.gcc (aarch64*-*-*): Install arm_fp16.h. + * config/aarch64/aarch64-builtins.c (hi_UP): New. + * config/aarch64/aarch64-simd-builtins.def: Register new builtins. + * config/aarch64/aarch64-simd.md (aarch64_frsqrte): Extend to HF + mode. + (aarch64_frecp): Likewise. + (aarch64_cm): Likewise. + * config/aarch64/aarch64.md (2): Likewise. + (l2): Likewise. + (fix_trunc2): Likewise. + (sqrt2): Likewise. + (abs2): Likewise. + (hf2): New pattern for HF mode. + (hihf2): Likewise. + * config/aarch64/arm_neon.h: Include arm_fp16.h. + * config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New. + (w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE): + Support HF mode. + * config/aarch64/arm_fp16.h: New file. + (vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16, + vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16, + vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16, + vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16, + vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16, + vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16, + vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16, + vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16, + vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16, + vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16, + vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16, + vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16, + vsqrth_f16): New. + 2016-07-25 Jiong Wang * config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_, diff --git a/gcc/config.gcc b/gcc/config.gcc index 1f75f178773..8827dc830d3 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -300,7 +300,7 @@ m32c*-*-*) ;; aarch64*-*-*) cpu_type=aarch64 - extra_headers="arm_neon.h arm_acle.h" + extra_headers="arm_fp16.h arm_neon.h arm_acle.h" c_target_objs="aarch64-c.o" cxx_target_objs="aarch64-c.o" extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o" diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index af5fac5b29c..ca91d9108ea 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -62,6 +62,7 @@ #define si_UP SImode #define sf_UP SFmode #define hi_UP HImode +#define hf_UP HFmode #define qi_UP QImode #define UP(X) X##_UP diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 363e131327d..6f50d8405d3 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -274,6 +274,14 @@ BUILTIN_VHSDF (UNOP, round, 2) BUILTIN_VHSDF_DF (UNOP, frintn, 2) + VAR1 (UNOP, btrunc, 2, hf) + VAR1 (UNOP, ceil, 2, hf) + VAR1 (UNOP, floor, 2, hf) + VAR1 (UNOP, frintn, 2, hf) + VAR1 (UNOP, nearbyint, 2, hf) + VAR1 (UNOP, rint, 2, hf) + VAR1 (UNOP, round, 2, hf) + /* Implemented by l2. */ VAR1 (UNOP, lbtruncv4hf, 2, v4hi) VAR1 (UNOP, lbtruncv8hf, 2, v8hi) @@ -292,7 +300,8 @@ VAR1 (UNOP, lroundv2sf, 2, v2si) VAR1 (UNOP, lroundv4sf, 2, v4si) VAR1 (UNOP, lroundv2df, 2, v2di) - /* Implemented by l2. */ + /* Implemented by l2. */ + BUILTIN_GPI_I16 (UNOP, lroundhf, 2) VAR1 (UNOP, lroundsf, 2, si) VAR1 (UNOP, lrounddf, 2, di) @@ -301,6 +310,7 @@ VAR1 (UNOPUS, lrounduv2sf, 2, v2si) VAR1 (UNOPUS, lrounduv4sf, 2, v4si) VAR1 (UNOPUS, lrounduv2df, 2, v2di) + BUILTIN_GPI_I16 (UNOPUS, lrounduhf, 2) VAR1 (UNOPUS, lroundusf, 2, si) VAR1 (UNOPUS, lroundudf, 2, di) @@ -309,12 +319,14 @@ VAR1 (UNOP, lceilv2sf, 2, v2si) VAR1 (UNOP, lceilv4sf, 2, v4si) VAR1 (UNOP, lceilv2df, 2, v2di) + BUILTIN_GPI_I16 (UNOP, lceilhf, 2) VAR1 (UNOPUS, lceiluv4hf, 2, v4hi) VAR1 (UNOPUS, lceiluv8hf, 2, v8hi) VAR1 (UNOPUS, lceiluv2sf, 2, v2si) VAR1 (UNOPUS, lceiluv4sf, 2, v4si) VAR1 (UNOPUS, lceiluv2df, 2, v2di) + BUILTIN_GPI_I16 (UNOPUS, lceiluhf, 2) VAR1 (UNOPUS, lceilusf, 2, si) VAR1 (UNOPUS, lceiludf, 2, di) @@ -323,12 +335,14 @@ VAR1 (UNOP, lfloorv2sf, 2, v2si) VAR1 (UNOP, lfloorv4sf, 2, v4si) VAR1 (UNOP, lfloorv2df, 2, v2di) + BUILTIN_GPI_I16 (UNOP, lfloorhf, 2) VAR1 (UNOPUS, lflooruv4hf, 2, v4hi) VAR1 (UNOPUS, lflooruv8hf, 2, v8hi) VAR1 (UNOPUS, lflooruv2sf, 2, v2si) VAR1 (UNOPUS, lflooruv4sf, 2, v4si) VAR1 (UNOPUS, lflooruv2df, 2, v2di) + BUILTIN_GPI_I16 (UNOPUS, lflooruhf, 2) VAR1 (UNOPUS, lfloorusf, 2, si) VAR1 (UNOPUS, lfloorudf, 2, di) @@ -337,6 +351,7 @@ VAR1 (UNOP, lfrintnv2sf, 2, v2si) VAR1 (UNOP, lfrintnv4sf, 2, v4si) VAR1 (UNOP, lfrintnv2df, 2, v2di) + BUILTIN_GPI_I16 (UNOP, lfrintnhf, 2) VAR1 (UNOP, lfrintnsf, 2, si) VAR1 (UNOP, lfrintndf, 2, di) @@ -345,6 +360,7 @@ VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si) VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si) VAR1 (UNOPUS, lfrintnuv2df, 2, v2di) + BUILTIN_GPI_I16 (UNOPUS, lfrintnuhf, 2) VAR1 (UNOPUS, lfrintnusf, 2, si) VAR1 (UNOPUS, lfrintnudf, 2, di) @@ -376,9 +392,9 @@ /* Implemented by aarch64_frecp. */ - BUILTIN_GPF (UNOP, frecpe, 0) + BUILTIN_GPF_F16 (UNOP, frecpe, 0) BUILTIN_GPF (BINOP, frecps, 0) - BUILTIN_GPF (UNOP, frecpx, 0) + BUILTIN_GPF_F16 (UNOP, frecpx, 0) BUILTIN_VDQ_SI (UNOP, urecpe, 0) @@ -389,6 +405,7 @@ only ever used for the int64x1_t intrinsic, there is no scalar version. */ BUILTIN_VSDQ_I_DI (UNOP, abs, 0) BUILTIN_VHSDF (UNOP, abs, 2) + VAR1 (UNOP, abs, 2, hf) BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10) VAR1 (BINOP, float_truncate_hi_, 0, v4sf) @@ -483,7 +500,7 @@ BUILTIN_VHSDF_SDF (SHIFTIMM_USS, fcvtzu, 3) /* Implemented by aarch64_rsqrte. */ - BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0) + BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0) /* Implemented by aarch64_rsqrts. */ BUILTIN_VHSDF_SDF (BINOP, rsqrts, 0) @@ -495,17 +512,34 @@ BUILTIN_VHSDF (BINOP, faddp, 0) /* Implemented by aarch64_cm. */ - BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0) - BUILTIN_VHSDF_SDF (BINOP_USS, cmge, 0) - BUILTIN_VHSDF_SDF (BINOP_USS, cmgt, 0) - BUILTIN_VHSDF_SDF (BINOP_USS, cmle, 0) - BUILTIN_VHSDF_SDF (BINOP_USS, cmlt, 0) + BUILTIN_VHSDF_HSDF (BINOP_USS, cmeq, 0) + BUILTIN_VHSDF_HSDF (BINOP_USS, cmge, 0) + BUILTIN_VHSDF_HSDF (BINOP_USS, cmgt, 0) + BUILTIN_VHSDF_HSDF (BINOP_USS, cmle, 0) + BUILTIN_VHSDF_HSDF (BINOP_USS, cmlt, 0) /* Implemented by neg2. */ - BUILTIN_VHSDF (UNOP, neg, 2) + BUILTIN_VHSDF_HSDF (UNOP, neg, 2) /* Implemented by aarch64_fac. */ BUILTIN_VHSDF_SDF (BINOP_USS, faclt, 0) BUILTIN_VHSDF_SDF (BINOP_USS, facle, 0) BUILTIN_VHSDF_SDF (BINOP_USS, facgt, 0) BUILTIN_VHSDF_SDF (BINOP_USS, facge, 0) + + /* Implemented by sqrt2. */ + VAR1 (UNOP, sqrt, 2, hf) + + /* Implemented by hf2. */ + VAR1 (UNOP, floatdi, 2, hf) + VAR1 (UNOP, floatsi, 2, hf) + VAR1 (UNOP, floathi, 2, hf) + VAR1 (UNOPUS, floatunsdi, 2, hf) + VAR1 (UNOPUS, floatunssi, 2, hf) + VAR1 (UNOPUS, floatunshi, 2, hf) + BUILTIN_GPI_I16 (UNOP, fix_trunchf, 2) + BUILTIN_GPI (UNOP, fix_truncsf, 2) + BUILTIN_GPI (UNOP, fix_truncdf, 2) + BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2) + BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2) + BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 501858d9ac3..2ed9d9f8ba8 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -383,8 +383,8 @@ ) (define_insn "aarch64_rsqrte" - [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w") - (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")] + [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") + (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] UNSPEC_RSQRTE))] "TARGET_SIMD" "frsqrte\\t%0, %1" @@ -1755,6 +1755,32 @@ [(set_attr "type" "neon_fp_to_int_")] ) +;; HF Scalar variants of related SIMD instructions. +(define_insn "lhfhi2" + [(set (match_operand:HI 0 "register_operand" "=w") + (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")] + FCVT)))] + "TARGET_SIMD_F16INST" + "fcvt\t%h0, %h1" + [(set_attr "type" "neon_fp_to_int_s")] +) + +(define_insn "_trunchfhi2" + [(set (match_operand:HI 0 "register_operand" "=w") + (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))] + "TARGET_SIMD_F16INST" + "fcvtz\t%h0, %h1" + [(set_attr "type" "neon_fp_to_int_s")] +) + +(define_insn "hihf2" + [(set (match_operand:HF 0 "register_operand" "=w") + (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))] + "TARGET_SIMD_F16INST" + "cvtf\t%h0, %h1" + [(set_attr "type" "neon_int_to_fp_s")] +) + (define_insn "*aarch64_fcvt2_mult" [(set (match_operand: 0 "register_operand" "=w") (FIXUORS: (unspec: @@ -4297,8 +4323,8 @@ [(set (match_operand: 0 "register_operand" "=w,w") (neg: (COMPARISONS: - (match_operand:VHSDF_SDF 1 "register_operand" "w,w") - (match_operand:VHSDF_SDF 2 "aarch64_simd_reg_or_zero" "w,YDz") + (match_operand:VHSDF_HSDF 1 "register_operand" "w,w") + (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz") )))] "TARGET_SIMD" "@ @@ -5425,12 +5451,12 @@ ) (define_insn "aarch64_frecp" - [(set (match_operand:GPF 0 "register_operand" "=w") - (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] - FRECP))] + [(set (match_operand:GPF_F16 0 "register_operand" "=w") + (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] + FRECP))] "TARGET_SIMD" "frecp\\t%0, %1" - [(set_attr "type" "neon_fp_recp_")] + [(set_attr "type" "neon_fp_recp_")] ) (define_insn "aarch64_frecps" diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index bcb7db08657..56ad581da6c 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4450,22 +4450,23 @@ ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. (define_insn "2" - [(set (match_operand:GPF 0 "register_operand" "=w") - (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] + [(set (match_operand:GPF_F16 0 "register_operand" "=w") + (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] FRINT))] "TARGET_FLOAT" "frint\\t%0, %1" - [(set_attr "type" "f_rint")] + [(set_attr "type" "f_rint")] ) ;; frcvt floating-point round to integer and convert standard patterns. ;; Expands to lbtrunc, lceil, lfloor, lround. -(define_insn "l2" +(define_insn "l2" [(set (match_operand:GPI 0 "register_operand" "=r") - (FIXUORS:GPI (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] - FCVT)))] + (FIXUORS:GPI + (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] + FCVT)))] "TARGET_FLOAT" - "fcvt\\t%0, %1" + "fcvt\\t%0, %1" [(set_attr "type" "f_cvtf2i")] ) @@ -4593,19 +4594,11 @@ [(set_attr "type" "f_cvt")] ) -(define_insn "fix_trunc2" - [(set (match_operand:GPI 0 "register_operand" "=r") - (fix:GPI (match_operand:GPF 1 "register_operand" "w")))] - "TARGET_FLOAT" - "fcvtzs\\t%0, %1" - [(set_attr "type" "f_cvtf2i")] -) - -(define_insn "fixuns_trunc2" +(define_insn "_trunc2" [(set (match_operand:GPI 0 "register_operand" "=r") - (unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w")))] + (FIXUORS:GPI (match_operand:GPF_F16 1 "register_operand" "w")))] "TARGET_FLOAT" - "fcvtzu\\t%0, %1" + "fcvtz\t%0, %1" [(set_attr "type" "f_cvtf2i")] ) @@ -4629,6 +4622,14 @@ [(set_attr "type" "f_cvti2f")] ) +(define_insn "hf2" + [(set (match_operand:HF 0 "register_operand" "=w") + (FLOATUORS:HF (match_operand:GPI 1 "register_operand" "r")))] + "TARGET_FP_F16INST" + "cvtf\t%h0, %1" + [(set_attr "type" "f_cvti2f")] +) + ;; Convert between fixed-point and floating-point (scalar modes) (define_insn "3" @@ -4735,16 +4736,16 @@ ) (define_insn "neg2" - [(set (match_operand:GPF 0 "register_operand" "=w") - (neg:GPF (match_operand:GPF 1 "register_operand" "w")))] + [(set (match_operand:GPF_F16 0 "register_operand" "=w") + (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))] "TARGET_FLOAT" "fneg\\t%0, %1" - [(set_attr "type" "ffarith")] + [(set_attr "type" "ffarith")] ) (define_expand "sqrt2" - [(set (match_operand:GPF 0 "register_operand") - (sqrt:GPF (match_operand:GPF 1 "register_operand")))] + [(set (match_operand:GPF_F16 0 "register_operand" "=w") + (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))] "TARGET_FLOAT" { if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) @@ -4752,19 +4753,19 @@ }) (define_insn "*sqrt2" - [(set (match_operand:GPF 0 "register_operand" "=w") - (sqrt:GPF (match_operand:GPF 1 "register_operand" "w")))] + [(set (match_operand:GPF_F16 0 "register_operand" "=w") + (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))] "TARGET_FLOAT" "fsqrt\\t%0, %1" - [(set_attr "type" "fsqrt")] + [(set_attr "type" "fsqrt")] ) (define_insn "abs2" - [(set (match_operand:GPF 0 "register_operand" "=w") - (abs:GPF (match_operand:GPF 1 "register_operand" "w")))] + [(set (match_operand:GPF_F16 0 "register_operand" "=w") + (abs:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))] "TARGET_FLOAT" "fabs\\t%0, %1" - [(set_attr "type" "ffarith")] + [(set_attr "type" "ffarith")] ) ;; Given that smax/smin do not specify the result when either input is NaN, diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h new file mode 100644 index 00000000000..818aa61925b --- /dev/null +++ b/gcc/config/aarch64/arm_fp16.h @@ -0,0 +1,365 @@ +/* ARM FP16 scalar intrinsics include file. + + Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _AARCH64_FP16_H_ +#define _AARCH64_FP16_H_ + +#include + +#pragma GCC push_options +#pragma GCC target ("arch=armv8.2-a+fp16") + +typedef __fp16 float16_t; + +/* ARMv8.2-A FP16 one operand scalar intrinsics. */ + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vabsh_f16 (float16_t __a) +{ + return __builtin_aarch64_abshf (__a); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vceqzh_f16 (float16_t __a) +{ + return __builtin_aarch64_cmeqhf_uss (__a, 0.0f); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcgezh_f16 (float16_t __a) +{ + return __builtin_aarch64_cmgehf_uss (__a, 0.0f); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcgtzh_f16 (float16_t __a) +{ + return __builtin_aarch64_cmgthf_uss (__a, 0.0f); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vclezh_f16 (float16_t __a) +{ + return __builtin_aarch64_cmlehf_uss (__a, 0.0f); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcltzh_f16 (float16_t __a) +{ + return __builtin_aarch64_cmlthf_uss (__a, 0.0f); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_f16_s16 (int16_t __a) +{ + return __builtin_aarch64_floathihf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_f16_s32 (int32_t __a) +{ + return __builtin_aarch64_floatsihf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_f16_s64 (int64_t __a) +{ + return __builtin_aarch64_floatdihf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_f16_u16 (uint16_t __a) +{ + return __builtin_aarch64_floatunshihf_us (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_f16_u32 (uint32_t __a) +{ + return __builtin_aarch64_floatunssihf_us (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vcvth_f16_u64 (uint64_t __a) +{ + return __builtin_aarch64_floatunsdihf_us (__a); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vcvth_s16_f16 (float16_t __a) +{ + return __builtin_aarch64_fix_trunchfhi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvth_s32_f16 (float16_t __a) +{ + return __builtin_aarch64_fix_trunchfsi (__a); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvth_s64_f16 (float16_t __a) +{ + return __builtin_aarch64_fix_trunchfdi (__a); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcvth_u16_f16 (float16_t __a) +{ + return __builtin_aarch64_fixuns_trunchfhi_us (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvth_u32_f16 (float16_t __a) +{ + return __builtin_aarch64_fixuns_trunchfsi_us (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvth_u64_f16 (float16_t __a) +{ + return __builtin_aarch64_fixuns_trunchfdi_us (__a); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vcvtah_s16_f16 (float16_t __a) +{ + return __builtin_aarch64_lroundhfhi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtah_s32_f16 (float16_t __a) +{ + return __builtin_aarch64_lroundhfsi (__a); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtah_s64_f16 (float16_t __a) +{ + return __builtin_aarch64_lroundhfdi (__a); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcvtah_u16_f16 (float16_t __a) +{ + return __builtin_aarch64_lrounduhfhi_us (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtah_u32_f16 (float16_t __a) +{ + return __builtin_aarch64_lrounduhfsi_us (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtah_u64_f16 (float16_t __a) +{ + return __builtin_aarch64_lrounduhfdi_us (__a); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vcvtmh_s16_f16 (float16_t __a) +{ + return __builtin_aarch64_lfloorhfhi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtmh_s32_f16 (float16_t __a) +{ + return __builtin_aarch64_lfloorhfsi (__a); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtmh_s64_f16 (float16_t __a) +{ + return __builtin_aarch64_lfloorhfdi (__a); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcvtmh_u16_f16 (float16_t __a) +{ + return __builtin_aarch64_lflooruhfhi_us (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtmh_u32_f16 (float16_t __a) +{ + return __builtin_aarch64_lflooruhfsi_us (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtmh_u64_f16 (float16_t __a) +{ + return __builtin_aarch64_lflooruhfdi_us (__a); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vcvtnh_s16_f16 (float16_t __a) +{ + return __builtin_aarch64_lfrintnhfhi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtnh_s32_f16 (float16_t __a) +{ + return __builtin_aarch64_lfrintnhfsi (__a); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtnh_s64_f16 (float16_t __a) +{ + return __builtin_aarch64_lfrintnhfdi (__a); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcvtnh_u16_f16 (float16_t __a) +{ + return __builtin_aarch64_lfrintnuhfhi_us (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtnh_u32_f16 (float16_t __a) +{ + return __builtin_aarch64_lfrintnuhfsi_us (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtnh_u64_f16 (float16_t __a) +{ + return __builtin_aarch64_lfrintnuhfdi_us (__a); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vcvtph_s16_f16 (float16_t __a) +{ + return __builtin_aarch64_lceilhfhi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtph_s32_f16 (float16_t __a) +{ + return __builtin_aarch64_lceilhfsi (__a); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtph_s64_f16 (float16_t __a) +{ + return __builtin_aarch64_lceilhfdi (__a); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vcvtph_u16_f16 (float16_t __a) +{ + return __builtin_aarch64_lceiluhfhi_us (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtph_u32_f16 (float16_t __a) +{ + return __builtin_aarch64_lceiluhfsi_us (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtph_u64_f16 (float16_t __a) +{ + return __builtin_aarch64_lceiluhfdi_us (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vnegh_f16 (float16_t __a) +{ + return __builtin_aarch64_neghf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrecpeh_f16 (float16_t __a) +{ + return __builtin_aarch64_frecpehf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrecpxh_f16 (float16_t __a) +{ + return __builtin_aarch64_frecpxhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndh_f16 (float16_t __a) +{ + return __builtin_aarch64_btrunchf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndah_f16 (float16_t __a) +{ + return __builtin_aarch64_roundhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndih_f16 (float16_t __a) +{ + return __builtin_aarch64_nearbyinthf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndmh_f16 (float16_t __a) +{ + return __builtin_aarch64_floorhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndnh_f16 (float16_t __a) +{ + return __builtin_aarch64_frintnhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndph_f16 (float16_t __a) +{ + return __builtin_aarch64_ceilhf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrndxh_f16 (float16_t __a) +{ + return __builtin_aarch64_rinthf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vrsqrteh_f16 (float16_t __a) +{ + return __builtin_aarch64_rsqrtehf (__a); +} + +__extension__ static __inline float16_t __attribute__ ((__always_inline__)) +vsqrth_f16 (float16_t __a) +{ + return __builtin_aarch64_sqrthf (__a); +} + +#pragma GCC pop_options + +#endif diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 4382efda8c3..fd555583b46 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26032,6 +26032,8 @@ __INTERLEAVE_LIST (zip) /* ARMv8.2-A FP16 intrinsics. */ +#include "arm_fp16.h" + #pragma GCC push_options #pragma GCC target ("arch=armv8.2-a+fp16") diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 011b937105e..20d0f1bf615 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -26,6 +26,9 @@ ;; Iterator for General Purpose Integer registers (32- and 64-bit modes) (define_mode_iterator GPI [SI DI]) +;; Iterator for HI, SI, DI, some instructions can only work on these modes. +(define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI]) + ;; Iterator for QI and HI modes (define_mode_iterator SHORT [QI HI]) @@ -38,6 +41,9 @@ ;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes) (define_mode_iterator GPF [SF DF]) +;; Iterator for all scalar floating point modes (HF, SF, DF) +(define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF]) + ;; Iterator for all scalar floating point modes (HF, SF, DF and TF) (define_mode_iterator GPF_TF_F16 [HF SF DF TF]) @@ -102,6 +108,11 @@ (define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST") (V8HF "TARGET_SIMD_F16INST") V2SF V4SF V2DF SF DF]) +(define_mode_iterator VHSDF_HSDF [(V4HF "TARGET_SIMD_F16INST") + (V8HF "TARGET_SIMD_F16INST") + V2SF V4SF V2DF + (HF "TARGET_SIMD_F16INST") + SF DF]) ;; Vector single Float modes. (define_mode_iterator VDQSF [V2SF V4SF]) @@ -372,8 +383,8 @@ (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")]) ;; For inequal width int to float conversion -(define_mode_attr w1 [(SF "w") (DF "x")]) -(define_mode_attr w2 [(SF "x") (DF "w")]) +(define_mode_attr w1 [(HF "w") (SF "w") (DF "x")]) +(define_mode_attr w2 [(HF "x") (SF "x") (DF "w")]) (define_mode_attr short_mask [(HI "65535") (QI "255")]) @@ -385,7 +396,7 @@ ;; For scalar usage of vector/FP registers (define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d") - (SF "s") (DF "d") + (HF "h") (SF "s") (DF "d") (V8QI "") (V16QI "") (V4HI "") (V8HI "") (V2SI "") (V4SI "") @@ -416,7 +427,7 @@ (define_mode_attr vas [(DI "") (SI ".2s")]) ;; Map a floating point mode to the appropriate register name prefix -(define_mode_attr s [(SF "s") (DF "d")]) +(define_mode_attr s [(HF "h") (SF "s") (DF "d")]) ;; Give the length suffix letter for a sign- or zero-extension. (define_mode_attr size [(QI "b") (HI "h") (SI "w")]) @@ -452,8 +463,8 @@ (V4SF ".4s") (V2DF ".2d") (DI "") (SI "") (HI "") (QI "") - (TI "") (SF "") - (DF "")]) + (TI "") (HF "") + (SF "") (DF "")]) ;; Register suffix narrowed modes for VQN. (define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h") @@ -468,6 +479,7 @@ (V2DI "d") (V4HF "h") (V8HF "h") (V2SF "s") (V4SF "s") (V2DF "d") + (HF "h") (SF "s") (DF "d") (QI "b") (HI "h") (SI "s") (DI "d")]) @@ -639,7 +651,7 @@ (V4HF "V4HI") (V8HF "V8HI") (V2SF "V2SI") (V4SF "V4SI") (V2DF "V2DI") (DF "DI") - (SF "SI")]) + (SF "SI") (HF "HI")]) ;; Lower case mode of results of comparison operations. (define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi") @@ -702,8 +714,8 @@ ;; for the inequal width integer to fp conversions -(define_mode_attr fcvt_iesize [(SF "di") (DF "si")]) -(define_mode_attr FCVT_IESIZE [(SF "DI") (DF "SI")]) +(define_mode_attr fcvt_iesize [(HF "di") (SF "di") (DF "si")]) +(define_mode_attr FCVT_IESIZE [(HF "DI") (SF "DI") (DF "SI")]) (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI") (V4HI "V8HI") (V8HI "V4HI") @@ -757,7 +769,7 @@ (V4HF "") (V8HF "_q") (V2SF "") (V4SF "_q") (V2DF "_q") - (QI "") (HI "") (SI "") (DI "") (SF "") (DF "")]) + (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")]) (define_mode_attr vp [(V8QI "v") (V16QI "v") (V4HI "v") (V8HI "v") -- 2.30.2