+2016-07-25 Jiong Wang <jiong.wang@arm.com>
+
+ * config.gcc (aarch64*-*-*): Install arm_fp16.h.
+ * config/aarch64/aarch64-builtins.c (hi_UP): New.
+ * config/aarch64/aarch64-simd-builtins.def: Register new builtins.
+ * config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF
+ mode.
+ (aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise.
+ (aarch64_cm<optab><mode>): Likewise.
+ * config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise.
+ (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise.
+ (fix_trunc<GPF:mode><GPI:mode>2): Likewise.
+ (sqrt<mode>2): Likewise.
+ (abs<mode>2): Likewise.
+ (<optab><mode>hf2): New pattern for HF mode.
+ (<optab>hihf2): Likewise.
+ * config/aarch64/arm_neon.h: Include arm_fp16.h.
+ * config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New.
+ (w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE):
+ Support HF mode.
+ * config/aarch64/arm_fp16.h: New file.
+ (vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16,
+ vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16,
+ vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16,
+ vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16,
+ vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16,
+ vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16,
+ vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16,
+ vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16,
+ vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16,
+ vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16,
+ vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16,
+ vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16,
+ vsqrth_f16): New.
+
2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_,
;;
aarch64*-*-*)
cpu_type=aarch64
- extra_headers="arm_neon.h arm_acle.h"
+ extra_headers="arm_fp16.h arm_neon.h arm_acle.h"
c_target_objs="aarch64-c.o"
cxx_target_objs="aarch64-c.o"
extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o"
#define si_UP SImode
#define sf_UP SFmode
#define hi_UP HImode
+#define hf_UP HFmode
#define qi_UP QImode
#define UP(X) X##_UP
BUILTIN_VHSDF (UNOP, round, 2)
BUILTIN_VHSDF_DF (UNOP, frintn, 2)
+ VAR1 (UNOP, btrunc, 2, hf)
+ VAR1 (UNOP, ceil, 2, hf)
+ VAR1 (UNOP, floor, 2, hf)
+ VAR1 (UNOP, frintn, 2, hf)
+ VAR1 (UNOP, nearbyint, 2, hf)
+ VAR1 (UNOP, rint, 2, hf)
+ VAR1 (UNOP, round, 2, hf)
+
/* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */
VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
VAR1 (UNOP, lroundv2sf, 2, v2si)
VAR1 (UNOP, lroundv4sf, 2, v4si)
VAR1 (UNOP, lroundv2df, 2, v2di)
- /* Implemented by l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2. */
+ /* Implemented by l<fcvt_pattern><su_optab><GPF_F16:mode><GPI:mode>2. */
+ BUILTIN_GPI_I16 (UNOP, lroundhf, 2)
VAR1 (UNOP, lroundsf, 2, si)
VAR1 (UNOP, lrounddf, 2, di)
VAR1 (UNOPUS, lrounduv2sf, 2, v2si)
VAR1 (UNOPUS, lrounduv4sf, 2, v4si)
VAR1 (UNOPUS, lrounduv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOPUS, lrounduhf, 2)
VAR1 (UNOPUS, lroundusf, 2, si)
VAR1 (UNOPUS, lroundudf, 2, di)
VAR1 (UNOP, lceilv2sf, 2, v2si)
VAR1 (UNOP, lceilv4sf, 2, v4si)
VAR1 (UNOP, lceilv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOP, lceilhf, 2)
VAR1 (UNOPUS, lceiluv4hf, 2, v4hi)
VAR1 (UNOPUS, lceiluv8hf, 2, v8hi)
VAR1 (UNOPUS, lceiluv2sf, 2, v2si)
VAR1 (UNOPUS, lceiluv4sf, 2, v4si)
VAR1 (UNOPUS, lceiluv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOPUS, lceiluhf, 2)
VAR1 (UNOPUS, lceilusf, 2, si)
VAR1 (UNOPUS, lceiludf, 2, di)
VAR1 (UNOP, lfloorv2sf, 2, v2si)
VAR1 (UNOP, lfloorv4sf, 2, v4si)
VAR1 (UNOP, lfloorv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOP, lfloorhf, 2)
VAR1 (UNOPUS, lflooruv4hf, 2, v4hi)
VAR1 (UNOPUS, lflooruv8hf, 2, v8hi)
VAR1 (UNOPUS, lflooruv2sf, 2, v2si)
VAR1 (UNOPUS, lflooruv4sf, 2, v4si)
VAR1 (UNOPUS, lflooruv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOPUS, lflooruhf, 2)
VAR1 (UNOPUS, lfloorusf, 2, si)
VAR1 (UNOPUS, lfloorudf, 2, di)
VAR1 (UNOP, lfrintnv2sf, 2, v2si)
VAR1 (UNOP, lfrintnv4sf, 2, v4si)
VAR1 (UNOP, lfrintnv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOP, lfrintnhf, 2)
VAR1 (UNOP, lfrintnsf, 2, si)
VAR1 (UNOP, lfrintndf, 2, di)
VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si)
VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si)
VAR1 (UNOPUS, lfrintnuv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOPUS, lfrintnuhf, 2)
VAR1 (UNOPUS, lfrintnusf, 2, si)
VAR1 (UNOPUS, lfrintnudf, 2, di)
/* Implemented by
aarch64_frecp<FRECP:frecp_suffix><mode>. */
- BUILTIN_GPF (UNOP, frecpe, 0)
+ BUILTIN_GPF_F16 (UNOP, frecpe, 0)
BUILTIN_GPF (BINOP, frecps, 0)
- BUILTIN_GPF (UNOP, frecpx, 0)
+ BUILTIN_GPF_F16 (UNOP, frecpx, 0)
BUILTIN_VDQ_SI (UNOP, urecpe, 0)
only ever used for the int64x1_t intrinsic, there is no scalar version. */
BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
BUILTIN_VHSDF (UNOP, abs, 2)
+ VAR1 (UNOP, abs, 2, hf)
BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
BUILTIN_VHSDF_SDF (SHIFTIMM_USS, fcvtzu, 3)
/* Implemented by aarch64_rsqrte<mode>. */
- BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0)
+ BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0)
/* Implemented by aarch64_rsqrts<mode>. */
BUILTIN_VHSDF_SDF (BINOP, rsqrts, 0)
BUILTIN_VHSDF (BINOP, faddp, 0)
/* Implemented by aarch64_cm<optab><mode>. */
- BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0)
- BUILTIN_VHSDF_SDF (BINOP_USS, cmge, 0)
- BUILTIN_VHSDF_SDF (BINOP_USS, cmgt, 0)
- BUILTIN_VHSDF_SDF (BINOP_USS, cmle, 0)
- BUILTIN_VHSDF_SDF (BINOP_USS, cmlt, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmeq, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmge, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmgt, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmle, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmlt, 0)
/* Implemented by neg<mode>2. */
- BUILTIN_VHSDF (UNOP, neg, 2)
+ BUILTIN_VHSDF_HSDF (UNOP, neg, 2)
/* Implemented by aarch64_fac<optab><mode>. */
BUILTIN_VHSDF_SDF (BINOP_USS, faclt, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, facle, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, facgt, 0)
BUILTIN_VHSDF_SDF (BINOP_USS, facge, 0)
+
+ /* Implemented by sqrt<mode>2. */
+ VAR1 (UNOP, sqrt, 2, hf)
+
+ /* Implemented by <optab><mode>hf2. */
+ VAR1 (UNOP, floatdi, 2, hf)
+ VAR1 (UNOP, floatsi, 2, hf)
+ VAR1 (UNOP, floathi, 2, hf)
+ VAR1 (UNOPUS, floatunsdi, 2, hf)
+ VAR1 (UNOPUS, floatunssi, 2, hf)
+ VAR1 (UNOPUS, floatunshi, 2, hf)
+ BUILTIN_GPI_I16 (UNOP, fix_trunchf, 2)
+ BUILTIN_GPI (UNOP, fix_truncsf, 2)
+ BUILTIN_GPI (UNOP, fix_truncdf, 2)
+ BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
+ BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
+ BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
)
(define_insn "aarch64_rsqrte<mode>"
- [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
- (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")]
+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+ (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
UNSPEC_RSQRTE))]
"TARGET_SIMD"
"frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_fp_to_int_<stype><q>")]
)
+;; HF Scalar variants of related SIMD instructions.
+(define_insn "l<fcvt_pattern><su_optab>hfhi2"
+ [(set (match_operand:HI 0 "register_operand" "=w")
+ (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
+ FCVT)))]
+ "TARGET_SIMD_F16INST"
+ "fcvt<frint_suffix><su>\t%h0, %h1"
+ [(set_attr "type" "neon_fp_to_int_s")]
+)
+
+(define_insn "<optab>_trunchfhi2"
+ [(set (match_operand:HI 0 "register_operand" "=w")
+ (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
+ "TARGET_SIMD_F16INST"
+ "fcvtz<su>\t%h0, %h1"
+ [(set_attr "type" "neon_fp_to_int_s")]
+)
+
+(define_insn "<optab>hihf2"
+ [(set (match_operand:HF 0 "register_operand" "=w")
+ (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
+ "TARGET_SIMD_F16INST"
+ "<su_optab>cvtf\t%h0, %h1"
+ [(set_attr "type" "neon_int_to_fp_s")]
+)
+
(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
(neg:<V_cmp_result>
(COMPARISONS:<V_cmp_result>
- (match_operand:VHSDF_SDF 1 "register_operand" "w,w")
- (match_operand:VHSDF_SDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
+ (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
+ (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
)))]
"TARGET_SIMD"
"@
)
(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
- [(set (match_operand:GPF 0 "register_operand" "=w")
- (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
- FRECP))]
+ [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+ (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
+ FRECP))]
"TARGET_SIMD"
"frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
- [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
+ [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
)
(define_insn "aarch64_frecps<mode>"
;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
(define_insn "<frint_pattern><mode>2"
- [(set (match_operand:GPF 0 "register_operand" "=w")
- (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+ [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+ (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
FRINT))]
"TARGET_FLOAT"
"frint<frint_suffix>\\t%<s>0, %<s>1"
- [(set_attr "type" "f_rint<s>")]
+ [(set_attr "type" "f_rint<stype>")]
)
;; frcvt floating-point round to integer and convert standard patterns.
;; Expands to lbtrunc, lceil, lfloor, lround.
-(define_insn "l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2"
+(define_insn "l<fcvt_pattern><su_optab><GPF_F16:mode><GPI:mode>2"
[(set (match_operand:GPI 0 "register_operand" "=r")
- (FIXUORS:GPI (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
- FCVT)))]
+ (FIXUORS:GPI
+ (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
+ FCVT)))]
"TARGET_FLOAT"
- "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF:s>1"
+ "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF_F16:s>1"
[(set_attr "type" "f_cvtf2i")]
)
[(set_attr "type" "f_cvt")]
)
-(define_insn "fix_trunc<GPF:mode><GPI:mode>2"
- [(set (match_operand:GPI 0 "register_operand" "=r")
- (fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
- "TARGET_FLOAT"
- "fcvtzs\\t%<GPI:w>0, %<GPF:s>1"
- [(set_attr "type" "f_cvtf2i")]
-)
-
-(define_insn "fixuns_trunc<GPF:mode><GPI:mode>2"
+(define_insn "<optab>_trunc<GPF_F16:mode><GPI:mode>2"
[(set (match_operand:GPI 0 "register_operand" "=r")
- (unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
+ (FIXUORS:GPI (match_operand:GPF_F16 1 "register_operand" "w")))]
"TARGET_FLOAT"
- "fcvtzu\\t%<GPI:w>0, %<GPF:s>1"
+ "fcvtz<su>\t%<GPI:w>0, %<GPF_F16:s>1"
[(set_attr "type" "f_cvtf2i")]
)
[(set_attr "type" "f_cvti2f")]
)
+(define_insn "<optab><mode>hf2"
+ [(set (match_operand:HF 0 "register_operand" "=w")
+ (FLOATUORS:HF (match_operand:GPI 1 "register_operand" "r")))]
+ "TARGET_FP_F16INST"
+ "<su_optab>cvtf\t%h0, %<w>1"
+ [(set_attr "type" "f_cvti2f")]
+)
+
;; Convert between fixed-point and floating-point (scalar modes)
(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><GPF:mode>3"
)
(define_insn "neg<mode>2"
- [(set (match_operand:GPF 0 "register_operand" "=w")
- (neg:GPF (match_operand:GPF 1 "register_operand" "w")))]
+ [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+ (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
"TARGET_FLOAT"
"fneg\\t%<s>0, %<s>1"
- [(set_attr "type" "ffarith<s>")]
+ [(set_attr "type" "ffarith<stype>")]
)
(define_expand "sqrt<mode>2"
- [(set (match_operand:GPF 0 "register_operand")
- (sqrt:GPF (match_operand:GPF 1 "register_operand")))]
+ [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+ (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
"TARGET_FLOAT"
{
if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
})
(define_insn "*sqrt<mode>2"
- [(set (match_operand:GPF 0 "register_operand" "=w")
- (sqrt:GPF (match_operand:GPF 1 "register_operand" "w")))]
+ [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+ (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
"TARGET_FLOAT"
"fsqrt\\t%<s>0, %<s>1"
- [(set_attr "type" "fsqrt<s>")]
+ [(set_attr "type" "fsqrt<stype>")]
)
(define_insn "abs<mode>2"
- [(set (match_operand:GPF 0 "register_operand" "=w")
- (abs:GPF (match_operand:GPF 1 "register_operand" "w")))]
+ [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+ (abs:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
"TARGET_FLOAT"
"fabs\\t%<s>0, %<s>1"
- [(set_attr "type" "ffarith<s>")]
+ [(set_attr "type" "ffarith<stype>")]
)
;; Given that smax/smin do not specify the result when either input is NaN,
--- /dev/null
+/* ARM FP16 scalar intrinsics include file.
+
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ Contributed by ARM Ltd.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _AARCH64_FP16_H_
+#define _AARCH64_FP16_H_
+
+#include <stdint.h>
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+typedef __fp16 float16_t;
+
+/* ARMv8.2-A FP16 one operand scalar intrinsics. */
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vabsh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_abshf (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vceqzh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_cmeqhf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcgezh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_cmgehf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcgtzh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_cmgthf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vclezh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_cmlehf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcltzh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_cmlthf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_s16 (int16_t __a)
+{
+ return __builtin_aarch64_floathihf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_s32 (int32_t __a)
+{
+ return __builtin_aarch64_floatsihf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_s64 (int64_t __a)
+{
+ return __builtin_aarch64_floatdihf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_u16 (uint16_t __a)
+{
+ return __builtin_aarch64_floatunshihf_us (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_u32 (uint32_t __a)
+{
+ return __builtin_aarch64_floatunssihf_us (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_u64 (uint64_t __a)
+{
+ return __builtin_aarch64_floatunsdihf_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvth_s16_f16 (float16_t __a)
+{
+ return __builtin_aarch64_fix_trunchfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvth_s32_f16 (float16_t __a)
+{
+ return __builtin_aarch64_fix_trunchfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvth_s64_f16 (float16_t __a)
+{
+ return __builtin_aarch64_fix_trunchfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvth_u16_f16 (float16_t __a)
+{
+ return __builtin_aarch64_fixuns_trunchfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvth_u32_f16 (float16_t __a)
+{
+ return __builtin_aarch64_fixuns_trunchfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvth_u64_f16 (float16_t __a)
+{
+ return __builtin_aarch64_fixuns_trunchfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtah_s16_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lroundhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtah_s32_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lroundhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtah_s64_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lroundhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtah_u16_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lrounduhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtah_u32_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lrounduhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtah_u64_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lrounduhfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtmh_s16_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lfloorhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtmh_s32_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lfloorhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtmh_s64_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lfloorhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtmh_u16_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lflooruhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtmh_u32_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lflooruhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtmh_u64_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lflooruhfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtnh_s16_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lfrintnhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtnh_s32_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lfrintnhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtnh_s64_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lfrintnhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtnh_u16_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lfrintnuhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtnh_u32_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lfrintnuhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtnh_u64_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lfrintnuhfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtph_s16_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lceilhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtph_s32_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lceilhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtph_s64_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lceilhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtph_u16_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lceiluhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtph_u32_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lceiluhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtph_u64_f16 (float16_t __a)
+{
+ return __builtin_aarch64_lceiluhfdi_us (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vnegh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_neghf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrecpeh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_frecpehf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrecpxh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_frecpxhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_btrunchf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndah_f16 (float16_t __a)
+{
+ return __builtin_aarch64_roundhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndih_f16 (float16_t __a)
+{
+ return __builtin_aarch64_nearbyinthf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndmh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_floorhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndnh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_frintnhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndph_f16 (float16_t __a)
+{
+ return __builtin_aarch64_ceilhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndxh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_rinthf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrsqrteh_f16 (float16_t __a)
+{
+ return __builtin_aarch64_rsqrtehf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vsqrth_f16 (float16_t __a)
+{
+ return __builtin_aarch64_sqrthf (__a);
+}
+
+#pragma GCC pop_options
+
+#endif
/* ARMv8.2-A FP16 intrinsics. */
+#include "arm_fp16.h"
+
#pragma GCC push_options
#pragma GCC target ("arch=armv8.2-a+fp16")
;; Iterator for General Purpose Integer registers (32- and 64-bit modes)
(define_mode_iterator GPI [SI DI])
+;; Iterator for HI, SI, DI, some instructions can only work on these modes.
+(define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI])
+
;; Iterator for QI and HI modes
(define_mode_iterator SHORT [QI HI])
;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes)
(define_mode_iterator GPF [SF DF])
+;; Iterator for all scalar floating point modes (HF, SF, DF)
+(define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF])
+
;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
(define_mode_iterator GPF_TF_F16 [HF SF DF TF])
(define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF SF DF])
+(define_mode_iterator VHSDF_HSDF [(V4HF "TARGET_SIMD_F16INST")
+ (V8HF "TARGET_SIMD_F16INST")
+ V2SF V4SF V2DF
+ (HF "TARGET_SIMD_F16INST")
+ SF DF])
;; Vector single Float modes.
(define_mode_iterator VDQSF [V2SF V4SF])
(define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
;; For inequal width int to float conversion
-(define_mode_attr w1 [(SF "w") (DF "x")])
-(define_mode_attr w2 [(SF "x") (DF "w")])
+(define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
+(define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
(define_mode_attr short_mask [(HI "65535") (QI "255")])
;; For scalar usage of vector/FP registers
(define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
- (SF "s") (DF "d")
+ (HF "h") (SF "s") (DF "d")
(V8QI "") (V16QI "")
(V4HI "") (V8HI "")
(V2SI "") (V4SI "")
(define_mode_attr vas [(DI "") (SI ".2s")])
;; Map a floating point mode to the appropriate register name prefix
-(define_mode_attr s [(SF "s") (DF "d")])
+(define_mode_attr s [(HF "h") (SF "s") (DF "d")])
;; Give the length suffix letter for a sign- or zero-extension.
(define_mode_attr size [(QI "b") (HI "h") (SI "w")])
(V4SF ".4s") (V2DF ".2d")
(DI "") (SI "")
(HI "") (QI "")
- (TI "") (SF "")
- (DF "")])
+ (TI "") (HF "")
+ (SF "") (DF "")])
;; Register suffix narrowed modes for VQN.
(define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
(V2DI "d") (V4HF "h")
(V8HF "h") (V2SF "s")
(V4SF "s") (V2DF "d")
+ (HF "h")
(SF "s") (DF "d")
(QI "b") (HI "h")
(SI "s") (DI "d")])
(V4HF "V4HI") (V8HF "V8HI")
(V2SF "V2SI") (V4SF "V4SI")
(V2DF "V2DI") (DF "DI")
- (SF "SI")])
+ (SF "SI") (HF "HI")])
;; Lower case mode of results of comparison operations.
(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
;; for the inequal width integer to fp conversions
-(define_mode_attr fcvt_iesize [(SF "di") (DF "si")])
-(define_mode_attr FCVT_IESIZE [(SF "DI") (DF "SI")])
+(define_mode_attr fcvt_iesize [(HF "di") (SF "di") (DF "si")])
+(define_mode_attr FCVT_IESIZE [(HF "DI") (SF "DI") (DF "SI")])
(define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
(V4HI "V8HI") (V8HI "V4HI")
(V4HF "") (V8HF "_q")
(V2SF "") (V4SF "_q")
(V2DF "_q")
- (QI "") (HI "") (SI "") (DI "") (SF "") (DF "")])
+ (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")])
(define_mode_attr vp [(V8QI "v") (V16QI "v")
(V4HI "v") (V8HI "v")