+2016-07-25 Jiong Wang <jiong.wang@arm.com>
+
+ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
+ * config/aarch64/aarch64-simd-builtins.def: Register new builtins.
+ * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
+ (neg<mode>2): Likewise.
+ (abs<mode>2): Likewise.
+ (<frint_pattern><mode>2): Likewise.
+ (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
+ (<optab><VDQF:mode><fcvt_target>2): Likewise.
+ (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
+ (ftrunc<VDQF:mode>2): Likewise.
+ (<optab><fcvt_target><VDQF:mode>2): Likewise.
+ (sqrt<mode>2): Likewise.
+ (*sqrt<mode>2): Likewise.
+ (aarch64_frecpe<mode>): Likewise.
+ (aarch64_cm<optab><mode>): Likewise.
+ * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
+ false for V4HF and V8HF.
+ * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
+ (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
+ (stype): New.
+ * config/aarch64/arm_neon.h (vdup_n_f16): New.
+ (vdupq_n_f16): Likewise.
+ (vld1_dup_f16): Use vdup_n_f16.
+ (vld1q_dup_f16): Use vdupq_n_f16.
+ (vabs_f16): New.
+ (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
+ vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
+ vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
+ vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
+ vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
+ vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
+ vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
+ vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
+ vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
+ vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
+ vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
+ vsqrtq_f16): Likewise.
+
2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd.md
= { qualifier_none, qualifier_none, qualifier_unsigned };
#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
static enum aarch64_type_qualifiers
+aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_unsigned, qualifier_none, qualifier_none };
+#define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
+static enum aarch64_type_qualifiers
aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_poly, qualifier_poly, qualifier_poly };
#define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
BUILTIN_VDC (COMBINE, combine, 0)
BUILTIN_VB (BINOP, pmul, 0)
BUILTIN_VALLF (BINOP, fmulx, 0)
- BUILTIN_VDQF_DF (UNOP, sqrt, 2)
+ BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di)
BUILTIN_VDQ_BHSI (UNOP, clrsb, 2)
BUILTIN_VDQF (BINOP, smin_nanp, 0)
/* Implemented by <frint_pattern><mode>2. */
- BUILTIN_VDQF (UNOP, btrunc, 2)
- BUILTIN_VDQF (UNOP, ceil, 2)
- BUILTIN_VDQF (UNOP, floor, 2)
- BUILTIN_VDQF (UNOP, nearbyint, 2)
- BUILTIN_VDQF (UNOP, rint, 2)
- BUILTIN_VDQF (UNOP, round, 2)
- BUILTIN_VDQF_DF (UNOP, frintn, 2)
+ BUILTIN_VHSDF (UNOP, btrunc, 2)
+ BUILTIN_VHSDF (UNOP, ceil, 2)
+ BUILTIN_VHSDF (UNOP, floor, 2)
+ BUILTIN_VHSDF (UNOP, nearbyint, 2)
+ BUILTIN_VHSDF (UNOP, rint, 2)
+ BUILTIN_VHSDF (UNOP, round, 2)
+ BUILTIN_VHSDF_DF (UNOP, frintn, 2)
/* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */
+ VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
+ VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
VAR1 (UNOP, lbtruncv2sf, 2, v2si)
VAR1 (UNOP, lbtruncv4sf, 2, v4si)
VAR1 (UNOP, lbtruncv2df, 2, v2di)
+ VAR1 (UNOPUS, lbtruncuv4hf, 2, v4hi)
+ VAR1 (UNOPUS, lbtruncuv8hf, 2, v8hi)
VAR1 (UNOPUS, lbtruncuv2sf, 2, v2si)
VAR1 (UNOPUS, lbtruncuv4sf, 2, v4si)
VAR1 (UNOPUS, lbtruncuv2df, 2, v2di)
+ VAR1 (UNOP, lroundv4hf, 2, v4hi)
+ VAR1 (UNOP, lroundv8hf, 2, v8hi)
VAR1 (UNOP, lroundv2sf, 2, v2si)
VAR1 (UNOP, lroundv4sf, 2, v4si)
VAR1 (UNOP, lroundv2df, 2, v2di)
VAR1 (UNOP, lroundsf, 2, si)
VAR1 (UNOP, lrounddf, 2, di)
+ VAR1 (UNOPUS, lrounduv4hf, 2, v4hi)
+ VAR1 (UNOPUS, lrounduv8hf, 2, v8hi)
VAR1 (UNOPUS, lrounduv2sf, 2, v2si)
VAR1 (UNOPUS, lrounduv4sf, 2, v4si)
VAR1 (UNOPUS, lrounduv2df, 2, v2di)
VAR1 (UNOPUS, lroundusf, 2, si)
VAR1 (UNOPUS, lroundudf, 2, di)
+ VAR1 (UNOP, lceilv4hf, 2, v4hi)
+ VAR1 (UNOP, lceilv8hf, 2, v8hi)
VAR1 (UNOP, lceilv2sf, 2, v2si)
VAR1 (UNOP, lceilv4sf, 2, v4si)
VAR1 (UNOP, lceilv2df, 2, v2di)
+ VAR1 (UNOPUS, lceiluv4hf, 2, v4hi)
+ VAR1 (UNOPUS, lceiluv8hf, 2, v8hi)
VAR1 (UNOPUS, lceiluv2sf, 2, v2si)
VAR1 (UNOPUS, lceiluv4sf, 2, v4si)
VAR1 (UNOPUS, lceiluv2df, 2, v2di)
VAR1 (UNOPUS, lceilusf, 2, si)
VAR1 (UNOPUS, lceiludf, 2, di)
+ VAR1 (UNOP, lfloorv4hf, 2, v4hi)
+ VAR1 (UNOP, lfloorv8hf, 2, v8hi)
VAR1 (UNOP, lfloorv2sf, 2, v2si)
VAR1 (UNOP, lfloorv4sf, 2, v4si)
VAR1 (UNOP, lfloorv2df, 2, v2di)
+ VAR1 (UNOPUS, lflooruv4hf, 2, v4hi)
+ VAR1 (UNOPUS, lflooruv8hf, 2, v8hi)
VAR1 (UNOPUS, lflooruv2sf, 2, v2si)
VAR1 (UNOPUS, lflooruv4sf, 2, v4si)
VAR1 (UNOPUS, lflooruv2df, 2, v2di)
VAR1 (UNOPUS, lfloorusf, 2, si)
VAR1 (UNOPUS, lfloorudf, 2, di)
+ VAR1 (UNOP, lfrintnv4hf, 2, v4hi)
+ VAR1 (UNOP, lfrintnv8hf, 2, v8hi)
VAR1 (UNOP, lfrintnv2sf, 2, v2si)
VAR1 (UNOP, lfrintnv4sf, 2, v4si)
VAR1 (UNOP, lfrintnv2df, 2, v2di)
VAR1 (UNOP, lfrintnsf, 2, si)
VAR1 (UNOP, lfrintndf, 2, di)
+ VAR1 (UNOPUS, lfrintnuv4hf, 2, v4hi)
+ VAR1 (UNOPUS, lfrintnuv8hf, 2, v8hi)
VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si)
VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si)
VAR1 (UNOPUS, lfrintnuv2df, 2, v2di)
VAR1 (UNOPUS, lfrintnudf, 2, di)
/* Implemented by <optab><fcvt_target><VDQF:mode>2. */
+ VAR1 (UNOP, floatv4hi, 2, v4hf)
+ VAR1 (UNOP, floatv8hi, 2, v8hf)
VAR1 (UNOP, floatv2si, 2, v2sf)
VAR1 (UNOP, floatv4si, 2, v4sf)
VAR1 (UNOP, floatv2di, 2, v2df)
+ VAR1 (UNOP, floatunsv4hi, 2, v4hf)
+ VAR1 (UNOP, floatunsv8hi, 2, v8hf)
VAR1 (UNOP, floatunsv2si, 2, v2sf)
VAR1 (UNOP, floatunsv4si, 2, v4sf)
VAR1 (UNOP, floatunsv2di, 2, v2df)
BUILTIN_VDQ_SI (UNOP, urecpe, 0)
- BUILTIN_VDQF (UNOP, frecpe, 0)
+ BUILTIN_VHSDF (UNOP, frecpe, 0)
BUILTIN_VDQF (BINOP, frecps, 0)
/* Implemented by a mixture of abs2 patterns. Note the DImode builtin is
only ever used for the int64x1_t intrinsic, there is no scalar version. */
BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
- BUILTIN_VDQF (UNOP, abs, 2)
+ BUILTIN_VHSDF (UNOP, abs, 2)
BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
BUILTIN_VALLF (SHIFTIMM_USS, fcvtzu, 3)
/* Implemented by aarch64_rsqrte<mode>. */
- BUILTIN_VALLF (UNOP, rsqrte, 0)
+ BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0)
/* Implemented by aarch64_rsqrts<mode>. */
BUILTIN_VALLF (BINOP, rsqrts, 0)
/* Implemented by aarch64_faddp<mode>. */
BUILTIN_VDQF (BINOP, faddp, 0)
+
+ /* Implemented by aarch64_cm<optab><mode>. */
+ BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0)
+ BUILTIN_VHSDF_SDF (BINOP_USS, cmge, 0)
+ BUILTIN_VHSDF_SDF (BINOP_USS, cmgt, 0)
+ BUILTIN_VHSDF_SDF (BINOP_USS, cmle, 0)
+ BUILTIN_VHSDF_SDF (BINOP_USS, cmlt, 0)
+
+ /* Implemented by neg<mode>2. */
+ BUILTIN_VHSDF (UNOP, neg, 2)
)
(define_insn "aarch64_rsqrte<mode>"
- [(set (match_operand:VALLF 0 "register_operand" "=w")
- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
+ [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
+ (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")]
UNSPEC_RSQRTE))]
"TARGET_SIMD"
"frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
- [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")])
+ [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
(define_insn "aarch64_rsqrts<mode>"
[(set (match_operand:VALLF 0 "register_operand" "=w")
)
(define_insn "neg<mode>2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD"
"fneg\\t%0.<Vtype>, %1.<Vtype>"
- [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_neg_<stype><q>")]
)
(define_insn "abs<mode>2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD"
"fabs\\t%0.<Vtype>, %1.<Vtype>"
- [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_abs_<stype><q>")]
)
(define_insn "fma<mode>4"
;; Vector versions of the floating-point frint patterns.
;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
(define_insn "<frint_pattern><mode>2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
- FRINT))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+ FRINT))]
"TARGET_SIMD"
"frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
- [(set_attr "type" "neon_fp_round_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_round_<stype><q>")]
)
;; Vector versions of the fcvt standard patterns.
;; Expands to lbtrunc, lround, lceil, lfloor
-(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
+(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
- [(match_operand:VDQF 1 "register_operand" "w")]
+ [(match_operand:VHSDF 1 "register_operand" "w")]
FCVT)))]
"TARGET_SIMD"
"fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
- [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_to_int_<stype><q>")]
)
(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
[(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
)
-(define_expand "<optab><VDQF:mode><fcvt_target>2"
+(define_expand "<optab><VHSDF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
- [(match_operand:VDQF 1 "register_operand")]
- UNSPEC_FRINTZ)))]
+ [(match_operand:VHSDF 1 "register_operand")]
+ UNSPEC_FRINTZ)))]
"TARGET_SIMD"
{})
-(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
+(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
- [(match_operand:VDQF 1 "register_operand")]
- UNSPEC_FRINTZ)))]
+ [(match_operand:VHSDF 1 "register_operand")]
+ UNSPEC_FRINTZ)))]
"TARGET_SIMD"
{})
-(define_expand "ftrunc<VDQF:mode>2"
- [(set (match_operand:VDQF 0 "register_operand")
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
- UNSPEC_FRINTZ))]
+(define_expand "ftrunc<VHSDF:mode>2"
+ [(set (match_operand:VHSDF 0 "register_operand")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
+ UNSPEC_FRINTZ))]
"TARGET_SIMD"
{})
-(define_insn "<optab><fcvt_target><VDQF:mode>2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (FLOATUORS:VDQF
+(define_insn "<optab><fcvt_target><VHSDF:mode>2"
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (FLOATUORS:VHSDF
(match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
"TARGET_SIMD"
"<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
- [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
+ [(set_attr "type" "neon_int_to_fp_<stype><q>")]
)
;; Conversions between vectors of floats and doubles.
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
(neg:<V_cmp_result>
(COMPARISONS:<V_cmp_result>
- (match_operand:VALLF 1 "register_operand" "w,w")
- (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
+ (match_operand:VHSDF_SDF 1 "register_operand" "w,w")
+ (match_operand:VHSDF_SDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
)))]
"TARGET_SIMD"
"@
fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
- [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_compare_<stype><q>")]
)
;; fac(ge|gt)
;; sqrt
(define_expand "sqrt<mode>2"
- [(set (match_operand:VDQF 0 "register_operand")
- (sqrt:VDQF (match_operand:VDQF 1 "register_operand")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD"
{
if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
})
(define_insn "*sqrt<mode>2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD"
"fsqrt\\t%0.<Vtype>, %1.<Vtype>"
- [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
)
;; Patterns for vector struct loads and stores.
)
(define_insn "aarch64_frecpe<mode>"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
- UNSPEC_FRECPE))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+ UNSPEC_FRECPE))]
"TARGET_SIMD"
"frecpe\\t%0.<Vtype>, %1.<Vtype>"
- [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
+ [(set_attr "type" "neon_fp_recpe_<stype><q>")]
)
(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
{
machine_mode mode = GET_MODE (dst);
+
+ if (GET_MODE_INNER (mode) == HFmode)
+ return false;
+
machine_mode mmsk = mode_for_vector
(int_mode_for_mode (GET_MODE_INNER (mode)),
GET_MODE_NUNITS (mode));
/* End of optimal implementations in approved order. */
+#pragma GCC pop_options
+
+/* ARMv8.2-A FP16 intrinsics. */
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+/* ARMv8.2-A FP16 one operand vector intrinsics. */
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vabs_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_absv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vabsq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_absv8hf (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceqz_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_cmeqv4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqzq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_cmeqv8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgez_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_cmgev4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgezq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_cmgev8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgtz_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_cmgtv4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtzq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_cmgtv8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclez_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_cmlev4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vclezq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_cmlev8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcltz_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_cmltv4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltzq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_cmltv8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_f16_s16 (int16x4_t __a)
+{
+ return __builtin_aarch64_floatv4hiv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcvtq_f16_s16 (int16x8_t __a)
+{
+ return __builtin_aarch64_floatv8hiv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_f16_u16 (uint16x4_t __a)
+{
+ return __builtin_aarch64_floatunsv4hiv4hf ((int16x4_t) __a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcvtq_f16_u16 (uint16x8_t __a)
+{
+ return __builtin_aarch64_floatunsv8hiv8hf ((int16x8_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvt_s16_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_lbtruncv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtq_s16_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_lbtruncv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvt_u16_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_lbtruncuv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtq_u16_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_lbtruncuv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvta_s16_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_lroundv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtaq_s16_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_lroundv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvta_u16_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_lrounduv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtaq_u16_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_lrounduv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvtm_s16_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_lfloorv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtmq_s16_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_lfloorv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvtm_u16_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_lflooruv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtmq_u16_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_lflooruv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvtn_s16_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_lfrintnv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtnq_s16_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_lfrintnv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvtn_u16_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_lfrintnuv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtnq_u16_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_lfrintnuv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvtp_s16_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_lceilv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtpq_s16_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_lceilv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvtp_u16_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_lceiluv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtpq_u16_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_lceiluv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vneg_f16 (float16x4_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vnegq_f16 (float16x8_t __a)
+{
+ return -__a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrecpe_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_frecpev4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrecpeq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_frecpev8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrnd_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_btruncv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_btruncv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrnda_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_roundv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndaq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_roundv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndi_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_nearbyintv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndiq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_nearbyintv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndm_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_floorv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndmq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_floorv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndn_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_frintnv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndnq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_frintnv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndp_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_ceilv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndpq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_ceilv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndx_f16 (float16x4_t __a)
+{
+ return __builtin_aarch64_rintv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndxq_f16 (float16x8_t __a)
+{
+ return __builtin_aarch64_rintv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrsqrte_f16 (float16x4_t a)
+{
+ return __builtin_aarch64_rsqrtev4hf (a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrsqrteq_f16 (float16x8_t a)
+{
+ return __builtin_aarch64_rsqrtev8hf (a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vsqrt_f16 (float16x4_t a)
+{
+ return __builtin_aarch64_sqrtv4hf (a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vsqrtq_f16 (float16x8_t a)
+{
+ return __builtin_aarch64_sqrtv8hf (a);
+}
+
+#pragma GCC pop_options
+
#undef __aarch64_vget_lane_any
#undef __aarch64_vdup_lane_any
#undef __aarch64_vdupq_laneq_u32
#undef __aarch64_vdupq_laneq_u64
-#pragma GCC pop_options
-
#endif
;; Vector Float modes suitable for moving, loading and storing.
(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF])
-;; Vector Float modes, barring HF modes.
+;; Vector Float modes.
(define_mode_iterator VDQF [V2SF V4SF V2DF])
+(define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
+ (V8HF "TARGET_SIMD_F16INST")
+ V2SF V4SF V2DF])
;; Vector Float modes, and DF.
(define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
+(define_mode_iterator VHSDF_DF [(V4HF "TARGET_SIMD_F16INST")
+ (V8HF "TARGET_SIMD_F16INST")
+ V2SF V4SF V2DF DF])
+(define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST")
+ (V8HF "TARGET_SIMD_F16INST")
+ V2SF V4SF V2DF SF DF])
;; Vector single Float modes.
(define_mode_iterator VDQSF [V2SF V4SF])
(V4HI "") (V8HI "")
(V2SI "") (V4SI "")
(V2DI "") (V2SF "")
- (V4SF "") (V2DF "")])
+ (V4SF "") (V4HF "")
+ (V8HF "") (V2DF "")])
;; For scalar usage of vector/FP registers, narrowing
(define_mode_attr vn2 [(QI "") (HI "b") (SI "h") (DI "s")
(QI "b") (HI "h")
(SI "s") (DI "d")])
+;; Vetype is used everywhere in scheduling type and assembly output,
+;; sometimes they are not the same, for example HF modes on some
+;; instructions. stype is defined to represent scheduling type
+;; more accurately.
+(define_mode_attr stype [(V8QI "b") (V16QI "b") (V4HI "s") (V8HI "s")
+ (V2SI "s") (V4SI "s") (V2DI "d") (V4HF "s")
+ (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d")
+ (HF "s") (SF "s") (DF "d") (QI "b") (HI "s")
+ (SI "s") (DI "d")])
+
;; Mode-to-bitwise operation type mapping.
(define_mode_attr Vbtype [(V8QI "8b") (V16QI "16b")
(V4HI "8b") (V8HI "16b")
(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")
(V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf")
- (SF "si") (DF "di") (SI "sf") (DI "df")])
+ (SF "si") (DF "di") (SI "sf") (DI "df")
+ (V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf")
+ (V8HI "v8hf")])
(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")
(V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF")
- (SF "SI") (DF "DI") (SI "SF") (DI "DF")])
+ (SF "SI") (DF "DI") (SI "SF") (DI "DF")
+ (V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF")
+ (V8HI "V8HF")])
;; for the inequal width integer to fp conversions
;; the 'x' constraint. All other modes may use the 'w' constraint.
(define_mode_attr h_con [(V2SI "w") (V4SI "w")
(V4HI "x") (V8HI "x")
+ (V4HF "w") (V8HF "w")
(V2SF "w") (V4SF "w")
(V2DF "w") (DF "w")])