[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics

author Jiong Wang <jiong.wang@arm.com>

Mon, 25 Jul 2016 14:20:37 +0000 (14:20 +0000)

committer Jiong Wang <jiwang@gcc.gnu.org>

Mon, 25 Jul 2016 14:20:37 +0000 (14:20 +0000)
author Jiong Wang <jiong.wang@arm.com>
Mon, 25 Jul 2016 14:20:37 +0000 (14:20 +0000)
committer Jiong Wang <jiwang@gcc.gnu.org>
Mon, 25 Jul 2016 14:20:37 +0000 (14:20 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 1e3f304a79d8a498737b317c96fc89301bb76ef3..b7c1631def53b11c750e139e4c7492769d0d86c8 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,43 @@
+2016-07-25  Jiong Wang  <jiong.wang@arm.com>
+
+       * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
+       * config/aarch64/aarch64-simd-builtins.def: Register new builtins.
+       * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes.
+       (neg<mode>2): Likewise.
+       (abs<mode>2): Likewise.
+       (<frint_pattern><mode>2): Likewise.
+       (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise.
+       (<optab><VDQF:mode><fcvt_target>2): Likewise.
+       (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise.
+       (ftrunc<VDQF:mode>2): Likewise.
+       (<optab><fcvt_target><VDQF:mode>2): Likewise.
+       (sqrt<mode>2): Likewise.
+       (*sqrt<mode>2): Likewise.
+       (aarch64_frecpe<mode>): Likewise.
+       (aarch64_cm<optab><mode>): Likewise.
+       * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
+       false for V4HF and V8HF.
+       * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
+       (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
+       (stype): New.
+       * config/aarch64/arm_neon.h (vdup_n_f16): New.
+       (vdupq_n_f16): Likewise.
+       (vld1_dup_f16): Use vdup_n_f16.
+       (vld1q_dup_f16): Use vdupq_n_f16.
+       (vabs_f16): New.
+       (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
+       vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
+       vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
+       vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
+       vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
+       vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
+       vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
+       vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
+       vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
+       vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
+       vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
+       vsqrtq_f16): Likewise.
+
  2016-07-25  Jiong Wang  <jiong.wang@arm.com>
  
         * config/aarch64/aarch64-simd.md
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c

index 6b90b2af5e9d2b5e7f48569ec1ebcb0ef16314ee..af5fac5b29cf5373561d9bf9a69c401d2bec5cec 100644 (file)
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -139,6 +139,10 @@ aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
    = { qualifier_none, qualifier_none, qualifier_unsigned };
  #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
  static enum aarch64_type_qualifiers
+aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_none, qualifier_none };
+#define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
+static enum aarch64_type_qualifiers
  aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
    = { qualifier_poly, qualifier_poly, qualifier_poly };
  #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def

index f1ad325f464f89c981cbdee8a8f6afafa938639a..22c87be429ba1aac2bbe77f1119d16b6b8bd6e80 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -42,7 +42,7 @@
    BUILTIN_VDC (COMBINE, combine, 0)
    BUILTIN_VB (BINOP, pmul, 0)
    BUILTIN_VALLF (BINOP, fmulx, 0)
-  BUILTIN_VDQF_DF (UNOP, sqrt, 2)
+  BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
    BUILTIN_VD_BHSI (BINOP, addp, 0)
    VAR1 (UNOP, addp, 0, di)
    BUILTIN_VDQ_BHSI (UNOP, clrsb, 2)
@@ -266,23 +266,29 @@
    BUILTIN_VDQF (BINOP, smin_nanp, 0)
  
    /* Implemented by <frint_pattern><mode>2.  */
-  BUILTIN_VDQF (UNOP, btrunc, 2)
-  BUILTIN_VDQF (UNOP, ceil, 2)
-  BUILTIN_VDQF (UNOP, floor, 2)
-  BUILTIN_VDQF (UNOP, nearbyint, 2)
-  BUILTIN_VDQF (UNOP, rint, 2)
-  BUILTIN_VDQF (UNOP, round, 2)
-  BUILTIN_VDQF_DF (UNOP, frintn, 2)
+  BUILTIN_VHSDF (UNOP, btrunc, 2)
+  BUILTIN_VHSDF (UNOP, ceil, 2)
+  BUILTIN_VHSDF (UNOP, floor, 2)
+  BUILTIN_VHSDF (UNOP, nearbyint, 2)
+  BUILTIN_VHSDF (UNOP, rint, 2)
+  BUILTIN_VHSDF (UNOP, round, 2)
+  BUILTIN_VHSDF_DF (UNOP, frintn, 2)
  
    /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2.  */
+  VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
+  VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
    VAR1 (UNOP, lbtruncv2sf, 2, v2si)
    VAR1 (UNOP, lbtruncv4sf, 2, v4si)
    VAR1 (UNOP, lbtruncv2df, 2, v2di)
  
+  VAR1 (UNOPUS, lbtruncuv4hf, 2, v4hi)
+  VAR1 (UNOPUS, lbtruncuv8hf, 2, v8hi)
    VAR1 (UNOPUS, lbtruncuv2sf, 2, v2si)
    VAR1 (UNOPUS, lbtruncuv4sf, 2, v4si)
    VAR1 (UNOPUS, lbtruncuv2df, 2, v2di)
  
+  VAR1 (UNOP, lroundv4hf, 2, v4hi)
+  VAR1 (UNOP, lroundv8hf, 2, v8hi)
    VAR1 (UNOP, lroundv2sf, 2, v2si)
    VAR1 (UNOP, lroundv4sf, 2, v4si)
    VAR1 (UNOP, lroundv2df, 2, v2di)
@@ -290,38 +296,52 @@
    VAR1 (UNOP, lroundsf, 2, si)
    VAR1 (UNOP, lrounddf, 2, di)
  
+  VAR1 (UNOPUS, lrounduv4hf, 2, v4hi)
+  VAR1 (UNOPUS, lrounduv8hf, 2, v8hi)
    VAR1 (UNOPUS, lrounduv2sf, 2, v2si)
    VAR1 (UNOPUS, lrounduv4sf, 2, v4si)
    VAR1 (UNOPUS, lrounduv2df, 2, v2di)
    VAR1 (UNOPUS, lroundusf, 2, si)
    VAR1 (UNOPUS, lroundudf, 2, di)
  
+  VAR1 (UNOP, lceilv4hf, 2, v4hi)
+  VAR1 (UNOP, lceilv8hf, 2, v8hi)
    VAR1 (UNOP, lceilv2sf, 2, v2si)
    VAR1 (UNOP, lceilv4sf, 2, v4si)
    VAR1 (UNOP, lceilv2df, 2, v2di)
  
+  VAR1 (UNOPUS, lceiluv4hf, 2, v4hi)
+  VAR1 (UNOPUS, lceiluv8hf, 2, v8hi)
    VAR1 (UNOPUS, lceiluv2sf, 2, v2si)
    VAR1 (UNOPUS, lceiluv4sf, 2, v4si)
    VAR1 (UNOPUS, lceiluv2df, 2, v2di)
    VAR1 (UNOPUS, lceilusf, 2, si)
    VAR1 (UNOPUS, lceiludf, 2, di)
  
+  VAR1 (UNOP, lfloorv4hf, 2, v4hi)
+  VAR1 (UNOP, lfloorv8hf, 2, v8hi)
    VAR1 (UNOP, lfloorv2sf, 2, v2si)
    VAR1 (UNOP, lfloorv4sf, 2, v4si)
    VAR1 (UNOP, lfloorv2df, 2, v2di)
  
+  VAR1 (UNOPUS, lflooruv4hf, 2, v4hi)
+  VAR1 (UNOPUS, lflooruv8hf, 2, v8hi)
    VAR1 (UNOPUS, lflooruv2sf, 2, v2si)
    VAR1 (UNOPUS, lflooruv4sf, 2, v4si)
    VAR1 (UNOPUS, lflooruv2df, 2, v2di)
    VAR1 (UNOPUS, lfloorusf, 2, si)
    VAR1 (UNOPUS, lfloorudf, 2, di)
  
+  VAR1 (UNOP, lfrintnv4hf, 2, v4hi)
+  VAR1 (UNOP, lfrintnv8hf, 2, v8hi)
    VAR1 (UNOP, lfrintnv2sf, 2, v2si)
    VAR1 (UNOP, lfrintnv4sf, 2, v4si)
    VAR1 (UNOP, lfrintnv2df, 2, v2di)
    VAR1 (UNOP, lfrintnsf, 2, si)
    VAR1 (UNOP, lfrintndf, 2, di)
  
+  VAR1 (UNOPUS, lfrintnuv4hf, 2, v4hi)
+  VAR1 (UNOPUS, lfrintnuv8hf, 2, v8hi)
    VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si)
    VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si)
    VAR1 (UNOPUS, lfrintnuv2df, 2, v2di)
@@ -329,10 +349,14 @@
    VAR1 (UNOPUS, lfrintnudf, 2, di)
  
    /* Implemented by <optab><fcvt_target><VDQF:mode>2.  */
+  VAR1 (UNOP, floatv4hi, 2, v4hf)
+  VAR1 (UNOP, floatv8hi, 2, v8hf)
    VAR1 (UNOP, floatv2si, 2, v2sf)
    VAR1 (UNOP, floatv4si, 2, v4sf)
    VAR1 (UNOP, floatv2di, 2, v2df)
  
+  VAR1 (UNOP, floatunsv4hi, 2, v4hf)
+  VAR1 (UNOP, floatunsv8hi, 2, v8hf)
    VAR1 (UNOP, floatunsv2si, 2, v2sf)
    VAR1 (UNOP, floatunsv4si, 2, v4sf)
    VAR1 (UNOP, floatunsv2di, 2, v2df)
@@ -358,13 +382,13 @@
  
    BUILTIN_VDQ_SI (UNOP, urecpe, 0)
  
-  BUILTIN_VDQF (UNOP, frecpe, 0)
+  BUILTIN_VHSDF (UNOP, frecpe, 0)
    BUILTIN_VDQF (BINOP, frecps, 0)
  
    /* Implemented by a mixture of abs2 patterns.  Note the DImode builtin is
       only ever used for the int64x1_t intrinsic, there is no scalar version.  */
    BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
-  BUILTIN_VDQF (UNOP, abs, 2)
+  BUILTIN_VHSDF (UNOP, abs, 2)
  
    BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
    VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
@@ -457,7 +481,7 @@
    BUILTIN_VALLF (SHIFTIMM_USS, fcvtzu, 3)
  
    /* Implemented by aarch64_rsqrte<mode>.  */
-  BUILTIN_VALLF (UNOP, rsqrte, 0)
+  BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0)
  
    /* Implemented by aarch64_rsqrts<mode>.  */
    BUILTIN_VALLF (BINOP, rsqrts, 0)
@@ -467,3 +491,13 @@
  
    /* Implemented by aarch64_faddp<mode>.  */
    BUILTIN_VDQF (BINOP, faddp, 0)
+
+  /* Implemented by aarch64_cm<optab><mode>.  */
+  BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0)
+  BUILTIN_VHSDF_SDF (BINOP_USS, cmge, 0)
+  BUILTIN_VHSDF_SDF (BINOP_USS, cmgt, 0)
+  BUILTIN_VHSDF_SDF (BINOP_USS, cmle, 0)
+  BUILTIN_VHSDF_SDF (BINOP_USS, cmlt, 0)
+
+  /* Implemented by neg<mode>2.  */
+  BUILTIN_VHSDF (UNOP, neg, 2)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 251ad972a4bed027f8c77946fb21ce8d94dc3035..8e922e697d2b1a5ab2e09974429a788731a4dcc5 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -383,12 +383,12 @@
  )
  
  (define_insn "aarch64_rsqrte<mode>"
-  [(set (match_operand:VALLF 0 "register_operand" "=w")
-       (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
+  [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
+       (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")]
                      UNSPEC_RSQRTE))]
    "TARGET_SIMD"
    "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
-  [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")])
+  [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
  
  (define_insn "aarch64_rsqrts<mode>"
    [(set (match_operand:VALLF 0 "register_operand" "=w")
@@ -1565,19 +1565,19 @@
  )
  
  (define_insn "neg<mode>2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
-       (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "fneg\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_neg_<stype><q>")]
  )
  
  (define_insn "abs<mode>2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
-       (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "fabs\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_abs_<stype><q>")]
  )
  
  (define_insn "fma<mode>4"
@@ -1735,24 +1735,24 @@
  ;; Vector versions of the floating-point frint patterns.
  ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
  (define_insn "<frint_pattern><mode>2"
-  [(set (match_operand:VDQF 0 "register_operand" "=w")
-       (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
-                     FRINT))]
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+                      FRINT))]
    "TARGET_SIMD"
    "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_round_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_round_<stype><q>")]
  )
  
  ;; Vector versions of the fcvt standard patterns.
  ;; Expands to lbtrunc, lround, lceil, lfloor
-(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
+(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
    [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
-                              [(match_operand:VDQF 1 "register_operand" "w")]
+                              [(match_operand:VHSDF 1 "register_operand" "w")]
                                FCVT)))]
    "TARGET_SIMD"
    "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_to_int_<stype><q>")]
  )
  
  (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
@@ -1775,36 +1775,36 @@
    [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
  )
  
-(define_expand "<optab><VDQF:mode><fcvt_target>2"
+(define_expand "<optab><VHSDF:mode><fcvt_target>2"
    [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
-                              [(match_operand:VDQF 1 "register_operand")]
-                              UNSPEC_FRINTZ)))]
+                              [(match_operand:VHSDF 1 "register_operand")]
+                               UNSPEC_FRINTZ)))]
    "TARGET_SIMD"
    {})
  
-(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
+(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
    [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
-                              [(match_operand:VDQF 1 "register_operand")]
-                              UNSPEC_FRINTZ)))]
+                              [(match_operand:VHSDF 1 "register_operand")]
+                               UNSPEC_FRINTZ)))]
    "TARGET_SIMD"
    {})
  
-(define_expand "ftrunc<VDQF:mode>2"
-  [(set (match_operand:VDQF 0 "register_operand")
-       (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
-                     UNSPEC_FRINTZ))]
+(define_expand "ftrunc<VHSDF:mode>2"
+  [(set (match_operand:VHSDF 0 "register_operand")
+       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
+                      UNSPEC_FRINTZ))]
    "TARGET_SIMD"
    {})
  
-(define_insn "<optab><fcvt_target><VDQF:mode>2"
-  [(set (match_operand:VDQF 0 "register_operand" "=w")
-       (FLOATUORS:VDQF
+(define_insn "<optab><fcvt_target><VHSDF:mode>2"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (FLOATUORS:VHSDF
           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
    "TARGET_SIMD"
    "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
+  [(set_attr "type" "neon_int_to_fp_<stype><q>")]
  )
  
  ;; Conversions between vectors of floats and doubles.
@@ -4296,14 +4296,14 @@
    [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
         (neg:<V_cmp_result>
           (COMPARISONS:<V_cmp_result>
-           (match_operand:VALLF 1 "register_operand" "w,w")
-           (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
+           (match_operand:VHSDF_SDF 1 "register_operand" "w,w")
+           (match_operand:VHSDF_SDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
           )))]
    "TARGET_SIMD"
    "@
    fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
    fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
-  [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_compare_<stype><q>")]
  )
  
  ;; fac(ge|gt)
@@ -4348,8 +4348,8 @@
  ;; sqrt
  
  (define_expand "sqrt<mode>2"
-  [(set (match_operand:VDQF 0 "register_operand")
-       (sqrt:VDQF (match_operand:VDQF 1 "register_operand")))]
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
    "TARGET_SIMD"
  {
    if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
@@ -4357,11 +4357,11 @@
  })
  
  (define_insn "*sqrt<mode>2"
-  [(set (match_operand:VDQF 0 "register_operand" "=w")
-        (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
    "TARGET_SIMD"
    "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
  )
  
  ;; Patterns for vector struct loads and stores.
@@ -5413,12 +5413,12 @@
  )
  
  (define_insn "aarch64_frecpe<mode>"
-  [(set (match_operand:VDQF 0 "register_operand" "=w")
-       (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
-                   UNSPEC_FRECPE))]
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+        UNSPEC_FRECPE))]
    "TARGET_SIMD"
    "frecpe\\t%0.<Vtype>, %1.<Vtype>"
-  [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
+  [(set_attr "type" "neon_fp_recpe_<stype><q>")]
  )
  
  (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 381cf7d3b8509ff5c0d51294ef555c4ea4d6eedd..a5251ab0748aa5d5189d29c6e17d08b35587c05b 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7485,6 +7485,10 @@ bool
  aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
  {
    machine_mode mode = GET_MODE (dst);
+
+  if (GET_MODE_INNER (mode) == HFmode)
+    return false;
+
    machine_mode mmsk = mode_for_vector
                         (int_mode_for_mode (GET_MODE_INNER (mode)),
                          GET_MODE_NUNITS (mode));
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index fd5f094de6a058065e2b1377f5ffc4c1aba01f97..b4310f27aac08ab6ff5e89d58512dafc389b2c37 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -26028,6 +26028,365 @@ __INTERLEAVE_LIST (zip)
  
  /* End of optimal implementations in approved order.  */
  
+#pragma GCC pop_options
+
+/* ARMv8.2-A FP16 intrinsics.  */
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+/* ARMv8.2-A FP16 one operand vector intrinsics.  */
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vabs_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_absv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vabsq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_absv8hf (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceqz_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_cmeqv4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqzq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_cmeqv8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgez_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_cmgev4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgezq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_cmgev8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgtz_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_cmgtv4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtzq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_cmgtv8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclez_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_cmlev4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vclezq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_cmlev8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcltz_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_cmltv4hf_uss (__a, vdup_n_f16 (0.0f));
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltzq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_cmltv8hf_uss (__a, vdupq_n_f16 (0.0f));
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_f16_s16 (int16x4_t __a)
+{
+  return __builtin_aarch64_floatv4hiv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcvtq_f16_s16 (int16x8_t __a)
+{
+  return __builtin_aarch64_floatv8hiv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vcvt_f16_u16 (uint16x4_t __a)
+{
+  return __builtin_aarch64_floatunsv4hiv4hf ((int16x4_t) __a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vcvtq_f16_u16 (uint16x8_t __a)
+{
+  return __builtin_aarch64_floatunsv8hiv8hf ((int16x8_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvt_s16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lbtruncv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lbtruncv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvt_u16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lbtruncuv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lbtruncuv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvta_s16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lroundv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtaq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lroundv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvta_u16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lrounduv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtaq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lrounduv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvtm_s16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lfloorv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtmq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lfloorv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvtm_u16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lflooruv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtmq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lflooruv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvtn_s16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lfrintnv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtnq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lfrintnv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvtn_u16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lfrintnuv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtnq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lfrintnuv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcvtp_s16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lceilv4hfv4hi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcvtpq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lceilv8hfv8hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcvtp_u16_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_lceiluv4hfv4hi_us (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcvtpq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_lceiluv8hfv8hi_us (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vneg_f16 (float16x4_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vnegq_f16 (float16x8_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrecpe_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_frecpev4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrecpeq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_frecpev8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrnd_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_btruncv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_btruncv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrnda_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_roundv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndaq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_roundv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndi_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_nearbyintv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndiq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_nearbyintv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndm_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_floorv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndmq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_floorv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndn_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_frintnv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndnq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_frintnv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndp_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_ceilv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndpq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_ceilv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrndx_f16 (float16x4_t __a)
+{
+  return __builtin_aarch64_rintv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrndxq_f16 (float16x8_t __a)
+{
+  return __builtin_aarch64_rintv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrsqrte_f16 (float16x4_t a)
+{
+  return __builtin_aarch64_rsqrtev4hf (a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrsqrteq_f16 (float16x8_t a)
+{
+  return __builtin_aarch64_rsqrtev8hf (a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vsqrt_f16 (float16x4_t a)
+{
+  return __builtin_aarch64_sqrtv4hf (a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vsqrtq_f16 (float16x8_t a)
+{
+  return __builtin_aarch64_sqrtv8hf (a);
+}
+
+#pragma GCC pop_options
+
  #undef __aarch64_vget_lane_any
  
  #undef __aarch64_vdup_lane_any
@@ -26084,6 +26443,4 @@ __INTERLEAVE_LIST (zip)
  #undef __aarch64_vdupq_laneq_u32
  #undef __aarch64_vdupq_laneq_u64
  
-#pragma GCC pop_options
-
  #endif
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index e8fbb1281dec2e8f37f58ef2ced792dd62e3b5aa..af5eda9b9f4a80e1309655dcd7798337e1d818eb 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -88,11 +88,20 @@
  ;; Vector Float modes suitable for moving, loading and storing.
  (define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF])
  
-;; Vector Float modes, barring HF modes.
+;; Vector Float modes.
  (define_mode_iterator VDQF [V2SF V4SF V2DF])
+(define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
+                            (V8HF "TARGET_SIMD_F16INST")
+                            V2SF V4SF V2DF])
  
  ;; Vector Float modes, and DF.
  (define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
+(define_mode_iterator VHSDF_DF [(V4HF "TARGET_SIMD_F16INST")
+                               (V8HF "TARGET_SIMD_F16INST")
+                               V2SF V4SF V2DF DF])
+(define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST")
+                                (V8HF "TARGET_SIMD_F16INST")
+                                V2SF V4SF V2DF SF DF])
  
  ;; Vector single Float modes.
  (define_mode_iterator VDQSF [V2SF V4SF])
@@ -366,7 +375,8 @@
                     (V4HI "") (V8HI "")
                     (V2SI "") (V4SI  "")
                     (V2DI "") (V2SF "")
-                   (V4SF "") (V2DF "")])
+                   (V4SF "") (V4HF "")
+                   (V8HF "") (V2DF "")])
  
  ;; For scalar usage of vector/FP registers, narrowing
  (define_mode_attr vn2 [(QI "") (HI "b") (SI "h") (DI "s")
@@ -447,6 +457,16 @@
                           (QI "b")   (HI "h")
                           (SI "s")   (DI "d")])
  
+;; Vetype is used everywhere in scheduling type and assembly output,
+;; sometimes they are not the same, for example HF modes on some
+;; instructions.  stype is defined to represent scheduling type
+;; more accurately.
+(define_mode_attr stype [(V8QI "b") (V16QI "b") (V4HI "s") (V8HI "s")
+                        (V2SI "s") (V4SI "s") (V2DI "d") (V4HF "s")
+                        (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d")
+                        (HF "s") (SF "s") (DF "d") (QI "b") (HI "s")
+                        (SI "s") (DI "d")])
+
  ;; Mode-to-bitwise operation type mapping.
  (define_mode_attr Vbtype [(V8QI "8b")  (V16QI "16b")
                           (V4HI "8b") (V8HI  "16b")
@@ -656,10 +676,14 @@
  
  (define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")
                                (V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf")
-                              (SF "si") (DF "di") (SI "sf") (DI "df")])
+                              (SF "si") (DF "di") (SI "sf") (DI "df")
+                              (V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf")
+                              (V8HI "v8hf")])
  (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")
                                (V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF")
-                              (SF "SI") (DF "DI") (SI "SF") (DI "DF")])
+                              (SF "SI") (DF "DI") (SI "SF") (DI "DF")
+                              (V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF")
+                              (V8HI "V8HF")])
  
  
  ;; for the inequal width integer to fp conversions
@@ -687,6 +711,7 @@
  ;; the 'x' constraint.  All other modes may use the 'w' constraint.
  (define_mode_attr h_con [(V2SI "w") (V4SI "w")
                          (V4HI "x") (V8HI "x")
+                        (V4HF "w") (V8HF "w")
                          (V2SF "w") (V4SF "w")
                          (V2DF "w") (DF "w")])
author	Jiong Wang <jiong.wang@arm.com>
	Mon, 25 Jul 2016 14:20:37 +0000 (14:20 +0000)
committer	Jiong Wang <jiwang@gcc.gnu.org>
	Mon, 25 Jul 2016 14:20:37 +0000 (14:20 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-builtins.c		patch \| blob \| history
gcc/config/aarch64/aarch64-simd-builtins.def		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/config/aarch64/arm_neon.h		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history