[AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics

author Jiong Wang <jiong.wang@arm.com>

Mon, 25 Jul 2016 16:00:28 +0000 (16:00 +0000)

committer Jiong Wang <jiwang@gcc.gnu.org>

Mon, 25 Jul 2016 16:00:28 +0000 (16:00 +0000)
author Jiong Wang <jiong.wang@arm.com>
Mon, 25 Jul 2016 16:00:28 +0000 (16:00 +0000)
committer Jiong Wang <jiwang@gcc.gnu.org>
Mon, 25 Jul 2016 16:00:28 +0000 (16:00 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index e9dd4f36aade72e32a6baf1da9d871ef3df1019f..a26721b8c514d868e60e665a612b3b3fd54c45ef 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,38 @@
+2016-07-25  Jiong Wang  <jiong.wang@arm.com>
+
+       * config.gcc (aarch64*-*-*): Install arm_fp16.h.
+       * config/aarch64/aarch64-builtins.c (hi_UP): New.
+       * config/aarch64/aarch64-simd-builtins.def: Register new builtins.
+       * config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF
+       mode.
+       (aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise.
+       (aarch64_cm<optab><mode>): Likewise.
+       * config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise.
+       (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise.
+       (fix_trunc<GPF:mode><GPI:mode>2): Likewise.
+       (sqrt<mode>2): Likewise.
+       (abs<mode>2): Likewise.
+       (<optab><mode>hf2): New pattern for HF mode.
+       (<optab>hihf2): Likewise.
+       * config/aarch64/arm_neon.h: Include arm_fp16.h.
+       * config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New.
+       (w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE):
+       Support HF mode.
+       * config/aarch64/arm_fp16.h: New file.
+       (vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16,
+       vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16,
+       vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16,
+       vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16,
+       vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16,
+       vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16,
+       vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16,
+       vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16,
+       vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16,
+       vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16,
+       vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16,
+       vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16,
+       vsqrth_f16): New.
+
  2016-07-25  Jiong Wang  <jiong.wang@arm.com>
  
         * config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_,
diff --git a/gcc/config.gcc b/gcc/config.gcc

index 1f75f17877334c2bb61cd16b69539ec7514db8ae..8827dc830d374c2512be5713d6dd143913f53c7d 100644 (file)
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -300,7 +300,7 @@ m32c*-*-*)
          ;;
  aarch64*-*-*)
         cpu_type=aarch64
-       extra_headers="arm_neon.h arm_acle.h"
+       extra_headers="arm_fp16.h arm_neon.h arm_acle.h"
         c_target_objs="aarch64-c.o"
         cxx_target_objs="aarch64-c.o"
         extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o"
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c

index af5fac5b29cf5373561d9bf9a69c401d2bec5cec..ca91d9108ead3eb83c21ee86d9e6ed44c8f4ad2d 100644 (file)
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -62,6 +62,7 @@
  #define si_UP    SImode
  #define sf_UP    SFmode
  #define hi_UP    HImode
+#define hf_UP    HFmode
  #define qi_UP    QImode
  #define UP(X) X##_UP
  
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def

index 363e131327d6be04dd94e664ef839e46f26940e4..6f50d8405d3ee8c4823037bb2022a4f2f08b72fe 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -274,6 +274,14 @@
    BUILTIN_VHSDF (UNOP, round, 2)
    BUILTIN_VHSDF_DF (UNOP, frintn, 2)
  
+  VAR1 (UNOP, btrunc, 2, hf)
+  VAR1 (UNOP, ceil, 2, hf)
+  VAR1 (UNOP, floor, 2, hf)
+  VAR1 (UNOP, frintn, 2, hf)
+  VAR1 (UNOP, nearbyint, 2, hf)
+  VAR1 (UNOP, rint, 2, hf)
+  VAR1 (UNOP, round, 2, hf)
+
    /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2.  */
    VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
    VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
@@ -292,7 +300,8 @@
    VAR1 (UNOP, lroundv2sf, 2, v2si)
    VAR1 (UNOP, lroundv4sf, 2, v4si)
    VAR1 (UNOP, lroundv2df, 2, v2di)
-  /* Implemented by l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2.  */
+  /* Implemented by l<fcvt_pattern><su_optab><GPF_F16:mode><GPI:mode>2.  */
+  BUILTIN_GPI_I16 (UNOP, lroundhf, 2)
    VAR1 (UNOP, lroundsf, 2, si)
    VAR1 (UNOP, lrounddf, 2, di)
  
@@ -301,6 +310,7 @@
    VAR1 (UNOPUS, lrounduv2sf, 2, v2si)
    VAR1 (UNOPUS, lrounduv4sf, 2, v4si)
    VAR1 (UNOPUS, lrounduv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lrounduhf, 2)
    VAR1 (UNOPUS, lroundusf, 2, si)
    VAR1 (UNOPUS, lroundudf, 2, di)
  
@@ -309,12 +319,14 @@
    VAR1 (UNOP, lceilv2sf, 2, v2si)
    VAR1 (UNOP, lceilv4sf, 2, v4si)
    VAR1 (UNOP, lceilv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOP, lceilhf, 2)
  
    VAR1 (UNOPUS, lceiluv4hf, 2, v4hi)
    VAR1 (UNOPUS, lceiluv8hf, 2, v8hi)
    VAR1 (UNOPUS, lceiluv2sf, 2, v2si)
    VAR1 (UNOPUS, lceiluv4sf, 2, v4si)
    VAR1 (UNOPUS, lceiluv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lceiluhf, 2)
    VAR1 (UNOPUS, lceilusf, 2, si)
    VAR1 (UNOPUS, lceiludf, 2, di)
  
@@ -323,12 +335,14 @@
    VAR1 (UNOP, lfloorv2sf, 2, v2si)
    VAR1 (UNOP, lfloorv4sf, 2, v4si)
    VAR1 (UNOP, lfloorv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOP, lfloorhf, 2)
  
    VAR1 (UNOPUS, lflooruv4hf, 2, v4hi)
    VAR1 (UNOPUS, lflooruv8hf, 2, v8hi)
    VAR1 (UNOPUS, lflooruv2sf, 2, v2si)
    VAR1 (UNOPUS, lflooruv4sf, 2, v4si)
    VAR1 (UNOPUS, lflooruv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lflooruhf, 2)
    VAR1 (UNOPUS, lfloorusf, 2, si)
    VAR1 (UNOPUS, lfloorudf, 2, di)
  
@@ -337,6 +351,7 @@
    VAR1 (UNOP, lfrintnv2sf, 2, v2si)
    VAR1 (UNOP, lfrintnv4sf, 2, v4si)
    VAR1 (UNOP, lfrintnv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOP, lfrintnhf, 2)
    VAR1 (UNOP, lfrintnsf, 2, si)
    VAR1 (UNOP, lfrintndf, 2, di)
  
@@ -345,6 +360,7 @@
    VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si)
    VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si)
    VAR1 (UNOPUS, lfrintnuv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lfrintnuhf, 2)
    VAR1 (UNOPUS, lfrintnusf, 2, si)
    VAR1 (UNOPUS, lfrintnudf, 2, di)
  
@@ -376,9 +392,9 @@
  
    /* Implemented by
       aarch64_frecp<FRECP:frecp_suffix><mode>.  */
-  BUILTIN_GPF (UNOP, frecpe, 0)
+  BUILTIN_GPF_F16 (UNOP, frecpe, 0)
    BUILTIN_GPF (BINOP, frecps, 0)
-  BUILTIN_GPF (UNOP, frecpx, 0)
+  BUILTIN_GPF_F16 (UNOP, frecpx, 0)
  
    BUILTIN_VDQ_SI (UNOP, urecpe, 0)
  
@@ -389,6 +405,7 @@
       only ever used for the int64x1_t intrinsic, there is no scalar version.  */
    BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
    BUILTIN_VHSDF (UNOP, abs, 2)
+  VAR1 (UNOP, abs, 2, hf)
  
    BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
    VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
@@ -483,7 +500,7 @@
    BUILTIN_VHSDF_SDF (SHIFTIMM_USS, fcvtzu, 3)
  
    /* Implemented by aarch64_rsqrte<mode>.  */
-  BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0)
+  BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0)
  
    /* Implemented by aarch64_rsqrts<mode>.  */
    BUILTIN_VHSDF_SDF (BINOP, rsqrts, 0)
@@ -495,17 +512,34 @@
    BUILTIN_VHSDF (BINOP, faddp, 0)
  
    /* Implemented by aarch64_cm<optab><mode>.  */
-  BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0)
-  BUILTIN_VHSDF_SDF (BINOP_USS, cmge, 0)
-  BUILTIN_VHSDF_SDF (BINOP_USS, cmgt, 0)
-  BUILTIN_VHSDF_SDF (BINOP_USS, cmle, 0)
-  BUILTIN_VHSDF_SDF (BINOP_USS, cmlt, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmeq, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmge, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmgt, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmle, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmlt, 0)
  
    /* Implemented by neg<mode>2.  */
-  BUILTIN_VHSDF (UNOP, neg, 2)
+  BUILTIN_VHSDF_HSDF (UNOP, neg, 2)
  
    /* Implemented by aarch64_fac<optab><mode>.  */
    BUILTIN_VHSDF_SDF (BINOP_USS, faclt, 0)
    BUILTIN_VHSDF_SDF (BINOP_USS, facle, 0)
    BUILTIN_VHSDF_SDF (BINOP_USS, facgt, 0)
    BUILTIN_VHSDF_SDF (BINOP_USS, facge, 0)
+
+  /* Implemented by sqrt<mode>2.  */
+  VAR1 (UNOP, sqrt, 2, hf)
+
+  /* Implemented by <optab><mode>hf2.  */
+  VAR1 (UNOP, floatdi, 2, hf)
+  VAR1 (UNOP, floatsi, 2, hf)
+  VAR1 (UNOP, floathi, 2, hf)
+  VAR1 (UNOPUS, floatunsdi, 2, hf)
+  VAR1 (UNOPUS, floatunssi, 2, hf)
+  VAR1 (UNOPUS, floatunshi, 2, hf)
+  BUILTIN_GPI_I16 (UNOP, fix_trunchf, 2)
+  BUILTIN_GPI (UNOP, fix_truncsf, 2)
+  BUILTIN_GPI (UNOP, fix_truncdf, 2)
+  BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
+  BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
+  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 501858d9ac3766e54d4c0a4798370f6cd7f76934..2ed9d9f8ba8b533d9d513bcaf4273565190d2a5c 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -383,8 +383,8 @@
  )
  
  (define_insn "aarch64_rsqrte<mode>"
-  [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
-       (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")]
+  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+       (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
                      UNSPEC_RSQRTE))]
    "TARGET_SIMD"
    "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
@@ -1755,6 +1755,32 @@
    [(set_attr "type" "neon_fp_to_int_<stype><q>")]
  )
  
+;; HF Scalar variants of related SIMD instructions.
+(define_insn "l<fcvt_pattern><su_optab>hfhi2"
+  [(set (match_operand:HI 0 "register_operand" "=w")
+       (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
+                     FCVT)))]
+  "TARGET_SIMD_F16INST"
+  "fcvt<frint_suffix><su>\t%h0, %h1"
+  [(set_attr "type" "neon_fp_to_int_s")]
+)
+
+(define_insn "<optab>_trunchfhi2"
+  [(set (match_operand:HI 0 "register_operand" "=w")
+       (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
+  "TARGET_SIMD_F16INST"
+  "fcvtz<su>\t%h0, %h1"
+  [(set_attr "type" "neon_fp_to_int_s")]
+)
+
+(define_insn "<optab>hihf2"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+       (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
+  "TARGET_SIMD_F16INST"
+  "<su_optab>cvtf\t%h0, %h1"
+  [(set_attr "type" "neon_int_to_fp_s")]
+)
+
  (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
    [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
@@ -4297,8 +4323,8 @@
    [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
         (neg:<V_cmp_result>
           (COMPARISONS:<V_cmp_result>
-           (match_operand:VHSDF_SDF 1 "register_operand" "w,w")
-           (match_operand:VHSDF_SDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
+           (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
+           (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
           )))]
    "TARGET_SIMD"
    "@
@@ -5425,12 +5451,12 @@
  )
  
  (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-       (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
-                   FRECP))]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+       (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
+        FRECP))]
    "TARGET_SIMD"
    "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
-  [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
+  [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
  )
  
  (define_insn "aarch64_frecps<mode>"
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md

index bcb7db086574683597d58a69a0fff9f2723d569f..56ad581da6c85716256f22eafbe432cde486154c 100644 (file)
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4450,22 +4450,23 @@
  ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
  
  (define_insn "<frint_pattern><mode>2"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-       (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+       (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
          FRINT))]
    "TARGET_FLOAT"
    "frint<frint_suffix>\\t%<s>0, %<s>1"
-  [(set_attr "type" "f_rint<s>")]
+  [(set_attr "type" "f_rint<stype>")]
  )
  
  ;; frcvt floating-point round to integer and convert standard patterns.
  ;; Expands to lbtrunc, lceil, lfloor, lround.
-(define_insn "l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2"
+(define_insn "l<fcvt_pattern><su_optab><GPF_F16:mode><GPI:mode>2"
    [(set (match_operand:GPI 0 "register_operand" "=r")
-       (FIXUORS:GPI (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
-                     FCVT)))]
+       (FIXUORS:GPI
+         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
+          FCVT)))]
    "TARGET_FLOAT"
-  "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF:s>1"
+  "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF_F16:s>1"
    [(set_attr "type" "f_cvtf2i")]
  )
  
@@ -4593,19 +4594,11 @@
    [(set_attr "type" "f_cvt")]
  )
  
-(define_insn "fix_trunc<GPF:mode><GPI:mode>2"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-        (fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
-  "TARGET_FLOAT"
-  "fcvtzs\\t%<GPI:w>0, %<GPF:s>1"
-  [(set_attr "type" "f_cvtf2i")]
-)
-
-(define_insn "fixuns_trunc<GPF:mode><GPI:mode>2"
+(define_insn "<optab>_trunc<GPF_F16:mode><GPI:mode>2"
    [(set (match_operand:GPI 0 "register_operand" "=r")
-        (unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
+       (FIXUORS:GPI (match_operand:GPF_F16 1 "register_operand" "w")))]
    "TARGET_FLOAT"
-  "fcvtzu\\t%<GPI:w>0, %<GPF:s>1"
+  "fcvtz<su>\t%<GPI:w>0, %<GPF_F16:s>1"
    [(set_attr "type" "f_cvtf2i")]
  )
  
@@ -4629,6 +4622,14 @@
    [(set_attr "type" "f_cvti2f")]
  )
  
+(define_insn "<optab><mode>hf2"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+       (FLOATUORS:HF (match_operand:GPI 1 "register_operand" "r")))]
+  "TARGET_FP_F16INST"
+  "<su_optab>cvtf\t%h0, %<w>1"
+  [(set_attr "type" "f_cvti2f")]
+)
+
  ;; Convert between fixed-point and floating-point (scalar modes)
  
  (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><GPF:mode>3"
@@ -4735,16 +4736,16 @@
  )
  
  (define_insn "neg<mode>2"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-        (neg:GPF (match_operand:GPF 1 "register_operand" "w")))]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+       (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
    "TARGET_FLOAT"
    "fneg\\t%<s>0, %<s>1"
-  [(set_attr "type" "ffarith<s>")]
+  [(set_attr "type" "ffarith<stype>")]
  )
  
  (define_expand "sqrt<mode>2"
-  [(set (match_operand:GPF 0 "register_operand")
-        (sqrt:GPF (match_operand:GPF 1 "register_operand")))]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+       (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
    "TARGET_FLOAT"
  {
    if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
@@ -4752,19 +4753,19 @@
  })
  
  (define_insn "*sqrt<mode>2"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-        (sqrt:GPF (match_operand:GPF 1 "register_operand" "w")))]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+       (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
    "TARGET_FLOAT"
    "fsqrt\\t%<s>0, %<s>1"
-  [(set_attr "type" "fsqrt<s>")]
+  [(set_attr "type" "fsqrt<stype>")]
  )
  
  (define_insn "abs<mode>2"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-        (abs:GPF (match_operand:GPF 1 "register_operand" "w")))]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+       (abs:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
    "TARGET_FLOAT"
    "fabs\\t%<s>0, %<s>1"
-  [(set_attr "type" "ffarith<s>")]
+  [(set_attr "type" "ffarith<stype>")]
  )
  
  ;; Given that smax/smin do not specify the result when either input is NaN,
diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h

new file mode 100644 (file)

index 0000000..818aa61
--- /dev/null
+++ b/gcc/config/aarch64/arm_fp16.h
@@ -0,0 +1,365 @@
+/* ARM FP16 scalar intrinsics include file.
+
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_FP16_H_
+#define _AARCH64_FP16_H_
+
+#include <stdint.h>
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+typedef __fp16 float16_t;
+
+/* ARMv8.2-A FP16 one operand scalar intrinsics.  */
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vabsh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_abshf (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vceqzh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_cmeqhf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcgezh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_cmgehf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcgtzh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_cmgthf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vclezh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_cmlehf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcltzh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_cmlthf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_s16 (int16_t __a)
+{
+  return __builtin_aarch64_floathihf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_s32 (int32_t __a)
+{
+  return __builtin_aarch64_floatsihf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_s64 (int64_t __a)
+{
+  return __builtin_aarch64_floatdihf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_u16 (uint16_t __a)
+{
+  return __builtin_aarch64_floatunshihf_us (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_u32 (uint32_t __a)
+{
+  return __builtin_aarch64_floatunssihf_us (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_u64 (uint64_t __a)
+{
+  return __builtin_aarch64_floatunsdihf_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvth_s16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fix_trunchfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvth_s32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fix_trunchfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvth_s64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fix_trunchfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvth_u16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fixuns_trunchfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvth_u32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fixuns_trunchfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvth_u64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fixuns_trunchfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtah_s16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lroundhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtah_s32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lroundhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtah_s64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lroundhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtah_u16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lrounduhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtah_u32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lrounduhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtah_u64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lrounduhfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtmh_s16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfloorhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtmh_s32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfloorhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtmh_s64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfloorhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtmh_u16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lflooruhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtmh_u32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lflooruhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtmh_u64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lflooruhfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtnh_s16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtnh_s32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtnh_s64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtnh_u16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnuhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtnh_u32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnuhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtnh_u64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnuhfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtph_s16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceilhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtph_s32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceilhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtph_s64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceilhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtph_u16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceiluhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtph_u32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceiluhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtph_u64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceiluhfdi_us (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vnegh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_neghf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrecpeh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_frecpehf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrecpxh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_frecpxhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_btrunchf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndah_f16 (float16_t __a)
+{
+  return __builtin_aarch64_roundhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndih_f16 (float16_t __a)
+{
+  return __builtin_aarch64_nearbyinthf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndmh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_floorhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndnh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_frintnhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndph_f16 (float16_t __a)
+{
+  return __builtin_aarch64_ceilhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndxh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_rinthf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrsqrteh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_rsqrtehf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vsqrth_f16 (float16_t __a)
+{
+  return __builtin_aarch64_sqrthf (__a);
+}
+
+#pragma GCC pop_options
+
+#endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index 4382efda8c3f2c4f29781c0ad9beb49b94501c47..fd555583b46e5899772ba4a9a2d80ea973895bc5 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -26032,6 +26032,8 @@ __INTERLEAVE_LIST (zip)
  
  /* ARMv8.2-A FP16 intrinsics.  */
  
+#include "arm_fp16.h"
+
  #pragma GCC push_options
  #pragma GCC target ("arch=armv8.2-a+fp16")
  
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 011b937105e477c0044bfb1c549179058bfbea31..20d0f1bf615396e0d662a51e9c5c9895046cd090 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -26,6 +26,9 @@
  ;; Iterator for General Purpose Integer registers (32- and 64-bit modes)
  (define_mode_iterator GPI [SI DI])
  
+;; Iterator for HI, SI, DI, some instructions can only work on these modes.
+(define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI])
+
  ;; Iterator for QI and HI modes
  (define_mode_iterator SHORT [QI HI])
  
@@ -38,6 +41,9 @@
  ;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes)
  (define_mode_iterator GPF [SF DF])
  
+;; Iterator for all scalar floating point modes (HF, SF, DF)
+(define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF])
+
  ;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
  (define_mode_iterator GPF_TF_F16 [HF SF DF TF])
  
@@ -102,6 +108,11 @@
  (define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST")
                                  (V8HF "TARGET_SIMD_F16INST")
                                  V2SF V4SF V2DF SF DF])
+(define_mode_iterator VHSDF_HSDF [(V4HF "TARGET_SIMD_F16INST")
+                                 (V8HF "TARGET_SIMD_F16INST")
+                                 V2SF V4SF V2DF
+                                 (HF "TARGET_SIMD_F16INST")
+                                 SF DF])
  
  ;; Vector single Float modes.
  (define_mode_iterator VDQSF [V2SF V4SF])
@@ -372,8 +383,8 @@
  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
  
  ;; For inequal width int to float conversion
-(define_mode_attr w1 [(SF "w") (DF "x")])
-(define_mode_attr w2 [(SF "x") (DF "w")])
+(define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
+(define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
  
  (define_mode_attr short_mask [(HI "65535") (QI "255")])
  
@@ -385,7 +396,7 @@
  
  ;; For scalar usage of vector/FP registers
  (define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
-                   (SF "s") (DF "d")
+                   (HF  "h") (SF "s") (DF "d")
                     (V8QI "") (V16QI "")
                     (V4HI "") (V8HI "")
                     (V2SI "") (V4SI  "")
@@ -416,7 +427,7 @@
  (define_mode_attr vas [(DI "") (SI ".2s")])
  
  ;; Map a floating point mode to the appropriate register name prefix
-(define_mode_attr s [(SF "s") (DF "d")])
+(define_mode_attr s [(HF "h") (SF "s") (DF "d")])
  
  ;; Give the length suffix letter for a sign- or zero-extension.
  (define_mode_attr size [(QI "b") (HI "h") (SI "w")])
@@ -452,8 +463,8 @@
                          (V4SF ".4s") (V2DF ".2d")
                          (DI   "")    (SI   "")
                          (HI   "")    (QI   "")
-                        (TI   "")    (SF   "")
-                        (DF   "")])
+                        (TI   "")    (HF   "")
+                        (SF   "")    (DF   "")])
  
  ;; Register suffix narrowed modes for VQN.
  (define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
@@ -468,6 +479,7 @@
                           (V2DI "d") (V4HF "h")
                           (V8HF "h") (V2SF  "s")
                           (V4SF "s") (V2DF  "d")
+                         (HF   "h")
                           (SF   "s") (DF  "d")
                           (QI "b")   (HI "h")
                           (SI "s")   (DI "d")])
@@ -639,7 +651,7 @@
                                 (V4HF "V4HI") (V8HF  "V8HI")
                                 (V2SF "V2SI") (V4SF  "V4SI")
                                 (V2DF "V2DI") (DF    "DI")
-                               (SF   "SI")])
+                               (SF   "SI")   (HF    "HI")])
  
  ;; Lower case mode of results of comparison operations.
  (define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
@@ -702,8 +714,8 @@
  
  
  ;; for the inequal width integer to fp conversions
-(define_mode_attr fcvt_iesize [(SF "di") (DF "si")])
-(define_mode_attr FCVT_IESIZE [(SF "DI") (DF "SI")])
+(define_mode_attr fcvt_iesize [(HF "di") (SF "di") (DF "si")])
+(define_mode_attr FCVT_IESIZE [(HF "DI") (SF "DI") (DF "SI")])
  
  (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
                                 (V4HI "V8HI") (V8HI  "V4HI")
@@ -757,7 +769,7 @@
                      (V4HF "") (V8HF "_q")
                      (V2SF "") (V4SF  "_q")
                                (V2DF  "_q")
-                    (QI "") (HI "") (SI "") (DI "") (SF "") (DF "")])
+                    (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")])
  
  (define_mode_attr vp [(V8QI "v") (V16QI "v")
                       (V4HI "v") (V8HI  "v")
author	Jiong Wang <jiong.wang@arm.com>
	Mon, 25 Jul 2016 16:00:28 +0000 (16:00 +0000)
committer	Jiong Wang <jiwang@gcc.gnu.org>
	Mon, 25 Jul 2016 16:00:28 +0000 (16:00 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config.gcc		patch \| blob \| history
gcc/config/aarch64/aarch64-builtins.c		patch \| blob \| history
gcc/config/aarch64/aarch64-simd-builtins.def		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/config/aarch64/aarch64.md		patch \| blob \| history
gcc/config/aarch64/arm_fp16.h	[new file with mode: 0644]	patch \| blob
gcc/config/aarch64/arm_neon.h		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history