From: Jiong Wang <jiong.wang@arm.com>
Date: Mon, 25 Jul 2016 16:00:28 +0000 (+0000)
Subject: [AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d7f33f07d88984cbe769047e3d07fc21067fbba9;p=gcc.git

[AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics

gcc/
	* config.gcc (aarch64*-*-*): Install arm_fp16.h.
	* config/aarch64/aarch64-builtins.c (hi_UP): New.
	* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
	* config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF
	mode.
	(aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise.
	(aarch64_cm<optab><mode>): Likewise.
	* config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise.
	(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise.
	(fix_trunc<GPF:mode><GPI:mode>2): Likewise.
	(sqrt<mode>2): Likewise.
	(abs<mode>2): Likewise.
	(<optab><mode>hf2): New pattern for HF mode.
	(<optab>hihf2): Likewise.
	* config/aarch64/arm_neon.h: Include arm_fp16.h.
	* config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New.
	(w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE):
	Support HF mode.
	* config/aarch64/arm_fp16.h: New file.
	(vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16,
	vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16,
	vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16,
	vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16,
	vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16,
	vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16,
	vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16,
	vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16,
	vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16,
	vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16,
	vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16,
	vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16,
	vsqrth_f16): New.

From-SVN: r238722
---

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e9dd4f36aad..a26721b8c51 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,38 @@
+2016-07-25  Jiong Wang  <jiong.wang@arm.com>
+
+	* config.gcc (aarch64*-*-*): Install arm_fp16.h.
+	* config/aarch64/aarch64-builtins.c (hi_UP): New.
+	* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
+	* config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF
+	mode.
+	(aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise.
+	(aarch64_cm<optab><mode>): Likewise.
+	* config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise.
+	(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise.
+	(fix_trunc<GPF:mode><GPI:mode>2): Likewise.
+	(sqrt<mode>2): Likewise.
+	(abs<mode>2): Likewise.
+	(<optab><mode>hf2): New pattern for HF mode.
+	(<optab>hihf2): Likewise.
+	* config/aarch64/arm_neon.h: Include arm_fp16.h.
+	* config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New.
+	(w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE):
+	Support HF mode.
+	* config/aarch64/arm_fp16.h: New file.
+	(vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16,
+	vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16,
+	vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16,
+	vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16,
+	vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16,
+	vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16,
+	vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16,
+	vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16,
+	vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16,
+	vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16,
+	vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16,
+	vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16,
+	vsqrth_f16): New.
+
 2016-07-25  Jiong Wang  <jiong.wang@arm.com>
 
 	* config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_,
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 1f75f178773..8827dc830d3 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -300,7 +300,7 @@ m32c*-*-*)
         ;;
 aarch64*-*-*)
 	cpu_type=aarch64
-	extra_headers="arm_neon.h arm_acle.h"
+	extra_headers="arm_fp16.h arm_neon.h arm_acle.h"
 	c_target_objs="aarch64-c.o"
 	cxx_target_objs="aarch64-c.o"
 	extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o"
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index af5fac5b29c..ca91d9108ea 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -62,6 +62,7 @@
 #define si_UP    SImode
 #define sf_UP    SFmode
 #define hi_UP    HImode
+#define hf_UP    HFmode
 #define qi_UP    QImode
 #define UP(X) X##_UP
 
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 363e131327d..6f50d8405d3 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -274,6 +274,14 @@
   BUILTIN_VHSDF (UNOP, round, 2)
   BUILTIN_VHSDF_DF (UNOP, frintn, 2)
 
+  VAR1 (UNOP, btrunc, 2, hf)
+  VAR1 (UNOP, ceil, 2, hf)
+  VAR1 (UNOP, floor, 2, hf)
+  VAR1 (UNOP, frintn, 2, hf)
+  VAR1 (UNOP, nearbyint, 2, hf)
+  VAR1 (UNOP, rint, 2, hf)
+  VAR1 (UNOP, round, 2, hf)
+
   /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2.  */
   VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
   VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
@@ -292,7 +300,8 @@
   VAR1 (UNOP, lroundv2sf, 2, v2si)
   VAR1 (UNOP, lroundv4sf, 2, v4si)
   VAR1 (UNOP, lroundv2df, 2, v2di)
-  /* Implemented by l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2.  */
+  /* Implemented by l<fcvt_pattern><su_optab><GPF_F16:mode><GPI:mode>2.  */
+  BUILTIN_GPI_I16 (UNOP, lroundhf, 2)
   VAR1 (UNOP, lroundsf, 2, si)
   VAR1 (UNOP, lrounddf, 2, di)
 
@@ -301,6 +310,7 @@
   VAR1 (UNOPUS, lrounduv2sf, 2, v2si)
   VAR1 (UNOPUS, lrounduv4sf, 2, v4si)
   VAR1 (UNOPUS, lrounduv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lrounduhf, 2)
   VAR1 (UNOPUS, lroundusf, 2, si)
   VAR1 (UNOPUS, lroundudf, 2, di)
 
@@ -309,12 +319,14 @@
   VAR1 (UNOP, lceilv2sf, 2, v2si)
   VAR1 (UNOP, lceilv4sf, 2, v4si)
   VAR1 (UNOP, lceilv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOP, lceilhf, 2)
 
   VAR1 (UNOPUS, lceiluv4hf, 2, v4hi)
   VAR1 (UNOPUS, lceiluv8hf, 2, v8hi)
   VAR1 (UNOPUS, lceiluv2sf, 2, v2si)
   VAR1 (UNOPUS, lceiluv4sf, 2, v4si)
   VAR1 (UNOPUS, lceiluv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lceiluhf, 2)
   VAR1 (UNOPUS, lceilusf, 2, si)
   VAR1 (UNOPUS, lceiludf, 2, di)
 
@@ -323,12 +335,14 @@
   VAR1 (UNOP, lfloorv2sf, 2, v2si)
   VAR1 (UNOP, lfloorv4sf, 2, v4si)
   VAR1 (UNOP, lfloorv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOP, lfloorhf, 2)
 
   VAR1 (UNOPUS, lflooruv4hf, 2, v4hi)
   VAR1 (UNOPUS, lflooruv8hf, 2, v8hi)
   VAR1 (UNOPUS, lflooruv2sf, 2, v2si)
   VAR1 (UNOPUS, lflooruv4sf, 2, v4si)
   VAR1 (UNOPUS, lflooruv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lflooruhf, 2)
   VAR1 (UNOPUS, lfloorusf, 2, si)
   VAR1 (UNOPUS, lfloorudf, 2, di)
 
@@ -337,6 +351,7 @@
   VAR1 (UNOP, lfrintnv2sf, 2, v2si)
   VAR1 (UNOP, lfrintnv4sf, 2, v4si)
   VAR1 (UNOP, lfrintnv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOP, lfrintnhf, 2)
   VAR1 (UNOP, lfrintnsf, 2, si)
   VAR1 (UNOP, lfrintndf, 2, di)
 
@@ -345,6 +360,7 @@
   VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si)
   VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si)
   VAR1 (UNOPUS, lfrintnuv2df, 2, v2di)
+  BUILTIN_GPI_I16 (UNOPUS, lfrintnuhf, 2)
   VAR1 (UNOPUS, lfrintnusf, 2, si)
   VAR1 (UNOPUS, lfrintnudf, 2, di)
 
@@ -376,9 +392,9 @@
 
   /* Implemented by
      aarch64_frecp<FRECP:frecp_suffix><mode>.  */
-  BUILTIN_GPF (UNOP, frecpe, 0)
+  BUILTIN_GPF_F16 (UNOP, frecpe, 0)
   BUILTIN_GPF (BINOP, frecps, 0)
-  BUILTIN_GPF (UNOP, frecpx, 0)
+  BUILTIN_GPF_F16 (UNOP, frecpx, 0)
 
   BUILTIN_VDQ_SI (UNOP, urecpe, 0)
 
@@ -389,6 +405,7 @@
      only ever used for the int64x1_t intrinsic, there is no scalar version.  */
   BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
   BUILTIN_VHSDF (UNOP, abs, 2)
+  VAR1 (UNOP, abs, 2, hf)
 
   BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
   VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
@@ -483,7 +500,7 @@
   BUILTIN_VHSDF_SDF (SHIFTIMM_USS, fcvtzu, 3)
 
   /* Implemented by aarch64_rsqrte<mode>.  */
-  BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0)
+  BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0)
 
   /* Implemented by aarch64_rsqrts<mode>.  */
   BUILTIN_VHSDF_SDF (BINOP, rsqrts, 0)
@@ -495,17 +512,34 @@
   BUILTIN_VHSDF (BINOP, faddp, 0)
 
   /* Implemented by aarch64_cm<optab><mode>.  */
-  BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0)
-  BUILTIN_VHSDF_SDF (BINOP_USS, cmge, 0)
-  BUILTIN_VHSDF_SDF (BINOP_USS, cmgt, 0)
-  BUILTIN_VHSDF_SDF (BINOP_USS, cmle, 0)
-  BUILTIN_VHSDF_SDF (BINOP_USS, cmlt, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmeq, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmge, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmgt, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmle, 0)
+  BUILTIN_VHSDF_HSDF (BINOP_USS, cmlt, 0)
 
   /* Implemented by neg<mode>2.  */
-  BUILTIN_VHSDF (UNOP, neg, 2)
+  BUILTIN_VHSDF_HSDF (UNOP, neg, 2)
 
   /* Implemented by aarch64_fac<optab><mode>.  */
   BUILTIN_VHSDF_SDF (BINOP_USS, faclt, 0)
   BUILTIN_VHSDF_SDF (BINOP_USS, facle, 0)
   BUILTIN_VHSDF_SDF (BINOP_USS, facgt, 0)
   BUILTIN_VHSDF_SDF (BINOP_USS, facge, 0)
+
+  /* Implemented by sqrt<mode>2.  */
+  VAR1 (UNOP, sqrt, 2, hf)
+
+  /* Implemented by <optab><mode>hf2.  */
+  VAR1 (UNOP, floatdi, 2, hf)
+  VAR1 (UNOP, floatsi, 2, hf)
+  VAR1 (UNOP, floathi, 2, hf)
+  VAR1 (UNOPUS, floatunsdi, 2, hf)
+  VAR1 (UNOPUS, floatunssi, 2, hf)
+  VAR1 (UNOPUS, floatunshi, 2, hf)
+  BUILTIN_GPI_I16 (UNOP, fix_trunchf, 2)
+  BUILTIN_GPI (UNOP, fix_truncsf, 2)
+  BUILTIN_GPI (UNOP, fix_truncdf, 2)
+  BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
+  BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
+  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 501858d9ac3..2ed9d9f8ba8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -383,8 +383,8 @@
 )
 
 (define_insn "aarch64_rsqrte<mode>"
-  [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
-	(unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")]
+  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 		     UNSPEC_RSQRTE))]
   "TARGET_SIMD"
   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
@@ -1755,6 +1755,32 @@
   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
 )
 
+;; HF Scalar variants of related SIMD instructions.
+(define_insn "l<fcvt_pattern><su_optab>hfhi2"
+  [(set (match_operand:HI 0 "register_operand" "=w")
+	(FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
+		      FCVT)))]
+  "TARGET_SIMD_F16INST"
+  "fcvt<frint_suffix><su>\t%h0, %h1"
+  [(set_attr "type" "neon_fp_to_int_s")]
+)
+
+(define_insn "<optab>_trunchfhi2"
+  [(set (match_operand:HI 0 "register_operand" "=w")
+	(FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
+  "TARGET_SIMD_F16INST"
+  "fcvtz<su>\t%h0, %h1"
+  [(set_attr "type" "neon_fp_to_int_s")]
+)
+
+(define_insn "<optab>hihf2"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+	(FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
+  "TARGET_SIMD_F16INST"
+  "<su_optab>cvtf\t%h0, %h1"
+  [(set_attr "type" "neon_int_to_fp_s")]
+)
+
 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
 	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
@@ -4297,8 +4323,8 @@
   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
 	(neg:<V_cmp_result>
 	  (COMPARISONS:<V_cmp_result>
-	    (match_operand:VHSDF_SDF 1 "register_operand" "w,w")
-	    (match_operand:VHSDF_SDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
+	    (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
+	    (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
 	  )))]
   "TARGET_SIMD"
   "@
@@ -5425,12 +5451,12 @@
 )
 
 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
-		    FRECP))]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+	(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
+	 FRECP))]
   "TARGET_SIMD"
   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
-  [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
+  [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
 )
 
 (define_insn "aarch64_frecps<mode>"
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index bcb7db08657..56ad581da6c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4450,22 +4450,23 @@
 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 
 (define_insn "<frint_pattern><mode>2"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+	(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
 	 FRINT))]
   "TARGET_FLOAT"
   "frint<frint_suffix>\\t%<s>0, %<s>1"
-  [(set_attr "type" "f_rint<s>")]
+  [(set_attr "type" "f_rint<stype>")]
 )
 
 ;; frcvt floating-point round to integer and convert standard patterns.
 ;; Expands to lbtrunc, lceil, lfloor, lround.
-(define_insn "l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2"
+(define_insn "l<fcvt_pattern><su_optab><GPF_F16:mode><GPI:mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(FIXUORS:GPI (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
-		      FCVT)))]
+	(FIXUORS:GPI
+	  (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
+	   FCVT)))]
   "TARGET_FLOAT"
-  "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF:s>1"
+  "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF_F16:s>1"
   [(set_attr "type" "f_cvtf2i")]
 )
 
@@ -4593,19 +4594,11 @@
   [(set_attr "type" "f_cvt")]
 )
 
-(define_insn "fix_trunc<GPF:mode><GPI:mode>2"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-        (fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
-  "TARGET_FLOAT"
-  "fcvtzs\\t%<GPI:w>0, %<GPF:s>1"
-  [(set_attr "type" "f_cvtf2i")]
-)
-
-(define_insn "fixuns_trunc<GPF:mode><GPI:mode>2"
+(define_insn "<optab>_trunc<GPF_F16:mode><GPI:mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-        (unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
+	(FIXUORS:GPI (match_operand:GPF_F16 1 "register_operand" "w")))]
   "TARGET_FLOAT"
-  "fcvtzu\\t%<GPI:w>0, %<GPF:s>1"
+  "fcvtz<su>\t%<GPI:w>0, %<GPF_F16:s>1"
   [(set_attr "type" "f_cvtf2i")]
 )
 
@@ -4629,6 +4622,14 @@
   [(set_attr "type" "f_cvti2f")]
 )
 
+(define_insn "<optab><mode>hf2"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+	(FLOATUORS:HF (match_operand:GPI 1 "register_operand" "r")))]
+  "TARGET_FP_F16INST"
+  "<su_optab>cvtf\t%h0, %<w>1"
+  [(set_attr "type" "f_cvti2f")]
+)
+
 ;; Convert between fixed-point and floating-point (scalar modes)
 
 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><GPF:mode>3"
@@ -4735,16 +4736,16 @@
 )
 
 (define_insn "neg<mode>2"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-        (neg:GPF (match_operand:GPF 1 "register_operand" "w")))]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+	(neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fneg\\t%<s>0, %<s>1"
-  [(set_attr "type" "ffarith<s>")]
+  [(set_attr "type" "ffarith<stype>")]
 )
 
 (define_expand "sqrt<mode>2"
-  [(set (match_operand:GPF 0 "register_operand")
-        (sqrt:GPF (match_operand:GPF 1 "register_operand")))]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+	(sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
   "TARGET_FLOAT"
 {
   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
@@ -4752,19 +4753,19 @@
 })
 
 (define_insn "*sqrt<mode>2"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-        (sqrt:GPF (match_operand:GPF 1 "register_operand" "w")))]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+	(sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fsqrt\\t%<s>0, %<s>1"
-  [(set_attr "type" "fsqrt<s>")]
+  [(set_attr "type" "fsqrt<stype>")]
 )
 
 (define_insn "abs<mode>2"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-        (abs:GPF (match_operand:GPF 1 "register_operand" "w")))]
+  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
+	(abs:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))]
   "TARGET_FLOAT"
   "fabs\\t%<s>0, %<s>1"
-  [(set_attr "type" "ffarith<s>")]
+  [(set_attr "type" "ffarith<stype>")]
 )
 
 ;; Given that smax/smin do not specify the result when either input is NaN,
diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h
new file mode 100644
index 00000000000..818aa61925b
--- /dev/null
+++ b/gcc/config/aarch64/arm_fp16.h
@@ -0,0 +1,365 @@
+/* ARM FP16 scalar intrinsics include file.
+
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_FP16_H_
+#define _AARCH64_FP16_H_
+
+#include <stdint.h>
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+typedef __fp16 float16_t;
+
+/* ARMv8.2-A FP16 one operand scalar intrinsics.  */
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vabsh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_abshf (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vceqzh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_cmeqhf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcgezh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_cmgehf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcgtzh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_cmgthf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vclezh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_cmlehf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcltzh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_cmlthf_uss (__a, 0.0f);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_s16 (int16_t __a)
+{
+  return __builtin_aarch64_floathihf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_s32 (int32_t __a)
+{
+  return __builtin_aarch64_floatsihf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_s64 (int64_t __a)
+{
+  return __builtin_aarch64_floatdihf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_u16 (uint16_t __a)
+{
+  return __builtin_aarch64_floatunshihf_us (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_u32 (uint32_t __a)
+{
+  return __builtin_aarch64_floatunssihf_us (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vcvth_f16_u64 (uint64_t __a)
+{
+  return __builtin_aarch64_floatunsdihf_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvth_s16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fix_trunchfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvth_s32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fix_trunchfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvth_s64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fix_trunchfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvth_u16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fixuns_trunchfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvth_u32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fixuns_trunchfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvth_u64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_fixuns_trunchfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtah_s16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lroundhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtah_s32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lroundhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtah_s64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lroundhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtah_u16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lrounduhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtah_u32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lrounduhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtah_u64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lrounduhfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtmh_s16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfloorhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtmh_s32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfloorhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtmh_s64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfloorhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtmh_u16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lflooruhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtmh_u32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lflooruhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtmh_u64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lflooruhfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtnh_s16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtnh_s32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtnh_s64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtnh_u16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnuhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtnh_u32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnuhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtnh_u64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lfrintnuhfdi_us (__a);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vcvtph_s16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceilhfhi (__a);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vcvtph_s32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceilhfsi (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vcvtph_s64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceilhfdi (__a);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vcvtph_u16_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceiluhfhi_us (__a);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcvtph_u32_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceiluhfsi_us (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcvtph_u64_f16 (float16_t __a)
+{
+  return __builtin_aarch64_lceiluhfdi_us (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vnegh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_neghf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrecpeh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_frecpehf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrecpxh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_frecpxhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_btrunchf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndah_f16 (float16_t __a)
+{
+  return __builtin_aarch64_roundhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndih_f16 (float16_t __a)
+{
+  return __builtin_aarch64_nearbyinthf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndmh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_floorhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndnh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_frintnhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndph_f16 (float16_t __a)
+{
+  return __builtin_aarch64_ceilhf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrndxh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_rinthf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vrsqrteh_f16 (float16_t __a)
+{
+  return __builtin_aarch64_rsqrtehf (__a);
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vsqrth_f16 (float16_t __a)
+{
+  return __builtin_aarch64_sqrthf (__a);
+}
+
+#pragma GCC pop_options
+
+#endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 4382efda8c3..fd555583b46 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -26032,6 +26032,8 @@ __INTERLEAVE_LIST (zip)
 
 /* ARMv8.2-A FP16 intrinsics.  */
 
+#include "arm_fp16.h"
+
 #pragma GCC push_options
 #pragma GCC target ("arch=armv8.2-a+fp16")
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 011b937105e..20d0f1bf615 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -26,6 +26,9 @@
 ;; Iterator for General Purpose Integer registers (32- and 64-bit modes)
 (define_mode_iterator GPI [SI DI])
 
+;; Iterator for HI, SI, DI, some instructions can only work on these modes.
+(define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI])
+
 ;; Iterator for QI and HI modes
 (define_mode_iterator SHORT [QI HI])
 
@@ -38,6 +41,9 @@
 ;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes)
 (define_mode_iterator GPF [SF DF])
 
+;; Iterator for all scalar floating point modes (HF, SF, DF)
+(define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF])
+
 ;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
 (define_mode_iterator GPF_TF_F16 [HF SF DF TF])
 
@@ -102,6 +108,11 @@
 (define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST")
 				 (V8HF "TARGET_SIMD_F16INST")
 				 V2SF V4SF V2DF SF DF])
+(define_mode_iterator VHSDF_HSDF [(V4HF "TARGET_SIMD_F16INST")
+				  (V8HF "TARGET_SIMD_F16INST")
+				  V2SF V4SF V2DF
+				  (HF "TARGET_SIMD_F16INST")
+				  SF DF])
 
 ;; Vector single Float modes.
 (define_mode_iterator VDQSF [V2SF V4SF])
@@ -372,8 +383,8 @@
 (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
 
 ;; For inequal width int to float conversion
-(define_mode_attr w1 [(SF "w") (DF "x")])
-(define_mode_attr w2 [(SF "x") (DF "w")])
+(define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
+(define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
 
 (define_mode_attr short_mask [(HI "65535") (QI "255")])
 
@@ -385,7 +396,7 @@
 
 ;; For scalar usage of vector/FP registers
 (define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
-		    (SF "s") (DF "d")
+		    (HF  "h") (SF "s") (DF "d")
 		    (V8QI "") (V16QI "")
 		    (V4HI "") (V8HI "")
 		    (V2SI "") (V4SI  "")
@@ -416,7 +427,7 @@
 (define_mode_attr vas [(DI "") (SI ".2s")])
 
 ;; Map a floating point mode to the appropriate register name prefix
-(define_mode_attr s [(SF "s") (DF "d")])
+(define_mode_attr s [(HF "h") (SF "s") (DF "d")])
 
 ;; Give the length suffix letter for a sign- or zero-extension.
 (define_mode_attr size [(QI "b") (HI "h") (SI "w")])
@@ -452,8 +463,8 @@
 			 (V4SF ".4s") (V2DF ".2d")
 			 (DI   "")    (SI   "")
 			 (HI   "")    (QI   "")
-			 (TI   "")    (SF   "")
-			 (DF   "")])
+			 (TI   "")    (HF   "")
+			 (SF   "")    (DF   "")])
 
 ;; Register suffix narrowed modes for VQN.
 (define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
@@ -468,6 +479,7 @@
 			  (V2DI "d") (V4HF "h")
 			  (V8HF "h") (V2SF  "s")
 			  (V4SF "s") (V2DF  "d")
+			  (HF   "h")
 			  (SF   "s") (DF  "d")
 			  (QI "b")   (HI "h")
 			  (SI "s")   (DI "d")])
@@ -639,7 +651,7 @@
 				(V4HF "V4HI") (V8HF  "V8HI")
 				(V2SF "V2SI") (V4SF  "V4SI")
 				(V2DF "V2DI") (DF    "DI")
-				(SF   "SI")])
+				(SF   "SI")   (HF    "HI")])
 
 ;; Lower case mode of results of comparison operations.
 (define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
@@ -702,8 +714,8 @@
 
 
 ;; for the inequal width integer to fp conversions
-(define_mode_attr fcvt_iesize [(SF "di") (DF "si")])
-(define_mode_attr FCVT_IESIZE [(SF "DI") (DF "SI")])
+(define_mode_attr fcvt_iesize [(HF "di") (SF "di") (DF "si")])
+(define_mode_attr FCVT_IESIZE [(HF "DI") (SF "DI") (DF "SI")])
 
 (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
 				(V4HI "V8HI") (V8HI  "V4HI")
@@ -757,7 +769,7 @@
 		     (V4HF "") (V8HF "_q")
 		     (V2SF "") (V4SF  "_q")
 			       (V2DF  "_q")
-		     (QI "") (HI "") (SI "") (DI "") (SF "") (DF "")])
+		     (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")])
 
 (define_mode_attr vp [(V8QI "v") (V16QI "v")
 		      (V4HI "v") (V8HI  "v")