From 0768b127e5ea675efb5377fc6bbd52e399a6f580 Mon Sep 17 00:00:00 2001 From: Matthew Wahab Date: Fri, 23 Sep 2016 10:34:57 +0000 Subject: [PATCH] [PATCH 12/17][ARM] Add builtins for NEON FP16 intrinsics. 2016-09-23 Matthew Wahab * config/arm/arm_neon_builtins.def (vadd): New (v8hf, v4hf variants). (vmulf): New (v8hf, v4hf variants). (vfma): New (v8hf, v4hf variants). (vfms): New (v8hf, v4hf variants). (vsub): New (v8hf, v4hf variants). (vcage): New (v8hf, v4hf variants). (vcagt): New (v8hf, v4hf variants). (vcale): New (v8hf, v4hf variants). (vcalt): New (v8hf, v4hf variants). (vceq): New (v8hf, v4hf variants). (vcgt): New (v8hf, v4hf variants). (vcge): New (v8hf, v4hf variants). (vcle): New (v8hf, v4hf variants). (vclt): New (v8hf, v4hf variants). (vceqz): New (v8hf, v4hf variants). (vcgez): New (v8hf, v4hf variants). (vcgtz): New (v8hf, v4hf variants). (vcltz): New (v8hf, v4hf variants). (vclez): New (v8hf, v4hf variants). (vabd): New (v8hf, v4hf variants). (vmaxf): New (v8hf, v4hf variants). (vmaxnm): New (v8hf, v4hf variants). (vminf): New (v8hf, v4hf variants). (vminnm): New (v8hf, v4hf variants). (vpmaxf): New (v4hf variant). (vpminf): New (v4hf variant). (vpadd): New (v4hf variant). (vrecps): New (v8hf, v4hf variants). (vrsqrts): New (v8hf, v4hf variants). (vabs): New (v8hf, v4hf variants). (vneg): New (v8hf, v4hf variants). (vrecpe): New (v8hf, v4hf variants). (vrnd): New (v8hf, v4hf variants). (vrnda): New (v8hf, v4hf variants). (vrndm): New (v8hf, v4hf variants). (vrndn): New (v8hf, v4hf variants). (vrndp): New (v8hf, v4hf variants). (vrndx): New (v8hf, v4hf variants). (vrsqrte): New (v8hf, v4hf variants). (vmul_lane): Add v4hf and v8hf variants. (vmul_n): Add v4hf and v8hf variants. (vext): New (v8hf, v4hf variants). (vcvts): New (v8hi, v4hi variants). (vcvts): New (v8hf, v4hf variants). (vcvtu): New (v8hi, v4hi variants). (vcvtu): New (v8hf, v4hf variants). (vcvts_n): New (v8hf, v4hf variants). (vcvtu_n): New (v8hi, v4hi variants). (vcvts_n): New (v8hi, v4hi variants). (vcvtu_n): New (v8hf, v4hf variants). (vbsl): New (v8hf, v4hf variants). (vcvtas): New (v8hf, v4hf variants). (vcvtau): New (v8hf, v4hf variants). (vcvtms): New (v8hf, v4hf variants). (vcvtmu): New (v8hf, v4hf variants). (vcvtns): New (v8hf, v4hf variants). (vcvtnu): New (v8hf, v4hf variants). (vcvtps): New (v8hf, v4hf variants). (vcvtpu): New (v8hf, v4hf variants). From-SVN: r240422 --- gcc/ChangeLog | 63 ++++++++++++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 59 +++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 2 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9e76a72575e..9d306bd5104 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,66 @@ +2016-09-23 Matthew Wahab + + * config/arm/arm_neon_builtins.def (vadd): New (v8hf, v4hf + variants). + (vmulf): New (v8hf, v4hf variants). + (vfma): New (v8hf, v4hf variants). + (vfms): New (v8hf, v4hf variants). + (vsub): New (v8hf, v4hf variants). + (vcage): New (v8hf, v4hf variants). + (vcagt): New (v8hf, v4hf variants). + (vcale): New (v8hf, v4hf variants). + (vcalt): New (v8hf, v4hf variants). + (vceq): New (v8hf, v4hf variants). + (vcgt): New (v8hf, v4hf variants). + (vcge): New (v8hf, v4hf variants). + (vcle): New (v8hf, v4hf variants). + (vclt): New (v8hf, v4hf variants). + (vceqz): New (v8hf, v4hf variants). + (vcgez): New (v8hf, v4hf variants). + (vcgtz): New (v8hf, v4hf variants). + (vcltz): New (v8hf, v4hf variants). + (vclez): New (v8hf, v4hf variants). + (vabd): New (v8hf, v4hf variants). + (vmaxf): New (v8hf, v4hf variants). + (vmaxnm): New (v8hf, v4hf variants). + (vminf): New (v8hf, v4hf variants). + (vminnm): New (v8hf, v4hf variants). + (vpmaxf): New (v4hf variant). + (vpminf): New (v4hf variant). + (vpadd): New (v4hf variant). + (vrecps): New (v8hf, v4hf variants). + (vrsqrts): New (v8hf, v4hf variants). + (vabs): New (v8hf, v4hf variants). + (vneg): New (v8hf, v4hf variants). + (vrecpe): New (v8hf, v4hf variants). + (vrnd): New (v8hf, v4hf variants). + (vrnda): New (v8hf, v4hf variants). + (vrndm): New (v8hf, v4hf variants). + (vrndn): New (v8hf, v4hf variants). + (vrndp): New (v8hf, v4hf variants). + (vrndx): New (v8hf, v4hf variants). + (vrsqrte): New (v8hf, v4hf variants). + (vmul_lane): Add v4hf and v8hf variants. + (vmul_n): Add v4hf and v8hf variants. + (vext): New (v8hf, v4hf variants). + (vcvts): New (v8hi, v4hi variants). + (vcvts): New (v8hf, v4hf variants). + (vcvtu): New (v8hi, v4hi variants). + (vcvtu): New (v8hf, v4hf variants). + (vcvts_n): New (v8hf, v4hf variants). + (vcvtu_n): New (v8hi, v4hi variants). + (vcvts_n): New (v8hi, v4hi variants). + (vcvtu_n): New (v8hf, v4hf variants). + (vbsl): New (v8hf, v4hf variants). + (vcvtas): New (v8hf, v4hf variants). + (vcvtau): New (v8hf, v4hf variants). + (vcvtms): New (v8hf, v4hf variants). + (vcvtmu): New (v8hf, v4hf variants). + (vcvtns): New (v8hf, v4hf variants). + (vcvtnu): New (v8hf, v4hf variants). + (vcvtps): New (v8hf, v4hf variants). + (vcvtpu): New (v8hf, v4hf variants). + 2016-09-23 Matthew Wahab * config/arm/arm-builtins.c (hf_UP): New. diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index a4ba516209c..b29aa91a64e 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -19,6 +19,7 @@ . */ VAR2 (BINOP, vadd, v2sf, v4sf) +VAR2 (BINOP, vadd, v8hf, v4hf) VAR3 (BINOP, vaddls, v8qi, v4hi, v2si) VAR3 (BINOP, vaddlu, v8qi, v4hi, v2si) VAR3 (BINOP, vaddws, v8qi, v4hi, v2si) @@ -32,12 +33,15 @@ VAR8 (BINOP, vqaddu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) VAR3 (BINOP, vraddhn, v8hi, v4si, v2di) VAR2 (BINOP, vmulf, v2sf, v4sf) +VAR2 (BINOP, vmulf, v8hf, v4hf) VAR2 (BINOP, vmulp, v8qi, v16qi) VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) VAR3 (TERNOP, vmlals, v8qi, v4hi, v2si) VAR3 (TERNOP, vmlalu, v8qi, v4hi, v2si) VAR2 (TERNOP, vfma, v2sf, v4sf) +VAR2 (TERNOP, vfma, v4hf, v8hf) VAR2 (TERNOP, vfms, v2sf, v4sf) +VAR2 (TERNOP, vfms, v4hf, v8hf) VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) VAR3 (TERNOP, vmlsls, v8qi, v4hi, v2si) VAR3 (TERNOP, vmlslu, v8qi, v4hi, v2si) @@ -94,6 +98,7 @@ VAR8 (TERNOP_IMM, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (TERNOP_IMM, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (TERNOP_IMM, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR2 (BINOP, vsub, v2sf, v4sf) +VAR2 (BINOP, vsub, v8hf, v4hf) VAR3 (BINOP, vsubls, v8qi, v4hi, v2si) VAR3 (BINOP, vsublu, v8qi, v4hi, v2si) VAR3 (BINOP, vsubws, v8qi, v4hi, v2si) @@ -111,12 +116,27 @@ VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vcage, v2sf, v4sf) VAR2 (BINOP, vcagt, v2sf, v4sf) +VAR2 (BINOP, vcage, v4hf, v8hf) +VAR2 (BINOP, vcagt, v4hf, v8hf) +VAR2 (BINOP, vcale, v4hf, v8hf) +VAR2 (BINOP, vcalt, v4hf, v8hf) +VAR2 (BINOP, vceq, v4hf, v8hf) +VAR2 (BINOP, vcge, v4hf, v8hf) +VAR2 (BINOP, vcgt, v4hf, v8hf) +VAR2 (BINOP, vcle, v4hf, v8hf) +VAR2 (BINOP, vclt, v4hf, v8hf) +VAR2 (UNOP, vceqz, v4hf, v8hf) +VAR2 (UNOP, vcgez, v4hf, v8hf) +VAR2 (UNOP, vcgtz, v4hf, v8hf) +VAR2 (UNOP, vclez, v4hf, v8hf) +VAR2 (UNOP, vcltz, v4hf, v8hf) VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vabds, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vabdu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vabdf, v2sf, v4sf) VAR3 (BINOP, vabdls, v8qi, v4hi, v2si) VAR3 (BINOP, vabdlu, v8qi, v4hi, v2si) +VAR2 (BINOP, vabd, v8hf, v4hf) VAR6 (TERNOP, vabas, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (TERNOP, vabau, v8qi, v4hi, v2si, v16qi, v8hi, v4si) @@ -126,27 +146,38 @@ VAR3 (TERNOP, vabalu, v8qi, v4hi, v2si) VAR6 (BINOP, vmaxs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vmaxu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vmaxf, v2sf, v4sf) +VAR2 (BINOP, vmaxf, v8hf, v4hf) +VAR2 (BINOP, vmaxnm, v4hf, v8hf) VAR6 (BINOP, vmins, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vminu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vminf, v2sf, v4sf) +VAR2 (BINOP, vminf, v4hf, v8hf) +VAR2 (BINOP, vminnm, v8hf, v4hf) VAR3 (BINOP, vpmaxs, v8qi, v4hi, v2si) VAR3 (BINOP, vpmaxu, v8qi, v4hi, v2si) VAR1 (BINOP, vpmaxf, v2sf) +VAR1 (BINOP, vpmaxf, v4hf) VAR3 (BINOP, vpmins, v8qi, v4hi, v2si) VAR3 (BINOP, vpminu, v8qi, v4hi, v2si) VAR1 (BINOP, vpminf, v2sf) +VAR1 (BINOP, vpminf, v4hf) VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) +VAR1 (BINOP, vpadd, v4hf) VAR6 (UNOP, vpaddls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (UNOP, vpaddlu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vpadals, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vpadalu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vrecps, v2sf, v4sf) VAR2 (BINOP, vrsqrts, v2sf, v4sf) +VAR2 (BINOP, vrecps, v4hf, v8hf) +VAR2 (BINOP, vrsqrts, v4hf, v8hf) VAR8 (TERNOP_IMM, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (TERNOP_IMM, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) +VAR2 (UNOP, vabs, v8hf, v4hf) +VAR2 (UNOP, vneg, v8hf, v4hf) VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) @@ -155,8 +186,16 @@ VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di) VAR2 (UNOP, vcnt, v8qi, v16qi) VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) +VAR2 (UNOP, vrecpe, v8hf, v4hf) VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) +VAR2 (UNOP, vrsqrte, v4hf, v8hf) VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) +VAR2 (UNOP, vrnd, v8hf, v4hf) +VAR2 (UNOP, vrnda, v8hf, v4hf) +VAR2 (UNOP, vrndm, v8hf, v4hf) +VAR2 (UNOP, vrndn, v8hf, v4hf) +VAR2 (UNOP, vrndp, v8hf, v4hf) +VAR2 (UNOP, vrndx, v8hf, v4hf) /* FIXME: vget_lane supports more variants than this! */ VAR10 (GETLANE, vget_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) @@ -179,7 +218,7 @@ VAR3 (UNOP, vqmovnu, v8hi, v4si, v2di) VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) VAR3 (UNOP, vmovls, v8qi, v4hi, v2si) VAR3 (UNOP, vmovlu, v8qi, v4hi, v2si) -VAR6 (SETLANE, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +VAR8 (SETLANE, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf, v4hf, v8hf) VAR6 (MAC_LANE, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) VAR2 (MAC_LANE, vmlals_lane, v4hi, v2si) VAR2 (MAC_LANE, vmlalu_lane, v4hi, v2si) @@ -188,7 +227,7 @@ VAR6 (MAC_LANE, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) VAR2 (MAC_LANE, vmlsls_lane, v4hi, v2si) VAR2 (MAC_LANE, vmlslu_lane, v4hi, v2si) VAR2 (MAC_LANE, vqdmlsl_lane, v4hi, v2si) -VAR6 (BINOP, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +VAR8 (BINOP, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf, v4hf, v8hf) VAR6 (MAC_N, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) VAR2 (MAC_N, vmlals_n, v4hi, v2si) VAR2 (MAC_N, vmlalu_n, v4hi, v2si) @@ -204,9 +243,17 @@ VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) VAR2 (UNOP, vrev16, v8qi, v16qi) VAR4 (UNOP, vcvts, v2si, v2sf, v4si, v4sf) +VAR2 (UNOP, vcvts, v4hi, v8hi) +VAR2 (UNOP, vcvts, v4hf, v8hf) +VAR2 (UNOP, vcvtu, v4hi, v8hi) +VAR2 (UNOP, vcvtu, v4hf, v8hf) VAR4 (UNOP, vcvtu, v2si, v2sf, v4si, v4sf) VAR4 (BINOP, vcvts_n, v2si, v2sf, v4si, v4sf) VAR4 (BINOP, vcvtu_n, v2si, v2sf, v4si, v4sf) +VAR2 (BINOP, vcvts_n, v4hf, v8hf) +VAR2 (BINOP, vcvtu_n, v4hi, v8hi) +VAR2 (BINOP, vcvts_n, v4hi, v8hi) +VAR2 (BINOP, vcvtu_n, v4hf, v8hf) VAR1 (UNOP, vcvtv4sf, v4hf) VAR1 (UNOP, vcvtv4hf, v4sf) VAR10 (TERNOP, vbsl, @@ -223,6 +270,14 @@ VAR1 (UNOP, vcvtav2sf, v2si) VAR1 (UNOP, vcvtav4sf, v4si) VAR1 (UNOP, vcvtauv2sf, v2si) VAR1 (UNOP, vcvtauv4sf, v4si) +VAR2 (UNOP, vcvtas, v4hf, v8hf) +VAR2 (UNOP, vcvtau, v4hf, v8hf) +VAR2 (UNOP, vcvtms, v4hf, v8hf) +VAR2 (UNOP, vcvtmu, v4hf, v8hf) +VAR2 (UNOP, vcvtns, v4hf, v8hf) +VAR2 (UNOP, vcvtnu, v4hf, v8hf) +VAR2 (UNOP, vcvtps, v4hf, v8hf) +VAR2 (UNOP, vcvtpu, v4hf, v8hf) VAR1 (UNOP, vcvtpv2sf, v2si) VAR1 (UNOP, vcvtpv4sf, v4si) VAR1 (UNOP, vcvtpuv2sf, v2si) -- 2.30.2