From: Carl Love Date: Mon, 24 Jul 2017 16:50:22 +0000 (+0000) Subject: rs6000-c.c: Add support for built-in functions vector float vec_extract_fp32_from_sho... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=26bca0ed48bcbd292d08bb2e83276658d6ea5434;p=gcc.git rs6000-c.c: Add support for built-in functions vector float vec_extract_fp32_from_shorth... gcc/ChangeLog: 2017-07-24 Carl Love * config/rs6000/rs6000-c.c: Add support for built-in functions vector float vec_extract_fp32_from_shorth (vector unsigned short); vector float vec_extract_fp32_from_shortl (vector unsigned short); * config/rs6000/altivec.h (vec_extract_fp_from_shorth, vec_extract_fp_from_shortl): Add defines for the two builtins. * config/rs6000/rs6000-builtin.def (VEXTRACT_FP_FROM_SHORTH, VEXTRACT_FP_FROM_SHORTL): Add BU_P9V_OVERLOAD_1 and BU_P9V_VSX_1 new builtins. * config/rs6000/vsx.md vsx_xvcvhpsp): Add define_insn. (vextract_fp_from_shorth, vextract_fp_from_shortl): Add define_expands. * doc/extend.texi: Update the built-in documentation file for the new built-in function. gcc/testsuite/ChangeLog: 2017-07-24 Carl Love * gcc.target/powerpc/builtins-3-p9-runnable.c: Add new test file for the new built-ins. From-SVN: r250477 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ef43c77e164..c77ba24254c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2017-07-24 Carl Love + + * config/rs6000/rs6000-c.c: Add support for built-in functions + vector float vec_extract_fp32_from_shorth (vector unsigned short); + vector float vec_extract_fp32_from_shortl (vector unsigned short); + * config/rs6000/altivec.h (vec_extract_fp_from_shorth, + vec_extract_fp_from_shortl): Add defines for the two builtins. + * config/rs6000/rs6000-builtin.def (VEXTRACT_FP_FROM_SHORTH, + VEXTRACT_FP_FROM_SHORTL): Add BU_P9V_OVERLOAD_1 and BU_P9V_VSX_1 + new builtins. + * config/rs6000/vsx.md vsx_xvcvhpsp): Add define_insn. + (vextract_fp_from_shorth, vextract_fp_from_shortl): Add define_expands. + * doc/extend.texi: Update the built-in documentation file for the + new built-in function. + 2017-07-24 Jakub Jelinek PR bootstrap/81521 diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 71cdca523df..4d34a97c9ae 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -449,6 +449,9 @@ #define vec_insert_exp __builtin_vec_insert_exp #define vec_test_data_class __builtin_vec_test_data_class +#define vec_extract_fp_from_shorth __builtin_vec_vextract_fp_from_shorth +#define vec_extract_fp_from_shortl __builtin_vec_vextract_fp_from_shortl + #define scalar_extract_exp __builtin_vec_scalar_extract_exp #define scalar_extract_sig __builtin_vec_scalar_extract_sig #define scalar_insert_exp __builtin_vec_scalar_insert_exp diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index e161423662c..a043e70f93b 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2066,6 +2066,9 @@ BU_P9V_OVERLOAD_1 (VSTDCNSP, "scalar_test_neg_sp") BU_P9V_OVERLOAD_1 (REVB, "revb") +BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth") +BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl") + /* ISA 3.0 vector scalar overloaded 2 argument functions. */ BU_P9V_OVERLOAD_2 (VSIEDP, "scalar_insert_exp") @@ -2084,6 +2087,8 @@ BU_P9V_VSX_1 (VEEDP, "extract_exp_dp", CONST, xvxexpdp) BU_P9V_VSX_1 (VEESP, "extract_exp_sp", CONST, xvxexpsp) BU_P9V_VSX_1 (VESDP, "extract_sig_dp", CONST, xvxsigdp) BU_P9V_VSX_1 (VESSP, "extract_sig_sp", CONST, xvxsigsp) +BU_P9V_VSX_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth", CONST, vextract_fp_from_shorth) +BU_P9V_VSX_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl", CONST, vextract_fp_from_shortl) /* 2 argument vsx vector functions added in ISA 3.0 (power9). */ BU_P9V_VSX_2 (VIEDP, "insert_exp_dp", CONST, xviexpdp) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 937cda0aa2a..4fcd1513b5b 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -5184,6 +5184,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B, RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 }, + { P9V_BUILTIN_VEC_VEXTRACT_FP_FROM_SHORTH, P9V_BUILTIN_VEXTRACT_FP_FROM_SHORTH, + RS6000_BTI_V4SF, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VEXTRACT_FP_FROM_SHORTL, P9V_BUILTIN_VEXTRACT_FP_FROM_SHORTL, + RS6000_BTI_V4SF, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUBLX, RS6000_BTI_INTQI, RS6000_BTI_UINTSI, RS6000_BTI_V16QI, 0 }, diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index ff65caa35dc..4e57340aa3b 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -326,6 +326,7 @@ UNSPEC_VSX_CVDPSXWS UNSPEC_VSX_CVDPUXWS UNSPEC_VSX_CVSPDP + UNSPEC_VSX_CVHPSP UNSPEC_VSX_CVSPDPN UNSPEC_VSX_CVDPSPN UNSPEC_VSX_CVSXWDP @@ -348,6 +349,8 @@ UNSPEC_VSX_ROUND_I UNSPEC_VSX_ROUND_IC UNSPEC_VSX_SLDWI + UNSPEC_VSX_XXPERM + UNSPEC_VSX_XXSPLTW UNSPEC_VSX_XXSPLTD UNSPEC_VSX_DIVSD @@ -368,6 +371,8 @@ UNSPEC_VSX_SIEXPQP UNSPEC_VSX_SCMPEXPDP UNSPEC_VSX_STSTDC + UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH + UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL UNSPEC_VSX_VXEXP UNSPEC_VSX_VXSIG UNSPEC_VSX_VIEXP @@ -1746,6 +1751,15 @@ "xscvspdp %x0,%x1" [(set_attr "type" "fp")]) +;; Generate xvcvhpsp instruction +(define_insn "vsx_xvcvhpsp" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVHPSP))] + "TARGET_P9_VECTOR" + "xvcvhpsp %x0,%x1" + [(set_attr "type" "vecfloat")]) + ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF ;; format of scalars is actually DF. (define_insn "vsx_xscvdpsp_scalar" @@ -4506,7 +4520,65 @@ "xxinsertw %x0,%x1,%3" [(set_attr "type" "vecperm")]) - +;; Generate vector extract four float 32 values from left four elements +;; of eight element vector of float 16 values. +(define_expand "vextract_fp_from_shorth" + [(set (match_operand:V4SF 0 "register_operand" "=wa") + (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] + UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))] + "TARGET_P9_VECTOR" +{ + int vals[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; + int i; + + rtx rvals[16]; + rtx mask = gen_reg_rtx (V16QImode); + rtx tmp = gen_reg_rtx (V16QImode); + rtvec v; + + for (i = 0; i < 16; i++) + rvals[i] = GEN_INT (vals[i]); + + /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 + inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move + src half words 0,1,2,3 for the conversion instruction. */ + v = gen_rtvec_v (16, rvals); + emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], + operands[1], mask)); + emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); + DONE; +}) + +;; Generate vector extract four float 32 values from right four elements +;; of eight element vector of float 16 values. +(define_expand "vextract_fp_from_shortl" + [(set (match_operand:V4SF 0 "register_operand" "=wa") + (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] + UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))] + "TARGET_P9_VECTOR" +{ + int vals[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; + int i; + rtx rvals[16]; + rtx mask = gen_reg_rtx (V16QImode); + rtx tmp = gen_reg_rtx (V16QImode); + rtvec v; + + for (i = 0; i < 16; i++) + rvals[i] = GEN_INT (vals[i]); + + /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 + inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move + src half words 4,5,6,7 for the conversion instruction. */ + v = gen_rtvec_v (16, rvals); + emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], + operands[1], mask)); + emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); + DONE; +}) + ;; Support for ISA 3.0 vector byte reverse ;; Swap all bytes with in a vector diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index da5b0af60fc..1dd53755128 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -18306,6 +18306,9 @@ vector bool short vec_cmpne (vector bool short, vector bool short); vector bool int vec_cmpne (vector bool int, vector bool int); vector bool long long vec_cmpne (vector bool long long, vector bool long long); +vector float vec_extract_fp32_from_shorth (vector unsigned short); +vector float vec_extract_fp32_from_shortl (vector unsigned short); + vector long long vec_vctz (vector long long); vector unsigned long long vec_vctz (vector unsigned long long); vector int vec_vctz (vector int); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 951cabb0aa5..bb4302f0541 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-07-24 Carl Love + + * gcc.target/powerpc/builtins-3-p9-runnable.c: Add new test file for + the new built-ins. + 2017-07-24 Jackson Woodruff * gcc.target/aarch64/simd/vmla_elem_1.c: New. diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-p9-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9-runnable.c new file mode 100644 index 00000000000..24589b55639 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9-runnable.c @@ -0,0 +1,35 @@ +/* { dg-do run { target { powerpc64*-*-* && { lp64 && p9vector_hw } } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2 -mupper-regs-di" } */ + +#include // vector + +void abort (void); + +int main() { + int i; + vector float vfr, vfexpt; + vector unsigned short vusha; + + /* 1.0, -2.0, 0.0, 8.5, 1.5, 0.5, 1.25, -0.25 */ + vusha = (vector unsigned short){0B011110000000000, 0B1100000000000000, + 0B000000000000000, 0B0100100001000000, + 0B011111000000000, 0B0011100000000000, + 0B011110100000000, 0B1011010000000000}; + + vfexpt = (vector float){1.0, -2.0, 0.0, 8.5}; + vfr = vec_extract_fp_from_shorth(vusha); + + for (i=0; i<4; i++) { + if (vfr[i] != vfexpt[i]) + abort(); + } + + vfexpt = (vector float){1.5, 0.5, 1.25, -0.25}; + vfr = vec_extract_fp_from_shortl(vusha); + + for (i=0; i<4; i++) { + if (vfr[i] != vfexpt[i]) + abort(); + } +}