From f1ad419ebfdcfaf26117e069b10bd1b154276049 Mon Sep 17 00:00:00 2001 From: Carl Love Date: Fri, 4 Sep 2020 19:24:22 -0500 Subject: [PATCH] rs6000, vector integer multiply/divide/modulo instructions 2021-01-15 Carl Love gcc/ChangeLog: * config/rs6000/altivec.h (vec_mulh, vec_div, vec_dive, vec_mod): New defines. * config/rs6000/altivec.md (VIlong): Move define to file vsx.md. * config/rs6000/rs6000-builtin.def (DIVES_V4SI, DIVES_V2DI, DIVEU_V4SI, DIVEU_V2DI, DIVS_V4SI, DIVS_V2DI, DIVU_V4SI, DIVU_V2DI, MODS_V2DI, MODS_V4SI, MODU_V2DI, MODU_V4SI, MULHS_V2DI, MULHS_V4SI, MULHU_V2DI, MULHU_V4SI, MULLD_V2DI): Add builtin define. (MULH, DIVE, MOD): Add new BU_P10_OVERLOAD_2 definitions. * config/rs6000/rs6000-call.c (VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_VEC_DIVE, P10_BUILTIN_VEC_MOD, P10_BUILTIN_VEC_MULH): New overloaded definitions. (builtin_function_type) [P10V_BUILTIN_DIVEU_V4SI, P10V_BUILTIN_DIVEU_V2DI, P10V_BUILTIN_DIVU_V4SI, P10V_BUILTIN_DIVU_V2DI, P10V_BUILTIN_MODU_V2DI, P10V_BUILTIN_MODU_V4SI, P10V_BUILTIN_MULHU_V2DI, P10V_BUILTIN_MULHU_V4SI]: Add case statement for builtins. * config/rs6000/rs6000.md (bits): Add new attribute sizes V4SI, V2DI. * config/rs6000/vsx.md (VIlong): Moved from config/rs6000/altivec.md. (UNSPEC_VDIVES, UNSPEC_VDIVEU): New unspec definitions. (vsx_mul_v2di): Add if TARGET_POWER10 statement. (vsx_udiv_v2di): Add if TARGET_POWER10 statement. (dives_, diveu_, div3, uvdiv3, mods_, modu_, mulhs_, mulhu_, mulv2di3): Add define_insn, mode is VIlong. * doc/extend.texi (vec_mulh, vec_mul, vec_div, vec_dive, vec_mod): Add builtin descriptions. gcc/testsuite/ChangeLog: * gcc.target/powerpc/builtins-1-p10-runnable.c: New test file. --- gcc/config/rs6000/altivec.h | 4 + gcc/config/rs6000/altivec.md | 2 - gcc/config/rs6000/rs6000-builtin.def | 21 + gcc/config/rs6000/rs6000-call.c | 53 +++ gcc/config/rs6000/rs6000.md | 5 +- gcc/config/rs6000/vsx.md | 211 +++++++--- gcc/doc/extend.texi | 120 ++++++ .../powerpc/builtins-1-p10-runnable.c | 398 ++++++++++++++++++ 8 files changed, 760 insertions(+), 54 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 06f0d4d9f14..961621a0841 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -750,6 +750,10 @@ __altivec_scalar_pred(vec_any_nle, #define vec_strir_p(a) __builtin_vec_strir_p (a) #define vec_stril_p(a) __builtin_vec_stril_p (a) +#define vec_mulh(a, b) __builtin_vec_mulh ((a), (b)) +#define vec_dive(a, b) __builtin_vec_dive ((a), (b)) +#define vec_mod(a, b) __builtin_vec_mod ((a), (b)) + /* VSX Mask Manipulation builtin. */ #define vec_genbm __builtin_vec_mtvsrbm #define vec_genhm __builtin_vec_mtvsrhm diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index fc19a8fc807..27a269b9e72 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -193,8 +193,6 @@ ;; Short vec int modes (define_mode_iterator VIshort [V8HI V16QI]) -;; Longer vec int modes for rotate/mask ops -(define_mode_iterator VIlong [V2DI V4SI]) ;; Vec float modes (define_mode_iterator VF [V4SF]) ;; Vec modes, pity mode iterators are not composable diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 8aa31ad0a06..058a32abf4c 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2883,6 +2883,24 @@ BU_P10V_AV_3 (VSRDB_V8HI, "vsrdb_v8hi", CONST, vsrdb_v8hi) BU_P10V_AV_3 (VSRDB_V4SI, "vsrdb_v4si", CONST, vsrdb_v4si) BU_P10V_AV_3 (VSRDB_V2DI, "vsrdb_v2di", CONST, vsrdb_v2di) +BU_P10V_AV_2 (DIVES_V4SI, "vdivesw", CONST, dives_v4si) +BU_P10V_AV_2 (DIVES_V2DI, "vdivesd", CONST, dives_v2di) +BU_P10V_AV_2 (DIVEU_V4SI, "vdiveuw", CONST, diveu_v4si) +BU_P10V_AV_2 (DIVEU_V2DI, "vdiveud", CONST, diveu_v2di) +BU_P10V_AV_2 (DIVS_V4SI, "vdivsw", CONST, divv4si3) +BU_P10V_AV_2 (DIVS_V2DI, "vdivsd", CONST, divv2di3) +BU_P10V_AV_2 (DIVU_V4SI, "vdivuw", CONST, udivv4si3) +BU_P10V_AV_2 (DIVU_V2DI, "vdivud", CONST, udivv2di3) +BU_P10V_AV_2 (MODS_V2DI, "vmodsd", CONST, mods_v2di) +BU_P10V_AV_2 (MODS_V4SI, "vmodsw", CONST, mods_v4si) +BU_P10V_AV_2 (MODU_V2DI, "vmodud", CONST, modu_v2di) +BU_P10V_AV_2 (MODU_V4SI, "vmoduw", CONST, modu_v4si) +BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, mulhs_v2di) +BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, mulhs_v4si) +BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, mulhu_v2di) +BU_P10V_AV_2 (MULHU_V4SI, "vmulhuw", CONST, mulhu_v4si) +BU_P10V_AV_2 (MULLD_V2DI, "vmulld", CONST, mulv2di3) + BU_P10V_VSX_1 (VXXSPLTIW_V4SI, "vxxspltiw_v4si", CONST, xxspltiw_v4si) BU_P10V_VSX_1 (VXXSPLTIW_V4SF, "vxxspltiw_v4sf", CONST, xxspltiw_v4sf) @@ -2958,6 +2976,9 @@ BU_P10_OVERLOAD_1 (VSTRIL_P, "stril_p") BU_P10_OVERLOAD_1 (XVTLSBB_ZEROS, "xvtlsbb_all_zeros") BU_P10_OVERLOAD_1 (XVTLSBB_ONES, "xvtlsbb_all_ones") +BU_P10_OVERLOAD_2 (MULH, "mulh") +BU_P10_OVERLOAD_2 (DIVE, "dive") +BU_P10_OVERLOAD_2 (MOD, "mod") BU_P10_OVERLOAD_1 (MTVSRBM, "mtvsrbm") BU_P10_OVERLOAD_1 (MTVSRHM, "mtvsrhm") diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index 2308cc8b4a2..ae0c761f0a4 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -1069,6 +1069,40 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_UDIV_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIVS_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIVU_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIVS_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIVU_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVES_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVEU_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVES_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVEU_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODS_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODU_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODS_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODU_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { VSX_BUILTIN_VEC_DOUBLE, VSX_BUILTIN_XVCVSXDDP, RS6000_BTI_V2DF, RS6000_BTI_V2DI, 0, 0 }, { VSX_BUILTIN_VEC_DOUBLE, VSX_BUILTIN_XVCVUXDDP, @@ -1909,6 +1943,17 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 }, + { P10_BUILTIN_VEC_MULH, P10V_BUILTIN_MULHS_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { P10_BUILTIN_VEC_MULH, P10V_BUILTIN_MULHU_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P10_BUILTIN_VEC_MULH, P10V_BUILTIN_MULHS_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { P10_BUILTIN_VEC_MULH, P10V_BUILTIN_MULHU_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUB, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESB, @@ -14438,6 +14483,14 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, case P10V_BUILTIN_XXGENPCVM_V8HI: case P10V_BUILTIN_XXGENPCVM_V4SI: case P10V_BUILTIN_XXGENPCVM_V2DI: + case P10V_BUILTIN_DIVEU_V4SI: + case P10V_BUILTIN_DIVEU_V2DI: + case P10V_BUILTIN_DIVU_V4SI: + case P10V_BUILTIN_DIVU_V2DI: + case P10V_BUILTIN_MODU_V2DI: + case P10V_BUILTIN_MODU_V4SI: + case P10V_BUILTIN_MULHU_V2DI: + case P10V_BUILTIN_MULHU_V4SI: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index bb9fb42f82a..976425361d9 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -668,9 +668,10 @@ (V4SI "du") (V2DI "d")]) -;; How many bits in this mode? +;; How many bits (per element) in this mode? (define_mode_attr bits [(QI "8") (HI "16") (SI "32") (DI "64") - (SF "32") (DF "64")]) + (SF "32") (DF "64") + (V4SI "32") (V2DI "64")]) ; DImode bits (define_mode_attr dbits [(QI "56") (HI "48") (SI "32")]) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 0c1bda522a9..3e0518631df 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -267,6 +267,10 @@ (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI]) (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI]) +;; Longer vec int modes for rotate/mask ops +;; and Vector Integer Multiply/Divide/Modulo Instructions +(define_mode_iterator VIlong [V2DI V4SI]) + ;; Constants for creating unspecs (define_c_enum "unspec" [UNSPEC_VSX_CONCAT @@ -363,6 +367,8 @@ UNSPEC_INSERTR UNSPEC_REPLACE_ELT UNSPEC_REPLACE_UN + UNSPEC_VDIVES + UNSPEC_VDIVEU ]) (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16 @@ -1623,28 +1629,35 @@ rtx op0 = operands[0]; rtx op1 = operands[1]; rtx op2 = operands[2]; - rtx op3 = gen_reg_rtx (DImode); - rtx op4 = gen_reg_rtx (DImode); - rtx op5 = gen_reg_rtx (DImode); - emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); - emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); - if (TARGET_POWERPC64) - emit_insn (gen_muldi3 (op5, op3, op4)); - else - { - rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); - emit_move_insn (op5, ret); - } - emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); - emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); - if (TARGET_POWERPC64) - emit_insn (gen_muldi3 (op3, op3, op4)); + + if (TARGET_POWER10) + emit_insn (gen_mulv2di3 (op0, op1, op2) ); + else { - rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); - emit_move_insn (op3, ret); + rtx op3 = gen_reg_rtx (DImode); + rtx op4 = gen_reg_rtx (DImode); + rtx op5 = gen_reg_rtx (DImode); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); + if (TARGET_POWERPC64) + emit_insn (gen_muldi3 (op5, op3, op4)); + else + { + rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); + emit_move_insn (op5, ret); + } + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); + if (TARGET_POWERPC64) + emit_insn (gen_muldi3 (op3, op3, op4)); + else + { + rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); + emit_move_insn (op3, ret); + } + emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); } - emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); DONE; } [(set_attr "type" "mul")]) @@ -1718,37 +1731,46 @@ rtx op0 = operands[0]; rtx op1 = operands[1]; rtx op2 = operands[2]; - rtx op3 = gen_reg_rtx (DImode); - rtx op4 = gen_reg_rtx (DImode); - rtx op5 = gen_reg_rtx (DImode); - emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); - emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); - if (TARGET_POWERPC64) - emit_insn (gen_udivdi3 (op5, op3, op4)); - else - { - rtx libfunc = optab_libfunc (udiv_optab, DImode); - rtx target = emit_library_call_value (libfunc, - op5, LCT_NORMAL, DImode, - op3, DImode, - op4, DImode); - emit_move_insn (op5, target); - } - emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); - emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); - if (TARGET_POWERPC64) - emit_insn (gen_udivdi3 (op3, op3, op4)); - else - { - rtx libfunc = optab_libfunc (udiv_optab, DImode); - rtx target = emit_library_call_value (libfunc, - op3, LCT_NORMAL, DImode, - op3, DImode, - op4, DImode); - emit_move_insn (op3, target); - } - emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); - DONE; + + if (TARGET_POWER10) + emit_insn (gen_udivv2di3 (op0, op1, op2) ); + else + { + rtx op3 = gen_reg_rtx (DImode); + rtx op4 = gen_reg_rtx (DImode); + rtx op5 = gen_reg_rtx (DImode); + + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); + + if (TARGET_POWERPC64) + emit_insn (gen_udivdi3 (op5, op3, op4)); + else + { + rtx libfunc = optab_libfunc (udiv_optab, DImode); + rtx target = emit_library_call_value (libfunc, + op5, LCT_NORMAL, DImode, + op3, DImode, + op4, DImode); + emit_move_insn (op5, target); + } + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); + + if (TARGET_POWERPC64) + emit_insn (gen_udivdi3 (op3, op3, op4)); + else + { + rtx libfunc = optab_libfunc (udiv_optab, DImode); + rtx target = emit_library_call_value (libfunc, + op3, LCT_NORMAL, DImode, + op3, DImode, + op4, DImode); + emit_move_insn (op3, target); + } + emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); + } + DONE; } [(set_attr "type" "div")]) @@ -6104,3 +6126,92 @@ "TARGET_POWER10" "vexpandm %0,%1" [(set_attr "type" "vecsimple")]) + +(define_insn "dives_" + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") + (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v") + (match_operand:VIlong 2 "vsx_register_operand" "v")] + UNSPEC_VDIVES))] + "TARGET_POWER10" + "vdives %0,%1,%2" + [(set_attr "type" "vecdiv") + (set_attr "size" "")]) + +(define_insn "diveu_" + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") + (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v") + (match_operand:VIlong 2 "vsx_register_operand" "v")] + UNSPEC_VDIVEU))] + "TARGET_POWER10" + "vdiveu %0,%1,%2" + [(set_attr "type" "vecdiv") + (set_attr "size" "")]) + +(define_insn "div3" + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") + (div:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") + (match_operand:VIlong 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" + "vdivs %0,%1,%2" + [(set_attr "type" "vecdiv") + (set_attr "size" "")]) + +(define_insn "udiv3" + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") + (udiv:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") + (match_operand:VIlong 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" + "vdivu %0,%1,%2" + [(set_attr "type" "vecdiv") + (set_attr "size" "")]) + +(define_insn "mods_" + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") + (mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") + (match_operand:VIlong 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" + "vmods %0,%1,%2" + [(set_attr "type" "vecdiv") + (set_attr "size" "")]) + +(define_insn "modu_" + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") + (umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") + (match_operand:VIlong 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" + "vmodu %0,%1,%2" + [(set_attr "type" "vecdiv") + (set_attr "size" "")]) + +(define_insn "mulhs_" + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") + (mult:VIlong (ashiftrt + (match_operand:VIlong 1 "vsx_register_operand" "v") + (const_int 32)) + (ashiftrt + (match_operand:VIlong 2 "vsx_register_operand" "v") + (const_int 32))))] + "TARGET_POWER10" + "vmulhs %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "mulhu_" + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") + (us_mult:VIlong (ashiftrt + (match_operand:VIlong 1 "vsx_register_operand" "v") + (const_int 32)) + (ashiftrt + (match_operand:VIlong 2 "vsx_register_operand" "v") + (const_int 32))))] + "TARGET_POWER10" + "vmulhu %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +;; Vector multiply low double word +(define_insn "mulv2di3" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") + (mult:V2DI (match_operand:V2DI 1 "vsx_register_operand" "v") + (match_operand:V2DI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" + "vmulld %0,%1,%2" + [(set_attr "type" "veccomplex")]) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 2748e98462e..c5b1faff60b 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -21642,6 +21642,126 @@ integer value between 0 and 255 inclusive. @exdent vector unsigned int vec_genpcvm (vector unsigned long long int, const int) @end smallexample + +Vector Integer Multiply/Divide/Modulo + +@smallexample +@exdent vector signed int +@exdent vec_mulh (vector signed int a, vector signed int b) +@exdent vector unsigned int +@exdent vec_mulh (vector unsigned int a, vector unsigned int b) +@end smallexample + +For each integer value @code{i} from 0 to 3, do the following. The integer +value in word element @code{i} of a is multiplied by the integer value in word +element @code{i} of b. The high-order 32 bits of the 64-bit product are placed +into word element @code{i} of the vector returned. + +@smallexample +@exdent vector signed long long +@exdent vec_mulh (vector signed long long a, vector signed long long b) +@exdent vector unsigned long long +@exdent vec_mulh (vector unsigned long long a, vector unsigned long long b) +@end smallexample + +For each integer value @code{i} from 0 to 1, do the following. The integer +value in doubleword element @code{i} of a is multiplied by the integer value in +doubleword element @code{i} of b. The high-order 64 bits of the 128-bit product +are placed into doubleword element @code{i} of the vector returned. + +@smallexample +@exdent vector unsigned long long +@exdent vec_mul (vector unsigned long long a, vector unsigned long long b) +@exdent vector signed long long +@exdent vec_mul (vector signed long long a, vector signed long long b) +@end smallexample + +For each integer value @code{i} from 0 to 1, do the following. The integer +value in doubleword element @code{i} of a is multiplied by the integer value in +doubleword element @code{i} of b. The low-order 64 bits of the 128-bit product +are placed into doubleword element @code{i} of the vector returned. + +@smallexample +@exdent vector signed int +@exdent vec_div (vector signed int a, vector signed int b) +@exdent vector unsigned int +@exdent vec_div (vector unsigned int a, vector unsigned int b) +@end smallexample + +For each integer value @code{i} from 0 to 3, do the following. The integer in +word element @code{i} of a is divided by the integer in word element @code{i} +of b. The unique integer quotient is placed into the word element @code{i} of +the vector returned. If an attempt is made to perform any of the divisions + ÷ 0 then the quotient is undefined. + +@smallexample +@exdent vector signed long long +@exdent vec_div (vector signed long long a, vector signed long long b) +@exdent vector unsigned long long +@exdent vec_div (vector unsigned long long a, vector unsigned long long b) +@end smallexample + +For each integer value @code{i} from 0 to 1, do the following. The integer in +doubleword element @code{i} of a is divided by the integer in doubleword +element @code{i} of b. The unique integer quotient is placed into the +doubleword element @code{i} of the vector returned. If an attempt is made to +perform any of the divisions 0x8000_0000_0000_0000 ÷ -1 or ÷ 0 then +the quotient is undefined. + +@smallexample +@exdent vector signed int +@exdent vec_dive (vector signed int a, vector signed int b) +@exdent vector unsigned int +@exdent vec_dive (vector unsigned int a, vector unsigned int b) +@end smallexample + +For each integer value @code{i} from 0 to 3, do the following. The integer in +word element @code{i} of a is shifted left by 32 bits, then divided by the +integer in word element @code{i} of b. The unique integer quotient is placed +into the word element @code{i} of the vector returned. If the quotient cannot +be represented in 32 bits, or if an attempt is made to perform any of the +divisions ÷ 0 then the quotient is undefined. + +@smallexample +@exdent vector signed long long +@exdent vec_dive (vector signed long long a, vector signed long long b) +@exdent vector unsigned long long +@exdent vec_dive (vector unsigned long long a, vector unsigned long long b) +@end smallexample + +For each integer value @code{i} from 0 to 1, do the following. The integer in +doubleword element @code{i} of a is shifted left by 64 bits, then divided by +the integer in doubleword element @code{i} of b. The unique integer quotient is +placed into the doubleword element @code{i} of the vector returned. If the +quotient cannot be represented in 64 bits, or if an attempt is made to perform + ÷ 0 then the quotient is undefined. + +@smallexample +@exdent vector signed int +@exdent vec_mod (vector signed int a, vector signed int b) +@exdent vector unsigned int +@exdent vec_mod (vector unsigned int a, vector unsigned int b) +@end smallexample + +For each integer value @code{i} from 0 to 3, do the following. The integer in +word element @code{i} of a is divided by the integer in word element @code{i} +of b. The unique integer remainder is placed into the word element @code{i} of +the vector returned. If an attempt is made to perform any of the divisions +0x8000_0000 ÷ -1 or ÷ 0 then the remainder is undefined. + +@smallexample +@exdent vector signed long long +@exdent vec_mod (vector signed long long a, vector signed long long b) +@exdent vector unsigned long long +@exdent vec_mod (vector unsigned long long a, vector unsigned long long b) +@end smallexample + +For each integer value @code{i} from 0 to 1, do the following. The integer in +doubleword element @code{i} of a is divided by the integer in doubleword +element @code{i} of b. The unique integer remainder is placed into the +doubleword element @code{i} of the vector returned. If an attempt is made to +perform ÷ 0 then the remainder is undefined. + Generate PCV from specified Mask size, as if implemented by the @code{xxgenpcvbm}, @code{xxgenpcvhm}, @code{xxgenpcvwm} instructions, where immediate value is either 0, 1, 2 or 3. diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c new file mode 100644 index 00000000000..222c8b3a409 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c @@ -0,0 +1,398 @@ +/* { dg-do run } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */ + +/* { dg-final { scan-assembler-times {\mvdivsw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdivuw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdivsd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdivud\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdivesw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdiveuw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdivesd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdiveud\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmodsw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmoduw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmodsd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmodud\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulhsw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulhuw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulhsd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulhud\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulld\M} 2 } } */ + +#include +#include +#include +#include + +#define DEBUG 0 + +#ifdef DEBUG +#include +#endif + +void abort (void); + +int main() + { + int i; + vector int i_arg1, i_arg2; + vector unsigned int u_arg1, u_arg2; + vector long long int d_arg1, d_arg2; + vector long long unsigned int ud_arg1, ud_arg2; + + vector int vec_i_expected, vec_i_result; + vector unsigned int vec_u_expected, vec_u_result; + vector long long int vec_d_expected, vec_d_result; + vector long long unsigned int vec_ud_expected, vec_ud_result; + + /* Signed word divide */ + i_arg1 = (vector int){ 20, 40, 60, 80}; + i_arg2 = (vector int){ 2, 2, 2, 2}; + vec_i_expected = (vector int){10, 20, 30, 40}; + + vec_i_result = vec_div (i_arg1, i_arg2); + + for (i = 0; i < 4; i++) + { + if (vec_i_expected[i] != vec_i_result[i]) +#ifdef DEBUG + printf("ERROR vec_div signed result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_i_result[i], i, vec_i_expected[i]); +#else + abort(); +#endif + } + + /* Unsigned word divide */ + u_arg1 = (vector unsigned int){ 20, 40, 60, 80}; + u_arg2 = (vector unsigned int){ 2, 2, 2, 2}; + vec_u_expected = (vector unsigned int){10, 20, 30, 40}; + + vec_u_result = vec_div (u_arg1, u_arg2); + + for (i = 0; i < 4; i++) + { + if (vec_u_expected[i] != vec_u_result[i]) +#ifdef DEBUG + printf("ERROR vec_div unsigned result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_u_result[i], i, vec_u_expected[i]); +#else + abort(); +#endif + } + + /* Signed double word divide */ + d_arg1 = (vector long long){ 24, 68}; + d_arg2 = (vector long long){ 2, 2}; + vec_d_expected = (vector long long){12, 34}; + + vec_d_result = vec_div (d_arg1, d_arg2); + + for (i = 0; i < 2; i++) + { + if (vec_d_expected[i] != vec_d_result[i]) +#ifdef DEBUG + printf("ERROR vec_div signed result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_d_result[i], i, vec_d_expected[i]); +#else + abort(); +#endif + } + + /* Unsigned double word divide */ + ud_arg1 = (vector unsigned long long){ 24, 68}; + ud_arg2 = (vector unsigned long long){ 2, 2}; + vec_ud_expected = (vector unsigned long long){12, 34}; + + vec_ud_result = vec_div (ud_arg1, ud_arg2); + + for (i = 0; i < 2; i++) + { + if (vec_ud_expected[i] != vec_ud_result[i]) +#ifdef DEBUG + printf("ERROR vec_div unsigned result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_ud_result[i], i, vec_ud_expected[i]); +#else + abort(); +#endif + } + + /* Divide Extended signed word result = (arg1 << 32)/arg2 */ + i_arg1 = (vector int){ 2, 4, 6, 8}; + i_arg2 = (vector int){ 2048, 2048, 2048, 2048}; + vec_i_expected = (vector int){4194304, 8388608, 12582912, 16777216}; + + vec_i_result = vec_dive (i_arg1, i_arg2); + + for (i = 0; i < 4; i++) + { + if (vec_i_expected[i] != vec_i_result[i]) +#ifdef DEBUG + printf("ERROR vec_dive signed result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_i_result[i], i, vec_i_expected[i]); +#else + abort(); +#endif + } + + /* Divide Extended unsigned word result = (arg1 << 32)/arg2 */ + u_arg1 = (vector unsigned int){ 2, 4, 6, 8}; + u_arg2 = (vector unsigned int){ 2048, 2048, 2048, 2048}; + vec_u_expected = (vector unsigned int){4194304, 8388608, + 12582912, 16777216}; + + vec_u_result = vec_dive (u_arg1, u_arg2); + + for (i = 0; i < 4; i++) + { + if (vec_u_expected[i] != vec_u_result[i]) +#ifdef DEBUG + printf("ERROR vec_dive unsigned result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_u_result[i], i, vec_u_expected[i]); +#else + abort(); +#endif + } + + /* Divide Extended double signed esult = (arg1 << 64)/arg2 */ + d_arg1 = (vector long long int){ 2, 4}; + d_arg2 = (vector long long int){ 4294967296, 4294967296}; + + vec_d_expected = (vector long long int){8589934592, 17179869184}; + + vec_d_result = vec_dive (d_arg1, d_arg2); + + for (i = 0; i < 2; i++) + { + if (vec_d_expected[i] != vec_d_result[i]) +#ifdef DEBUG + printf("ERROR vec_dive signed result[%d] = %lld != " + "expected[%d] = %lld\n", + i, vec_d_result[i], i, vec_d_expected[i]); +#else + abort(); +#endif + } + + /* Divide Extended double unsigned result = (arg1 << 64)/arg2 */ + ud_arg1 = (vector long long unsigned int){ 2, 4}; + ud_arg2 = (vector long long unsigned int){ 4294967296, 4294967296}; + + vec_ud_expected = (vector long long unsigned int){8589934592, + 17179869184}; + + vec_ud_result = vec_dive (ud_arg1, ud_arg2); + + for (i = 0; i < 2; i++) + { + if (vec_ud_expected[i] != vec_ud_result[i]) +#ifdef DEBUG + printf("ERROR vec_dive unsigned result[%d] = %lld != " + "expected[%d] = %lld\n", + i, vec_ud_result[i], i, vec_ud_expected[i]); +#else + abort(); +#endif + } + + /* Signed word modulo */ + i_arg1 = (vector int){ 23, 45, 61, 89}; + i_arg2 = (vector int){ 2, 2, 2, 2}; + vec_i_expected = (vector int){1, 1, 1, 1}; + + vec_i_result = vec_mod (i_arg1, i_arg2); + + for (i = 0; i < 4; i++) + { + if (vec_i_expected[i] != vec_i_result[i]) +#ifdef DEBUG + printf("ERROR vec_mod signed result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_i_result[i], i, vec_i_expected[i]); +#else + abort(); +#endif + } + + /* Unsigned word modulo */ + u_arg1 = (vector unsigned int){ 25, 41, 67, 86}; + u_arg2 = (vector unsigned int){ 3, 3, 3, 3}; + vec_u_expected = (vector unsigned int){1, 2, 1, 2}; + + vec_u_result = vec_mod (u_arg1, u_arg2); + + for (i = 0; i < 4; i++) + { + if (vec_u_expected[i] != vec_u_result[i]) +#ifdef DEBUG + printf("ERROR vec_mod unsigned result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_u_result[i], i, vec_u_expected[i]); +#else + abort(); +#endif + } + + /* Signed double word modulo */ + d_arg1 = (vector long long){ 24, 68}; + d_arg2 = (vector long long){ 7, 7}; + vec_d_expected = (vector long long){3, 5}; + + vec_d_result = vec_mod (d_arg1, d_arg2); + + for (i = 0; i < 2; i++) + { + if (vec_d_expected[i] != vec_d_result[i]) +#ifdef DEBUG + printf("ERROR vec_mod signed result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_d_result[i], i, vec_d_expected[i]); +#else + abort(); +#endif + } + + /* Unsigned double word modulo */ + ud_arg1 = (vector unsigned long long){ 24, 68}; + ud_arg2 = (vector unsigned long long){ 8, 8}; + vec_ud_expected = (vector unsigned long long){0, 4}; + + vec_ud_result = vec_mod (ud_arg1, ud_arg2); + + for (i = 0; i < 2; i++) + { + if (vec_ud_expected[i] != vec_ud_result[i]) +#ifdef DEBUG + printf("ERROR vecmod unsigned result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_ud_result[i], i, vec_ud_expected[i]); +#else + abort(); +#endif + } + + /* Signed word multiply high */ + i_arg1 = (vector int){ 2147483648, 2147483648, 2147483648, 2147483648 }; + i_arg2 = (vector int){ 2, 3, 4, 5}; + // vec_i_expected = (vector int){-1, -2, -2, -3}; + vec_i_expected = (vector int){1, -2, -2, -3}; + + vec_i_result = vec_mulh (i_arg1, i_arg2); + + for (i = 0; i < 4; i++) + { + if (vec_i_expected[i] != vec_i_result[i]) +#ifdef DEBUG + printf("ERROR vec_mulh signed result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_i_result[i], i, vec_i_expected[i]); +#else + abort(); +#endif + } + + /* Unsigned word multiply high */ + u_arg1 = (vector unsigned int){ 2147483648, 2147483648, + 2147483648, 2147483648 }; + u_arg2 = (vector unsigned int){ 4, 5, 6, 7 }; + vec_u_expected = (vector unsigned int){2, 2, 3, 3 }; + + vec_u_result = vec_mulh (u_arg1, u_arg2); + + for (i = 0; i < 4; i++) + { + if (vec_u_expected[i] != vec_u_result[i]) +#ifdef DEBUG + printf("ERROR vec_mulh unsigned result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_u_result[i], i, vec_u_expected[i]); +#else + abort(); +#endif + } + + /* Signed double word multiply high */ + d_arg1 = (vector long long int){ 2305843009213693951, + 4611686018427387903 }; + d_arg2 = (vector long long int){ 12, 20 }; + vec_d_expected = (vector long long int){ 1, 4 }; + + vec_d_result = vec_mulh (d_arg1, d_arg2); + + for (i = 0; i < 2; i++) + { + if (vec_d_expected[i] != vec_d_result[i]) +#ifdef DEBUG + printf("ERROR vec_mulh signed result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_d_result[i], i, vec_d_expected[i]); +#else + abort(); +#endif + } + + /* Unsigned double word multiply high */ + ud_arg1 = (vector unsigned long long int){ 2305843009213693951, + 4611686018427387903 }; + ud_arg2 = (vector unsigned long long int){ 32, 10 }; + vec_ud_expected = (vector unsigned long long int){ 3, 2 }; + + vec_ud_result = vec_mulh (ud_arg1, ud_arg2); + + for (i = 0; i < 2; i++) + { + if (vec_ud_expected[i] != vec_ud_result[i]) +#ifdef DEBUG + printf("ERROR vec_mulh unsigned result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_ud_result[i], i, vec_ud_expected[i]); +#else + abort(); +#endif + } + + /* Unsigned double word multiply low */ + ud_arg1 = (vector unsigned long long int){ 2048, 4096 }; + ud_arg2 = (vector unsigned long long int){ 2, 4 }; + vec_ud_expected = (vector unsigned long long int){ 4096, 16384 }; + + vec_ud_result = vec_mul (ud_arg1, ud_arg2); + + for (i = 0; i < 2; i++) + { + if (vec_ud_expected[i] != vec_ud_result[i]) +#ifdef DEBUG + printf("ERROR vec_mul unsigned result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_ud_result[i], i, vec_ud_expected[i]); +#else + abort(); +#endif + } + + /* Signed double word multiply low */ + d_arg1 = (vector signed long long int){ 2048, 4096 }; + d_arg2 = (vector signed long long int){ 2, 4 }; + vec_d_expected = (vector signed long long int){ 4096, 16384 }; + + vec_d_result = vec_mul (d_arg1, d_arg2); + + for (i = 0; i < 2; i++) + { + if (vec_d_expected[i] != vec_d_result[i]) +#ifdef DEBUG + printf("ERROR vec_mul signed result[%d] = %d != " + "expected[%d] = %d\n", + i, vec_d_result[i], i, vec_d_expected[i]); +#else + abort(); +#endif + } + } -- 2.30.2