From 16370e798cada94eb7af14b4a17c785241f19b18 Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Wed, 14 Dec 2016 16:08:07 +0000 Subject: [PATCH] rs6000.c (rs6000_split_vec_extract_var): On ISA 3.0/power9, add support to use the VEXTU{B,H,W}{L,R}X extract instructions. [gcc] 2016-12-14 Michael Meissner * config/rs6000/rs6000.c (rs6000_split_vec_extract_var): On ISA 3.0/power9, add support to use the VEXTU{B,H,W}{L,R}X extract instructions. * config/rs6000/vsx.md (VSr2): Add IEEE 128-bit floating point type constraint registers. (VSr3): Likewise. (FL_CONV): New mode iterator for binary floating types that have a direct conversion from 64-bit integer to floating point. (vsx_extract__p9): Add support for the ISA 3.0/power9 VEXTU{B,H,W}{L,R}X extract instructions. (vsx_extract__p9 splitter): Add splitter to load up the extract byte position into the GPR if we are using the VEXTU{B,H,W}{L,R}X extract instructions. (vsx_extract__di_p9): Support extracts to GPRs. (vsx_extract__store_p9): Support extracting to GPRs so that we can use reg+offset address instructions. (vsx_extract__var): Support extracts to GPRs. (vsx_extract___var): New combiner insn to combine vector extracts with zero_extend. (vsx_ext__fl_): Optimize extracting a small integer vector element and converting it to a floating point type. (vsx_ext__ufl_): Likewise. (UNSPEC_XXEXTRACTUW): New unspec. (UNSPEC_XXINSERTW): Likewise. (vextract4b): Add support for the vec_vextract4b built-in function. (vextract4b_internal): Likewise. (vinsert4b): Add support for the vec_insert4b built-in function. Include both a version that inserts element 1 from a V4SI object and one that inserts a DI object. (vinsert4b_internal): Likewise. (vinsert4b_di): Likewise. (vinsert4b_di_internal): Likewise. * config/rs6000/predicates.md (const_0_to_11_operand): New predicate, match 0..11. * config/rs6000/rs6000-builtin.def (BU_P9V_VSX_3): Set built-in type to ternary, not binary. (BU_P9V_64BIT_VSX_3): Likewise. (P9V_BUILTIN_VEXTRACT4B): Add support for vec_vinsert4b and vec_extract4b non-overloaded built-in functions. (P9V_BUILTIN_VINSERT4B): Likewise. (P9V_BUILTIN_VINSERT4B_DI): Likewise. (P9V_BUILTIN_VEC_VEXTULX): Move to section that adds 2 operand ISA 3.0 built-in functions. (P9V_BUILTIN_VEC_VEXTURX): Likewise. (P9V_BUILTIN_VEC_VEXTRACT4B): Add support for overloaded vec_insert4b and vec_extract4 built-in functions. (P9V_BUILTIN_VEC_VINSERT4B): Likewise. * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add overloaded support for vec_vinsert4b and vec_extract4b. * config/rs6000/rs6000.c (altivec_expand_builtin): Add checks for the vec_insert4b and vec_extract4b byte number being a constant in the range 0..11. * config/rs6000/altivec.h (vec_vinsert4b): Support vec_vinsert4b and vec_extract4b built-in functions. * doc/extend.doc (PowerPC VSX built-in functions): Document vec_insert4b and vec_extract4b. [gcc/testsuite] 2016-12-14 Michael Meissner * gcc/testsuite/gcc.target/powerpc/vec-extract.h: If DO_TRACE is defined, add tracing of the various extracts to stderr. Add support for tests that convert the result to another type. * gcc/testsuite/gcc.target/powerpc/vec-extract-v2df.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v4sf.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c: Add new tests that do an extract and then convert the values double. * gcc/testsuite/gcc.target/powerpc/vec-extract-v4siu-df.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v16qiu-df.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v16qi-df.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v8hiu-df.c: Likewise. * gcc/testsuite/gcc.target/powerpc/vec-extract-v8hi-df.c: Likewise. * gcc.target/powerpc/p9-extract-1.c: Update test to check for VEXTU{B,H,W}{L,R}X instructions being generated by default instead of VEXTRACTU{B,H} and XXEXTRACTUW. * gcc.target/powerpc/p9-extract-3.c: New test for combination of vec_extract and convert to floating point. * gcc.target/powerpc/p9-vinsert4b-1.c: New test for vec_vinsert4b and vec_extract4b. * gcc.target/powerpc/p9-vinsert4b-2.c: Likewise. From-SVN: r243653 --- gcc/ChangeLog | 61 ++ gcc/config/rs6000/altivec.h | 2 + gcc/config/rs6000/predicates.md | 5 + gcc/config/rs6000/rs6000-builtin.def | 23 +- gcc/config/rs6000/rs6000-c.c | 27 + gcc/config/rs6000/rs6000.c | 81 ++- gcc/config/rs6000/vsx.md | 285 +++++++-- gcc/doc/extend.texi | 9 + gcc/testsuite/ChangeLog | 23 + .../gcc.target/powerpc/p9-extract-1.c | 121 +++- .../gcc.target/powerpc/p9-extract-3.c | 108 ++++ .../gcc.target/powerpc/p9-vinsert4b-1.c | 39 ++ .../gcc.target/powerpc/p9-vinsert4b-2.c | 30 + .../gcc.target/powerpc/vec-extract-v16qi-df.c | 14 + .../powerpc/vec-extract-v16qiu-df.c | 13 + .../gcc.target/powerpc/vec-extract-v2df.c | 2 + .../gcc.target/powerpc/vec-extract-v4sf.c | 2 + .../gcc.target/powerpc/vec-extract-v4si-df.c | 12 + .../gcc.target/powerpc/vec-extract-v4siu-df.c | 12 + .../gcc.target/powerpc/vec-extract-v8hi-df.c | 12 + .../gcc.target/powerpc/vec-extract-v8hiu-df.c | 12 + .../gcc.target/powerpc/vec-extract.h | 570 ++++++++++-------- 22 files changed, 1144 insertions(+), 319 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/p9-extract-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-extract-v16qi-df.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-extract-v16qiu-df.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-extract-v4siu-df.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-extract-v8hi-df.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-extract-v8hiu-df.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 25c01dba752..bfeb1c3a076 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,64 @@ +2016-12-14 Michael Meissner + + * config/rs6000/rs6000.c (rs6000_split_vec_extract_var): On ISA + 3.0/power9, add support to use the VEXTU{B,H,W}{L,R}X extract + instructions. + * config/rs6000/vsx.md (VSr2): Add IEEE 128-bit floating point + type constraint registers. + (VSr3): Likewise. + (FL_CONV): New mode iterator for binary floating types that have a + direct conversion from 64-bit integer to floating point. + (vsx_extract__p9): Add support for the ISA 3.0/power9 + VEXTU{B,H,W}{L,R}X extract instructions. + (vsx_extract__p9 splitter): Add splitter to load up the + extract byte position into the GPR if we are using the + VEXTU{B,H,W}{L,R}X extract instructions. + (vsx_extract__di_p9): Support extracts to GPRs. + (vsx_extract__store_p9): Support extracting to GPRs so that + we can use reg+offset address instructions. + (vsx_extract__var): Support extracts to GPRs. + (vsx_extract___var): New combiner + insn to combine vector extracts with zero_extend. + (vsx_ext__fl_): Optimize + extracting a small integer vector element and converting it to a + floating point type. + (vsx_ext__ufl_): Likewise. + (UNSPEC_XXEXTRACTUW): New unspec. + (UNSPEC_XXINSERTW): Likewise. + (vextract4b): Add support for the vec_vextract4b built-in + function. + (vextract4b_internal): Likewise. + (vinsert4b): Add support for the vec_insert4b built-in function. + Include both a version that inserts element 1 from a V4SI object + and one that inserts a DI object. + (vinsert4b_internal): Likewise. + (vinsert4b_di): Likewise. + (vinsert4b_di_internal): Likewise. + * config/rs6000/predicates.md (const_0_to_11_operand): New + predicate, match 0..11. + * config/rs6000/rs6000-builtin.def (BU_P9V_VSX_3): Set built-in + type to ternary, not binary. + (BU_P9V_64BIT_VSX_3): Likewise. + (P9V_BUILTIN_VEXTRACT4B): Add support for vec_vinsert4b and + vec_extract4b non-overloaded built-in functions. + (P9V_BUILTIN_VINSERT4B): Likewise. + (P9V_BUILTIN_VINSERT4B_DI): Likewise. + (P9V_BUILTIN_VEC_VEXTULX): Move to section that adds 2 operand ISA + 3.0 built-in functions. + (P9V_BUILTIN_VEC_VEXTURX): Likewise. + (P9V_BUILTIN_VEC_VEXTRACT4B): Add support for overloaded + vec_insert4b and vec_extract4 built-in functions. + (P9V_BUILTIN_VEC_VINSERT4B): Likewise. + * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add + overloaded support for vec_vinsert4b and vec_extract4b. + * config/rs6000/rs6000.c (altivec_expand_builtin): Add checks for + the vec_insert4b and vec_extract4b byte number being a constant in + the range 0..11. + * config/rs6000/altivec.h (vec_vinsert4b): Support vec_vinsert4b + and vec_extract4b built-in functions. + * doc/extend.doc (PowerPC VSX built-in functions): Document + vec_insert4b and vec_extract4b. + 2016-12-14 Martin Liska * gimple-pretty-print.c (dump_probability): New function. diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index f8984d9902e..2eeeab18d48 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -394,6 +394,8 @@ #define vec_vctzd __builtin_vec_vctzd #define vec_vctzh __builtin_vec_vctzh #define vec_vctzw __builtin_vec_vctzw +#define vec_vextract4b __builtin_vec_vextract4b +#define vec_vinsert4b __builtin_vec_vinsert4b #define vec_vprtyb __builtin_vec_vprtyb #define vec_vprtybd __builtin_vec_vprtybd #define vec_vprtybw __builtin_vec_vprtybw diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 57a463b2007..30b212392f6 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -210,6 +210,11 @@ (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) +;; Match op = 0..11 +(define_predicate "const_0_to_11_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 11)"))) + ;; Match op = 0..15 (define_predicate "const_0_to_15_operand" (and (match_code "const_int") diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 68f0936612b..702c2d04d15 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -877,7 +877,16 @@ "__builtin_vsx_" NAME, /* NAME */ \ RS6000_BTM_P9_VECTOR, /* MASK */ \ (RS6000_BTC_ ## ATTR /* ATTR */ \ - | RS6000_BTC_BINARY), \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_64BIT_VSX_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + (RS6000_BTM_64BIT \ + | RS6000_BTM_P9_VECTOR), /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ CODE_FOR_ ## ICODE) /* ICODE */ /* See the comment on BU_ALTIVEC_P. */ @@ -1968,6 +1977,11 @@ BU_P9V_AV_2 (VEXTUHRX, "vextuhrx", CONST, vextuhrx) BU_P9V_AV_2 (VEXTUWLX, "vextuwlx", CONST, vextuwlx) BU_P9V_AV_2 (VEXTUWRX, "vextuwrx", CONST, vextuwrx) +/* Insert/extract 4 byte word into a vector. */ +BU_P9V_VSX_2 (VEXTRACT4B, "vextract4b", CONST, vextract4b) +BU_P9V_VSX_3 (VINSERT4B, "vinsert4b", CONST, vinsert4b) +BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di", CONST, vinsert4b_di) + /* 3 argument vector functions returning void, treated as SPECIAL, added in ISA 3.0 (power9). */ BU_P9V_64BIT_AV_X (STXVL, "stxvl", MISC) @@ -2009,12 +2023,13 @@ BU_P9V_AV_P (VCMPNEZW_P, "vcmpnezw_p", CONST, vector_nez_v4si_p) /* ISA 3.0 Vector scalar overloaded 2 argument functions */ BU_P9V_OVERLOAD_2 (LXVL, "lxvl") +BU_P9V_OVERLOAD_2 (VEXTULX, "vextulx") +BU_P9V_OVERLOAD_2 (VEXTURX, "vexturx") +BU_P9V_OVERLOAD_2 (VEXTRACT4B, "vextract4b") /* ISA 3.0 Vector scalar overloaded 3 argument functions */ BU_P9V_OVERLOAD_3 (STXVL, "stxvl") - -BU_P9V_OVERLOAD_2 (VEXTULX, "vextulx") -BU_P9V_OVERLOAD_2 (VEXTURX, "vexturx") +BU_P9V_OVERLOAD_3 (VINSERT4B, "vinsert4b") /* Overloaded CMPNE support was implemented prior to Power 9, so is not mentioned here. */ diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index bf01a119bf8..6ec9d59a291 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -4686,6 +4686,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_VCTZLSBB, P9V_BUILTIN_VCTZLSBB, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B, + RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 }, + { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B, + RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 }, + { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUBLX, RS6000_BTI_INTQI, RS6000_BTI_UINTSI, RS6000_BTI_V16QI, 0 }, @@ -4739,6 +4744,28 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B, + RS6000_BTI_V16QI, RS6000_BTI_V4SI, + RS6000_BTI_V16QI, RS6000_BTI_UINTSI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B, + RS6000_BTI_V16QI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_V16QI, RS6000_BTI_UINTSI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_V16QI, RS6000_BTI_INTDI, + RS6000_BTI_V16QI, RS6000_BTI_UINTDI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_V16QI, RS6000_BTI_UINTDI, + RS6000_BTI_V16QI, RS6000_BTI_UINTDI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTDI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI }, + { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 042ffc64657..7076ca16a4f 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -7522,8 +7522,52 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, { int bit_shift = byte_shift + 3; rtx element2; + int dest_regno = regno_or_subregno (dest); + int src_regno = regno_or_subregno (src); + int element_regno = regno_or_subregno (element); + + gcc_assert (REG_P (tmp_gpr)); + + /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in + a general purpose register. */ + if (TARGET_P9_VECTOR + && (mode == V16QImode || mode == V8HImode || mode == V4SImode) + && INT_REGNO_P (dest_regno) + && ALTIVEC_REGNO_P (src_regno) + && INT_REGNO_P (element_regno)) + { + rtx dest_si = gen_rtx_REG (SImode, dest_regno); + rtx element_si = gen_rtx_REG (SImode, element_regno); + + if (mode == V16QImode) + emit_insn (VECTOR_ELT_ORDER_BIG + ? gen_vextublx (dest_si, element_si, src) + : gen_vextubrx (dest_si, element_si, src)); + + else if (mode == V8HImode) + { + rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); + emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx)); + emit_insn (VECTOR_ELT_ORDER_BIG + ? gen_vextuhlx (dest_si, tmp_gpr_si, src) + : gen_vextuhrx (dest_si, tmp_gpr_si, src)); + } + + + else + { + rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); + emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx)); + emit_insn (VECTOR_ELT_ORDER_BIG + ? gen_vextuwlx (dest_si, tmp_gpr_si, src) + : gen_vextuwrx (dest_si, tmp_gpr_si, src)); + } + + return; + } - gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec)); + + gcc_assert (REG_P (tmp_altivec)); /* For little endian, adjust element ordering. For V2DI/V2DF, we can use an XOR, otherwise we need to subtract. The shift amount is so VSLO @@ -15550,7 +15594,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) size_t i; enum insn_code icode; tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - tree arg0; + tree arg0, arg1, arg2; rtx op0, pat; machine_mode tmode, mode0; enum rs6000_builtins fcode @@ -15770,6 +15814,39 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case VSX_BUILTIN_VEC_EXT_V1TI: return altivec_expand_vec_ext_builtin (exp, target); + case P9V_BUILTIN_VEXTRACT4B: + case P9V_BUILTIN_VEC_VEXTRACT4B: + arg1 = CALL_EXPR_ARG (exp, 1); + STRIP_NOPS (arg1); + + /* Generate a normal call if it is invalid. */ + if (arg1 == error_mark_node) + return expand_call (exp, target, false); + + if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 11) + { + error ("second argument to vec_vextract4b must 0..11"); + return expand_call (exp, target, false); + } + break; + + case P9V_BUILTIN_VINSERT4B: + case P9V_BUILTIN_VINSERT4B_DI: + case P9V_BUILTIN_VEC_VINSERT4B: + arg2 = CALL_EXPR_ARG (exp, 2); + STRIP_NOPS (arg2); + + /* Generate a normal call if it is invalid. */ + if (arg2 == error_mark_node) + return expand_call (exp, target, false); + + if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 11) + { + error ("third argument to vec_vinsert4b must 0..11"); + return expand_call (exp, target, false); + } + break; + default: break; /* Fall through. */ diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 1801bc05906..7aecbe6d645 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -119,13 +119,17 @@ (V4SF "wf") (DF "ws") (SF "ww") - (DI "wi")]) + (DI "wi") + (KF "wq") + (TF "wp")]) (define_mode_attr VSr3 [(V2DF "wa") (V4SF "wa") (DF "ws") (SF "ww") - (DI "wi")]) + (DI "wi") + (KF "wq") + (TF "wp")]) ;; Map the register class for sp<->dp float conversions, destination (define_mode_attr VSr4 [(SF "ws") @@ -298,6 +302,14 @@ || (FLOAT128_IEEE_P (TFmode) && TARGET_FLOAT128_HW)")]) +;; Mode iterator for binary floating types that have a direct conversion +;; from 64-bit integer to floating point +(define_mode_iterator FL_CONV [SF + DF + (KF "TARGET_FLOAT128_HW") + (TF "TARGET_FLOAT128_HW + && FLOAT128_IEEE_P (TFmode)")]) + ;; Iterator for the 2 short vector types to do a splat from an integer (define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) @@ -366,6 +378,8 @@ UNSPEC_VCMPNEZH UNSPEC_VCMPNEW UNSPEC_VCMPNEZW + UNSPEC_XXEXTRACTUW + UNSPEC_XXINSERTW ]) ;; VSX moves @@ -2535,63 +2549,98 @@ }) (define_insn "vsx_extract__p9" - [(set (match_operand: 0 "gpc_reg_operand" "=") + [(set (match_operand: 0 "gpc_reg_operand" "=r,") (vec_select: - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "") - (parallel [(match_operand:QI 2 "" "n")])))] + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,") + (parallel [(match_operand:QI 2 "" "n,n")]))) + (clobber (match_scratch:SI 3 "=r,X"))] "VECTOR_MEM_VSX_P (mode) && TARGET_VEXTRACTUB && TARGET_VSX_SMALL_INTEGER" { - HOST_WIDE_INT elt = INTVAL (operands[2]); - HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG - ? GET_MODE_NUNITS (mode) - 1 - elt - : elt); - - HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (mode); - HOST_WIDE_INT offset = unit_size * elt_adj; + if (which_alternative == 0) + return "#"; - operands[2] = GEN_INT (offset); - if (unit_size == 4) - return "xxextractuw %x0,%x1,%2"; else - return "vextractu %0,%1,%2"; + { + HOST_WIDE_INT elt = INTVAL (operands[2]); + HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG + ? GET_MODE_NUNITS (mode) - 1 - elt + : elt); + + HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (mode); + HOST_WIDE_INT offset = unit_size * elt_adj; + + operands[2] = GEN_INT (offset); + if (unit_size == 4) + return "xxextractuw %x0,%x1,%2"; + else + return "vextractu %0,%1,%2"; + } } [(set_attr "type" "vecsimple")]) +(define_split + [(set (match_operand: 0 "int_reg_operand") + (vec_select: + (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand") + (parallel [(match_operand:QI 2 "const_int_operand")]))) + (clobber (match_operand:SI 3 "int_reg_operand"))] + "VECTOR_MEM_VSX_P (mode) && TARGET_VEXTRACTUB + && TARGET_VSX_SMALL_INTEGER && reload_completed" + [(const_int 0)] +{ + rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0])); + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = operands[3]; + HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (mode); + + emit_move_insn (op3, GEN_INT (offset)); + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_vextulx (op0_si, op3, op1)); + else + emit_insn (gen_vexturx (op0_si, op3, op1)); + DONE; +}) + ;; Optimize zero extracts to eliminate the AND after the extract. (define_insn_and_split "*vsx_extract__di_p9" - [(set (match_operand:DI 0 "gpc_reg_operand" "=") + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,") (zero_extend:DI (vec_select: - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "") - (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))] + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,") + (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))) + (clobber (match_scratch:SI 3 "=r,X"))] "VECTOR_MEM_VSX_P (mode) && TARGET_VEXTRACTUB && TARGET_VSX_SMALL_INTEGER" "#" "&& reload_completed" - [(set (match_dup 3) - (vec_select: - (match_dup 1) - (parallel [(match_dup 2)])))] + [(parallel [(set (match_dup 4) + (vec_select: + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (match_dup 3))])] { - operands[3] = gen_rtx_REG (mode, REGNO (operands[0])); + operands[4] = gen_rtx_REG (mode, REGNO (operands[0])); }) ;; Optimize stores to use the ISA 3.0 scalar store instructions (define_insn_and_split "*vsx_extract__store_p9" - [(set (match_operand: 0 "memory_operand" "=Z") + [(set (match_operand: 0 "memory_operand" "=Z,m") (vec_select: - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "") - (parallel [(match_operand:QI 2 "const_int_operand" "n")]))) - (clobber (match_scratch: 3 "="))] + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" ",") + (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))) + (clobber (match_scratch: 3 "=,&r")) + (clobber (match_scratch:SI 4 "=X,&r"))] "VECTOR_MEM_VSX_P (mode) && TARGET_VEXTRACTUB && TARGET_VSX_SMALL_INTEGER" "#" "&& reload_completed" - [(set (match_dup 3) - (vec_select: - (match_dup 1) - (parallel [(match_dup 2)]))) + [(parallel [(set (match_dup 3) + (vec_select: + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (match_dup 4))]) (set (match_dup 0) (match_dup 3))]) @@ -2721,13 +2770,13 @@ ;; Variable V16QI/V8HI/V4SI extract (define_insn_and_split "vsx_extract__var" - [(set (match_operand: 0 "gpc_reg_operand" "=r,r") + [(set (match_operand: 0 "gpc_reg_operand" "=r,r,r") (unspec: - [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,m") - (match_operand:DI 2 "gpc_reg_operand" "r,r")] + [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] UNSPEC_VSX_EXTRACT)) - (clobber (match_scratch:DI 3 "=r,&b")) - (clobber (match_scratch:V2DI 4 "=&v,X"))] + (clobber (match_scratch:DI 3 "=r,r,&b")) + (clobber (match_scratch:V2DI 4 "=X,&v,X"))] "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT" "#" "&& reload_completed" @@ -2738,6 +2787,27 @@ DONE; }) +(define_insn_and_split "*vsx_extract___var" + [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r") + (zero_extend:SDI + (unspec: + [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] + UNSPEC_VSX_EXTRACT))) + (clobber (match_scratch:DI 3 "=r,r,&b")) + (clobber (match_scratch:V2DI 4 "=X,&v,X"))] + "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] +{ + machine_mode smode = mode; + rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])), + operands[1], operands[2], + operands[3], operands[4]); + DONE; +}) + ;; VSX_EXTRACT optimizations ;; Optimize double d = (double) vec_extract (vi, ) ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP @@ -2839,6 +2909,56 @@ DONE; }) +;; Optimize f = () vec_extract (, ) +;; Where is SFmode, DFmode (and KFmode/TFmode if those types are IEEE +;; 128-bit hardware types) and is vector char, vector unsigned char, +;; vector short or vector unsigned short. +(define_insn_and_split "*vsx_ext__fl_" + [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=") + (float:FL_CONV + (vec_select: + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) + (clobber (match_scratch: 3 "=v"))] + "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT + && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 3) + (vec_select: + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (scratch:SI))]) + (set (match_dup 4) + (sign_extend:DI (match_dup 3))) + (set (match_dup 0) + (float: (match_dup 4)))] +{ + operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); +}) + +(define_insn_and_split "*vsx_ext__ufl_" + [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=") + (unsigned_float:FL_CONV + (vec_select: + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) + (clobber (match_scratch: 3 "=v"))] + "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT + && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 3) + (vec_select: + (match_dup 1) + (parallel [(match_dup 2)]))) + (clobber (scratch:SI))]) + (set (match_dup 0) + (float: (match_dup 4)))] +{ + operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); +}) + ;; V4SI/V8HI/V16QI set operation on ISA 3.0 (define_insn "vsx_set__p9" [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=") @@ -3686,3 +3806,94 @@ "TARGET_P9_VECTOR" "vextuwrx %0,%1,%2" [(set_attr "type" "vecsimple")]) + +;; Vector insert/extract word at arbitrary byte values. Note, the little +;; endian version needs to adjust the byte number, and the V4SI element in +;; vinsert4b. +(define_expand "vextract4b" + [(set (match_operand:DI 0 "gpc_reg_operand") + (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand") + (match_operand:QI 2 "const_0_to_11_operand")] + UNSPEC_XXEXTRACTUW))] + "TARGET_P9_VECTOR" +{ + if (!VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (12 - INTVAL (operands[2])); +}) + +(define_insn_and_split "*vextract4b_internal" + [(set (match_operand:DI 0 "gpc_reg_operand" "=wj,r") + (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand" "wa,v") + (match_operand:QI 2 "const_0_to_11_operand" "n,n")] + UNSPEC_XXEXTRACTUW))] + "TARGET_P9_VECTOR" + "@ + xxextractuw %x0,%x1,%2 + #" + "&& reload_completed && int_reg_operand (operands[0], DImode)" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op0_si = gen_rtx_REG (SImode, REGNO (op0)); + rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (op1)); + + emit_move_insn (op0, op2); + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_vextuwlx (op0_si, op0_si, op1_v4si)); + else + emit_insn (gen_vextuwrx (op0_si, op0_si, op1_v4si)); + DONE; +} + [(set_attr "type" "vecperm")]) + +(define_expand "vinsert4b" + [(set (match_operand:V16QI 0 "vsx_register_operand") + (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand") + (match_operand:V16QI 2 "vsx_register_operand") + (match_operand:QI 3 "const_0_to_11_operand")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" +{ + if (!VECTOR_ELT_ORDER_BIG) + { + rtx op1 = operands[1]; + rtx v4si_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_vsx_xxpermdi_v4si (v4si_tmp, op1, op1, const1_rtx)); + operands[1] = v4si_tmp; + operands[3] = GEN_INT (12 - INTVAL (operands[3])); + } +}) + +(define_insn "*vinsert4b_internal" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa") + (match_operand:V16QI 2 "vsx_register_operand" "0") + (match_operand:QI 3 "const_0_to_11_operand" "n")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" + "xxinsertw %x0,%x1,%3" + [(set_attr "type" "vecperm")]) + +(define_expand "vinsert4b_di" + [(set (match_operand:V16QI 0 "vsx_register_operand") + (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand") + (match_operand:V16QI 2 "vsx_register_operand") + (match_operand:QI 3 "const_0_to_11_operand")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" +{ + if (!VECTOR_ELT_ORDER_BIG) + operands[3] = GEN_INT (12 - INTVAL (operands[3])); +}) + +(define_insn "*vinsert4b_di_internal" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand" "wj") + (match_operand:V16QI 2 "vsx_register_operand" "0") + (match_operand:QI 3 "const_0_to_11_operand" "n")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" + "xxinsertw %x0,%x1,%3" + [(set_attr "type" "vecperm")]) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 10913d50782..23b8898ad5a 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -17995,6 +17995,15 @@ vector unsigned short vec_vctzh (vector unsigned short); vector int vec_vctzw (vector int); vector unsigned int vec_vctzw (vector int); +long long vec_vextract4b (const vector signed char, const int); +long long vec_vextract4b (const vector unsigned char, const int); + +vector signed char vec_insert4b (vector int, vector signed char, const int); +vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char, + const int); +vector signed char vec_insert4b (long long, vector signed char, const int); +vector unsigned char vec_insert4b (long long, vector unsigned char, const int); + vector int vec_vprtyb (vector int); vector unsigned int vec_vprtyb (vector unsigned int); vector long long vec_vprtyb (vector long long); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6e1c5174c52..9f7fa7e43c7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,26 @@ +2016-12-14 Michael Meissner + + * gcc/testsuite/gcc.target/powerpc/vec-extract.h: If DO_TRACE is + defined, add tracing of the various extracts to stderr. Add + support for tests that convert the result to another type. + * gcc/testsuite/gcc.target/powerpc/vec-extract-v2df.c: Likewise. + * gcc/testsuite/gcc.target/powerpc/vec-extract-v4sf.c: Likewise. + * gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c: Add new + tests that do an extract and then convert the values double. + * gcc/testsuite/gcc.target/powerpc/vec-extract-v4siu-df.c: Likewise. + * gcc/testsuite/gcc.target/powerpc/vec-extract-v16qiu-df.c: Likewise. + * gcc/testsuite/gcc.target/powerpc/vec-extract-v16qi-df.c: Likewise. + * gcc/testsuite/gcc.target/powerpc/vec-extract-v8hiu-df.c: Likewise. + * gcc/testsuite/gcc.target/powerpc/vec-extract-v8hi-df.c: Likewise. + * gcc.target/powerpc/p9-extract-1.c: Update test to check for + VEXTU{B,H,W}{L,R}X instructions being generated by default instead + of VEXTRACTU{B,H} and XXEXTRACTUW. + * gcc.target/powerpc/p9-extract-3.c: New test for combination of + vec_extract and convert to floating point. + * gcc.target/powerpc/p9-vinsert4b-1.c: New test for vec_vinsert4b + and vec_extract4b. + * gcc.target/powerpc/p9-vinsert4b-2.c: Likewise. + 2016-12-14 Toma Tabacu * gcc.target/mips/mips16-attributes.c: Remove dg-skip-if for diff --git a/gcc/testsuite/gcc.target/powerpc/p9-extract-1.c b/gcc/testsuite/gcc.target/powerpc/p9-extract-1.c index fceb334195e..ecbe0ed660d 100644 --- a/gcc/testsuite/gcc.target/powerpc/p9-extract-1.c +++ b/gcc/testsuite/gcc.target/powerpc/p9-extract-1.c @@ -3,24 +3,107 @@ /* { dg-require-effective-target powerpc_p9vector_ok } */ /* { dg-options "-mcpu=power9 -O2" } */ +/* Test to make sure VEXTU{B,H,W}{L,R}X is generated for various vector extract + operations for ISA 3.0 (-mcpu=power9). In addition, make sure that neither + of the the the old methods of doing vector extracts are done either by + explict stores to the stack or by using direct move instructions. */ + #include -int extract_int_0 (vector int a) { return vec_extract (a, 0); } -int extract_int_3 (vector int a) { return vec_extract (a, 3); } - -int extract_short_0 (vector short a) { return vec_extract (a, 0); } -int extract_short_3 (vector short a) { return vec_extract (a, 7); } - -int extract_schar_0 (vector signed char a) { return vec_extract (a, 0); } -int extract_schar_3 (vector signed char a) { return vec_extract (a, 15); } - -/* { dg-final { scan-assembler "vextractub" } } */ -/* { dg-final { scan-assembler "vextractuh" } } */ -/* { dg-final { scan-assembler "xxextractuw" } } */ -/* { dg-final { scan-assembler "mfvsr" } } */ -/* { dg-final { scan-assembler-not "stxvd2x" } } */ -/* { dg-final { scan-assembler-not "stxv" } } */ -/* { dg-final { scan-assembler-not "lwa" } } */ -/* { dg-final { scan-assembler-not "lwz" } } */ -/* { dg-final { scan-assembler-not "lha" } } */ -/* { dg-final { scan-assembler-not "lhz" } } */ +int +extract_int_0 (vector int a) +{ + int b = vec_extract (a, 0); + return b; +} + +int +extract_int_3 (vector int a) +{ + int b = vec_extract (a, 3); + return b; +} + +unsigned int +extract_uint_0 (vector unsigned int a) +{ + unsigned int b = vec_extract (a, 0); + return b; +} + +unsigned int +extract_uint_3 (vector unsigned int a) +{ + unsigned int b = vec_extract (a, 3); + return b; +} + +short +extract_short_0 (vector short a) +{ + short b = vec_extract (a, 0); + return b; +} + +short +extract_short_7 (vector short a) +{ + short b = vec_extract (a, 7); + return b; +} + +unsigned short +extract_ushort_0 (vector unsigned short a) +{ + unsigned short b = vec_extract (a, 0); + return b; +} + +unsigned short +extract_ushort_7 (vector unsigned short a) +{ + unsigned short b = vec_extract (a, 7); + return b; +} + +signed char +extract_schar_0 (vector signed char a) +{ + signed char b = vec_extract (a, 0); + return b; +} + +signed char +extract_schar_15 (vector signed char a) +{ + signed char b = vec_extract (a, 15); + return b; +} + +unsigned char +extract_uchar_0 (vector unsigned char a) +{ + unsigned char b = vec_extract (a, 0); + return b; +} + +unsigned char +extract_uchar_15 (vector unsigned char a) +{ + signed char b = vec_extract (a, 15); + return b; +} + +/* { dg-final { scan-assembler "vextub\[lr\]x " } } */ +/* { dg-final { scan-assembler "vextuh\[lr\]x " } } */ +/* { dg-final { scan-assembler "vextuw\[lr\]x " } } */ +/* { dg-final { scan-assembler "extsb " } } */ +/* { dg-final { scan-assembler "extsh " } } */ +/* { dg-final { scan-assembler "extsw " } } */ +/* { dg-final { scan-assembler-not "m\[ft\]vsr" } } */ +/* { dg-final { scan-assembler-not "stxvd2x " } } */ +/* { dg-final { scan-assembler-not "stxv " } } */ +/* { dg-final { scan-assembler-not "lwa " } } */ +/* { dg-final { scan-assembler-not "lwz " } } */ +/* { dg-final { scan-assembler-not "lha " } } */ +/* { dg-final { scan-assembler-not "lhz " } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/p9-extract-3.c b/gcc/testsuite/gcc.target/powerpc/p9-extract-3.c new file mode 100644 index 00000000000..90b3eae83a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p9-extract-3.c @@ -0,0 +1,108 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +/* Test that under ISA 3.0 (-mcpu=power9), the compiler optimizes conversion to + double after a vec_extract to use the VEXTRACTU{B,H} or XXEXTRACTUW + instructions (which leaves the result in a vector register), and not the + VEXTU{B,H,W}{L,R}X instructions (which needs a direct move to do the floating + point conversion). */ + +#include + +double +fpcvt_int_0 (vector int a) +{ + int b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_int_3 (vector int a) +{ + int b = vec_extract (a, 3); + return (double)b; +} + +double +fpcvt_uint_0 (vector unsigned int a) +{ + unsigned int b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_uint_3 (vector unsigned int a) +{ + unsigned int b = vec_extract (a, 3); + return (double)b; +} + +double +fpcvt_short_0 (vector short a) +{ + short b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_short_7 (vector short a) +{ + short b = vec_extract (a, 7); + return (double)b; +} + +double +fpcvt_ushort_0 (vector unsigned short a) +{ + unsigned short b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_ushort_7 (vector unsigned short a) +{ + unsigned short b = vec_extract (a, 7); + return (double)b; +} + +double +fpcvt_schar_0 (vector signed char a) +{ + signed char b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_schar_15 (vector signed char a) +{ + signed char b = vec_extract (a, 15); + return (double)b; +} + +double +fpcvt_uchar_0 (vector unsigned char a) +{ + unsigned char b = vec_extract (a, 0); + return (double)b; +} + +double +fpcvt_uchar_15 (vector unsigned char a) +{ + signed char b = vec_extract (a, 15); + return (double)b; +} + +/* { dg-final { scan-assembler "vextractu\[bh\] " } } */ +/* { dg-final { scan-assembler "vexts\[bh\]2d " } } */ +/* { dg-final { scan-assembler "vspltw " } } */ +/* { dg-final { scan-assembler "xscvsxddp " } } */ +/* { dg-final { scan-assembler "xvcvsxwdp " } } */ +/* { dg-final { scan-assembler "xvcvuxwdp " } } */ +/* { dg-final { scan-assembler-not "exts\[bhw\] " } } */ +/* { dg-final { scan-assembler-not "stxv" } } */ +/* { dg-final { scan-assembler-not "m\[ft\]vsrd " } } */ +/* { dg-final { scan-assembler-not "m\[ft\]vsrw\[az\] " } } */ +/* { dg-final { scan-assembler-not "l\[hw\]\[az\] " } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-1.c b/gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-1.c new file mode 100644 index 00000000000..fa1ba754705 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-1.c @@ -0,0 +1,39 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include + +vector signed char +vins_v4si (vector int *vi, vector signed char *vc) +{ + return vec_vinsert4b (*vi, *vc, 1); +} + +vector unsigned char +vins_di (long di, vector unsigned char *vc) +{ + return vec_vinsert4b (di, *vc, 2); +} + +vector char +vins_di2 (long *p_di, vector char *vc) +{ + return vec_vinsert4b (*p_di, *vc, 3); +} + +vector unsigned char +vins_di0 (vector unsigned char *vc) +{ + return vec_vinsert4b (0, *vc, 4); +} + +long +vext (vector signed char *vc) +{ + return vec_vextract4b (*vc, 5); +} + +/* { dg-final { scan-assembler "xxextractuw\|vextuw\[lr\]x" } } */ +/* { dg-final { scan-assembler "xxinsertw" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-2.c b/gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-2.c new file mode 100644 index 00000000000..653f6e0c5e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-2.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include + +vector signed char +ins_v4si (vector int vi, vector signed char vc) +{ + return vec_vinsert4b (vi, vc, 12); /* { dg-error "vec_vinsert4b" } */ +} + +vector unsigned char +ins_di (long di, vector unsigned char vc, long n) +{ + return vec_vinsert4b (di, vc, n); /* { dg-error "vec_vinsert4b" } */ +} + +long +vext1 (vector signed char vc) +{ + return vec_vextract4b (vc, 12); /* { dg-error "vec_vextract4b" } */ +} + +long +vextn (vector unsigned char vc, long n) +{ + return vec_vextract4b (vc, n); /* { dg-error "vec_vextract4b" } */ +} diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-v16qi-df.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-v16qi-df.c new file mode 100644 index 00000000000..cfcb8d7753e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-v16qi-df.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE signed char +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 16 +#define INITIAL \ + { 10, -20, 30, -40, 50, -60, 70, -80, \ + 90, -100, 110, -120, 30, -40, 50, -60 } + +#include "vec-extract.h" diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-v16qiu-df.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-v16qiu-df.c new file mode 100644 index 00000000000..514de253a75 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-v16qiu-df.c @@ -0,0 +1,13 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE unsigned char +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 16 +#define INITIAL \ + { 1, 2, 3, 4, 5, 6, 7, 8, 240, 241, 242, 243, 244, 245, 246, 247 } + +#include "vec-extract.h" diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-v2df.c index a8b255cc556..34c9b00b933 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-extract-v2df.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-v2df.c @@ -3,6 +3,8 @@ /* { dg-options "-O2 -mvsx" } */ #define TYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) #define ELEMENTS 2 #define INITIAL { 10.0, -20.0 } diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-v4sf.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-v4sf.c index 93e8b10680a..518ff4ab73d 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-extract-v4sf.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-v4sf.c @@ -3,6 +3,8 @@ /* { dg-options "-O2 -mvsx" } */ #define TYPE float +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) #define ELEMENTS 4 #define INITIAL { 10.0f, -20.0f, 30.0f, -40.0f } diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c new file mode 100644 index 00000000000..afcc7de856d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-v4si-df.c @@ -0,0 +1,12 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE int +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 4 +#define INITIAL { 10, -20, 30, -40 } + +#include "vec-extract.h" diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-v4siu-df.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-v4siu-df.c new file mode 100644 index 00000000000..071e492eb39 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-v4siu-df.c @@ -0,0 +1,12 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE unsigned int +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 4 +#define INITIAL { 1, 2, 0xff03, 0xff04 } + +#include "vec-extract.h" diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-v8hi-df.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-v8hi-df.c new file mode 100644 index 00000000000..4a15fb6bfd5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-v8hi-df.c @@ -0,0 +1,12 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE short +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 8 +#define INITIAL { 10, -20, 30, -40, 50, -60, 70, 80 } + +#include "vec-extract.h" diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-v8hiu-df.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-v8hiu-df.c new file mode 100644 index 00000000000..ff45c172bf0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-v8hiu-df.c @@ -0,0 +1,12 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define TYPE unsigned short +#define RTYPE double +#define FAIL_FORMAT "%g" +#define FAIL_CAST(X) ((double)(X)) +#define ELEMENTS 8 +#define INITIAL { 1, 2, 3, 4, 0xf1, 0xf2, 0xf3, 0xf4 } + +#include "vec-extract.h" diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract.h b/gcc/testsuite/gcc.target/powerpc/vec-extract.h index fcb4a403d61..a6f2dc48b3d 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-extract.h +++ b/gcc/testsuite/gcc.target/powerpc/vec-extract.h @@ -2,16 +2,53 @@ #include #include +#ifndef RTYPE +#define RTYPE TYPE +#endif + +#ifdef DO_TRACE +#include + +#define TRACE(STRING, NUM) \ +do \ + { \ + fprintf (stderr, "%s%s: %2d\n", (NUM == 0) ? "\n" : "", \ + STRING, (int)NUM); \ + fflush (stderr); \ + } \ +while (0) + +#ifndef FAIL_FORMAT +#define FAIL_FORMAT "%ld" +#define FAIL_CAST(X) ((long)(X)) +#endif + +#define FAIL(EXP, GOT) \ +do \ + { \ + fprintf (stderr, "Expected: " FAIL_FORMAT ", got " FAIL_FORMAT "\n", \ + FAIL_CAST (EXP), FAIL_CAST (GOT)); \ + fflush (stderr); \ + abort (); \ + } \ +while (0) + +#else +#define TRACE(STRING, NUM) +#define FAIL(EXP, GOT) abort () +#endif + +static void check (RTYPE, RTYPE) __attribute__((__noinline__)); +static vector TYPE deoptimize (vector TYPE) __attribute__((__noinline__)); +static vector TYPE *deoptimize_ptr (vector TYPE *) __attribute__((__noinline__)); + static void -check (TYPE expected, TYPE got) +check (RTYPE expected, RTYPE got) { if (expected != got) - abort (); + FAIL (expected, got); } -static vector TYPE deoptimize (vector TYPE) __attribute__((__noinline__)); -static vector TYPE *deoptimize_ptr (vector TYPE *) __attribute__((__noinline__)); - static vector TYPE deoptimize (vector TYPE a) { @@ -29,116 +66,116 @@ deoptimize_ptr (vector TYPE *p) /* Tests for the normal case of vec_extract where the vector is in a register and returning the result in a register as a return value. */ -TYPE +RTYPE get_auto_n (vector TYPE a, ssize_t n) { - return vec_extract (a, n); + return (RTYPE) vec_extract (a, n); } -TYPE +RTYPE get_auto_0 (vector TYPE a) { - return vec_extract (a, 0); + return (RTYPE) vec_extract (a, 0); } -TYPE +RTYPE get_auto_1 (vector TYPE a) { - return vec_extract (a, 1); + return (RTYPE) vec_extract (a, 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_auto_2 (vector TYPE a) { - return vec_extract (a, 2); + return (RTYPE) vec_extract (a, 2); } -TYPE +RTYPE get_auto_3 (vector TYPE a) { - return vec_extract (a, 3); + return (RTYPE) vec_extract (a, 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_auto_4 (vector TYPE a) { - return vec_extract (a, 4); + return (RTYPE) vec_extract (a, 4); } -TYPE +RTYPE get_auto_5 (vector TYPE a) { - return vec_extract (a, 5); + return (RTYPE) vec_extract (a, 5); } -TYPE +RTYPE get_auto_6 (vector TYPE a) { - return vec_extract (a, 6); + return (RTYPE) vec_extract (a, 6); } -TYPE +RTYPE get_auto_7 (vector TYPE a) { - return vec_extract (a, 7); + return (RTYPE) vec_extract (a, 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_auto_8 (vector TYPE a) { - return vec_extract (a, 8); + return (RTYPE) vec_extract (a, 8); } -TYPE +RTYPE get_auto_9 (vector TYPE a) { - return vec_extract (a, 9); + return (RTYPE) vec_extract (a, 9); } -TYPE +RTYPE get_auto_10 (vector TYPE a) { - return vec_extract (a, 10); + return (RTYPE) vec_extract (a, 10); } -TYPE +RTYPE get_auto_11 (vector TYPE a) { - return vec_extract (a, 11); + return (RTYPE) vec_extract (a, 11); } -TYPE +RTYPE get_auto_12 (vector TYPE a) { - return vec_extract (a, 12); + return (RTYPE) vec_extract (a, 12); } -TYPE +RTYPE get_auto_13 (vector TYPE a) { - return vec_extract (a, 13); + return (RTYPE) vec_extract (a, 13); } -TYPE +RTYPE get_auto_14 (vector TYPE a) { - return vec_extract (a, 14); + return (RTYPE) vec_extract (a, 14); } -TYPE +RTYPE get_auto_15 (vector TYPE a) { - return vec_extract (a, 15); + return (RTYPE) vec_extract (a, 15); } #endif #endif #endif -typedef TYPE (*auto_func_type) (vector TYPE); +typedef RTYPE (*auto_func_type) (vector TYPE); static auto_func_type get_auto_const[] = { get_auto_0, @@ -173,7 +210,10 @@ do_auto (vector TYPE a) size_t i; for (i = 0; i < sizeof (get_auto_const) / sizeof (get_auto_const[0]); i++) - check (get_auto_n (a, i), (get_auto_const[i]) (a)); + { + TRACE ("auto", i); + check (get_auto_n (a, i), (get_auto_const[i]) (a)); + } } @@ -182,115 +222,115 @@ do_auto (vector TYPE a) in the right position to use a scalar store). */ void -get_store_n (TYPE *p, vector TYPE a, ssize_t n) +get_store_n (RTYPE *p, vector TYPE a, ssize_t n) { - *p = vec_extract (a, n); + *p = (RTYPE) vec_extract (a, n); } void -get_store_0 (TYPE *p, vector TYPE a) +get_store_0 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 0); + *p = (RTYPE) vec_extract (a, 0); } void -get_store_1 (TYPE *p, vector TYPE a) +get_store_1 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 1); + *p = (RTYPE) vec_extract (a, 1); } #if ELEMENTS >= 4 void -get_store_2 (TYPE *p, vector TYPE a) +get_store_2 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 2); + *p = (RTYPE) vec_extract (a, 2); } void -get_store_3 (TYPE *p, vector TYPE a) +get_store_3 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 3); + *p = (RTYPE) vec_extract (a, 3); } #if ELEMENTS >= 8 void -get_store_4 (TYPE *p, vector TYPE a) +get_store_4 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 4); + *p = (RTYPE) vec_extract (a, 4); } void -get_store_5 (TYPE *p, vector TYPE a) +get_store_5 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 5); + *p = (RTYPE) vec_extract (a, 5); } void -get_store_6 (TYPE *p, vector TYPE a) +get_store_6 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 6); + *p = (RTYPE) vec_extract (a, 6); } void -get_store_7 (TYPE *p, vector TYPE a) +get_store_7 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 7); + *p = (RTYPE) vec_extract (a, 7); } #if ELEMENTS >= 16 void -get_store_8 (TYPE *p, vector TYPE a) +get_store_8 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 8); + *p = (RTYPE) vec_extract (a, 8); } void -get_store_9 (TYPE *p, vector TYPE a) +get_store_9 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 9); + *p = (RTYPE) vec_extract (a, 9); } void -get_store_10 (TYPE *p, vector TYPE a) +get_store_10 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 10); + *p = (RTYPE) vec_extract (a, 10); } void -get_store_11 (TYPE *p, vector TYPE a) +get_store_11 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 11); + *p = (RTYPE) vec_extract (a, 11); } void -get_store_12 (TYPE *p, vector TYPE a) +get_store_12 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 12); + *p = (RTYPE) vec_extract (a, 12); } void -get_store_13 (TYPE *p, vector TYPE a) +get_store_13 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 13); + *p = (RTYPE) vec_extract (a, 13); } void -get_store_14 (TYPE *p, vector TYPE a) +get_store_14 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 14); + *p = (RTYPE) vec_extract (a, 14); } void -get_store_15 (TYPE *p, vector TYPE a) +get_store_15 (RTYPE *p, vector TYPE a) { - *p = vec_extract (a, 15); + *p = (RTYPE) vec_extract (a, 15); } #endif #endif #endif -typedef void (*store_func_type) (TYPE *, vector TYPE); +typedef void (*store_func_type) (RTYPE *, vector TYPE); static store_func_type get_store_const[] = { get_store_0, @@ -323,10 +363,11 @@ void do_store (vector TYPE a) { size_t i; - TYPE result_var, result_const; + RTYPE result_var, result_const; for (i = 0; i < sizeof (get_store_const) / sizeof (get_store_const[0]); i++) { + TRACE ("store", i); get_store_n (&result_var, a, i); (get_store_const[i]) (&result_const, a); check (result_var, result_const); @@ -337,116 +378,116 @@ do_store (vector TYPE a) /* Tests for vec_extract where the vector comes from memory (the compiler can optimize this by doing a scalar load without having to load the whole vector). */ -TYPE +RTYPE get_pointer_n (vector TYPE *p, ssize_t n) { - return vec_extract (*p, n); + return (RTYPE) vec_extract (*p, n); } -TYPE +RTYPE get_pointer_0 (vector TYPE *p) { - return vec_extract (*p, 0); + return (RTYPE) vec_extract (*p, 0); } -TYPE +RTYPE get_pointer_1 (vector TYPE *p) { - return vec_extract (*p, 1); + return (RTYPE) vec_extract (*p, 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_pointer_2 (vector TYPE *p) { - return vec_extract (*p, 2); + return (RTYPE) vec_extract (*p, 2); } -TYPE +RTYPE get_pointer_3 (vector TYPE *p) { - return vec_extract (*p, 3); + return (RTYPE) vec_extract (*p, 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_pointer_4 (vector TYPE *p) { - return vec_extract (*p, 4); + return (RTYPE) vec_extract (*p, 4); } -static TYPE +RTYPE get_pointer_5 (vector TYPE *p) { - return vec_extract (*p, 5); + return (RTYPE) vec_extract (*p, 5); } -TYPE +RTYPE get_pointer_6 (vector TYPE *p) { - return vec_extract (*p, 6); + return (RTYPE) vec_extract (*p, 6); } -TYPE +RTYPE get_pointer_7 (vector TYPE *p) { - return vec_extract (*p, 7); + return (RTYPE) vec_extract (*p, 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_pointer_8 (vector TYPE *p) { - return vec_extract (*p, 8); + return (RTYPE) vec_extract (*p, 8); } -TYPE +RTYPE get_pointer_9 (vector TYPE *p) { - return vec_extract (*p, 9); + return (RTYPE) vec_extract (*p, 9); } -TYPE +RTYPE get_pointer_10 (vector TYPE *p) { - return vec_extract (*p, 10); + return (RTYPE) vec_extract (*p, 10); } -TYPE +RTYPE get_pointer_11 (vector TYPE *p) { - return vec_extract (*p, 11); + return (RTYPE) vec_extract (*p, 11); } -TYPE +RTYPE get_pointer_12 (vector TYPE *p) { - return vec_extract (*p, 12); + return (RTYPE) vec_extract (*p, 12); } -TYPE +RTYPE get_pointer_13 (vector TYPE *p) { - return vec_extract (*p, 13); + return (RTYPE) vec_extract (*p, 13); } -TYPE +RTYPE get_pointer_14 (vector TYPE *p) { - return vec_extract (*p, 14); + return (RTYPE) vec_extract (*p, 14); } -TYPE +RTYPE get_pointer_15 (vector TYPE *p) { - return vec_extract (*p, 15); + return (RTYPE) vec_extract (*p, 15); } #endif #endif #endif -typedef TYPE (*pointer_func_type) (vector TYPE *); +typedef RTYPE (*pointer_func_type) (vector TYPE *); static pointer_func_type get_pointer_const[] = { get_pointer_0, @@ -481,7 +522,10 @@ do_pointer (vector TYPE *p) size_t i; for (i = 0; i < sizeof (get_pointer_const) / sizeof (get_pointer_const[0]); i++) - check (get_pointer_n (p, i), (get_pointer_const[i]) (p)); + { + TRACE ("pointer", i); + check (get_pointer_n (p, i), (get_pointer_const[i]) (p)); + } } @@ -489,116 +533,116 @@ do_pointer (vector TYPE *p) operation. This is to make sure that if the compiler optimizes vec_extract from memory to be a scalar load, the address is correctly adjusted. */ -TYPE +RTYPE get_indexed_n (vector TYPE *p, size_t x, ssize_t n) { - return vec_extract (p[x], n); + return (RTYPE) vec_extract (p[x], n); } -TYPE +RTYPE get_indexed_0 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 0); + return (RTYPE) vec_extract (p[x], 0); } -TYPE +RTYPE get_indexed_1 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 1); + return (RTYPE) vec_extract (p[x], 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_indexed_2 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 2); + return (RTYPE) vec_extract (p[x], 2); } -TYPE +RTYPE get_indexed_3 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 3); + return (RTYPE) vec_extract (p[x], 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_indexed_4 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 4); + return (RTYPE) vec_extract (p[x], 4); } -static TYPE +RTYPE get_indexed_5 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 5); + return (RTYPE) vec_extract (p[x], 5); } -TYPE +RTYPE get_indexed_6 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 6); + return (RTYPE) vec_extract (p[x], 6); } -TYPE +RTYPE get_indexed_7 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 7); + return (RTYPE) vec_extract (p[x], 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_indexed_8 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 8); + return (RTYPE) vec_extract (p[x], 8); } -TYPE +RTYPE get_indexed_9 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 9); + return (RTYPE) vec_extract (p[x], 9); } -TYPE +RTYPE get_indexed_10 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 10); + return (RTYPE) vec_extract (p[x], 10); } -TYPE +RTYPE get_indexed_11 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 11); + return (RTYPE) vec_extract (p[x], 11); } -TYPE +RTYPE get_indexed_12 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 12); + return (RTYPE) vec_extract (p[x], 12); } -TYPE +RTYPE get_indexed_13 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 13); + return (RTYPE) vec_extract (p[x], 13); } -TYPE +RTYPE get_indexed_14 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 14); + return (RTYPE) vec_extract (p[x], 14); } -TYPE +RTYPE get_indexed_15 (vector TYPE *p, size_t x) { - return vec_extract (p[x], 15); + return (RTYPE) vec_extract (p[x], 15); } #endif #endif #endif -typedef TYPE (*indexed_func_type) (vector TYPE *, size_t); +typedef RTYPE (*indexed_func_type) (vector TYPE *, size_t); static indexed_func_type get_indexed_const[] = { get_indexed_0, @@ -633,7 +677,10 @@ do_indexed (vector TYPE *p, size_t x) size_t i; for (i = 0; i < sizeof (get_indexed_const) / sizeof (get_indexed_const[0]); i++) - check (get_indexed_n (p, x, i), (get_indexed_const[i]) (p, x)); + { + TRACE ("indexed", i); + check (get_indexed_n (p, x, i), (get_indexed_const[i]) (p, x)); + } } @@ -641,116 +688,116 @@ do_indexed (vector TYPE *p, size_t x) with a pointer and a constant offset. This will occur in ISA 3.0 which added d-form memory addressing for vectors. */ -TYPE +RTYPE get_ptr_plus1_n (vector TYPE *p, ssize_t n) { - return vec_extract (p[1], n); + return (RTYPE) vec_extract (p[1], n); } -TYPE +RTYPE get_ptr_plus1_0 (vector TYPE *p) { - return vec_extract (p[1], 0); + return (RTYPE) vec_extract (p[1], 0); } -TYPE +RTYPE get_ptr_plus1_1 (vector TYPE *p) { - return vec_extract (p[1], 1); + return (RTYPE) vec_extract (p[1], 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_ptr_plus1_2 (vector TYPE *p) { - return vec_extract (p[1], 2); + return (RTYPE) vec_extract (p[1], 2); } -TYPE +RTYPE get_ptr_plus1_3 (vector TYPE *p) { - return vec_extract (p[1], 3); + return (RTYPE) vec_extract (p[1], 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_ptr_plus1_4 (vector TYPE *p) { - return vec_extract (p[1], 4); + return (RTYPE) vec_extract (p[1], 4); } -static TYPE +RTYPE get_ptr_plus1_5 (vector TYPE *p) { - return vec_extract (p[1], 5); + return (RTYPE) vec_extract (p[1], 5); } -TYPE +RTYPE get_ptr_plus1_6 (vector TYPE *p) { - return vec_extract (p[1], 6); + return (RTYPE) vec_extract (p[1], 6); } -TYPE +RTYPE get_ptr_plus1_7 (vector TYPE *p) { - return vec_extract (p[1], 7); + return (RTYPE) vec_extract (p[1], 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_ptr_plus1_8 (vector TYPE *p) { - return vec_extract (p[1], 8); + return (RTYPE) vec_extract (p[1], 8); } -TYPE +RTYPE get_ptr_plus1_9 (vector TYPE *p) { - return vec_extract (p[1], 9); + return (RTYPE) vec_extract (p[1], 9); } -TYPE +RTYPE get_ptr_plus1_10 (vector TYPE *p) { - return vec_extract (p[1], 10); + return (RTYPE) vec_extract (p[1], 10); } -TYPE +RTYPE get_ptr_plus1_11 (vector TYPE *p) { - return vec_extract (p[1], 11); + return (RTYPE) vec_extract (p[1], 11); } -TYPE +RTYPE get_ptr_plus1_12 (vector TYPE *p) { - return vec_extract (p[1], 12); + return (RTYPE) vec_extract (p[1], 12); } -TYPE +RTYPE get_ptr_plus1_13 (vector TYPE *p) { - return vec_extract (p[1], 13); + return (RTYPE) vec_extract (p[1], 13); } -TYPE +RTYPE get_ptr_plus1_14 (vector TYPE *p) { - return vec_extract (p[1], 14); + return (RTYPE) vec_extract (p[1], 14); } -TYPE +RTYPE get_ptr_plus1_15 (vector TYPE *p) { - return vec_extract (p[1], 15); + return (RTYPE) vec_extract (p[1], 15); } #endif #endif #endif -typedef TYPE (*pointer_func_type) (vector TYPE *); +typedef RTYPE (*pointer_func_type) (vector TYPE *); static pointer_func_type get_ptr_plus1_const[] = { get_ptr_plus1_0, @@ -785,7 +832,10 @@ do_ptr_plus1 (vector TYPE *p) size_t i; for (i = 0; i < sizeof (get_ptr_plus1_const) / sizeof (get_ptr_plus1_const[0]); i++) - check (get_ptr_plus1_n (p, i), (get_ptr_plus1_const[i]) (p)); + { + TRACE ("ptr_plus1", i); + check (get_ptr_plus1_n (p, i), (get_ptr_plus1_const[i]) (p)); + } } @@ -793,116 +843,116 @@ do_ptr_plus1 (vector TYPE *p) static vector TYPE s; -TYPE +RTYPE get_static_n (ssize_t n) { - return vec_extract (s, n); + return (RTYPE) vec_extract (s, n); } -TYPE +RTYPE get_static_0 (void) { - return vec_extract (s, 0); + return (RTYPE) vec_extract (s, 0); } -TYPE +RTYPE get_static_1 (void) { - return vec_extract (s, 1); + return (RTYPE) vec_extract (s, 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_static_2 (void) { - return vec_extract (s, 2); + return (RTYPE) vec_extract (s, 2); } -TYPE +RTYPE get_static_3 (void) { - return vec_extract (s, 3); + return (RTYPE) vec_extract (s, 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_static_4 (void) { - return vec_extract (s, 4); + return (RTYPE) vec_extract (s, 4); } -TYPE +RTYPE get_static_5 (void) { - return vec_extract (s, 5); + return (RTYPE) vec_extract (s, 5); } -TYPE +RTYPE get_static_6 (void) { - return vec_extract (s, 6); + return (RTYPE) vec_extract (s, 6); } -TYPE +RTYPE get_static_7 (void) { - return vec_extract (s, 7); + return (RTYPE) vec_extract (s, 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_static_8 (void) { - return vec_extract (s, 8); + return (RTYPE) vec_extract (s, 8); } -TYPE +RTYPE get_static_9 (void) { - return vec_extract (s, 9); + return (RTYPE) vec_extract (s, 9); } -TYPE +RTYPE get_static_10 (void) { - return vec_extract (s, 10); + return (RTYPE) vec_extract (s, 10); } -TYPE +RTYPE get_static_11 (void) { - return vec_extract (s, 11); + return (RTYPE) vec_extract (s, 11); } -TYPE +RTYPE get_static_12 (void) { - return vec_extract (s, 12); + return (RTYPE) vec_extract (s, 12); } -TYPE +RTYPE get_static_13 (void) { - return vec_extract (s, 13); + return (RTYPE) vec_extract (s, 13); } -TYPE +RTYPE get_static_14 (void) { - return vec_extract (s, 14); + return (RTYPE) vec_extract (s, 14); } -TYPE +RTYPE get_static_15 (void) { - return vec_extract (s, 15); + return (RTYPE) vec_extract (s, 15); } #endif #endif #endif -typedef TYPE (*static_func_type) (void); +typedef RTYPE (*static_func_type) (void); static static_func_type get_static_const[] = { get_static_0, @@ -937,7 +987,10 @@ do_static (void) size_t i; for (i = 0; i < sizeof (get_static_const) / sizeof (get_static_const[0]); i++) - check (get_static_n (i), (get_static_const[i]) ()); + { + TRACE ("static", i); + check (get_static_n (i), (get_static_const[i]) ()); + } } @@ -945,116 +998,116 @@ do_static (void) vector TYPE g; -TYPE +RTYPE get_global_n (ssize_t n) { - return vec_extract (g, n); + return (RTYPE) vec_extract (g, n); } -TYPE +RTYPE get_global_0 (void) { - return vec_extract (g, 0); + return (RTYPE) vec_extract (g, 0); } -TYPE +RTYPE get_global_1 (void) { - return vec_extract (g, 1); + return (RTYPE) vec_extract (g, 1); } #if ELEMENTS >= 4 -TYPE +RTYPE get_global_2 (void) { - return vec_extract (g, 2); + return (RTYPE) vec_extract (g, 2); } -TYPE +RTYPE get_global_3 (void) { - return vec_extract (g, 3); + return (RTYPE) vec_extract (g, 3); } #if ELEMENTS >= 8 -TYPE +RTYPE get_global_4 (void) { - return vec_extract (g, 4); + return (RTYPE) vec_extract (g, 4); } -TYPE +RTYPE get_global_5 (void) { - return vec_extract (g, 5); + return (RTYPE) vec_extract (g, 5); } -TYPE +RTYPE get_global_6 (void) { - return vec_extract (g, 6); + return (RTYPE) vec_extract (g, 6); } -TYPE +RTYPE get_global_7 (void) { - return vec_extract (g, 7); + return (RTYPE) vec_extract (g, 7); } #if ELEMENTS >= 16 -TYPE +RTYPE get_global_8 (void) { - return vec_extract (g, 8); + return (RTYPE) vec_extract (g, 8); } -TYPE +RTYPE get_global_9 (void) { - return vec_extract (g, 9); + return (RTYPE) vec_extract (g, 9); } -TYPE +RTYPE get_global_10 (void) { - return vec_extract (g, 10); + return (RTYPE) vec_extract (g, 10); } -TYPE +RTYPE get_global_11 (void) { - return vec_extract (g, 11); + return (RTYPE) vec_extract (g, 11); } -TYPE +RTYPE get_global_12 (void) { - return vec_extract (g, 12); + return (RTYPE) vec_extract (g, 12); } -TYPE +RTYPE get_global_13 (void) { - return vec_extract (g, 13); + return (RTYPE) vec_extract (g, 13); } -TYPE +RTYPE get_global_14 (void) { - return vec_extract (g, 14); + return (RTYPE) vec_extract (g, 14); } -TYPE +RTYPE get_global_15 (void) { - return vec_extract (g, 15); + return (RTYPE) vec_extract (g, 15); } #endif #endif #endif -typedef TYPE (*global_func_type) (void); +typedef RTYPE (*global_func_type) (void); static global_func_type get_global_const[] = { get_global_0, @@ -1089,7 +1142,10 @@ do_global (void) size_t i; for (i = 0; i < sizeof (get_global_const) / sizeof (get_global_const[0]); i++) - check (get_global_n (i), (get_global_const[i]) ()); + { + TRACE ("global", i); + check (get_global_n (i), (get_global_const[i]) ()); + } } -- 2.30.2