From 5998f1bbeb336d0b9aee86726291269fd8bb6a66 Mon Sep 17 00:00:00 2001 From: Carl Love Date: Fri, 12 Jun 2020 10:35:31 -0500 Subject: [PATCH] rs6000 Add vector blend, permute builtin support GCC maintainers: The following patch adds support for the vec_blendv and vec_permx builtins. The patch has been compiled and tested on powerpc64le-unknown-linux-gnu (Power 8 LE) powerpc64le-unknown-linux-gnu (Power 9 LE) with no regression errors. The test cases were compiled on a Power 9 system and then tested on Mambo. Carl Love rs6000 RFC2609 vector blend, permute instructions gcc/ChangeLog 2020-08-04 Carl Love * config/rs6000/altivec.h (vec_blendv, vec_permx): Add define. * config/rs6000/altivec.md (UNSPEC_XXBLEND, UNSPEC_XXPERMX.): New unspecs. (VM3): New define_mode. (VM3_char): New define_attr. (xxblend_ mode VM3): New define_insn. (xxpermx): New define_expand. (xxpermx_inst): New define_insn. * config/rs6000/rs6000-builtin.def (VXXBLEND_V16QI, VXXBLEND_V8HI, VXXBLEND_V4SI, VXXBLEND_V2DI, VXXBLEND_V4SF, VXXBLEND_V2DF): New BU_P10V_3 definitions. (XXBLEND): New BU_P10_OVERLOAD_3 definition. (XXPERMX): New BU_P10_OVERLOAD_4 definition. * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): (P10_BUILTIN_VXXPERMX): Add if statement. * config/rs6000/rs6000-call.c (P10_BUILTIN_VXXBLEND_V16QI, P10_BUILTIN_VXXBLEND_V8HI, P10_BUILTIN_VXXBLEND_V4SI, P10_BUILTIN_VXXBLEND_V2DI, P10_BUILTIN_VXXBLEND_V4SF, P10_BUILTIN_VXXBLEND_V2DF, P10_BUILTIN_VXXPERMX): Define overloaded arguments. (rs6000_expand_quaternop_builtin): Add if case for CODE_FOR_xxpermx. (builtin_quaternary_function_type): Add v16uqi_type and xxpermx_type variables, add case statement for P10_BUILTIN_VXXPERMX. (builtin_function_type): Add case statements for P10_BUILTIN_VXXBLEND_V16QI, P10_BUILTIN_VXXBLEND_V8HI, P10_BUILTIN_VXXBLEND_V4SI, P10_BUILTIN_VXXBLEND_V2DI. * doc/extend.texi: Add documentation for vec_blendv and vec_permx. gcc/testsuite/ChangeLog 2020-08-04 Carl Love * gcc.target/powerpc/vec-blend-runnable.c: New test. * gcc.target/powerpc/vec-permute-ext-runnable.c: New test. --- gcc/config/rs6000/altivec.h | 2 + gcc/config/rs6000/altivec.md | 71 +++++ gcc/config/rs6000/rs6000-builtin.def | 11 + gcc/config/rs6000/rs6000-c.c | 24 +- gcc/config/rs6000/rs6000-call.c | 95 ++++++ gcc/doc/extend.texi | 63 ++++ .../gcc.target/powerpc/vec-blend-runnable.c | 276 ++++++++++++++++ .../powerpc/vec-permute-ext-runnable.c | 294 ++++++++++++++++++ 8 files changed, 830 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-blend-runnable.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-permute-ext-runnable.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 71249b50ef8..bf2240f16a2 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -712,6 +712,8 @@ __altivec_scalar_pred(vec_any_nle, #define vec_splati(a) __builtin_vec_xxspltiw (a) #define vec_splatid(a) __builtin_vec_xxspltid (a) #define vec_splati_ins(a, b, c) __builtin_vec_xxsplti32dx (a, b, c) +#define vec_blendv(a, b, c) __builtin_vec_xxblend (a, b, c) +#define vec_permx(a, b, c, d) __builtin_vec_xxpermx (a, b, c, d) #define vec_gnb(a, b) __builtin_vec_gnb (a, b) #define vec_clrl(a, b) __builtin_vec_clrl (a, b) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 2a5a254e676..0a2e634d6b0 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -178,6 +178,8 @@ UNSPEC_XXSPLTIW UNSPEC_XXSPLTID UNSPEC_XXSPLTI32DX + UNSPEC_XXBLEND + UNSPEC_XXPERMX ]) (define_c_enum "unspecv" @@ -220,6 +222,21 @@ (KF "FLOAT128_VECTOR_P (KFmode)") (TF "FLOAT128_VECTOR_P (TFmode)")]) +;; Like VM2, just do char, short, int, long, float and double +(define_mode_iterator VM3 [V4SI + V8HI + V16QI + V4SF + V2DF + V2DI]) + +(define_mode_attr VM3_char [(V2DI "d") + (V4SI "w") + (V8HI "h") + (V16QI "b") + (V2DF "d") + (V4SF "w")]) + ;; Map the Vector convert single precision to double precision for integer ;; versus floating point (define_mode_attr VS_sxwsp [(V4SI "sxw") (V4SF "sp")]) @@ -907,6 +924,60 @@ "xxsplti32dx %x0,%2,%3" [(set_attr "type" "vecsimple")]) +(define_insn "xxblend_" + [(set (match_operand:VM3 0 "register_operand" "=wa") + (unspec:VM3 [(match_operand:VM3 1 "register_operand" "wa") + (match_operand:VM3 2 "register_operand" "wa") + (match_operand:VM3 3 "register_operand" "wa")] + UNSPEC_XXBLEND))] + "TARGET_POWER10" + "xxblendv %x0,%x1,%x2,%x3" + [(set_attr "type" "vecsimple")]) + +(define_expand "xxpermx" + [(set (match_operand:V2DI 0 "register_operand" "+wa") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa") + (match_operand:V2DI 2 "register_operand" "wa") + (match_operand:V16QI 3 "register_operand" "wa") + (match_operand:QI 4 "u8bit_cint_operand" "n")] + UNSPEC_XXPERMX))] + "TARGET_POWER10" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_xxpermx_inst (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + else + { + /* Reverse value of byte element indexes by XORing with 0xFF. + Reverse the 32-byte section identifier match by subracting bits [0:2] + of elemet from 7. */ + int value = INTVAL (operands[4]); + rtx vreg = gen_reg_rtx (V16QImode); + + emit_insn (gen_xxspltib_v16qi (vreg, GEN_INT (-1))); + emit_insn (gen_xorv16qi3 (operands[3], operands[3], vreg)); + value = 7 - value; + emit_insn (gen_xxpermx_inst (operands[0], operands[2], + operands[1], operands[3], + GEN_INT (value))); + } + + DONE; +} + [(set_attr "type" "vecsimple")]) + +(define_insn "xxpermx_inst" + [(set (match_operand:V2DI 0 "register_operand" "+v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v") + (match_operand:QI 4 "u3bit_cint_operand" "n")] + UNSPEC_XXPERMX))] + "TARGET_POWER10" + "xxpermx %x0,%x1,%x2,%x3,%4" + [(set_attr "type" "vecsimple")]) + (define_expand "vstrir_" [(set (match_operand:VIshort 0 "altivec_register_operand") (unspec:VIshort [(match_operand:VIshort 1 "altivec_register_operand")] diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index c44587ae8d5..f9f0fece549 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2782,6 +2782,15 @@ BU_P10V_1 (VXXSPLTID, "vxxspltidp", CONST, xxspltidp_v2df) BU_P10V_3 (VXXSPLTI32DX_V4SI, "vxxsplti32dx_v4si", CONST, xxsplti32dx_v4si) BU_P10V_3 (VXXSPLTI32DX_V4SF, "vxxsplti32dx_v4sf", CONST, xxsplti32dx_v4sf) +BU_P10V_3 (VXXBLEND_V16QI, "xxblend_v16qi", CONST, xxblend_v16qi) +BU_P10V_3 (VXXBLEND_V8HI, "xxblend_v8hi", CONST, xxblend_v8hi) +BU_P10V_3 (VXXBLEND_V4SI, "xxblend_v4si", CONST, xxblend_v4si) +BU_P10V_3 (VXXBLEND_V2DI, "xxblend_v2di", CONST, xxblend_v2di) +BU_P10V_3 (VXXBLEND_V4SF, "xxblend_v4sf", CONST, xxblend_v4sf) +BU_P10V_3 (VXXBLEND_V2DF, "xxblend_v2df", CONST, xxblend_v2df) + +BU_P10V_4 (VXXPERMX, "xxpermx", CONST, xxpermx) + BU_P10V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi) BU_P10V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi) BU_P10V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi) @@ -2852,6 +2861,8 @@ BU_P10_OVERLOAD_1 (VEXTRACTM, "vextractm") BU_P10_OVERLOAD_1 (XXSPLTIW, "xxspltiw") BU_P10_OVERLOAD_1 (XXSPLTID, "xxspltid") BU_P10_OVERLOAD_3 (XXSPLTI32DX, "xxsplti32dx") +BU_P10_OVERLOAD_3 (XXBLEND, "xxblend") +BU_P10_OVERLOAD_4 (XXPERMX, "xxpermx") /* 1 argument crypto functions. */ BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox_v2di) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index cb7d34dcdb5..2fad3d94706 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -1800,22 +1800,34 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, unsupported_builtin = true; } } - else if (fcode == P10_BUILTIN_VEC_XXEVAL) + else if ((fcode == P10_BUILTIN_VEC_XXEVAL) + || (fcode == P10_BUILTIN_VXXPERMX)) { - /* Need to special case __builtin_vec_xxeval because this takes - 4 arguments, and the existing infrastructure handles no - more than three. */ + signed char op3_type; + + /* Need to special case P10_BUILTIN_VEC_XXEVAL and + P10_BUILTIN_VXXPERMX because they take 4 arguments and the + existing infrastructure only handles three. */ if (nargs != 4) { - error ("builtin %qs requires 4 arguments", - "__builtin_vec_xxeval"); + const char *name = fcode == P10_BUILTIN_VEC_XXEVAL ? + "__builtin_vec_xxeval":"__builtin_vec_xxpermx"; + + error ("builtin %qs requires 4 arguments", name); return error_mark_node; } + for ( ; desc->code == fcode; desc++) { + if (fcode == P10_BUILTIN_VEC_XXEVAL) + op3_type = desc->op3; + else /* P10_BUILTIN_VXXPERMX */ + op3_type = RS6000_BTI_V16QI; + if (rs6000_builtin_type_compatible (types[0], desc->op1) && rs6000_builtin_type_compatible (types[1], desc->op2) && rs6000_builtin_type_compatible (types[2], desc->op3) + && rs6000_builtin_type_compatible (types[2], op3_type) && rs6000_builtin_type_compatible (types[3], RS6000_BTI_UINTSI)) { diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index 838f518f578..189497efb45 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -5569,6 +5569,39 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, + /* The overloaded XXPERMX definitions are handled specially because the + fourth unsigned char operand is not encoded in this table. */ + { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, + RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, + RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, + RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, + RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, + RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, + RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_EXTRACTL, P10_BUILTIN_VEXTRACTBL, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI }, @@ -5710,6 +5743,37 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P10_BUILTIN_VEC_XXSPLTI32DX, P10_BUILTIN_VXXSPLTI32DX_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_UINTQI, RS6000_BTI_float }, + { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, + RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, + RS6000_BTI_unsigned_V8HI }, + { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, + RS6000_BTI_unsigned_V4SI }, + { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI }, + { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, + RS6000_BTI_unsigned_V2DI }, + { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, + RS6000_BTI_unsigned_V4SI }, + { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, + RS6000_BTI_unsigned_V2DI }, + { P10_BUILTIN_VEC_SRDB, P10_BUILTIN_VSRDB_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_UINTQI }, @@ -10162,6 +10226,19 @@ rs6000_expand_quaternop_builtin (enum insn_code icode, tree exp, rtx target) return CONST0_RTX (tmode); } } + + else if (icode == CODE_FOR_xxpermx) + { + /* Only allow 3-bit unsigned literals. */ + STRIP_NOPS (arg3); + if (TREE_CODE (arg3) != INTEGER_CST + || TREE_INT_CST_LOW (arg3) & ~0x7) + { + error ("argument 4 must be a 3-bit unsigned literal"); + return CONST0_RTX (tmode); + } + } + else if (icode == CODE_FOR_vreplace_elt_v4si || icode == CODE_FOR_vreplace_elt_v4sf) { @@ -13847,12 +13924,17 @@ builtin_quaternary_function_type (machine_mode mode_ret, tree function_type = NULL; static tree v2udi_type = builtin_mode_to_type[V2DImode][1]; + static tree v16uqi_type = builtin_mode_to_type[V16QImode][1]; static tree uchar_type = builtin_mode_to_type[QImode][1]; static tree xxeval_type = build_function_type_list (v2udi_type, v2udi_type, v2udi_type, v2udi_type, uchar_type, NULL_TREE); + static tree xxpermx_type = + build_function_type_list (v2udi_type, v2udi_type, v2udi_type, + v16uqi_type, uchar_type, NULL_TREE); + switch (builtin) { case P10_BUILTIN_XXEVAL: @@ -13864,6 +13946,15 @@ builtin_quaternary_function_type (machine_mode mode_ret, function_type = xxeval_type; break; + case P10_BUILTIN_VXXPERMX: + gcc_assert ((mode_ret == V2DImode) + && (mode_arg0 == V2DImode) + && (mode_arg1 == V2DImode) + && (mode_arg2 == V16QImode) + && (mode_arg3 == QImode)); + function_type = xxpermx_type; + break; + default: /* A case for each quaternary built-in must be provided above. */ gcc_unreachable (); @@ -14059,6 +14150,10 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, case P10_BUILTIN_VREPLACE_ELT_UV2DI: case P10_BUILTIN_VREPLACE_UN_UV4SI: case P10_BUILTIN_VREPLACE_UN_UV2DI: + case P10_BUILTIN_VXXBLEND_V16QI: + case P10_BUILTIN_VXXBLEND_V8HI: + case P10_BUILTIN_VXXBLEND_V4SI: + case P10_BUILTIN_VXXBLEND_V2DI: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 54a71c632e5..37a675aa2a5 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -21228,6 +21228,69 @@ result. The other words of argument 1 are unchanged. @findex vec_splati_ins +Vector Blend Variable + +@smallexample +@exdent vector signed char vec_blendv (vector signed char, vector signed char, +vector unsigned char); +@exdent vector unsigned char vec_blendv (vector unsigned char, +vector unsigned char, vector unsigned char); +@exdent vector signed short vec_blendv (vector signed short, +vector signed short, vector unsigned short); +@exdent vector unsigned short vec_blendv (vector unsigned short, +vector unsigned short, vector unsigned short); +@exdent vector signed int vec_blendv (vector signed int, vector signed int, +vector unsigned int); +@exdent vector unsigned int vec_blendv (vector unsigned int, +vector unsigned int, vector unsigned int); +@exdent vector signed long long vec_blendv (vector signed long long, +vector signed long long, vector unsigned long long); +@exdent vector unsigned long long vec_blendv (vector unsigned long long, +vector unsigned long long, vector unsigned long long); +@exdent vector float vec_blendv (vector float, vector float, +vector unsigned int); +@exdent vector double vec_blendv (vector double, vector double, +vector unsigned long long); +@end smallexample + +Blend the first and second argument vectors according to the sign bits of the +corresponding elements of the third argument vector. This is similar to the +@code{vsel} and @code{xxsel} instructions but for bigger elements. + +@findex vec_blendv + +Vector Permute Extended + +@smallexample +@exdent vector signed char vec_permx (vector signed char, vector signed char, +vector unsigned char, const int); +@exdent vector unsigned char vec_permx (vector unsigned char, +vector unsigned char, vector unsigned char, const int); +@exdent vector signed short vec_permx (vector signed short, +vector signed short, vector unsigned char, const int); +@exdent vector unsigned short vec_permx (vector unsigned short, +vector unsigned short, vector unsigned char, const int); +@exdent vector signed int vec_permx (vector signed int, vector signed int, +vector unsigned char, const int); +@exdent vector unsigned int vec_permx (vector unsigned int, +vector unsigned int, vector unsigned char, const int); +@exdent vector signed long long vec_permx (vector signed long long, +vector signed long long, vector unsigned char, const int); +@exdent vector unsigned long long vec_permx (vector unsigned long long, +vector unsigned long long, vector unsigned char, const int); +@exdent vector float (vector float, vector float, vector unsigned char, +const int); +@exdent vector double (vector double, vector double, vector unsigned char, +const int); +@end smallexample + +Perform a partial permute of the first two arguments, which form a 32-byte +section of an emulated vector up to 256 bytes wide, using the partial permute +control vector in the third argument. The fourth argument (constrained to +values of 0-7) identifies which 32-byte section of the emulated vector is +contained in the first two arguments. +@findex vec_permx + @smallexample @exdent vector unsigned long long int @exdent vec_pext (vector unsigned long long int, vector unsigned long long int) diff --git a/gcc/testsuite/gcc.target/powerpc/vec-blend-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-blend-runnable.c new file mode 100644 index 00000000000..0c3d47234ed --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-blend-runnable.c @@ -0,0 +1,276 @@ +/* { dg-do run } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-mdejagnu-cpu=power10" } */ +#include + +#define DEBUG 0 + +#ifdef DEBUG +#include +#endif + +extern void abort (void); + +int +main (int argc, char *argv []) +{ + int i; + vector signed char vsrc_a_char, vsrc_b_char; + vector signed char vresult_char; + vector signed char expected_vresult_char; + + vector unsigned char vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar; + vector unsigned char vresult_uchar; + vector unsigned char expected_vresult_uchar; + + vector signed short vsrc_a_short, vsrc_b_short, vsrc_c_short; + vector signed short vresult_short; + vector signed short expected_vresult_short; + + vector unsigned short vsrc_a_ushort, vsrc_b_ushort, vsrc_c_ushort; + vector unsigned short vresult_ushort; + vector unsigned short expected_vresult_ushort; + + vector int vsrc_a_int, vsrc_b_int, vsrc_c_int; + vector int vresult_int; + vector int expected_vresult_int; + + vector unsigned int vsrc_a_uint, vsrc_b_uint, vsrc_c_uint; + vector unsigned int vresult_uint; + vector unsigned int expected_vresult_uint; + + vector long long int vsrc_a_ll, vsrc_b_ll, vsrc_c_ll; + vector long long int vresult_ll; + vector long long int expected_vresult_ll; + + vector unsigned long long int vsrc_a_ull, vsrc_b_ull, vsrc_c_ull; + vector unsigned long long int vresult_ull; + vector unsigned long long int expected_vresult_ull; + + vector float vresult_f; + vector float expected_vresult_f; + vector float vsrc_a_f, vsrc_b_f; + + vector double vsrc_a_d, vsrc_b_d; + vector double vresult_d; + vector double expected_vresult_d; + + /* Vector blend */ + vsrc_c_uchar = (vector unsigned char) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, + 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 }; + + vsrc_a_char = (vector signed char) { -1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29 }; + vsrc_b_char = (vector signed char) { 2, -4, 6, 8, 10, 12, 14, 16, + 18, 20, 22, 24, 26, 28, 30, 32 }; + vsrc_c_uchar = (vector unsigned char) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, + 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 }; + vresult_char = (vector signed char) { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + expected_vresult_char = (vector signed char) { -1, -4, 5, 8, + 9, 12, 13, 16, + 17, 20, 21, 24, + 25, 28, 29, 32 }; + + vresult_char = vec_blendv (vsrc_a_char, vsrc_b_char, vsrc_c_uchar); + + if (!vec_all_eq (vresult_char, expected_vresult_char)) { +#if DEBUG + printf("ERROR, vec_blendv (vsrc_a_char, vsrc_b_char, vsrc_c_uchar)\n"); + for(i = 0; i < 16; i++) + printf(" vresult_char[%d] = %d, expected_vresult_char[%d] = %d\n", + i, vresult_char[i], i, expected_vresult_char[i]); +#else + abort(); +#endif + } + + vsrc_a_uchar = (vector unsigned char) { 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29 }; + vsrc_b_uchar = (vector unsigned char) { 2, 4, 6, 8, 10, 12, 14, 16, + 18, 20, 22, 24, 26, 28, 30, 32 }; + vsrc_c_uchar = (vector unsigned char) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, + 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 }; + vresult_uchar = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + expected_vresult_uchar = (vector unsigned char) { 1, 4, 5, 8, + 9, 12, 13, 16, + 17, 20, 21, 24, + 25, 28, 29, 32 }; + + vresult_uchar = vec_blendv (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar); + + if (!vec_all_eq (vresult_uchar, expected_vresult_uchar)) { +#if DEBUG + printf("ERROR, vec_blendv (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar)\n"); + for(i = 0; i < 16; i++) + printf(" vresult_uchar[%d] = %d, expected_vresult_uchar[%d] = %d\n", + i, vresult_uchar[i], i, expected_vresult_uchar[i]); +#else + abort(); +#endif + } + + vsrc_a_short = (vector signed short) { -1, 3, 5, 7, 9, 11, 13, 15 }; + vsrc_b_short = (vector signed short) { 2, -4, 6, 8, 10, 12, 14, 16 }; + vsrc_c_ushort = (vector unsigned short) { 0, 0x8000, 0, 0x8000, + 0, 0x8000, 0, 0x8000 }; + vresult_short = (vector signed short) { 0, 0, 0, 0, 0, 0, 0, 0 }; + expected_vresult_short = (vector signed short) { -1, -4, 5, 8, + 9, 12, 13, 16 }; + + vresult_short = vec_blendv (vsrc_a_short, vsrc_b_short, vsrc_c_ushort); + + if (!vec_all_eq (vresult_short, expected_vresult_short)) { +#if DEBUG + printf("ERROR, vec_blendv (vsrc_a_short, vsrc_b_short, vsrc_c_ushort)\n"); + for(i = 0; i < 8; i++) + printf(" vresult_short[%d] = %d, expected_vresult_short[%d] = %d\n", + i, vresult_short[i], i, expected_vresult_short[i]); +#else + abort(); +#endif + } + + vsrc_a_ushort = (vector unsigned short) { 1, 3, 5, 7, 9, 11, 13, 15 }; + vsrc_b_ushort = (vector unsigned short) { 2, 4, 6, 8, 10, 12, 14, 16 }; + vsrc_c_ushort = (vector unsigned short) { 0, 0x8000, 0, 0x8000, + 0, 0x8000, 0, 0x8000 }; + vresult_ushort = (vector unsigned short) { 0, 0, 0, 0, 0, 0, 0, 0 }; + expected_vresult_ushort = (vector unsigned short) { 1, 4, 5, 8, + 9, 12, 13, 16 }; + + vresult_ushort = vec_blendv (vsrc_a_ushort, vsrc_b_ushort, vsrc_c_ushort); + + if (!vec_all_eq (vresult_ushort, expected_vresult_ushort)) { +#if DEBUG + printf("ERROR, vec_blendv (vsrc_a_ushort, vsrc_b_ushort, vsrc_c_ushort)\n"); + for(i = 0; i < 8; i++) + printf(" vresult_ushort[%d] = %d, expected_vresult_ushort[%d] = %d\n", + i, vresult_ushort[i], i, expected_vresult_ushort[i]); +#else + abort(); +#endif + } + + vsrc_a_int = (vector signed int) { -1, -3, -5, -7 }; + vsrc_b_int = (vector signed int) { 2, 4, 6, 8 }; + vsrc_c_uint = (vector unsigned int) { 0, 0x80000000, 0, 0x80000000}; + vresult_int = (vector signed int) { 0, 0, 0, 0 }; + expected_vresult_int = (vector signed int) { -1, 4, -5, 8 }; + + vresult_int = vec_blendv (vsrc_a_int, vsrc_b_int, vsrc_c_uint); + + if (!vec_all_eq (vresult_int, expected_vresult_int)) { +#if DEBUG + printf("ERROR, vec_blendv (vsrc_a_int, vsrc_b_int, vsrc_c_uint)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n", + i, vresult_int[i], i, expected_vresult_int[i]); +#else + abort(); +#endif + } + + vsrc_a_uint = (vector unsigned int) { 1, 3, 5, 7 }; + vsrc_b_uint = (vector unsigned int) { 2, 4, 6, 8 }; + vsrc_c_uint = (vector unsigned int) { 0, 0x80000000, 0, 0x80000000 }; + vresult_uint = (vector unsigned int) { 0, 0, 0, 0 }; + expected_vresult_uint = (vector unsigned int) { 1, 4, 5, 8 }; + + vresult_uint = vec_blendv (vsrc_a_uint, vsrc_b_uint, vsrc_c_uint); + + if (!vec_all_eq (vresult_uint, expected_vresult_uint)) { +#if DEBUG + printf("ERROR, vec_blendv (vsrc_a_uint, vsrc_b_uint, vsrc_c_uint)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n", + i, vresult_uint[i], i, expected_vresult_uint[i]); +#else + abort(); +#endif + } + + vsrc_a_ll = (vector signed long long int) { -1, -3 }; + vsrc_b_ll = (vector signed long long int) { 2, 4, }; + vsrc_c_ull = (vector unsigned long long int) { 0, 0x8000000000000000ULL }; + vresult_ll = (vector signed long long int) { 0, 0 }; + expected_vresult_ll = (vector signed long long int) { -1, 4 }; + + vresult_ll = vec_blendv (vsrc_a_ll, vsrc_b_ll, vsrc_c_ull); + + if (!vec_all_eq (vresult_ll, expected_vresult_ll)) { +#if DEBUG + printf("ERROR, vec_blendv (vsrc_a_ll, vsrc_b_ll, vsrc_c_ull)\n"); + for(i = 0; i < 2; i++) + printf(" vresult_ll[%d] = %d, expected_vresult_ll[%d] = %d\n", + i, vresult_ll[i], i, expected_vresult_ll[i]); +#else + abort(); +#endif + } + + vsrc_a_ull = (vector unsigned long long) { 1, 3 }; + vsrc_b_ull = (vector unsigned long long) { 2, 4 }; + vsrc_c_ull = (vector unsigned long long int) { 0, 0x8000000000000000ULL }; + vresult_ull = (vector unsigned long long) { 0, 0 }; + expected_vresult_ull = (vector unsigned long long) { 1, 4 }; + + vresult_ull = vec_blendv (vsrc_a_ull, vsrc_b_ull, vsrc_c_ull); + + if (!vec_all_eq (vresult_ull, expected_vresult_ull)) { +#if DEBUG + printf("ERROR, vec_blendv (vsrc_a_ull, vsrc_b_ull, vsrc_c_ull)\n"); + for(i = 0; i < 2; i++) + printf(" vresult_ull[%d] = %d, expected_vresult_ull[%d] = %d\n", + i, vresult_ull[i], i, expected_vresult_ull[i]); +#else + abort(); +#endif + } + + vsrc_a_f = (vector float) { -1.0, -3.0, -5.0, -7.0 }; + vsrc_b_f = (vector float) { 2.0, 4.0, 6.0, 8.0 }; + vsrc_c_uint = (vector unsigned int) { 0, 0x80000000, 0, 0x80000000}; + vresult_f = (vector float) { 0, 0, 0, 0 }; + expected_vresult_f = (vector float) { -1, 4, -5, 8 }; + + vresult_f = vec_blendv (vsrc_a_f, vsrc_b_f, vsrc_c_uint); + + if (!vec_all_eq (vresult_f, expected_vresult_f)) { +#if DEBUG + printf("ERROR, vec_blendv (vsrc_a_f, vsrc_b_f, vsrc_c_uint)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_f[%d] = %d, expected_vresult_f[%d] = %d\n", + i, vresult_f[i], i, expected_vresult_f[i]); +#else + abort(); +#endif + } + + vsrc_a_d = (vector double) { -1.0, -3.0 }; + vsrc_b_d = (vector double) { 2.0, 4.0 }; + vsrc_c_ull = (vector unsigned long long int) { 0, 0x8000000000000000ULL }; + vresult_d = (vector double) { 0, 0 }; + expected_vresult_d = (vector double) { -1, 4 }; + + vresult_d = vec_blendv (vsrc_a_d, vsrc_b_d, vsrc_c_ull); + + if (!vec_all_eq (vresult_d, expected_vresult_d)) { +#if DEBUG + printf("ERROR, vec_blendv (vsrc_a_d, vsrc_b_d, vsrc_c_ull)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_d[%d] = %d, expected_vresult_d[%d] = %d\n", + i, vresult_d[i], i, expected_vresult_d[i]); +#else + abort(); +#endif + } + + return 0; +} + +/* { dg-final { scan-assembler-times {\mxxblendvb\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxblendvh\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxblendvw\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mxxblendvd\M} 3 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-permute-ext-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-permute-ext-runnable.c new file mode 100644 index 00000000000..c2dcd48b4c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-permute-ext-runnable.c @@ -0,0 +1,294 @@ +/* { dg-do run } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-mdejagnu-cpu=power10" } */ +#include + +#define DEBUG 0 + +#ifdef DEBUG +#include +#endif + +extern void abort (void); + +int +main (int argc, char *argv []) +{ + int i; + vector signed char vsrc_a_char, vsrc_b_char; + vector signed char vresult_char; + vector signed char expected_vresult_char; + + vector unsigned char vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar; + vector unsigned char vresult_uchar; + vector unsigned char expected_vresult_uchar; + + vector signed short vsrc_a_short, vsrc_b_short, vsrc_c_short; + vector signed short vresult_short; + vector signed short expected_vresult_short; + + vector unsigned short vsrc_a_ushort, vsrc_b_ushort, vsrc_c_ushort; + vector unsigned short vresult_ushort; + vector unsigned short expected_vresult_ushort; + + vector int vsrc_a_int, vsrc_b_int, vsrc_c_int; + vector int vresult_int; + vector int expected_vresult_int; + + vector unsigned int vsrc_a_uint, vsrc_b_uint, vsrc_c_uint; + vector unsigned int vresult_uint; + vector unsigned int expected_vresult_uint; + + vector long long int vsrc_a_ll, vsrc_b_ll, vsrc_c_ll; + vector long long int vresult_ll; + vector long long int expected_vresult_ll; + + vector unsigned long long int vsrc_a_ull, vsrc_b_ull, vsrc_c_ull; + vector unsigned long long int vresult_ull; + vector unsigned long long int expected_vresult_ull; + + vector float vresult_f; + vector float expected_vresult_f; + vector float vsrc_a_f, vsrc_b_f; + + vector double vsrc_a_d, vsrc_b_d; + vector double vresult_d; + vector double expected_vresult_d; + + /* Vector permx */ + vsrc_a_char = (vector signed char) { -1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29 }; + vsrc_b_char = (vector signed char) { 2, -4, 6, 8, 10, 12, 14, 16, + 18, 20, 22, 24, 26, 28, 30, 32 }; + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x7, 0, 0x5, 0, 0x3, 0, 0x1, + 0, 0x2, 0, 0x4, 0, 0x6, 0, 0x0 }; + vresult_char = (vector signed char) { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + expected_vresult_char = (vector signed char) { -1, 15, -1, 11, + -1, 7, -1, 3, + -1, 5, -1, 9, + -1, 13, -1, -1 }; + + vresult_char = vec_permx (vsrc_a_char, vsrc_b_char, vsrc_c_uchar, 0); + + if (!vec_all_eq (vresult_char, expected_vresult_char)) { +#if DEBUG + printf("ERROR, vec_permx (vsrc_a_char, vsrc_b_char, vsrc_c_uchar)\n"); + for(i = 0; i < 16; i++) + printf(" vresult_char[%d] = %d, expected_vresult_char[%d] = %d\n", + i, vresult_char[i], i, expected_vresult_char[i]); +#else + abort(); +#endif + } + + vsrc_a_uchar = (vector unsigned char) { 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29 }; + vsrc_b_uchar = (vector unsigned char) { 2, 4, 6, 8, 10, 12, 14, 16, + 18, 20, 22, 24, 26, 28, 30, 32 }; + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x7, 0, 0x5, 0, 0x3, 0, 0x1, + 0, 0x2, 0, 0x4, 0, 0x6, 0, 0x0 }; + vresult_uchar = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + expected_vresult_uchar = (vector unsigned char) { 1, 15, 1, 11, + 1, 7, 1, 3, + 1, 5, 1, 9, + 1, 13, 1, 1 }; + + vresult_uchar = vec_permx (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar, 0); + + if (!vec_all_eq (vresult_uchar, expected_vresult_uchar)) { +#if DEBUG + printf("ERROR, vec_permx (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar)\n"); + for(i = 0; i < 16; i++) + printf(" vresult_uchar[%d] = %d, expected_vresult_uchar[%d] = %d\n", + i, vresult_uchar[i], i, expected_vresult_uchar[i]); +#else + abort(); +#endif + } + + vsrc_a_short = (vector signed short int) { 1, -3, 5, 7, 9, 11, 13, 15 }; + vsrc_b_short = (vector signed short int) { 2, 4, -6, 8, 10, 12, 14, 16 }; + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x2, 0x3, + 0x8, 0x9, 0x2, 0x3, + 0x1E, 0x1F, 0x2, 0x3 }; + vresult_short = (vector signed short int) { 0, 0, 0, 0, 0, 0, 0, 0 }; + expected_vresult_short = (vector signed short int) { 1, -3, 5, -3, + 9, -3, 16, -3 }; + + vresult_short = vec_permx (vsrc_a_short, vsrc_b_short, vsrc_c_uchar, 0); + + if (!vec_all_eq (vresult_short, expected_vresult_short)) { +#if DEBUG + printf("ERROR, vec_permx (vsrc_a_short, vsrc_b_short, vsrc_c_uchar)\n"); + for(i = 0; i < 8; i++) + printf(" vresult_short[%d] = %d, expected_vresult_short[%d] = %d\n", + i, vresult_short[i], i, expected_vresult_short[i]); +#else + abort(); +#endif + } + + vsrc_a_ushort = (vector unsigned short int) { 1, 3, 5, 7, 9, 11, 13, 15 }; + vsrc_b_ushort = (vector unsigned short int) { 2, 4, 6, 8, 10, 12, 14, 16 }; + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x2, 0x3, + 0x8, 0x9, 0x2, 0x3, + 0x1E, 0x1F, 0x2, 0x3 }; + vresult_ushort = (vector unsigned short int) { 0, 0, 0, 0, 0, 0, 0, 0 }; + expected_vresult_ushort = (vector unsigned short int) { 1, 3, 5, 3, + 9, 3, 16, 3 }; + + vresult_ushort = vec_permx (vsrc_a_ushort, vsrc_b_ushort, vsrc_c_uchar, 0); + + if (!vec_all_eq (vresult_ushort, expected_vresult_ushort)) { +#if DEBUG + printf("ERROR, vec_permx (vsrc_a_ushort, vsrc_b_ushort, vsrc_c_uchar)\n"); + for(i = 0; i < 8; i++) + printf(" vresult_ushort[%d] = %d, expected_vresult_ushort[%d] = %d\n", + i, vresult_ushort[i], i, expected_vresult_ushort[i]); +#else + abort(); +#endif + } + + vsrc_a_int = (vector signed int) { 1, -3, 5, 7 }; + vsrc_b_int = (vector signed int) { 2, 4, -6, 8 }; + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F }; + vresult_int = (vector signed int) { 0, 0, 0, 0 }; + expected_vresult_int = (vector signed int) { 1, -3, -6, 8 }; + + vresult_int = vec_permx (vsrc_a_int, vsrc_b_int, vsrc_c_uchar, 0); + + if (!vec_all_eq (vresult_int, expected_vresult_int)) { +#if DEBUG + printf("ERROR, vec_permx (vsrc_a_int, vsrc_b_int, vsrc_c_uchar)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n", + i, vresult_int[i], i, expected_vresult_int[i]); +#else + abort(); +#endif + } + + vsrc_a_uint = (vector unsigned int) { 1, 3, 5, 7 }; + vsrc_b_uint = (vector unsigned int) { 10, 12, 14, 16 }; + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F }; + vresult_uint = (vector unsigned int) { 0, 0, 0, 0 }; + expected_vresult_uint = (vector unsigned int) { 1, 3, 14, 16 }; + + vresult_uint = vec_permx (vsrc_a_uint, vsrc_b_uint, vsrc_c_uchar, 0); + + if (!vec_all_eq (vresult_uint, expected_vresult_uint)) { +#if DEBUG + printf("ERROR, vec_permx (vsrc_a_uint, vsrc_b_uint, vsrc_c_uchar)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n", + i, vresult_uint[i], i, expected_vresult_uint[i]); +#else + abort(); +#endif + } + + vsrc_a_ll = (vector signed long long int) { 1, -3 }; + vsrc_b_ll = (vector signed long long int) { 2, -4 }; + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F }; + vresult_ll = (vector signed long long int) { 0, 0}; + expected_vresult_ll = (vector signed long long int) { 1, -4 }; + + vresult_ll = vec_permx (vsrc_a_ll, vsrc_b_ll, vsrc_c_uchar, 0); + + if (!vec_all_eq (vresult_ll, expected_vresult_ll)) { +#if DEBUG + printf("ERROR, vec_permx (vsrc_a_ll, vsrc_b_ll, vsrc_c_uchar)\n"); + for(i = 0; i < 2; i++) + printf(" vresult_ll[%d] = %lld, expected_vresult_ll[%d] = %lld\n", + i, vresult_ll[i], i, expected_vresult_ll[i]); +#else + abort(); +#endif + } + + vsrc_a_ull = (vector unsigned long long int) { 1, 3 }; + vsrc_b_ull = (vector unsigned long long int) { 10, 12 }; + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F }; + vresult_ull = (vector unsigned long long int) { 0, 0 }; + expected_vresult_ull = (vector unsigned long long int) { 1, 12 }; + + vresult_ull = vec_permx (vsrc_a_ull, vsrc_b_ull, vsrc_c_uchar, 0); + + if (!vec_all_eq (vresult_ull, expected_vresult_ull)) { +#if DEBUG + printf("ERROR, vec_permx (vsrc_a_ull, vsrc_b_ull, vsrc_c_uchar)\n"); + for(i = 0; i < 2; i++) + printf(" vresult_ull[%d] = %d, expected_vresult_ull[%d] = %d\n", + i, vresult_ull[i], i, expected_vresult_ull[i]); +#else + abort(); +#endif + } + + vsrc_a_f = (vector float) { -3.0, 5.0, 7.0, 9.0 }; + vsrc_b_f = (vector float) { 2.0, 4.0, 6.0, 8.0 }; + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F }; + vresult_f = (vector float) { 0.0, 0.0, 0.0, 0.0 }; + expected_vresult_f = (vector float) { -3.0, 5.0, 6.0, 8.0 }; + + vresult_f = vec_permx (vsrc_a_f, vsrc_b_f, vsrc_c_uchar, 0); + + if (!vec_all_eq (vresult_f, expected_vresult_f)) { +#if DEBUG + printf("ERROR, vec_permx (vsrc_a_f, vsrc_b_f, vsrc_c_uchar)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_f[%d] = %f, expected_vresult_f[%d] = %f\n", + i, vresult_f[i], i, expected_vresult_f[i]); +#else + abort(); +#endif + } + + vsrc_a_d = (vector double) { 1.0, -3.0 }; + vsrc_b_d = (vector double) { 2.0, -4.0 }; + vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x1A, 0x1B, 0x1C, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F }; + vresult_d = (vector double) { 0.0, 0.0 }; + expected_vresult_d = (vector double) { 1.0, -4.0 }; + + vresult_d = vec_permx (vsrc_a_d, vsrc_b_d, vsrc_c_uchar, 0); + + if (!vec_all_eq (vresult_d, expected_vresult_d)) { +#if DEBUG + printf("ERROR, vec_permx (vsrc_a_d, vsrc_b_d, vsrc_c_uchar)\n"); + for(i = 0; i < 2; i++) + printf(" vresult_d[%d] = %f, expected_vresult_d[%d] = %f\n", + i, vresult_d[i], i, expected_vresult_d[i]); +#else + abort(); +#endif + } + + return 0; +} + +/* { dg-final { scan-assembler-times {\mxxpermx\M} 10 } } */ + + -- 2.30.2