#define vec_splati(a) __builtin_vec_xxspltiw (a)
#define vec_splatid(a) __builtin_vec_xxspltid (a)
#define vec_splati_ins(a, b, c) __builtin_vec_xxsplti32dx (a, b, c)
+#define vec_blendv(a, b, c) __builtin_vec_xxblend (a, b, c)
+#define vec_permx(a, b, c, d) __builtin_vec_xxpermx (a, b, c, d)
#define vec_gnb(a, b) __builtin_vec_gnb (a, b)
#define vec_clrl(a, b) __builtin_vec_clrl (a, b)
UNSPEC_XXSPLTIW
UNSPEC_XXSPLTID
UNSPEC_XXSPLTI32DX
+ UNSPEC_XXBLEND
+ UNSPEC_XXPERMX
])
(define_c_enum "unspecv"
(KF "FLOAT128_VECTOR_P (KFmode)")
(TF "FLOAT128_VECTOR_P (TFmode)")])
+;; Like VM2, just do char, short, int, long, float and double
+(define_mode_iterator VM3 [V4SI
+ V8HI
+ V16QI
+ V4SF
+ V2DF
+ V2DI])
+
+(define_mode_attr VM3_char [(V2DI "d")
+ (V4SI "w")
+ (V8HI "h")
+ (V16QI "b")
+ (V2DF "d")
+ (V4SF "w")])
+
;; Map the Vector convert single precision to double precision for integer
;; versus floating point
(define_mode_attr VS_sxwsp [(V4SI "sxw") (V4SF "sp")])
"xxsplti32dx %x0,%2,%3"
[(set_attr "type" "vecsimple")])
+(define_insn "xxblend_<mode>"
+ [(set (match_operand:VM3 0 "register_operand" "=wa")
+ (unspec:VM3 [(match_operand:VM3 1 "register_operand" "wa")
+ (match_operand:VM3 2 "register_operand" "wa")
+ (match_operand:VM3 3 "register_operand" "wa")]
+ UNSPEC_XXBLEND))]
+ "TARGET_POWER10"
+ "xxblendv<VM3_char> %x0,%x1,%x2,%x3"
+ [(set_attr "type" "vecsimple")])
+
+(define_expand "xxpermx"
+ [(set (match_operand:V2DI 0 "register_operand" "+wa")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa")
+ (match_operand:V2DI 2 "register_operand" "wa")
+ (match_operand:V16QI 3 "register_operand" "wa")
+ (match_operand:QI 4 "u8bit_cint_operand" "n")]
+ UNSPEC_XXPERMX))]
+ "TARGET_POWER10"
+{
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_xxpermx_inst (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4]));
+ else
+ {
+ /* Reverse value of byte element indexes by XORing with 0xFF.
+ Reverse the 32-byte section identifier match by subracting bits [0:2]
+ of elemet from 7. */
+ int value = INTVAL (operands[4]);
+ rtx vreg = gen_reg_rtx (V16QImode);
+
+ emit_insn (gen_xxspltib_v16qi (vreg, GEN_INT (-1)));
+ emit_insn (gen_xorv16qi3 (operands[3], operands[3], vreg));
+ value = 7 - value;
+ emit_insn (gen_xxpermx_inst (operands[0], operands[2],
+ operands[1], operands[3],
+ GEN_INT (value)));
+ }
+
+ DONE;
+}
+ [(set_attr "type" "vecsimple")])
+
+(define_insn "xxpermx_inst"
+ [(set (match_operand:V2DI 0 "register_operand" "+v")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
+ (match_operand:V2DI 2 "register_operand" "v")
+ (match_operand:V16QI 3 "register_operand" "v")
+ (match_operand:QI 4 "u3bit_cint_operand" "n")]
+ UNSPEC_XXPERMX))]
+ "TARGET_POWER10"
+ "xxpermx %x0,%x1,%x2,%x3,%4"
+ [(set_attr "type" "vecsimple")])
+
(define_expand "vstrir_<mode>"
[(set (match_operand:VIshort 0 "altivec_register_operand")
(unspec:VIshort [(match_operand:VIshort 1 "altivec_register_operand")]
BU_P10V_3 (VXXSPLTI32DX_V4SI, "vxxsplti32dx_v4si", CONST, xxsplti32dx_v4si)
BU_P10V_3 (VXXSPLTI32DX_V4SF, "vxxsplti32dx_v4sf", CONST, xxsplti32dx_v4sf)
+BU_P10V_3 (VXXBLEND_V16QI, "xxblend_v16qi", CONST, xxblend_v16qi)
+BU_P10V_3 (VXXBLEND_V8HI, "xxblend_v8hi", CONST, xxblend_v8hi)
+BU_P10V_3 (VXXBLEND_V4SI, "xxblend_v4si", CONST, xxblend_v4si)
+BU_P10V_3 (VXXBLEND_V2DI, "xxblend_v2di", CONST, xxblend_v2di)
+BU_P10V_3 (VXXBLEND_V4SF, "xxblend_v4sf", CONST, xxblend_v4sf)
+BU_P10V_3 (VXXBLEND_V2DF, "xxblend_v2df", CONST, xxblend_v2df)
+
+BU_P10V_4 (VXXPERMX, "xxpermx", CONST, xxpermx)
+
BU_P10V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi)
BU_P10V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi)
BU_P10V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi)
BU_P10_OVERLOAD_1 (XXSPLTIW, "xxspltiw")
BU_P10_OVERLOAD_1 (XXSPLTID, "xxspltid")
BU_P10_OVERLOAD_3 (XXSPLTI32DX, "xxsplti32dx")
+BU_P10_OVERLOAD_3 (XXBLEND, "xxblend")
+BU_P10_OVERLOAD_4 (XXPERMX, "xxpermx")
/* 1 argument crypto functions. */
BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox_v2di)
unsupported_builtin = true;
}
}
- else if (fcode == P10_BUILTIN_VEC_XXEVAL)
+ else if ((fcode == P10_BUILTIN_VEC_XXEVAL)
+ || (fcode == P10_BUILTIN_VXXPERMX))
{
- /* Need to special case __builtin_vec_xxeval because this takes
- 4 arguments, and the existing infrastructure handles no
- more than three. */
+ signed char op3_type;
+
+ /* Need to special case P10_BUILTIN_VEC_XXEVAL and
+ P10_BUILTIN_VXXPERMX because they take 4 arguments and the
+ existing infrastructure only handles three. */
if (nargs != 4)
{
- error ("builtin %qs requires 4 arguments",
- "__builtin_vec_xxeval");
+ const char *name = fcode == P10_BUILTIN_VEC_XXEVAL ?
+ "__builtin_vec_xxeval":"__builtin_vec_xxpermx";
+
+ error ("builtin %qs requires 4 arguments", name);
return error_mark_node;
}
+
for ( ; desc->code == fcode; desc++)
{
+ if (fcode == P10_BUILTIN_VEC_XXEVAL)
+ op3_type = desc->op3;
+ else /* P10_BUILTIN_VXXPERMX */
+ op3_type = RS6000_BTI_V16QI;
+
if (rs6000_builtin_type_compatible (types[0], desc->op1)
&& rs6000_builtin_type_compatible (types[1], desc->op2)
&& rs6000_builtin_type_compatible (types[2], desc->op3)
+ && rs6000_builtin_type_compatible (types[2], op3_type)
&& rs6000_builtin_type_compatible (types[3],
RS6000_BTI_UINTSI))
{
RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+ /* The overloaded XXPERMX definitions are handled specially because the
+ fourth unsigned char operand is not encoded in this table. */
+ { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI,
+ RS6000_BTI_unsigned_V16QI },
+ { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+ { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI,
+ RS6000_BTI_unsigned_V16QI },
+ { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI },
+ { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI,
+ RS6000_BTI_unsigned_V16QI },
+ { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI },
+ { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI,
+ RS6000_BTI_unsigned_V16QI },
+ { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI },
+ { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF,
+ RS6000_BTI_unsigned_V16QI },
+ { P10_BUILTIN_VEC_XXPERMX, P10_BUILTIN_VXXPERMX,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF,
+ RS6000_BTI_unsigned_V16QI },
+
{ P10_BUILTIN_VEC_EXTRACTL, P10_BUILTIN_VEXTRACTBL,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTQI },
{ P10_BUILTIN_VEC_XXSPLTI32DX, P10_BUILTIN_VXXSPLTI32DX_V4SF,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_UINTQI, RS6000_BTI_float },
+ { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI,
+ RS6000_BTI_unsigned_V16QI },
+ { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+ { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI,
+ RS6000_BTI_unsigned_V8HI },
+ { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+ { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI,
+ RS6000_BTI_unsigned_V4SI },
+ { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+ { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI,
+ RS6000_BTI_unsigned_V2DI },
+ { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+ { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF,
+ RS6000_BTI_unsigned_V4SI },
+ { P10_BUILTIN_VEC_XXBLEND, P10_BUILTIN_VXXBLEND_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF,
+ RS6000_BTI_unsigned_V2DI },
+
{ P10_BUILTIN_VEC_SRDB, P10_BUILTIN_VSRDB_V16QI,
RS6000_BTI_V16QI, RS6000_BTI_V16QI,
RS6000_BTI_V16QI, RS6000_BTI_UINTQI },
return CONST0_RTX (tmode);
}
}
+
+ else if (icode == CODE_FOR_xxpermx)
+ {
+ /* Only allow 3-bit unsigned literals. */
+ STRIP_NOPS (arg3);
+ if (TREE_CODE (arg3) != INTEGER_CST
+ || TREE_INT_CST_LOW (arg3) & ~0x7)
+ {
+ error ("argument 4 must be a 3-bit unsigned literal");
+ return CONST0_RTX (tmode);
+ }
+ }
+
else if (icode == CODE_FOR_vreplace_elt_v4si
|| icode == CODE_FOR_vreplace_elt_v4sf)
{
tree function_type = NULL;
static tree v2udi_type = builtin_mode_to_type[V2DImode][1];
+ static tree v16uqi_type = builtin_mode_to_type[V16QImode][1];
static tree uchar_type = builtin_mode_to_type[QImode][1];
static tree xxeval_type =
build_function_type_list (v2udi_type, v2udi_type, v2udi_type,
v2udi_type, uchar_type, NULL_TREE);
+ static tree xxpermx_type =
+ build_function_type_list (v2udi_type, v2udi_type, v2udi_type,
+ v16uqi_type, uchar_type, NULL_TREE);
+
switch (builtin) {
case P10_BUILTIN_XXEVAL:
function_type = xxeval_type;
break;
+ case P10_BUILTIN_VXXPERMX:
+ gcc_assert ((mode_ret == V2DImode)
+ && (mode_arg0 == V2DImode)
+ && (mode_arg1 == V2DImode)
+ && (mode_arg2 == V16QImode)
+ && (mode_arg3 == QImode));
+ function_type = xxpermx_type;
+ break;
+
default:
/* A case for each quaternary built-in must be provided above. */
gcc_unreachable ();
case P10_BUILTIN_VREPLACE_ELT_UV2DI:
case P10_BUILTIN_VREPLACE_UN_UV4SI:
case P10_BUILTIN_VREPLACE_UN_UV2DI:
+ case P10_BUILTIN_VXXBLEND_V16QI:
+ case P10_BUILTIN_VXXBLEND_V8HI:
+ case P10_BUILTIN_VXXBLEND_V4SI:
+ case P10_BUILTIN_VXXBLEND_V2DI:
h.uns_p[0] = 1;
h.uns_p[1] = 1;
h.uns_p[2] = 1;
@findex vec_splati_ins
+Vector Blend Variable
+
+@smallexample
+@exdent vector signed char vec_blendv (vector signed char, vector signed char,
+vector unsigned char);
+@exdent vector unsigned char vec_blendv (vector unsigned char,
+vector unsigned char, vector unsigned char);
+@exdent vector signed short vec_blendv (vector signed short,
+vector signed short, vector unsigned short);
+@exdent vector unsigned short vec_blendv (vector unsigned short,
+vector unsigned short, vector unsigned short);
+@exdent vector signed int vec_blendv (vector signed int, vector signed int,
+vector unsigned int);
+@exdent vector unsigned int vec_blendv (vector unsigned int,
+vector unsigned int, vector unsigned int);
+@exdent vector signed long long vec_blendv (vector signed long long,
+vector signed long long, vector unsigned long long);
+@exdent vector unsigned long long vec_blendv (vector unsigned long long,
+vector unsigned long long, vector unsigned long long);
+@exdent vector float vec_blendv (vector float, vector float,
+vector unsigned int);
+@exdent vector double vec_blendv (vector double, vector double,
+vector unsigned long long);
+@end smallexample
+
+Blend the first and second argument vectors according to the sign bits of the
+corresponding elements of the third argument vector. This is similar to the
+@code{vsel} and @code{xxsel} instructions but for bigger elements.
+
+@findex vec_blendv
+
+Vector Permute Extended
+
+@smallexample
+@exdent vector signed char vec_permx (vector signed char, vector signed char,
+vector unsigned char, const int);
+@exdent vector unsigned char vec_permx (vector unsigned char,
+vector unsigned char, vector unsigned char, const int);
+@exdent vector signed short vec_permx (vector signed short,
+vector signed short, vector unsigned char, const int);
+@exdent vector unsigned short vec_permx (vector unsigned short,
+vector unsigned short, vector unsigned char, const int);
+@exdent vector signed int vec_permx (vector signed int, vector signed int,
+vector unsigned char, const int);
+@exdent vector unsigned int vec_permx (vector unsigned int,
+vector unsigned int, vector unsigned char, const int);
+@exdent vector signed long long vec_permx (vector signed long long,
+vector signed long long, vector unsigned char, const int);
+@exdent vector unsigned long long vec_permx (vector unsigned long long,
+vector unsigned long long, vector unsigned char, const int);
+@exdent vector float (vector float, vector float, vector unsigned char,
+const int);
+@exdent vector double (vector double, vector double, vector unsigned char,
+const int);
+@end smallexample
+
+Perform a partial permute of the first two arguments, which form a 32-byte
+section of an emulated vector up to 256 bytes wide, using the partial permute
+control vector in the third argument. The fourth argument (constrained to
+values of 0-7) identifies which 32-byte section of the emulated vector is
+contained in the first two arguments.
+@findex vec_permx
+
@smallexample
@exdent vector unsigned long long int
@exdent vec_pext (vector unsigned long long int, vector unsigned long long int)
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10" } */
+#include <altivec.h>
+
+#define DEBUG 0
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+extern void abort (void);
+
+int
+main (int argc, char *argv [])
+{
+ int i;
+ vector signed char vsrc_a_char, vsrc_b_char;
+ vector signed char vresult_char;
+ vector signed char expected_vresult_char;
+
+ vector unsigned char vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar;
+ vector unsigned char vresult_uchar;
+ vector unsigned char expected_vresult_uchar;
+
+ vector signed short vsrc_a_short, vsrc_b_short, vsrc_c_short;
+ vector signed short vresult_short;
+ vector signed short expected_vresult_short;
+
+ vector unsigned short vsrc_a_ushort, vsrc_b_ushort, vsrc_c_ushort;
+ vector unsigned short vresult_ushort;
+ vector unsigned short expected_vresult_ushort;
+
+ vector int vsrc_a_int, vsrc_b_int, vsrc_c_int;
+ vector int vresult_int;
+ vector int expected_vresult_int;
+
+ vector unsigned int vsrc_a_uint, vsrc_b_uint, vsrc_c_uint;
+ vector unsigned int vresult_uint;
+ vector unsigned int expected_vresult_uint;
+
+ vector long long int vsrc_a_ll, vsrc_b_ll, vsrc_c_ll;
+ vector long long int vresult_ll;
+ vector long long int expected_vresult_ll;
+
+ vector unsigned long long int vsrc_a_ull, vsrc_b_ull, vsrc_c_ull;
+ vector unsigned long long int vresult_ull;
+ vector unsigned long long int expected_vresult_ull;
+
+ vector float vresult_f;
+ vector float expected_vresult_f;
+ vector float vsrc_a_f, vsrc_b_f;
+
+ vector double vsrc_a_d, vsrc_b_d;
+ vector double vresult_d;
+ vector double expected_vresult_d;
+
+ /* Vector blend */
+ vsrc_c_uchar = (vector unsigned char) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80,
+ 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 };
+
+ vsrc_a_char = (vector signed char) { -1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29 };
+ vsrc_b_char = (vector signed char) { 2, -4, 6, 8, 10, 12, 14, 16,
+ 18, 20, 22, 24, 26, 28, 30, 32 };
+ vsrc_c_uchar = (vector unsigned char) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80,
+ 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 };
+ vresult_char = (vector signed char) { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+ expected_vresult_char = (vector signed char) { -1, -4, 5, 8,
+ 9, 12, 13, 16,
+ 17, 20, 21, 24,
+ 25, 28, 29, 32 };
+
+ vresult_char = vec_blendv (vsrc_a_char, vsrc_b_char, vsrc_c_uchar);
+
+ if (!vec_all_eq (vresult_char, expected_vresult_char)) {
+#if DEBUG
+ printf("ERROR, vec_blendv (vsrc_a_char, vsrc_b_char, vsrc_c_uchar)\n");
+ for(i = 0; i < 16; i++)
+ printf(" vresult_char[%d] = %d, expected_vresult_char[%d] = %d\n",
+ i, vresult_char[i], i, expected_vresult_char[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_uchar = (vector unsigned char) { 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29 };
+ vsrc_b_uchar = (vector unsigned char) { 2, 4, 6, 8, 10, 12, 14, 16,
+ 18, 20, 22, 24, 26, 28, 30, 32 };
+ vsrc_c_uchar = (vector unsigned char) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80,
+ 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 };
+ vresult_uchar = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+ expected_vresult_uchar = (vector unsigned char) { 1, 4, 5, 8,
+ 9, 12, 13, 16,
+ 17, 20, 21, 24,
+ 25, 28, 29, 32 };
+
+ vresult_uchar = vec_blendv (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar);
+
+ if (!vec_all_eq (vresult_uchar, expected_vresult_uchar)) {
+#if DEBUG
+ printf("ERROR, vec_blendv (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar)\n");
+ for(i = 0; i < 16; i++)
+ printf(" vresult_uchar[%d] = %d, expected_vresult_uchar[%d] = %d\n",
+ i, vresult_uchar[i], i, expected_vresult_uchar[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_short = (vector signed short) { -1, 3, 5, 7, 9, 11, 13, 15 };
+ vsrc_b_short = (vector signed short) { 2, -4, 6, 8, 10, 12, 14, 16 };
+ vsrc_c_ushort = (vector unsigned short) { 0, 0x8000, 0, 0x8000,
+ 0, 0x8000, 0, 0x8000 };
+ vresult_short = (vector signed short) { 0, 0, 0, 0, 0, 0, 0, 0 };
+ expected_vresult_short = (vector signed short) { -1, -4, 5, 8,
+ 9, 12, 13, 16 };
+
+ vresult_short = vec_blendv (vsrc_a_short, vsrc_b_short, vsrc_c_ushort);
+
+ if (!vec_all_eq (vresult_short, expected_vresult_short)) {
+#if DEBUG
+ printf("ERROR, vec_blendv (vsrc_a_short, vsrc_b_short, vsrc_c_ushort)\n");
+ for(i = 0; i < 8; i++)
+ printf(" vresult_short[%d] = %d, expected_vresult_short[%d] = %d\n",
+ i, vresult_short[i], i, expected_vresult_short[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_ushort = (vector unsigned short) { 1, 3, 5, 7, 9, 11, 13, 15 };
+ vsrc_b_ushort = (vector unsigned short) { 2, 4, 6, 8, 10, 12, 14, 16 };
+ vsrc_c_ushort = (vector unsigned short) { 0, 0x8000, 0, 0x8000,
+ 0, 0x8000, 0, 0x8000 };
+ vresult_ushort = (vector unsigned short) { 0, 0, 0, 0, 0, 0, 0, 0 };
+ expected_vresult_ushort = (vector unsigned short) { 1, 4, 5, 8,
+ 9, 12, 13, 16 };
+
+ vresult_ushort = vec_blendv (vsrc_a_ushort, vsrc_b_ushort, vsrc_c_ushort);
+
+ if (!vec_all_eq (vresult_ushort, expected_vresult_ushort)) {
+#if DEBUG
+ printf("ERROR, vec_blendv (vsrc_a_ushort, vsrc_b_ushort, vsrc_c_ushort)\n");
+ for(i = 0; i < 8; i++)
+ printf(" vresult_ushort[%d] = %d, expected_vresult_ushort[%d] = %d\n",
+ i, vresult_ushort[i], i, expected_vresult_ushort[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_int = (vector signed int) { -1, -3, -5, -7 };
+ vsrc_b_int = (vector signed int) { 2, 4, 6, 8 };
+ vsrc_c_uint = (vector unsigned int) { 0, 0x80000000, 0, 0x80000000};
+ vresult_int = (vector signed int) { 0, 0, 0, 0 };
+ expected_vresult_int = (vector signed int) { -1, 4, -5, 8 };
+
+ vresult_int = vec_blendv (vsrc_a_int, vsrc_b_int, vsrc_c_uint);
+
+ if (!vec_all_eq (vresult_int, expected_vresult_int)) {
+#if DEBUG
+ printf("ERROR, vec_blendv (vsrc_a_int, vsrc_b_int, vsrc_c_uint)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+ i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_uint = (vector unsigned int) { 1, 3, 5, 7 };
+ vsrc_b_uint = (vector unsigned int) { 2, 4, 6, 8 };
+ vsrc_c_uint = (vector unsigned int) { 0, 0x80000000, 0, 0x80000000 };
+ vresult_uint = (vector unsigned int) { 0, 0, 0, 0 };
+ expected_vresult_uint = (vector unsigned int) { 1, 4, 5, 8 };
+
+ vresult_uint = vec_blendv (vsrc_a_uint, vsrc_b_uint, vsrc_c_uint);
+
+ if (!vec_all_eq (vresult_uint, expected_vresult_uint)) {
+#if DEBUG
+ printf("ERROR, vec_blendv (vsrc_a_uint, vsrc_b_uint, vsrc_c_uint)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n",
+ i, vresult_uint[i], i, expected_vresult_uint[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_ll = (vector signed long long int) { -1, -3 };
+ vsrc_b_ll = (vector signed long long int) { 2, 4, };
+ vsrc_c_ull = (vector unsigned long long int) { 0, 0x8000000000000000ULL };
+ vresult_ll = (vector signed long long int) { 0, 0 };
+ expected_vresult_ll = (vector signed long long int) { -1, 4 };
+
+ vresult_ll = vec_blendv (vsrc_a_ll, vsrc_b_ll, vsrc_c_ull);
+
+ if (!vec_all_eq (vresult_ll, expected_vresult_ll)) {
+#if DEBUG
+ printf("ERROR, vec_blendv (vsrc_a_ll, vsrc_b_ll, vsrc_c_ull)\n");
+ for(i = 0; i < 2; i++)
+ printf(" vresult_ll[%d] = %d, expected_vresult_ll[%d] = %d\n",
+ i, vresult_ll[i], i, expected_vresult_ll[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_ull = (vector unsigned long long) { 1, 3 };
+ vsrc_b_ull = (vector unsigned long long) { 2, 4 };
+ vsrc_c_ull = (vector unsigned long long int) { 0, 0x8000000000000000ULL };
+ vresult_ull = (vector unsigned long long) { 0, 0 };
+ expected_vresult_ull = (vector unsigned long long) { 1, 4 };
+
+ vresult_ull = vec_blendv (vsrc_a_ull, vsrc_b_ull, vsrc_c_ull);
+
+ if (!vec_all_eq (vresult_ull, expected_vresult_ull)) {
+#if DEBUG
+ printf("ERROR, vec_blendv (vsrc_a_ull, vsrc_b_ull, vsrc_c_ull)\n");
+ for(i = 0; i < 2; i++)
+ printf(" vresult_ull[%d] = %d, expected_vresult_ull[%d] = %d\n",
+ i, vresult_ull[i], i, expected_vresult_ull[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_f = (vector float) { -1.0, -3.0, -5.0, -7.0 };
+ vsrc_b_f = (vector float) { 2.0, 4.0, 6.0, 8.0 };
+ vsrc_c_uint = (vector unsigned int) { 0, 0x80000000, 0, 0x80000000};
+ vresult_f = (vector float) { 0, 0, 0, 0 };
+ expected_vresult_f = (vector float) { -1, 4, -5, 8 };
+
+ vresult_f = vec_blendv (vsrc_a_f, vsrc_b_f, vsrc_c_uint);
+
+ if (!vec_all_eq (vresult_f, expected_vresult_f)) {
+#if DEBUG
+ printf("ERROR, vec_blendv (vsrc_a_f, vsrc_b_f, vsrc_c_uint)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_f[%d] = %d, expected_vresult_f[%d] = %d\n",
+ i, vresult_f[i], i, expected_vresult_f[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_d = (vector double) { -1.0, -3.0 };
+ vsrc_b_d = (vector double) { 2.0, 4.0 };
+ vsrc_c_ull = (vector unsigned long long int) { 0, 0x8000000000000000ULL };
+ vresult_d = (vector double) { 0, 0 };
+ expected_vresult_d = (vector double) { -1, 4 };
+
+ vresult_d = vec_blendv (vsrc_a_d, vsrc_b_d, vsrc_c_ull);
+
+ if (!vec_all_eq (vresult_d, expected_vresult_d)) {
+#if DEBUG
+ printf("ERROR, vec_blendv (vsrc_a_d, vsrc_b_d, vsrc_c_ull)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_d[%d] = %d, expected_vresult_d[%d] = %d\n",
+ i, vresult_d[i], i, expected_vresult_d[i]);
+#else
+ abort();
+#endif
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times {\mxxblendvb\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxblendvh\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxblendvw\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxblendvd\M} 3 } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10" } */
+#include <altivec.h>
+
+#define DEBUG 0
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+extern void abort (void);
+
+int
+main (int argc, char *argv [])
+{
+ int i;
+ vector signed char vsrc_a_char, vsrc_b_char;
+ vector signed char vresult_char;
+ vector signed char expected_vresult_char;
+
+ vector unsigned char vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar;
+ vector unsigned char vresult_uchar;
+ vector unsigned char expected_vresult_uchar;
+
+ vector signed short vsrc_a_short, vsrc_b_short, vsrc_c_short;
+ vector signed short vresult_short;
+ vector signed short expected_vresult_short;
+
+ vector unsigned short vsrc_a_ushort, vsrc_b_ushort, vsrc_c_ushort;
+ vector unsigned short vresult_ushort;
+ vector unsigned short expected_vresult_ushort;
+
+ vector int vsrc_a_int, vsrc_b_int, vsrc_c_int;
+ vector int vresult_int;
+ vector int expected_vresult_int;
+
+ vector unsigned int vsrc_a_uint, vsrc_b_uint, vsrc_c_uint;
+ vector unsigned int vresult_uint;
+ vector unsigned int expected_vresult_uint;
+
+ vector long long int vsrc_a_ll, vsrc_b_ll, vsrc_c_ll;
+ vector long long int vresult_ll;
+ vector long long int expected_vresult_ll;
+
+ vector unsigned long long int vsrc_a_ull, vsrc_b_ull, vsrc_c_ull;
+ vector unsigned long long int vresult_ull;
+ vector unsigned long long int expected_vresult_ull;
+
+ vector float vresult_f;
+ vector float expected_vresult_f;
+ vector float vsrc_a_f, vsrc_b_f;
+
+ vector double vsrc_a_d, vsrc_b_d;
+ vector double vresult_d;
+ vector double expected_vresult_d;
+
+ /* Vector permx */
+ vsrc_a_char = (vector signed char) { -1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29 };
+ vsrc_b_char = (vector signed char) { 2, -4, 6, 8, 10, 12, 14, 16,
+ 18, 20, 22, 24, 26, 28, 30, 32 };
+ vsrc_c_uchar = (vector unsigned char) { 0x0, 0x7, 0, 0x5, 0, 0x3, 0, 0x1,
+ 0, 0x2, 0, 0x4, 0, 0x6, 0, 0x0 };
+ vresult_char = (vector signed char) { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+ expected_vresult_char = (vector signed char) { -1, 15, -1, 11,
+ -1, 7, -1, 3,
+ -1, 5, -1, 9,
+ -1, 13, -1, -1 };
+
+ vresult_char = vec_permx (vsrc_a_char, vsrc_b_char, vsrc_c_uchar, 0);
+
+ if (!vec_all_eq (vresult_char, expected_vresult_char)) {
+#if DEBUG
+ printf("ERROR, vec_permx (vsrc_a_char, vsrc_b_char, vsrc_c_uchar)\n");
+ for(i = 0; i < 16; i++)
+ printf(" vresult_char[%d] = %d, expected_vresult_char[%d] = %d\n",
+ i, vresult_char[i], i, expected_vresult_char[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_uchar = (vector unsigned char) { 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29 };
+ vsrc_b_uchar = (vector unsigned char) { 2, 4, 6, 8, 10, 12, 14, 16,
+ 18, 20, 22, 24, 26, 28, 30, 32 };
+ vsrc_c_uchar = (vector unsigned char) { 0x0, 0x7, 0, 0x5, 0, 0x3, 0, 0x1,
+ 0, 0x2, 0, 0x4, 0, 0x6, 0, 0x0 };
+ vresult_uchar = (vector unsigned char) { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+ expected_vresult_uchar = (vector unsigned char) { 1, 15, 1, 11,
+ 1, 7, 1, 3,
+ 1, 5, 1, 9,
+ 1, 13, 1, 1 };
+
+ vresult_uchar = vec_permx (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar, 0);
+
+ if (!vec_all_eq (vresult_uchar, expected_vresult_uchar)) {
+#if DEBUG
+ printf("ERROR, vec_permx (vsrc_a_uchar, vsrc_b_uchar, vsrc_c_uchar)\n");
+ for(i = 0; i < 16; i++)
+ printf(" vresult_uchar[%d] = %d, expected_vresult_uchar[%d] = %d\n",
+ i, vresult_uchar[i], i, expected_vresult_uchar[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_short = (vector signed short int) { 1, -3, 5, 7, 9, 11, 13, 15 };
+ vsrc_b_short = (vector signed short int) { 2, 4, -6, 8, 10, 12, 14, 16 };
+ vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x2, 0x3,
+ 0x8, 0x9, 0x2, 0x3,
+ 0x1E, 0x1F, 0x2, 0x3 };
+ vresult_short = (vector signed short int) { 0, 0, 0, 0, 0, 0, 0, 0 };
+ expected_vresult_short = (vector signed short int) { 1, -3, 5, -3,
+ 9, -3, 16, -3 };
+
+ vresult_short = vec_permx (vsrc_a_short, vsrc_b_short, vsrc_c_uchar, 0);
+
+ if (!vec_all_eq (vresult_short, expected_vresult_short)) {
+#if DEBUG
+ printf("ERROR, vec_permx (vsrc_a_short, vsrc_b_short, vsrc_c_uchar)\n");
+ for(i = 0; i < 8; i++)
+ printf(" vresult_short[%d] = %d, expected_vresult_short[%d] = %d\n",
+ i, vresult_short[i], i, expected_vresult_short[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_ushort = (vector unsigned short int) { 1, 3, 5, 7, 9, 11, 13, 15 };
+ vsrc_b_ushort = (vector unsigned short int) { 2, 4, 6, 8, 10, 12, 14, 16 };
+ vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x2, 0x3,
+ 0x8, 0x9, 0x2, 0x3,
+ 0x1E, 0x1F, 0x2, 0x3 };
+ vresult_ushort = (vector unsigned short int) { 0, 0, 0, 0, 0, 0, 0, 0 };
+ expected_vresult_ushort = (vector unsigned short int) { 1, 3, 5, 3,
+ 9, 3, 16, 3 };
+
+ vresult_ushort = vec_permx (vsrc_a_ushort, vsrc_b_ushort, vsrc_c_uchar, 0);
+
+ if (!vec_all_eq (vresult_ushort, expected_vresult_ushort)) {
+#if DEBUG
+ printf("ERROR, vec_permx (vsrc_a_ushort, vsrc_b_ushort, vsrc_c_uchar)\n");
+ for(i = 0; i < 8; i++)
+ printf(" vresult_ushort[%d] = %d, expected_vresult_ushort[%d] = %d\n",
+ i, vresult_ushort[i], i, expected_vresult_ushort[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_int = (vector signed int) { 1, -3, 5, 7 };
+ vsrc_b_int = (vector signed int) { 2, 4, -6, 8 };
+ vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F };
+ vresult_int = (vector signed int) { 0, 0, 0, 0 };
+ expected_vresult_int = (vector signed int) { 1, -3, -6, 8 };
+
+ vresult_int = vec_permx (vsrc_a_int, vsrc_b_int, vsrc_c_uchar, 0);
+
+ if (!vec_all_eq (vresult_int, expected_vresult_int)) {
+#if DEBUG
+ printf("ERROR, vec_permx (vsrc_a_int, vsrc_b_int, vsrc_c_uchar)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+ i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_uint = (vector unsigned int) { 1, 3, 5, 7 };
+ vsrc_b_uint = (vector unsigned int) { 10, 12, 14, 16 };
+ vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F };
+ vresult_uint = (vector unsigned int) { 0, 0, 0, 0 };
+ expected_vresult_uint = (vector unsigned int) { 1, 3, 14, 16 };
+
+ vresult_uint = vec_permx (vsrc_a_uint, vsrc_b_uint, vsrc_c_uchar, 0);
+
+ if (!vec_all_eq (vresult_uint, expected_vresult_uint)) {
+#if DEBUG
+ printf("ERROR, vec_permx (vsrc_a_uint, vsrc_b_uint, vsrc_c_uchar)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n",
+ i, vresult_uint[i], i, expected_vresult_uint[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_ll = (vector signed long long int) { 1, -3 };
+ vsrc_b_ll = (vector signed long long int) { 2, -4 };
+ vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F };
+ vresult_ll = (vector signed long long int) { 0, 0};
+ expected_vresult_ll = (vector signed long long int) { 1, -4 };
+
+ vresult_ll = vec_permx (vsrc_a_ll, vsrc_b_ll, vsrc_c_uchar, 0);
+
+ if (!vec_all_eq (vresult_ll, expected_vresult_ll)) {
+#if DEBUG
+ printf("ERROR, vec_permx (vsrc_a_ll, vsrc_b_ll, vsrc_c_uchar)\n");
+ for(i = 0; i < 2; i++)
+ printf(" vresult_ll[%d] = %lld, expected_vresult_ll[%d] = %lld\n",
+ i, vresult_ll[i], i, expected_vresult_ll[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_ull = (vector unsigned long long int) { 1, 3 };
+ vsrc_b_ull = (vector unsigned long long int) { 10, 12 };
+ vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F };
+ vresult_ull = (vector unsigned long long int) { 0, 0 };
+ expected_vresult_ull = (vector unsigned long long int) { 1, 12 };
+
+ vresult_ull = vec_permx (vsrc_a_ull, vsrc_b_ull, vsrc_c_uchar, 0);
+
+ if (!vec_all_eq (vresult_ull, expected_vresult_ull)) {
+#if DEBUG
+ printf("ERROR, vec_permx (vsrc_a_ull, vsrc_b_ull, vsrc_c_uchar)\n");
+ for(i = 0; i < 2; i++)
+ printf(" vresult_ull[%d] = %d, expected_vresult_ull[%d] = %d\n",
+ i, vresult_ull[i], i, expected_vresult_ull[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_f = (vector float) { -3.0, 5.0, 7.0, 9.0 };
+ vsrc_b_f = (vector float) { 2.0, 4.0, 6.0, 8.0 };
+ vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F };
+ vresult_f = (vector float) { 0.0, 0.0, 0.0, 0.0 };
+ expected_vresult_f = (vector float) { -3.0, 5.0, 6.0, 8.0 };
+
+ vresult_f = vec_permx (vsrc_a_f, vsrc_b_f, vsrc_c_uchar, 0);
+
+ if (!vec_all_eq (vresult_f, expected_vresult_f)) {
+#if DEBUG
+ printf("ERROR, vec_permx (vsrc_a_f, vsrc_b_f, vsrc_c_uchar)\n");
+ for(i = 0; i < 4; i++)
+ printf(" vresult_f[%d] = %f, expected_vresult_f[%d] = %f\n",
+ i, vresult_f[i], i, expected_vresult_f[i]);
+#else
+ abort();
+#endif
+ }
+
+ vsrc_a_d = (vector double) { 1.0, -3.0 };
+ vsrc_b_d = (vector double) { 2.0, -4.0 };
+ vsrc_c_uchar = (vector unsigned char) { 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7,
+ 0x1A, 0x1B, 0x1C, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F };
+ vresult_d = (vector double) { 0.0, 0.0 };
+ expected_vresult_d = (vector double) { 1.0, -4.0 };
+
+ vresult_d = vec_permx (vsrc_a_d, vsrc_b_d, vsrc_c_uchar, 0);
+
+ if (!vec_all_eq (vresult_d, expected_vresult_d)) {
+#if DEBUG
+ printf("ERROR, vec_permx (vsrc_a_d, vsrc_b_d, vsrc_c_uchar)\n");
+ for(i = 0; i < 2; i++)
+ printf(" vresult_d[%d] = %f, expected_vresult_d[%d] = %f\n",
+ i, vresult_d[i], i, expected_vresult_d[i]);
+#else
+ abort();
+#endif
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times {\mxxpermx\M} 10 } } */
+
+