+2020-04-20 Andreas Krebbel <krebbel@linux.ibm.com>
+
+ PR target/94613
+ * config/s390/s390-builtin-types.def: Add 3 new function modes.
+ * config/s390/s390-builtins.def: Add mode dependent low-level
+ builtin and map the overloaded builtins to these.
+ * config/s390/vx-builtins.md ("vec_selV_HW"): Rename to ...
+ ("vsel<V_HW"): ... this and rewrite the pattern with bitops.
+
2020-04-20 Richard Sandiford <richard.sandiford@arm.com>
* tree-vect-loop.c (vect_better_loop_vinfo_p): If old_loop_vinfo
DEF_FN_TYPE_3 (BT_FN_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_UV2DI, BT_ULONGLONG, BT_INT)
DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI_INT, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT)
+DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI)
DEF_FN_TYPE_3 (BT_FN_UV2DI_UV4SI_UV4SI_UV2DI, BT_UV2DI, BT_UV4SI, BT_UV4SI, BT_UV2DI)
DEF_FN_TYPE_3 (BT_FN_UV4SI_UV2DI_UV2DI_INTPTR, BT_UV4SI, BT_UV2DI, BT_UV2DI, BT_INTPTR)
DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UINT_INT, BT_UV4SI, BT_UV4SI, BT_UINT, BT_INT)
DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_UCHAR_UCHAR, BT_V2DF, BT_V2DF, BT_UCHAR, BT_UCHAR)
DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_UINT_UINT, BT_V2DF, BT_V2DF, BT_UINT, BT_UINT)
DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_V2DF_INT, BT_V2DF, BT_V2DF, BT_V2DF, BT_INT)
+DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_V2DF_UV2DI, BT_V2DF, BT_V2DF, BT_V2DF, BT_UV2DI)
DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_V2DF_V2DF, BT_V2DF, BT_V2DF, BT_V2DF, BT_V2DF)
DEF_FN_TYPE_3 (BT_FN_V2DI_UV2DI_UV2DI_INTPTR, BT_V2DI, BT_UV2DI, BT_UV2DI, BT_INTPTR)
DEF_FN_TYPE_3 (BT_FN_V2DI_V2DF_INT_INTPTR, BT_V2DI, BT_V2DF, BT_INT, BT_INTPTR)
DEF_FN_TYPE_3 (BT_FN_V4SF_V4SF_FLT_INT, BT_V4SF, BT_V4SF, BT_FLT, BT_INT)
DEF_FN_TYPE_3 (BT_FN_V4SF_V4SF_UCHAR_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR, BT_UCHAR)
DEF_FN_TYPE_3 (BT_FN_V4SF_V4SF_V4SF_INT, BT_V4SF, BT_V4SF, BT_V4SF, BT_INT)
+DEF_FN_TYPE_3 (BT_FN_V4SF_V4SF_V4SF_UV4SI, BT_V4SF, BT_V4SF, BT_V4SF, BT_UV4SI)
DEF_FN_TYPE_3 (BT_FN_V4SF_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF, BT_V4SF)
DEF_FN_TYPE_3 (BT_FN_V4SI_UV4SI_UV4SI_INTPTR, BT_V4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
DEF_FN_TYPE_3 (BT_FN_V4SI_V2DI_V2DI_INTPTR, BT_V4SI, BT_V2DI, BT_V2DI, BT_INTPTR)
/* First two operands are swapped in s390-c.c */
OB_DEF (s390_vec_sel, s390_vec_sel_b8_a, s390_vec_sel_dbl_b, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
-OB_DEF_VAR (s390_vec_sel_b8_a, s390_vsel, 0, 0, BT_OV_BV16QI_BV16QI_BV16QI_UV16QI)
-OB_DEF_VAR (s390_vec_sel_b8_b, s390_vsel, 0, 0, BT_OV_BV16QI_BV16QI_BV16QI_BV16QI)
-OB_DEF_VAR (s390_vec_sel_s8_a, s390_vsel, 0, 0, BT_OV_V16QI_V16QI_V16QI_UV16QI)
-OB_DEF_VAR (s390_vec_sel_s8_b, s390_vsel, 0, 0, BT_OV_V16QI_V16QI_V16QI_BV16QI)
-OB_DEF_VAR (s390_vec_sel_u8_a, s390_vsel, 0, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
-OB_DEF_VAR (s390_vec_sel_u8_b, s390_vsel, 0, 0, BT_OV_UV16QI_UV16QI_UV16QI_BV16QI)
-OB_DEF_VAR (s390_vec_sel_b16_a, s390_vsel, 0, 0, BT_OV_BV8HI_BV8HI_BV8HI_UV8HI)
-OB_DEF_VAR (s390_vec_sel_b16_b, s390_vsel, 0, 0, BT_OV_BV8HI_BV8HI_BV8HI_BV8HI)
-OB_DEF_VAR (s390_vec_sel_s16_a, s390_vsel, 0, 0, BT_OV_V8HI_V8HI_V8HI_UV8HI)
-OB_DEF_VAR (s390_vec_sel_s16_b, s390_vsel, 0, 0, BT_OV_V8HI_V8HI_V8HI_BV8HI)
-OB_DEF_VAR (s390_vec_sel_u16_a, s390_vsel, 0, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
-OB_DEF_VAR (s390_vec_sel_u16_b, s390_vsel, 0, 0, BT_OV_UV8HI_UV8HI_UV8HI_BV8HI)
-OB_DEF_VAR (s390_vec_sel_b32_a, s390_vsel, 0, 0, BT_OV_BV4SI_BV4SI_BV4SI_UV4SI)
-OB_DEF_VAR (s390_vec_sel_b32_b, s390_vsel, 0, 0, BT_OV_BV4SI_BV4SI_BV4SI_BV4SI)
-OB_DEF_VAR (s390_vec_sel_s32_a, s390_vsel, 0, 0, BT_OV_V4SI_V4SI_V4SI_UV4SI)
-OB_DEF_VAR (s390_vec_sel_s32_b, s390_vsel, 0, 0, BT_OV_V4SI_V4SI_V4SI_BV4SI)
-OB_DEF_VAR (s390_vec_sel_u32_a, s390_vsel, 0, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
-OB_DEF_VAR (s390_vec_sel_u32_b, s390_vsel, 0, 0, BT_OV_UV4SI_UV4SI_UV4SI_BV4SI)
-OB_DEF_VAR (s390_vec_sel_b64_a, s390_vsel, 0, 0, BT_OV_BV2DI_BV2DI_BV2DI_UV2DI)
-OB_DEF_VAR (s390_vec_sel_b64_b, s390_vsel, 0, 0, BT_OV_BV2DI_BV2DI_BV2DI_BV2DI)
-OB_DEF_VAR (s390_vec_sel_s64_a, s390_vsel, 0, 0, BT_OV_V2DI_V2DI_V2DI_UV2DI)
-OB_DEF_VAR (s390_vec_sel_s64_b, s390_vsel, 0, 0, BT_OV_V2DI_V2DI_V2DI_BV2DI)
-OB_DEF_VAR (s390_vec_sel_u64_a, s390_vsel, 0, 0, BT_OV_UV2DI_UV2DI_UV2DI_UV2DI)
-OB_DEF_VAR (s390_vec_sel_u64_b, s390_vsel, 0, 0, BT_OV_UV2DI_UV2DI_UV2DI_BV2DI)
-OB_DEF_VAR (s390_vec_sel_flt_a, s390_vsel, B_VXE, 0, BT_OV_V4SF_V4SF_V4SF_UV4SI)
-OB_DEF_VAR (s390_vec_sel_flt_b, s390_vsel, B_VXE, 0, BT_OV_V4SF_V4SF_V4SF_BV4SI)
-OB_DEF_VAR (s390_vec_sel_dbl_a, s390_vsel, 0, 0, BT_OV_V2DF_V2DF_V2DF_UV2DI)
-OB_DEF_VAR (s390_vec_sel_dbl_b, s390_vsel, 0, 0, BT_OV_V2DF_V2DF_V2DF_BV2DI)
-
-B_DEF (s390_vsel, vec_selv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
+OB_DEF_VAR (s390_vec_sel_b8_a, s390_vselb, 0, 0, BT_OV_BV16QI_BV16QI_BV16QI_UV16QI)
+OB_DEF_VAR (s390_vec_sel_b8_b, s390_vselb, 0, 0, BT_OV_BV16QI_BV16QI_BV16QI_BV16QI)
+OB_DEF_VAR (s390_vec_sel_s8_a, s390_vselb, 0, 0, BT_OV_V16QI_V16QI_V16QI_UV16QI)
+OB_DEF_VAR (s390_vec_sel_s8_b, s390_vselb, 0, 0, BT_OV_V16QI_V16QI_V16QI_BV16QI)
+OB_DEF_VAR (s390_vec_sel_u8_a, s390_vselb, 0, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
+OB_DEF_VAR (s390_vec_sel_u8_b, s390_vselb, 0, 0, BT_OV_UV16QI_UV16QI_UV16QI_BV16QI)
+OB_DEF_VAR (s390_vec_sel_b16_a, s390_vselh, 0, 0, BT_OV_BV8HI_BV8HI_BV8HI_UV8HI)
+OB_DEF_VAR (s390_vec_sel_b16_b, s390_vselh, 0, 0, BT_OV_BV8HI_BV8HI_BV8HI_BV8HI)
+OB_DEF_VAR (s390_vec_sel_s16_a, s390_vselh, 0, 0, BT_OV_V8HI_V8HI_V8HI_UV8HI)
+OB_DEF_VAR (s390_vec_sel_s16_b, s390_vselh, 0, 0, BT_OV_V8HI_V8HI_V8HI_BV8HI)
+OB_DEF_VAR (s390_vec_sel_u16_a, s390_vselh, 0, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
+OB_DEF_VAR (s390_vec_sel_u16_b, s390_vselh, 0, 0, BT_OV_UV8HI_UV8HI_UV8HI_BV8HI)
+OB_DEF_VAR (s390_vec_sel_b32_a, s390_vself, 0, 0, BT_OV_BV4SI_BV4SI_BV4SI_UV4SI)
+OB_DEF_VAR (s390_vec_sel_b32_b, s390_vself, 0, 0, BT_OV_BV4SI_BV4SI_BV4SI_BV4SI)
+OB_DEF_VAR (s390_vec_sel_s32_a, s390_vself, 0, 0, BT_OV_V4SI_V4SI_V4SI_UV4SI)
+OB_DEF_VAR (s390_vec_sel_s32_b, s390_vself, 0, 0, BT_OV_V4SI_V4SI_V4SI_BV4SI)
+OB_DEF_VAR (s390_vec_sel_u32_a, s390_vself, 0, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
+OB_DEF_VAR (s390_vec_sel_u32_b, s390_vself, 0, 0, BT_OV_UV4SI_UV4SI_UV4SI_BV4SI)
+OB_DEF_VAR (s390_vec_sel_b64_a, s390_vselg, 0, 0, BT_OV_BV2DI_BV2DI_BV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_sel_b64_b, s390_vselg, 0, 0, BT_OV_BV2DI_BV2DI_BV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_sel_s64_a, s390_vselg, 0, 0, BT_OV_V2DI_V2DI_V2DI_UV2DI)
+OB_DEF_VAR (s390_vec_sel_s64_b, s390_vselg, 0, 0, BT_OV_V2DI_V2DI_V2DI_BV2DI)
+OB_DEF_VAR (s390_vec_sel_u64_a, s390_vselg, 0, 0, BT_OV_UV2DI_UV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_sel_u64_b, s390_vselg, 0, 0, BT_OV_UV2DI_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_sel_flt_a, s390_vself_flt, B_VXE, 0, BT_OV_V4SF_V4SF_V4SF_UV4SI)
+OB_DEF_VAR (s390_vec_sel_flt_b, s390_vself_flt, B_VXE, 0, BT_OV_V4SF_V4SF_V4SF_BV4SI)
+OB_DEF_VAR (s390_vec_sel_dbl_a, s390_vselg_dbl, 0, 0, BT_OV_V2DF_V2DF_V2DF_UV2DI)
+OB_DEF_VAR (s390_vec_sel_dbl_b, s390_vselg_dbl, 0, 0, BT_OV_V2DF_V2DF_V2DF_BV2DI)
+
+B_DEF (s390_vselb, vselv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
+B_DEF (s390_vselh, vselv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI)
+B_DEF (s390_vself, vselv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI)
+B_DEF (s390_vselg, vselv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI_UV2DI)
+B_DEF (s390_vself_flt, vselv4sf, 0, B_VXE, 0, BT_FN_V4SF_V4SF_V4SF_UV4SI)
+B_DEF (s390_vselg_dbl, vselv2df, 0, B_VX, 0, BT_FN_V2DF_V2DF_V2DF_UV2DI)
OB_DEF (s390_vec_extend_s64, s390_vec_extend_s64_s8,s390_vec_extend_s64_s32,B_VX, BT_FN_OV4SI_OV4SI)
OB_DEF_VAR (s390_vec_extend_s64_s8, s390_vsegb, 0, 0, BT_OV_V2DI_V16QI)
; Vector select
-; Operand 3 selects bits from either OP1 (0) or OP2 (1)
+; for all b in bits op0[b] = op3[b] == 0 ? op2[b] : op1[b]
+; implemented as: op0 = (op1 & op3) | (op2 & ~op3)
-; Comparison operator should not matter as long as we always use the same ?!
+; Used to expand the vec_sel builtin. Operands op1 and op2 already got
+; swapped in s390-c.c when we get here.
-; Operands 1 and 2 are swapped in order to match the altivec builtin.
-; If operand 3 is a const_int bitmask this would be vec_merge
-(define_expand "vec_sel<mode>"
- [(set (match_operand:V_HW 0 "register_operand" "")
- (if_then_else:V_HW
- (eq (match_operand:<tointvec> 3 "register_operand" "")
- (match_dup 4))
- (match_operand:V_HW 2 "register_operand" "")
- (match_operand:V_HW 1 "register_operand" "")))]
+(define_insn "vsel<mode>"
+ [(set (match_operand:V_HW 0 "register_operand" "=v")
+ (ior:V_HW
+ (and:V_HW (match_operand:V_HW 1 "register_operand" "v")
+ (match_operand:V_HW 3 "register_operand" "v"))
+ (and:V_HW (not:V_HW (match_dup 3))
+ (match_operand:V_HW 2 "register_operand" "v"))))]
"TARGET_VX"
-{
- operands[4] = CONST0_RTX (<tointvec>mode);
-})
+ "vsel\t%v0,%1,%2,%3"
+ [(set_attr "op_type" "VRR")])
; Vector sign extend to doubleword
+2020-04-20 Andreas Krebbel <krebbel@linux.ibm.com>
+
+ PR target/94613
+ * gcc.target/s390/zvector/pr94613.c: New test.
+ * gcc.target/s390/zvector/vec_sel-1.c: New test.
+
2020-04-20 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/cost_model_8.c: New test.
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target s390_vx } */
+/* { dg-options "-O3 -mzarch -march=arch13 -mzvector -std=gnu99 --save-temps" } */
+
+#include <vecintrin.h>
+
+/* The initial implementation of vec_sel used an IF_THEN_ELSE rtx.
+ This did NOT match what the vsel instruction does. vsel is a
+ bit-wise operation. Using IF_THEN_ELSE made the + operation to be
+ simplified away in combine. A plus operation affects other bits in
+ the same element. Hence per-element simplifications are wrong for
+ vsel. */
+vector unsigned char __attribute__((noinline))
+foo (vector unsigned char a, vector unsigned char b, vector unsigned char c)
+{
+ return vec_sel (a + b, c, a);
+}
+
+/* FIXME: The OR operation still should be optimized away in that case. */
+vector unsigned char __attribute__((noinline))
+bar (vector unsigned char a, vector unsigned char b, vector unsigned char c)
+{
+ return vec_sel (a | b, c, a);
+}
+
+int
+main ()
+{
+ vector unsigned char v = (vector unsigned char){ 1 };
+
+ if (foo (v, v, v)[0] != 3)
+ __builtin_abort ();
+
+ if (bar (v, v, v)[0] != 1)
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps -Wno-attributes" } */
+
+#include <string.h>
+#include <vecintrin.h>
+
+typedef vector signed char v16qi;
+typedef vector unsigned char uv16qi;
+typedef vector bool char bv16qi;
+
+typedef vector signed short int v8hi;
+typedef vector unsigned short int uv8hi;
+typedef vector bool short int bv8hi;
+
+typedef vector signed int v4si;
+typedef vector unsigned int uv4si;
+typedef vector bool int bv4si;
+
+typedef vector signed long long v2di;
+typedef vector unsigned long long uv2di;
+typedef vector bool long long bv2di;
+
+typedef vector float v4sf;
+typedef vector double v2df;
+
+#define NUM_CONSTS 8
+
+const v16qi v16qi_vals[NUM_CONSTS] =
+ { (v16qi){ 1 },
+ (v16qi){ 2 },
+ (v16qi){ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 },
+ (v16qi){ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2 },
+ (v16qi){ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 },
+ (v16qi){ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
+ (v16qi){ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 },
+ (v16qi){ 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1 }
+ };
+
+const v8hi v8hi_vals[NUM_CONSTS] =
+ { (v8hi){ 1 },
+ (v8hi){ 2 },
+ (v8hi){ 1,1,1,1,1,1,1,1 },
+ (v8hi){ 2,2,2,2,2,2,2,2 },
+ (v8hi){ -1,-1,-1,-1,-1,-1,-1,-1 },
+ (v8hi){ 0,0,0,0,0,0,0,0 },
+ (v8hi){ 1,2,3,4,5,6,7,8 },
+ (v8hi){ 8,7,6,5,4,3,2,1 }
+ };
+
+const v4si v4si_vals[NUM_CONSTS] =
+ { (v4si){ 1 },
+ (v4si){ 2 },
+ (v4si){ 1,1,1,1 },
+ (v4si){ 2,2,2,2 },
+ (v4si){ -1,-1,-1,-1 },
+ (v4si){ 0,0,0,0 },
+ (v4si){ 1,2,3,4 },
+ (v4si){ 4,3,2,1 }
+ };
+
+const v2di v2di_vals[NUM_CONSTS] =
+ { (v2di){ 1 },
+ (v2di){ 2 },
+ (v2di){ 1,1 },
+ (v2di){ 2,2 },
+ (v2di){ -1,-1 },
+ (v2di){ 0,0 },
+ (v2di){ 1,2 },
+ (v2di){ 2,1 }
+ };
+
+const v4sf v4sf_vals[NUM_CONSTS] =
+ { (v4sf){ 1.0f },
+ (v4sf){ 2.0f },
+ (v4sf){ 1.0f,1.0f,1.0f,1.0f },
+ (v4sf){ 2.0f,2.0f,2.0f,2.0f },
+ (v4sf){ -1.0f,-1.0f,-1.0f,-1.0f },
+ (v4sf){ 0.0f,0.0f,0.0f,0.0f },
+ (v4sf){ 1.1f,2.1f,3.1f,4.1f },
+ (v4sf){ 4.1f,3.1f,2.1f,1.1f }
+ };
+
+const v2df v2df_vals[NUM_CONSTS] =
+ { (v2df){ 1.0 },
+ (v2df){ 2.0 },
+ (v2df){ 1.0,1.0 },
+ (v2df){ 2.0,2.0 },
+ (v2df){ -1.0,-1.0 },
+ (v2df){ 0.0,0.0 },
+ (v2df){ 1.1,2.1 },
+ (v2df){ 2.1,1.1 }
+ };
+
+/* Each bit of the result vector has the value of the corresponding
+ bit of A if the corresponding bit of C is 0, or the value of the
+ corresponding bit of B otherwise. */
+void __attribute__((noinline, noclone, target ("arch=zEC12")))
+emul (unsigned char *result, unsigned char *a,
+ unsigned char *b, unsigned char *c)
+{
+ for (int i = 0; i < 16; i++)
+ result[i] = (a[i] & ~c[i]) | (b[i] & c[i]);
+}
+
+#define GENFUNC(NAME, T1, T2) \
+ T1 __attribute__((noinline, noclone)) \
+ NAME##_reg (T1 a, T1 b, T2 c) { return vec_sel (a, b, c); } \
+ void __attribute__((noinline, noclone)) \
+ NAME##_mem (T1 *a, T1 *b, T2 *c, T1 *out) { *out = vec_sel (*a, *b, *c); } \
+ T1 __attribute__((always_inline)) \
+ NAME##_const (T1 a, T1 b, T2 c) { return vec_sel (a, b, c); }
+
+GENFUNC (vec_sel_b8_a, bv16qi, uv16qi)
+GENFUNC (vec_sel_b8_b, bv16qi, bv16qi)
+GENFUNC (vec_sel_s8_a, v16qi, uv16qi)
+GENFUNC (vec_sel_s8_b, v16qi, bv16qi)
+GENFUNC (vec_sel_u8_a, uv16qi, uv16qi)
+GENFUNC (vec_sel_u8_b, uv16qi, bv16qi)
+
+GENFUNC (vec_sel_b16_a, bv8hi, uv8hi)
+GENFUNC (vec_sel_b16_b, bv8hi, bv8hi)
+GENFUNC (vec_sel_s16_a, v8hi, uv8hi)
+GENFUNC (vec_sel_s16_b, v8hi, bv8hi)
+GENFUNC (vec_sel_u16_a, uv8hi, uv8hi)
+GENFUNC (vec_sel_u16_b, uv8hi, bv8hi)
+
+GENFUNC (vec_sel_b32_a, bv4si, uv4si)
+GENFUNC (vec_sel_b32_b, bv4si, bv4si)
+GENFUNC (vec_sel_s32_a, v4si, uv4si)
+GENFUNC (vec_sel_s32_b, v4si, bv4si)
+GENFUNC (vec_sel_u32_a, uv4si, uv4si)
+GENFUNC (vec_sel_u32_b, uv4si, bv4si)
+
+GENFUNC (vec_sel_b64_a, bv2di, uv2di)
+GENFUNC (vec_sel_b64_b, bv2di, bv2di)
+GENFUNC (vec_sel_s64_a, v2di, uv2di)
+GENFUNC (vec_sel_s64_b, v2di, bv2di)
+GENFUNC (vec_sel_u64_a, uv2di, uv2di)
+GENFUNC (vec_sel_u64_b, uv2di, bv2di)
+
+GENFUNC (vec_sel_flt_a, v4sf, uv4si)
+GENFUNC (vec_sel_flt_b, v4sf, bv4si)
+
+GENFUNC (vec_sel_dbl_a, v2df, uv2di)
+GENFUNC (vec_sel_dbl_b, v2df, bv2di)
+
+#define TESTFUNC(NAME, T1, T2, VAL_TYPE) \
+ for (int i = 0; i < NUM_CONSTS; i++) \
+ for (int j = 0; j < NUM_CONSTS; j++) \
+ for (int k = 0; k < NUM_CONSTS; k++) \
+ { \
+ unsigned char result[16]; \
+ T1 in1 = (T1)VAL_TYPE##_vals[i]; \
+ T1 in2 = (T1)VAL_TYPE##_vals[j]; \
+ T2 in3 = (T2)VAL_TYPE##_vals[k]; \
+ emul (result, (char*)&in1, (char*)&in2, (char*)&in3); \
+ \
+ T1 reg = NAME##_reg (in1, in2, in3); \
+ if (memcmp ((char*)®, result, 16) != 0) \
+ __builtin_abort (); \
+ \
+ T1 mem; \
+ NAME##_mem (&in1, &in2, &in3, &mem); \
+ if (memcmp ((char*)&mem, result, 16) != 0) \
+ __builtin_abort (); \
+ \
+ T1 cons = NAME##_const (in1, in2, in3); \
+ if (memcmp ((char*)&cons, result, 16) != 0) \
+ __builtin_abort (); \
+ }
+
+int
+main ()
+{
+ TESTFUNC (vec_sel_b8_a, bv16qi, uv16qi, v16qi);
+ TESTFUNC (vec_sel_b8_b, bv16qi, bv16qi, v16qi);
+ TESTFUNC (vec_sel_s8_a, v16qi, uv16qi, v16qi);
+ TESTFUNC (vec_sel_s8_b, v16qi, bv16qi, v16qi);
+ TESTFUNC (vec_sel_u8_a, uv16qi, uv16qi, v16qi);
+ TESTFUNC (vec_sel_u8_b, uv16qi, bv16qi, v16qi);
+
+ TESTFUNC (vec_sel_b16_a, bv8hi, uv8hi, v8hi);
+ TESTFUNC (vec_sel_b16_b, bv8hi, bv8hi, v8hi);
+ TESTFUNC (vec_sel_s16_a, v8hi, uv8hi, v8hi);
+ TESTFUNC (vec_sel_s16_b, v8hi, bv8hi, v8hi);
+ TESTFUNC (vec_sel_u16_a, uv8hi, uv8hi, v8hi);
+ TESTFUNC (vec_sel_u16_b, uv8hi, bv8hi, v8hi);
+
+ TESTFUNC (vec_sel_b32_a, bv4si, uv4si, v4si);
+ TESTFUNC (vec_sel_b32_b, bv4si, bv4si, v4si);
+ TESTFUNC (vec_sel_s32_a, v4si, uv4si, v4si);
+ TESTFUNC (vec_sel_s32_b, v4si, bv4si, v4si);
+ TESTFUNC (vec_sel_u32_a, uv4si, uv4si, v4si);
+ TESTFUNC (vec_sel_u32_b, uv4si, bv4si, v4si);
+
+ TESTFUNC (vec_sel_b64_a, bv2di, uv2di, v2di);
+ TESTFUNC (vec_sel_b64_b, bv2di, bv2di, v2di);
+ TESTFUNC (vec_sel_s64_a, v2di, uv2di, v2di);
+ TESTFUNC (vec_sel_s64_b, v2di, bv2di, v2di);
+ TESTFUNC (vec_sel_u64_a, uv2di, uv2di, v2di);
+ TESTFUNC (vec_sel_u64_b, uv2di, bv2di, v2di);
+
+ TESTFUNC (vec_sel_flt_a, v4sf, uv4si, v4sf);
+ TESTFUNC (vec_sel_flt_b, v4sf, bv4si, v4sf);
+
+ TESTFUNC (vec_sel_dbl_a, v2df, uv2di, v2df);
+ TESTFUNC (vec_sel_dbl_b, v2df, bv2di, v2df);
+}
+
+/* { dg-final { scan-assembler {\n\tvsel\t} } } */