--- /dev/null
+/* PR71488 */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_pack_trunc } */
+/* { dg-additional-options "-msse4" { target { i?86-*-* x86_64-*-* } } } */
+
+int i1, i2;
+
+void __attribute__((noclone,noinline))
+fn1 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn2 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn3 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn4 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn5 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn6 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn7 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn8 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn9 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn10 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn11 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn12 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn13 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn14 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn15 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn16 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn17 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn18 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+int eq (int i1, int i2) { return i1 == i2; }
+int ne (int i1, int i2) { return i1 != i2; }
+int lt (int i1, int i2) { return i1 < i2; }
+int le (int i1, int i2) { return i1 <= i2; }
+int gt (int i1, int i2) { return i1 > i2; }
+int ge (int i1, int i2) { return i1 >= i2; }
+
+typedef int (*cmp_fn)(int, int);
+
+void
+check (int *p, cmp_fn fn)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ {
+ int t1 = ((i % 4) > 1) == 0;
+ int t2 = (i % 2) == 0;
+ int res = fn (t1, t2) + t1;
+ if (p[i] != res)
+ __builtin_abort ();
+ }
+}
+
+int
+main (int argc, char **argv)
+{
+ int i1[32], i2[32], res[32];
+ short s2[32];
+ long long l2[32];
+ int i;
+
+ for (i = 0; i < 32; i++)
+ {
+ l2[i] = i2[i] = s2[i] = i % 2;
+ i1[i] = (i % 4) > 1;
+ asm ("":::"memory");
+ }
+
+ fn1 (res, i1, i2, 32);
+ check (res, gt);
+ fn2 (res, i1, s2, 32);
+ check (res, gt);
+ fn3 (res, i1, l2, 32);
+ check (res, gt);
+
+ fn4 (res, i1, i2, 32);
+ check (res, ge);
+ fn5 (res, i1, s2, 32);
+ check (res, ge);
+ fn6 (res, i1, l2, 32);
+ check (res, ge);
+
+ fn7 (res, i1, i2, 32);
+ check (res, lt);
+ fn8 (res, i1, s2, 32);
+ check (res, lt);
+ fn9 (res, i1, l2, 32);
+ check (res, lt);
+
+ fn10 (res, i1, i2, 32);
+ check (res, le);
+ fn11 (res, i1, s2, 32);
+ check (res, le);
+ fn12 (res, i1, l2, 32);
+ check (res, le);
+
+ fn13 (res, i1, i2, 32);
+ check (res, eq);
+ fn14 (res, i1, s2, 32);
+ check (res, eq);
+ fn15 (res, i1, l2, 32);
+ check (res, eq);
+
+ fn16 (res, i1, i2, 32);
+ check (res, ne);
+ fn17 (res, i1, s2, 32);
+ check (res, ne);
+ fn18 (res, i1, l2, 32);
+ check (res, ne);
+}
+
+/* { dg-final { scan-tree-dump-times "VECTORIZED" 18 "vect" { target { i?86-*-* x86_64-*-* } } } } */
enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
unsigned nunits;
int ncopies;
- enum tree_code code;
+ enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
stmt_vec_info prev_stmt_info = NULL;
int i, j;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
return false;
+ /* Can't compare mask and non-mask types. */
+ if (vectype1 && vectype2
+ && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
+ return false;
+
+ /* Boolean values may have another representation in vectors
+ and therefore we prefer bit operations over comparison for
+ them (which also works for scalar masks). We store opcodes
+ to use in bitop1 and bitop2. Statement is vectorized as
+ BITOP2 (rhs1 BITOP1 rhs2) or
+ rhs1 BITOP2 (BITOP1 rhs2)
+ depending on bitop1 and bitop2 arity. */
+ if (VECTOR_BOOLEAN_TYPE_P (vectype))
+ {
+ if (code == GT_EXPR)
+ {
+ bitop1 = BIT_NOT_EXPR;
+ bitop2 = BIT_AND_EXPR;
+ }
+ else if (code == GE_EXPR)
+ {
+ bitop1 = BIT_NOT_EXPR;
+ bitop2 = BIT_IOR_EXPR;
+ }
+ else if (code == LT_EXPR)
+ {
+ bitop1 = BIT_NOT_EXPR;
+ bitop2 = BIT_AND_EXPR;
+ std::swap (rhs1, rhs2);
+ }
+ else if (code == LE_EXPR)
+ {
+ bitop1 = BIT_NOT_EXPR;
+ bitop2 = BIT_IOR_EXPR;
+ std::swap (rhs1, rhs2);
+ }
+ else
+ {
+ bitop1 = BIT_XOR_EXPR;
+ if (code == EQ_EXPR)
+ bitop2 = BIT_NOT_EXPR;
+ }
+ }
+
if (!vec_stmt)
{
STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
- vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
- return expand_vec_cmp_expr_p (vectype, mask_type);
+ vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
+ dts, NULL, NULL);
+ if (bitop1 == NOP_EXPR)
+ return expand_vec_cmp_expr_p (vectype, mask_type);
+ else
+ {
+ machine_mode mode = TYPE_MODE (vectype);
+ optab optab;
+
+ optab = optab_for_tree_code (bitop1, vectype, optab_default);
+ if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
+ return false;
+
+ if (bitop2 != NOP_EXPR)
+ {
+ optab = optab_for_tree_code (bitop2, vectype, optab_default);
+ if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
+ return false;
+ }
+ return true;
+ }
}
/* Transform. */
vec_rhs2 = vec_oprnds1[i];
new_temp = make_ssa_name (mask);
- new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ if (bitop1 == NOP_EXPR)
+ {
+ new_stmt = gimple_build_assign (new_temp, code,
+ vec_rhs1, vec_rhs2);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ }
+ else
+ {
+ if (bitop1 == BIT_NOT_EXPR)
+ new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
+ else
+ new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
+ vec_rhs2);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ if (bitop2 != NOP_EXPR)
+ {
+ tree res = make_ssa_name (mask);
+ if (bitop2 == BIT_NOT_EXPR)
+ new_stmt = gimple_build_assign (res, bitop2, new_temp);
+ else
+ new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
+ new_temp);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ }
+ }
if (slp_node)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}