+2019-10-18 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
+ Richard Sandiford <richard.sandiford@arm.com>
+
+ PR target/86753
+ * tree-vectorizer.h (scalar_cond_masked_key): New struct,
+ and define hashmap traits for it.
+ (loop_vec_info::scalar_cond_masked_set): New member.
+ (vect_record_loop_mask): Adjust prototype.
+ * tree-vectorizer.c (scalar_cond_masked_key::get_cond_ops_from_tree):
+ Implement method.
+ * tree-vect-loop.c (vectorizable_reduction): Pass NULL as last arg to
+ vect_record_loop_mask.
+ (vectorizable_live_operation): Likewise.
+ (vect_record_loop_mask): New param scalar_mask. Add entry
+ cond, loop_mask to scalar_cond_masked_set if scalar_mask is non NULL.
+ * tree-vect-stmts.c (check_load_store_masking): New param scalar_mask.
+ Pass it as last arg to vect_record_loop_mask.
+ (vectorizable_call): Pass scalar_mask as last arg to
+ vect_record_loop_mask.
+ (vectorizable_store): Likewise.
+ (vectorizable_load): Likewise.
+ (vectorizable_condition): Check if another part of vectorized code
+ applies loop_mask to condition or to it's inverse, and if yes,
+ apply loop_mask to result of vector comparison.
+
2019-10-17 John David Anglin <danglin@gcc.gnu.org>
* config/pa/pa.c (pa_output_indirect_call): Fix typos in last change.
+2019-10-18 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
+ Richard Sandiford <richard.sandiford@arm.com>
+
+ PR target/86753
+ * gcc.target/aarch64/sve/cond_cnot_2.c: Remove XFAIL
+ from { scan-assembler-not {\tsel\t}.
+ * gcc.target/aarch64/sve/cond_convert_1.c: Adjust to make
+ only one load conditional.
+ * gcc.target/aarch64/sve/cond_convert_4.c: Likewise.
+ * gcc.target/aarch64/sve/cond_unary_2.c: Likewise.
+ * gcc.target/aarch64/sve/vcond_4.c: Remove XFAIL's.
+ * gcc.target/aarch64/sve/vcond_5.c: Likewise.
+
2019-10-18 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/92056
/* { dg-final { scan-assembler-not {\tmov\tz} } } */
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
/* Currently we canonicalize the ?: so that !b[i] is the "false" value. */
-/* { dg-final { scan-assembler-not {\tsel\t} { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
INT_TYPE *__restrict pred, int n) \
{ \
for (int i = 0; i < n; ++i) \
- r[i] = pred[i] ? (FLOAT_TYPE) a[i] : b[i]; \
+ { \
+ FLOAT_TYPE bi = b[i]; \
+ r[i] = pred[i] ? (FLOAT_TYPE) a[i] : bi; \
+ } \
}
#define TEST_ALL(T) \
INT_TYPE *__restrict pred, int n) \
{ \
for (int i = 0; i < n; ++i) \
- r[i] = pred[i] ? (INT_TYPE) a[i] : b[i]; \
+ { \
+ INT_TYPE bi = b[i]; \
+ r[i] = pred[i] ? (INT_TYPE) a[i] : bi; \
+ } \
}
#define TEST_ALL(T) \
TYPE *__restrict pred, int n) \
{ \
for (int i = 0; i < n; ++i) \
- r[i] = pred[i] ? OP (a[i]) : b[i]; \
+ { \
+ TYPE bi = b[i]; \
+ r[i] = pred[i] ? OP (a[i]) : bi; \
+ } \
}
#define TEST_INT_TYPE(T, TYPE) \
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
/* 5 for lt, 5 for ult and 5 for nult. */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
/* 5 for le, 5 for ule and 5 for nule. */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
/* 5 for gt, 5 for ugt and 5 for nugt. */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
/* 5 for ge, 5 for uge and 5 for nuge. */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */
/* 3 loops * 5 invocations for all 12 unordered comparisons. */
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 } } */
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 7 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 14 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */
/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
for all 12 unordered comparisons. */
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 } } */
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
/* 5 for le, 5 for ule and 5 for nule. */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
/* 5 for gt, 5 for ugt, 5 for nueq and 5 for nugt. */
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
/* 5 for ge, 5 for uge and 5 for nuge. */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} } } */
/* 3 loops * 5 invocations for ordered, unordered amd ueq. */
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} } } */
/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
}
else
vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
- vectype_in);
+ vectype_in, NULL);
}
if (dump_enabled_p ()
&& reduction_type == FOLD_LEFT_REDUCTION)
gcc_assert (ncopies == 1 && !slp_node);
vect_record_loop_mask (loop_vinfo,
&LOOP_VINFO_MASKS (loop_vinfo),
- 1, vectype);
+ 1, vectype, NULL);
}
}
return true;
/* Record that a fully-masked version of LOOP_VINFO would need MASKS to
contain a sequence of NVECTORS masks that each control a vector of type
- VECTYPE. */
+ VECTYPE. If SCALAR_MASK is nonnull, the fully-masked loop would AND
+ these vector masks with the vector version of SCALAR_MASK. */
void
vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
- unsigned int nvectors, tree vectype)
+ unsigned int nvectors, tree vectype, tree scalar_mask)
{
gcc_assert (nvectors != 0);
if (masks->length () < nvectors)
unsigned int nscalars_per_iter
= exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
+
+ if (scalar_mask)
+ {
+ scalar_cond_masked_key cond (scalar_mask, nvectors);
+ loop_vinfo->scalar_cond_masked_set.add (cond);
+ }
+
if (rgm->max_nscalars_per_iter < nscalars_per_iter)
{
rgm->max_nscalars_per_iter = nscalars_per_iter;
says how the load or store is going to be implemented and GROUP_SIZE
is the number of load or store statements in the containing group.
If the access is a gather load or scatter store, GS_INFO describes
- its arguments.
+ its arguments. If the load or store is conditional, SCALAR_MASK is the
+ condition under which it occurs.
Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
supported, otherwise record the required mask types. */
check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
vec_load_store_type vls_type, int group_size,
vect_memory_access_type memory_access_type,
- gather_scatter_info *gs_info)
+ gather_scatter_info *gs_info, tree scalar_mask)
{
/* Invariant loads need no special support. */
if (memory_access_type == VMAT_INVARIANT)
return;
}
unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
- vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
+ vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
return;
}
return;
}
unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
- vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
+ vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
return;
}
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
unsigned int nvectors;
if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
- vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
+ vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
else
gcc_unreachable ();
}
unsigned int nvectors = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
: ncopies);
- vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
+ tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
+ vect_record_loop_mask (loop_vinfo, masks, nvectors,
+ vectype_out, scalar_mask);
}
return true;
}
if (loop_vinfo
&& LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
- memory_access_type, &gs_info);
+ memory_access_type, &gs_info, mask);
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
if (loop_vinfo
&& LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
- memory_access_type, &gs_info);
+ memory_access_type, &gs_info, mask);
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
vect_model_load_cost (stmt_info, ncopies, memory_access_type,
/* Handle cond expr. */
for (j = 0; j < ncopies; j++)
{
+ tree loop_mask = NULL_TREE;
+ bool swap_cond_operands = false;
+
+ /* See whether another part of the vectorized code applies a loop
+ mask to the condition, or to its inverse. */
+
+ if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ scalar_cond_masked_key cond (cond_expr, ncopies);
+ if (loop_vinfo->scalar_cond_masked_set.contains (cond))
+ {
+ vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
+ }
+ else
+ {
+ bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
+ cond.code = invert_tree_comparison (cond.code, honor_nans);
+ if (loop_vinfo->scalar_cond_masked_set.contains (cond))
+ {
+ vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
+ vectype, j);
+ cond_code = cond.code;
+ swap_cond_operands = true;
+ }
+ }
+ }
+
stmt_vec_info new_stmt_info = NULL;
if (j == 0)
{
vec_then_clause = vec_oprnds2[i];
vec_else_clause = vec_oprnds3[i];
+ if (swap_cond_operands)
+ std::swap (vec_then_clause, vec_else_clause);
+
if (masked)
vec_compare = vec_cond_lhs;
else
}
}
}
+
+ /* If we decided to apply a loop mask to the result of the vector
+ comparison, AND the comparison with the mask now. Later passes
+ should then be able to reuse the AND results between mulitple
+ vector statements.
+
+ For example:
+ for (int i = 0; i < 100; ++i)
+ x[i] = y[i] ? z[i] : 10;
+
+ results in following optimized GIMPLE:
+
+ mask__35.8_43 = vect__4.7_41 != { 0, ... };
+ vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
+ _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
+ vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
+ vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
+ vect_iftmp.11_47, { 10, ... }>;
+
+ instead of using a masked and unmasked forms of
+ vec != { 0, ... } (masked in the MASK_LOAD,
+ unmasked in the VEC_COND_EXPR). */
+
+ if (loop_mask)
+ {
+ if (COMPARISON_CLASS_P (vec_compare))
+ {
+ tree tmp = make_ssa_name (vec_cmp_type);
+ tree op0 = TREE_OPERAND (vec_compare, 0);
+ tree op1 = TREE_OPERAND (vec_compare, 1);
+ gassign *g = gimple_build_assign (tmp,
+ TREE_CODE (vec_compare),
+ op0, op1);
+ vect_finish_stmt_generation (stmt_info, g, gsi);
+ vec_compare = tmp;
+ }
+
+ tree tmp2 = make_ssa_name (vec_cmp_type);
+ gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
+ vec_compare, loop_mask);
+ vect_finish_stmt_generation (stmt_info, g, gsi);
+ vec_compare = tmp2;
+ }
+
if (reduction_type == EXTRACT_LAST_REDUCTION)
{
if (!is_gimple_val (vec_compare))
{
return new pass_ipa_increase_alignment (ctxt);
}
+
+/* If the condition represented by T is a comparison or the SSA name
+ result of a comparison, extract the comparison's operands. Represent
+ T as NE_EXPR <T, 0> otherwise. */
+
+void
+scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
+{
+ if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
+ {
+ this->code = TREE_CODE (t);
+ this->op0 = TREE_OPERAND (t, 0);
+ this->op1 = TREE_OPERAND (t, 1);
+ return;
+ }
+
+ if (TREE_CODE (t) == SSA_NAME)
+ if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
+ {
+ tree_code code = gimple_assign_rhs_code (stmt);
+ if (TREE_CODE_CLASS (code) == tcc_comparison)
+ {
+ this->code = code;
+ this->op0 = gimple_assign_rhs1 (stmt);
+ this->op1 = gimple_assign_rhs2 (stmt);
+ return;
+ }
+ }
+
+ this->code = NE_EXPR;
+ this->op0 = t;
+ this->op1 = build_zero_cst (TREE_TYPE (t));
+}
#define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators
#define SLP_TREE_DEF_TYPE(S) (S)->def_type
+/* Key for map that records association between
+ scalar conditions and corresponding loop mask, and
+ is populated by vect_record_loop_mask. */
+struct scalar_cond_masked_key
+{
+ scalar_cond_masked_key (tree t, unsigned ncopies_)
+ : ncopies (ncopies_)
+ {
+ get_cond_ops_from_tree (t);
+ }
+
+ void get_cond_ops_from_tree (tree);
+
+ unsigned ncopies;
+ tree_code code;
+ tree op0;
+ tree op1;
+};
+
+template<>
+struct default_hash_traits<scalar_cond_masked_key>
+{
+ typedef scalar_cond_masked_key compare_type;
+ typedef scalar_cond_masked_key value_type;
+
+ static inline hashval_t
+ hash (value_type v)
+ {
+ inchash::hash h;
+ h.add_int (v.code);
+ inchash::add_expr (v.op0, h, 0);
+ inchash::add_expr (v.op1, h, 0);
+ h.add_int (v.ncopies);
+ return h.end ();
+ }
+
+ static inline bool
+ equal (value_type existing, value_type candidate)
+ {
+ return (existing.ncopies == candidate.ncopies
+ && existing.code == candidate.code
+ && operand_equal_p (existing.op0, candidate.op0, 0)
+ && operand_equal_p (existing.op1, candidate.op1, 0));
+ }
+
+ static inline void
+ mark_empty (value_type &v)
+ {
+ v.ncopies = 0;
+ }
+
+ static inline bool
+ is_empty (value_type v)
+ {
+ return v.ncopies == 0;
+ }
+
+ static inline void mark_deleted (value_type &) {}
+
+ static inline bool is_deleted (const value_type &)
+ {
+ return false;
+ }
+
+ static inline void remove (value_type &) {}
+};
+
+typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
/* Describes two objects whose addresses must be unequal for the vectorized
loop to be valid. */
on inactive scalars. */
vec_loop_masks masks;
+ /* Set of scalar conditions that have loop mask applied. */
+ scalar_cond_masked_set_type scalar_cond_masked_set;
+
/* If we are using a loop mask to align memory addresses, this variable
contains the number of vector elements that we should skip in the
first iteration of the vector loop (i.e. the number of leading
extern tree vect_halve_mask_nunits (tree);
extern tree vect_double_mask_nunits (tree);
extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *,
- unsigned int, tree);
+ unsigned int, tree, tree);
extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
unsigned int, tree, unsigned int);
extern stmt_vec_info info_for_reduction (stmt_vec_info);