vect_epilogue);
}
}
+ else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ {
+ unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ /* Extraction of scalar elements. */
+ epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits,
+ vec_to_scalar, stmt_info, 0,
+ vect_epilogue);
+ /* Scalar max reductions via COND_EXPR / MAX_EXPR. */
+ epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits - 3,
+ scalar_stmt, stmt_info, 0,
+ vect_epilogue);
+ }
else
{
int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
int element_bitsize = tree_to_uhwi (bitsize);
int nelements = vec_size_in_bits / element_bitsize;
+ if (code == COND_EXPR)
+ code = MAX_EXPR;
+
optab = optab_for_tree_code (code, vectype, optab_default);
/* We have a whole vector shift available. */
- if (VECTOR_MODE_P (mode)
+ if (optab != unknown_optab
+ && VECTOR_MODE_P (mode)
&& optab_handler (optab, mode) != CODE_FOR_nothing
&& have_whole_vector_shift (mode))
{
else
new_phi_result = PHI_RESULT (new_phis[0]);
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
+ && reduc_code != ERROR_MARK)
{
/* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
various data values where the condition matched and another vector
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
scalar_results.safe_push (new_temp);
}
+ else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
+ && reduc_code == ERROR_MARK)
+ {
+ /* Condition redution without supported REDUC_MAX_EXPR. Generate
+ idx = 0;
+ idx_val = induction_index[0];
+ val = data_reduc[0];
+ for (idx = 0, val = init, i = 0; i < nelts; ++i)
+ if (induction_index[i] > idx_val)
+ val = data_reduc[i], idx_val = induction_index[i];
+ return val; */
+
+ tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result));
+ tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index));
+ unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype));
+ unsigned HOST_WIDE_INT v_size
+ = el_size * TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index));
+ tree idx_val = NULL_TREE, val = NULL_TREE;
+ for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size)
+ {
+ tree old_idx_val = idx_val;
+ tree old_val = val;
+ idx_val = make_ssa_name (idx_eltype);
+ epilog_stmt = gimple_build_assign (idx_val, BIT_FIELD_REF,
+ build3 (BIT_FIELD_REF, idx_eltype,
+ induction_index,
+ bitsize_int (el_size),
+ bitsize_int (off)));
+ gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+ val = make_ssa_name (data_eltype);
+ epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF,
+ build3 (BIT_FIELD_REF,
+ data_eltype,
+ new_phi_result,
+ bitsize_int (el_size),
+ bitsize_int (off)));
+ gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+ if (off != 0)
+ {
+ tree new_idx_val = idx_val;
+ tree new_val = val;
+ if (off != v_size - el_size)
+ {
+ new_idx_val = make_ssa_name (idx_eltype);
+ epilog_stmt = gimple_build_assign (new_idx_val,
+ MAX_EXPR, idx_val,
+ old_idx_val);
+ gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+ }
+ new_val = make_ssa_name (data_eltype);
+ epilog_stmt = gimple_build_assign (new_val,
+ COND_EXPR,
+ build2 (GT_EXPR,
+ boolean_type_node,
+ idx_val,
+ old_idx_val),
+ val, old_val);
+ gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+ idx_val = new_idx_val;
+ val = new_val;
+ }
+ }
+ scalar_results.safe_push (val);
+ }
/* 2.3 Create the reduction code, using one of the three schemes described
above. In SLP we simply need to extract all the elements from the
int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
tree vec_temp;
+ /* COND reductions all do the final reduction with MAX_EXPR. */
+ if (code == COND_EXPR)
+ code = MAX_EXPR;
+
/* Regardless of whether we have a whole vector shift, if we're
emulating the operation via tree-vect-generic, we don't want
to use it. Only the first round of the reduction is likely
/* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
scalar_results.safe_push (new_temp);
}
+
+ if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+ == INTEGER_INDUC_COND_REDUCTION)
+ {
+ /* Earlier we set the initial value to be zero. Check the result
+ and if it is zero then replace with the original initial
+ value. */
+ tree zero = build_zero_cst (scalar_type);
+ tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
+
+ tree tmp = make_ssa_name (new_scalar_dest);
+ epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
+ initial_def, new_temp);
+ gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+ scalar_results[0] = tmp;
+ }
}
vect_finalize_reduction:
epilog_reduc_code = ERROR_MARK;
}
-
- /* When epilog_reduc_code is ERROR_MARK then a reduction will be
- generated in the epilog using multiple expressions. This does not
- work for condition reductions. */
- if (epilog_reduc_code == ERROR_MARK
- && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
- == INTEGER_INDUC_COND_REDUCTION
- || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
- == CONST_COND_REDUCTION))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "no reduc code for scalar code.\n");
- return false;
- }
}
else
{
cr_index_vector_type = build_vector_type
(cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
- epilog_reduc_code = REDUC_MAX_EXPR;
optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
optab_default);
if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
- == CODE_FOR_nothing)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduc max op not supported by target.\n");
- return false;
- }
+ != CODE_FOR_nothing)
+ epilog_reduc_code = REDUC_MAX_EXPR;
}
if ((double_reduc