static void
vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
{
- if (!possible_vector_mask_operation_p (stmt_info)
- || stmt_info->mask_precision)
+ if (!possible_vector_mask_operation_p (stmt_info))
return;
- auto_vec<stmt_vec_info, 32> worklist;
- worklist.quick_push (stmt_info);
- while (!worklist.is_empty ())
- {
- stmt_info = worklist.last ();
- unsigned int orig_length = worklist.length ();
-
- /* If at least one boolean input uses a vector mask type,
- pick the mask type with the narrowest elements.
-
- ??? This is the traditional behavior. It should always produce
- the smallest number of operations, but isn't necessarily the
- optimal choice. For example, if we have:
+ /* If at least one boolean input uses a vector mask type,
+ pick the mask type with the narrowest elements.
- a = b & c
+ ??? This is the traditional behavior. It should always produce
+ the smallest number of operations, but isn't necessarily the
+ optimal choice. For example, if we have:
- where:
+ a = b & c
- - the user of a wants it to have a mask type for 16-bit elements (M16)
- - b also uses M16
- - c uses a mask type for 8-bit elements (M8)
+ where:
- then picking M8 gives:
+ - the user of a wants it to have a mask type for 16-bit elements (M16)
+ - b also uses M16
+ - c uses a mask type for 8-bit elements (M8)
- - 1 M16->M8 pack for b
- - 1 M8 AND for a
- - 2 M8->M16 unpacks for the user of a
+ then picking M8 gives:
- whereas picking M16 would have given:
+ - 1 M16->M8 pack for b
+ - 1 M8 AND for a
+ - 2 M8->M16 unpacks for the user of a
- - 2 M8->M16 unpacks for c
- - 2 M16 ANDs for a
-
- The number of operations are equal, but M16 would have given
- a shorter dependency chain and allowed more ILP. */
- unsigned int precision = ~0U;
- gassign *assign = as_a <gassign *> (stmt_info->stmt);
- unsigned int nops = gimple_num_ops (assign);
- for (unsigned int i = 1; i < nops; ++i)
- {
- tree rhs = gimple_op (assign, i);
- if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
- continue;
+ whereas picking M16 would have given:
- stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
- if (!def_stmt_info)
- /* Don't let external or constant operands influence the choice.
- We can convert them to whichever vector type we pick. */
- continue;
+ - 2 M8->M16 unpacks for c
+ - 2 M16 ANDs for a
- if (def_stmt_info->mask_precision)
- {
- if (precision > def_stmt_info->mask_precision)
- precision = def_stmt_info->mask_precision;
- }
- else if (possible_vector_mask_operation_p (def_stmt_info))
- worklist.safe_push (def_stmt_info);
- }
+ The number of operations are equal, but M16 would have given
+ a shorter dependency chain and allowed more ILP. */
+ unsigned int precision = ~0U;
+ gassign *assign = as_a <gassign *> (stmt_info->stmt);
+ unsigned int nops = gimple_num_ops (assign);
+ for (unsigned int i = 1; i < nops; ++i)
+ {
+ tree rhs = gimple_op (assign, i);
+ if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
+ continue;
- /* Defer the choice if we need to visit operands first. */
- if (orig_length != worklist.length ())
+ stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
+ if (!def_stmt_info)
+ /* Don't let external or constant operands influence the choice.
+ We can convert them to whichever vector type we pick. */
continue;
- /* If the statement compares two values that shouldn't use vector masks,
- try comparing the values as normal scalars instead. */
- tree_code rhs_code = gimple_assign_rhs_code (assign);
- if (precision == ~0U
- && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
+ if (def_stmt_info->mask_precision)
{
- tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
- scalar_mode mode;
- tree vectype, mask_type;
- if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
- && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
- && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
- && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
- precision = GET_MODE_BITSIZE (mode);
+ if (precision > def_stmt_info->mask_precision)
+ precision = def_stmt_info->mask_precision;
}
+ }
- if (dump_enabled_p ())
- {
- if (precision == ~0U)
- dump_printf_loc (MSG_NOTE, vect_location,
- "using normal nonmask vectors for %G",
- stmt_info->stmt);
- else
- dump_printf_loc (MSG_NOTE, vect_location,
- "using boolean precision %d for %G",
- precision, stmt_info->stmt);
- }
+ /* If the statement compares two values that shouldn't use vector masks,
+ try comparing the values as normal scalars instead. */
+ tree_code rhs_code = gimple_assign_rhs_code (assign);
+ if (precision == ~0U
+ && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
+ {
+ tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
+ scalar_mode mode;
+ tree vectype, mask_type;
+ if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
+ && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
+ && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
+ && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
+ precision = GET_MODE_BITSIZE (mode);
+ }
- stmt_info->mask_precision = precision;
- worklist.pop ();
+ if (dump_enabled_p ())
+ {
+ if (precision == ~0U)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "using normal nonmask vectors for %G",
+ stmt_info->stmt);
+ else
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "using boolean precision %d for %G",
+ precision, stmt_info->stmt);
}
+
+ stmt_info->mask_precision = precision;
}
/* Handle vect_determine_precisions for STMT_INFO, given that we
vect_determine_precisions_from_range (stmt_info, stmt);
vect_determine_precisions_from_users (stmt_info, stmt);
}
- vect_determine_mask_precision (vinfo, stmt_info);
}
/* Walk backwards through the vectorizable region to determine the
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
unsigned int nbbs = loop->num_nodes;
+ for (unsigned int i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ if (!is_gimple_debug (gsi_stmt (si)))
+ vect_determine_mask_precision
+ (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
+ }
for (unsigned int i = 0; i < nbbs; i++)
{
basic_block bb = bbs[nbbs - i - 1];
else
{
bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
+ for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
+ {
+ basic_block bb = bb_vinfo->bbs[i];
+ for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
+ if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
+ vect_determine_mask_precision (vinfo, stmt_info);
+ }
+ }
for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
!gsi_end_p (gsi); gsi_prev (&gsi))