}
+/* Attempt to reorder stmts in a reduction chain so that we don't
+ require any load permutation. Return true if that was possible,
+ otherwise return false. */
+
+static bool
+vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
+{
+ unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
+ unsigned int i, j;
+ sbitmap load_index;
+ unsigned int lidx;
+ slp_tree node, load;
+
+ /* Compare all the permutation sequences to the first one. We know
+ that at least one load is permuted. */
+ node = SLP_INSTANCE_LOADS (slp_instn)[0];
+ if (!node->load_permutation.exists ())
+ return false;
+ for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
+ {
+ if (!load->load_permutation.exists ())
+ return false;
+ FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
+ if (lidx != node->load_permutation[j])
+ return false;
+ }
+
+ /* Check that the loads in the first sequence are different and there
+ are no gaps between them. */
+ load_index = sbitmap_alloc (group_size);
+ bitmap_clear (load_index);
+ FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
+ {
+ if (bitmap_bit_p (load_index, lidx))
+ {
+ sbitmap_free (load_index);
+ return false;
+ }
+ bitmap_set_bit (load_index, lidx);
+ }
+ for (i = 0; i < group_size; i++)
+ if (!bitmap_bit_p (load_index, i))
+ {
+ sbitmap_free (load_index);
+ return false;
+ }
+ sbitmap_free (load_index);
+
+ /* This permutation is valid for reduction. Since the order of the
+ statements in the nodes is not important unless they are memory
+ accesses, we can rearrange the statements in all the nodes
+ according to the order of the loads. */
+ vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
+ node->load_permutation);
+
+ /* We are done, no actual permutations need to be generated. */
+ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+ SLP_TREE_LOAD_PERMUTATION (node).release ();
+ return true;
+}
+
/* Check if the required load permutations in the SLP instance
SLP_INSTN are supported. */
{
unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
unsigned int i, j, k, next;
- sbitmap load_index;
slp_tree node;
gimple stmt, load, next_load, first_load;
struct data_reference *dr;
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
/* Reduction (there are no data-refs in the root).
- In reduction chain the order of the loads is important. */
+ In reduction chain the order of the loads is not important. */
if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
&& !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
- slp_tree load;
- unsigned int lidx;
+ if (vect_attempt_slp_rearrange_stmts (slp_instn))
+ return true;
- /* Compare all the permutation sequences to the first one. We know
- that at least one load is permuted. */
- node = SLP_INSTANCE_LOADS (slp_instn)[0];
- if (!node->load_permutation.exists ())
- return false;
- for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
- {
- if (!load->load_permutation.exists ())
- return false;
- FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
- if (lidx != node->load_permutation[j])
- return false;
- }
-
- /* Check that the loads in the first sequence are different and there
- are no gaps between them. */
- load_index = sbitmap_alloc (group_size);
- bitmap_clear (load_index);
- FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
- {
- if (bitmap_bit_p (load_index, lidx))
- {
- sbitmap_free (load_index);
- return false;
- }
- bitmap_set_bit (load_index, lidx);
- }
- for (i = 0; i < group_size; i++)
- if (!bitmap_bit_p (load_index, i))
- {
- sbitmap_free (load_index);
- return false;
- }
- sbitmap_free (load_index);
-
- /* This permutation is valid for reduction. Since the order of the
- statements in the nodes is not important unless they are memory
- accesses, we can rearrange the statements in all the nodes
- according to the order of the loads. */
- vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
- node->load_permutation);
-
- /* We are done, no actual permutations need to be generated. */
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- SLP_TREE_LOAD_PERMUTATION (node).release ();
- return true;
+ /* Fallthru to general load permutation handling. */
}
/* In basic block vectorization we allow any subchain of an interleaving
if ((grouped_load
&& (slp || PURE_SLP_STMT (stmt_info)))
&& (group_size > nunits
- || nunits % group_size != 0
- /* We don't support load permutations. */
- || slp_perm))
+ || nunits % group_size != 0))
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unhandled strided group load\n");
alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
int nloads = nunits;
tree ltype = TREE_TYPE (vectype);
+ auto_vec<tree> dr_chain;
if (slp)
{
nloads = nunits / group_size;
ltype = vectype;
ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- gcc_assert (!slp_perm);
+ if (slp_perm)
+ dr_chain.create (ncopies);
}
for (j = 0; j < ncopies; j++)
{
}
if (slp)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ {
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ if (slp_perm)
+ dr_chain.quick_push (gimple_assign_lhs (new_stmt));
+ }
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
+ if (slp_perm)
+ vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
+ slp_node_instance, false);
return true;
}