+2016-07-06 Richard Sandiford <richard.sandiford@arm.com>
+
+ * tree-vectorizer.h (vect_grouped_load_supported): Add a
+ single_element_p parameter.
+ * tree-vect-data-refs.c (vect_grouped_load_supported): Likewise.
+ Check the PR65518 case here rather than in vectorizable_load.
+ * tree-vect-loop.c (vect_analyze_loop_2): Update call accordignly.
+ * tree-vect-stmts.c (vectorizable_load): Likewise.
+
2016-07-06 Richard Sandiford <richard.sandiford@arm.com>
* tree-vectorizer.h (gather_scatter_info): New structure.
/* Function vect_grouped_load_supported.
- Returns TRUE if even and odd permutations are supported,
- and FALSE otherwise. */
+ COUNT is the size of the load group (the number of statements plus the
+ number of gaps). SINGLE_ELEMENT_P is true if there is actually
+ only one statement, with a gap of COUNT - 1.
+
+ Returns true if a suitable permute exists. */
bool
-vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
+vect_grouped_load_supported (tree vectype, bool single_element_p,
+ unsigned HOST_WIDE_INT count)
{
machine_mode mode = TYPE_MODE (vectype);
+ /* If this is single-element interleaving with an element distance
+ that leaves unused vector loads around punt - we at least create
+ very sub-optimal code in that case (and blow up memory,
+ see PR65518). */
+ if (single_element_p && count > TYPE_VECTOR_SUBPARTS (vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "single-element interleaving not supported "
+ "for not adjacent vector loads\n");
+ return false;
+ }
+
/* vect_permute_load_chain requires the group size to be equal to 3 or
be a power of two. */
if (count != 3 && exact_log2 (count) == -1)
{
vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
+ bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo);
size = STMT_VINFO_GROUP_SIZE (vinfo);
vectype = STMT_VINFO_VECTYPE (vinfo);
if (! vect_load_lanes_supported (vectype, size)
- && ! vect_grouped_load_supported (vectype, size))
+ && ! vect_grouped_load_supported (vectype, single_element_p,
+ size))
return false;
}
}
first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+ bool single_element_p = (first_stmt == stmt
+ && !GROUP_NEXT_ELEMENT (stmt_info));
if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
{
if (vect_load_lanes_supported (vectype, group_size))
load_lanes_p = true;
- else if (!vect_grouped_load_supported (vectype, group_size))
+ else if (!vect_grouped_load_supported (vectype, single_element_p,
+ group_size))
return false;
}
- /* If this is single-element interleaving with an element distance
- that leaves unused vector loads around punt - we at least create
- very sub-optimal code in that case (and blow up memory,
- see PR65518). */
- if (first_stmt == stmt
- && !GROUP_NEXT_ELEMENT (stmt_info))
+ if (single_element_p)
{
- if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "single-element interleaving not supported "
- "for not adjacent vector loads\n");
- return false;
- }
-
/* Single-element interleaving requires peeling for gaps. */
gcc_assert (GROUP_GAP (stmt_info));
}
extern tree vect_create_destination_var (tree, tree);
extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT);
-extern bool vect_grouped_load_supported (tree, unsigned HOST_WIDE_INT);
+extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT);
extern void vect_permute_store_chain (vec<tree> ,unsigned int, gimple *,
gimple_stmt_iterator *, vec<tree> *);