From 4fb8ba9d357297206678a3e3eacf9292148eafb5 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 6 Jul 2016 08:14:41 +0000 Subject: [PATCH] [5/7] Move the fix for PR65518 This patch moves the fix for PR65518 to the code that checks whether load-and-permute operations are supported. If the group size is greater than the vectorisation factor, it would still be possible to fall back to elementwise loads (as for strided groups) rather than fail vectorisation entirely. Tested on aarch64-linux-gnu and x86_64-linux-gnu. gcc/ * tree-vectorizer.h (vect_grouped_load_supported): Add a single_element_p parameter. * tree-vect-data-refs.c (vect_grouped_load_supported): Likewise. Check the PR65518 case here rather than in vectorizable_load. * tree-vect-loop.c (vect_analyze_loop_2): Update call accordignly. * tree-vect-stmts.c (vectorizable_load): Likewise. From-SVN: r238037 --- gcc/ChangeLog | 9 +++++++++ gcc/tree-vect-data-refs.c | 23 ++++++++++++++++++++--- gcc/tree-vect-loop.c | 4 +++- gcc/tree-vect-stmts.c | 21 +++++---------------- gcc/tree-vectorizer.h | 2 +- 5 files changed, 38 insertions(+), 21 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e911b16a628..91f76acb5e4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2016-07-06 Richard Sandiford + + * tree-vectorizer.h (vect_grouped_load_supported): Add a + single_element_p parameter. + * tree-vect-data-refs.c (vect_grouped_load_supported): Likewise. + Check the PR65518 case here rather than in vectorizable_load. + * tree-vect-loop.c (vect_analyze_loop_2): Update call accordignly. + * tree-vect-stmts.c (vectorizable_load): Likewise. + 2016-07-06 Richard Sandiford * tree-vectorizer.h (gather_scatter_info): New structure. diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 71155c9228b..f2f0dc50bbd 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -5144,14 +5144,31 @@ vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi, /* Function vect_grouped_load_supported. - Returns TRUE if even and odd permutations are supported, - and FALSE otherwise. */ + COUNT is the size of the load group (the number of statements plus the + number of gaps). SINGLE_ELEMENT_P is true if there is actually + only one statement, with a gap of COUNT - 1. + + Returns true if a suitable permute exists. */ bool -vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count) +vect_grouped_load_supported (tree vectype, bool single_element_p, + unsigned HOST_WIDE_INT count) { machine_mode mode = TYPE_MODE (vectype); + /* If this is single-element interleaving with an element distance + that leaves unused vector loads around punt - we at least create + very sub-optimal code in that case (and blow up memory, + see PR65518). */ + if (single_element_p && count > TYPE_VECTOR_SUBPARTS (vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "single-element interleaving not supported " + "for not adjacent vector loads\n"); + return false; + } + /* vect_permute_load_chain requires the group size to be equal to 3 or be a power of two. */ if (count != 3 && exact_log2 (count) == -1) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 5e4007642b7..63c002e737c 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2152,10 +2152,12 @@ again: { vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo)); + bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo); size = STMT_VINFO_GROUP_SIZE (vinfo); vectype = STMT_VINFO_VECTYPE (vinfo); if (! vect_load_lanes_supported (vectype, size) - && ! vect_grouped_load_supported (vectype, size)) + && ! vect_grouped_load_supported (vectype, single_element_p, + size)) return false; } } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 1a616b2428e..ab32481600b 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6315,31 +6315,20 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, first_stmt = GROUP_FIRST_ELEMENT (stmt_info); group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); + bool single_element_p = (first_stmt == stmt + && !GROUP_NEXT_ELEMENT (stmt_info)); if (!slp && !STMT_VINFO_STRIDED_P (stmt_info)) { if (vect_load_lanes_supported (vectype, group_size)) load_lanes_p = true; - else if (!vect_grouped_load_supported (vectype, group_size)) + else if (!vect_grouped_load_supported (vectype, single_element_p, + group_size)) return false; } - /* If this is single-element interleaving with an element distance - that leaves unused vector loads around punt - we at least create - very sub-optimal code in that case (and blow up memory, - see PR65518). */ - if (first_stmt == stmt - && !GROUP_NEXT_ELEMENT (stmt_info)) + if (single_element_p) { - if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "single-element interleaving not supported " - "for not adjacent vector loads\n"); - return false; - } - /* Single-element interleaving requires peeling for gaps. */ gcc_assert (GROUP_GAP (stmt_info)); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 180b3061c2c..08066306dab 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1065,7 +1065,7 @@ extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, gimple *, extern tree vect_create_destination_var (tree, tree); extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT); -extern bool vect_grouped_load_supported (tree, unsigned HOST_WIDE_INT); +extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT); extern void vect_permute_store_chain (vec ,unsigned int, gimple *, gimple_stmt_iterator *, vec *); -- 2.30.2