From b266b96856da702dbe957396adc6f242daed233b Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 10 Jun 2015 10:39:31 +0000 Subject: [PATCH] tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split out from ... 2015-06-10 Richard Biener * tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split out from ... (vect_supported_load_permutation_p): ... here. Handle supportable permutations in reductions. * tree-vect-stmts.c (vectorizable_load): Handle SLP permutations for vectorizing strided group loads. From-SVN: r224324 --- gcc/ChangeLog | 9 ++++ gcc/tree-vect-slp.c | 115 ++++++++++++++++++++++++------------------ gcc/tree-vect-stmts.c | 17 +++++-- 3 files changed, 86 insertions(+), 55 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4486b4e4660..c3854fa5c6e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2015-06-10 Richard Biener + + * tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split + out from ... + (vect_supported_load_permutation_p): ... here. Handle + supportable permutations in reductions. + * tree-vect-stmts.c (vectorizable_load): Handle SLP permutations + for vectorizing strided group loads. + 2015-06-10 Jakub Jelinek PR target/66470 diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 20e4aba9c71..880b245c927 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1299,6 +1299,67 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size, } +/* Attempt to reorder stmts in a reduction chain so that we don't + require any load permutation. Return true if that was possible, + otherwise return false. */ + +static bool +vect_attempt_slp_rearrange_stmts (slp_instance slp_instn) +{ + unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); + unsigned int i, j; + sbitmap load_index; + unsigned int lidx; + slp_tree node, load; + + /* Compare all the permutation sequences to the first one. We know + that at least one load is permuted. */ + node = SLP_INSTANCE_LOADS (slp_instn)[0]; + if (!node->load_permutation.exists ()) + return false; + for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i) + { + if (!load->load_permutation.exists ()) + return false; + FOR_EACH_VEC_ELT (load->load_permutation, j, lidx) + if (lidx != node->load_permutation[j]) + return false; + } + + /* Check that the loads in the first sequence are different and there + are no gaps between them. */ + load_index = sbitmap_alloc (group_size); + bitmap_clear (load_index); + FOR_EACH_VEC_ELT (node->load_permutation, i, lidx) + { + if (bitmap_bit_p (load_index, lidx)) + { + sbitmap_free (load_index); + return false; + } + bitmap_set_bit (load_index, lidx); + } + for (i = 0; i < group_size; i++) + if (!bitmap_bit_p (load_index, i)) + { + sbitmap_free (load_index); + return false; + } + sbitmap_free (load_index); + + /* This permutation is valid for reduction. Since the order of the + statements in the nodes is not important unless they are memory + accesses, we can rearrange the statements in all the nodes + according to the order of the loads. */ + vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size, + node->load_permutation); + + /* We are done, no actual permutations need to be generated. */ + FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) + SLP_TREE_LOAD_PERMUTATION (node).release (); + return true; +} + /* Check if the required load permutations in the SLP instance SLP_INSTN are supported. */ @@ -1307,7 +1368,6 @@ vect_supported_load_permutation_p (slp_instance slp_instn) { unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); unsigned int i, j, k, next; - sbitmap load_index; slp_tree node; gimple stmt, load, next_load, first_load; struct data_reference *dr; @@ -1342,59 +1402,14 @@ vect_supported_load_permutation_p (slp_instance slp_instn) stmt = SLP_TREE_SCALAR_STMTS (node)[0]; /* Reduction (there are no data-refs in the root). - In reduction chain the order of the loads is important. */ + In reduction chain the order of the loads is not important. */ if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)) && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) { - slp_tree load; - unsigned int lidx; + if (vect_attempt_slp_rearrange_stmts (slp_instn)) + return true; - /* Compare all the permutation sequences to the first one. We know - that at least one load is permuted. */ - node = SLP_INSTANCE_LOADS (slp_instn)[0]; - if (!node->load_permutation.exists ()) - return false; - for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i) - { - if (!load->load_permutation.exists ()) - return false; - FOR_EACH_VEC_ELT (load->load_permutation, j, lidx) - if (lidx != node->load_permutation[j]) - return false; - } - - /* Check that the loads in the first sequence are different and there - are no gaps between them. */ - load_index = sbitmap_alloc (group_size); - bitmap_clear (load_index); - FOR_EACH_VEC_ELT (node->load_permutation, i, lidx) - { - if (bitmap_bit_p (load_index, lidx)) - { - sbitmap_free (load_index); - return false; - } - bitmap_set_bit (load_index, lidx); - } - for (i = 0; i < group_size; i++) - if (!bitmap_bit_p (load_index, i)) - { - sbitmap_free (load_index); - return false; - } - sbitmap_free (load_index); - - /* This permutation is valid for reduction. Since the order of the - statements in the nodes is not important unless they are memory - accesses, we can rearrange the statements in all the nodes - according to the order of the loads. */ - vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size, - node->load_permutation); - - /* We are done, no actual permutations need to be generated. */ - FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) - SLP_TREE_LOAD_PERMUTATION (node).release (); - return true; + /* Fallthru to general load permutation handling. */ } /* In basic block vectorization we allow any subchain of an interleaving diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 06980618f12..2f77e8448ee 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -5995,9 +5995,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if ((grouped_load && (slp || PURE_SLP_STMT (stmt_info))) && (group_size > nunits - || nunits % group_size != 0 - /* We don't support load permutations. */ - || slp_perm)) + || nunits % group_size != 0)) { dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "unhandled strided group load\n"); @@ -6294,6 +6292,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0); int nloads = nunits; tree ltype = TREE_TYPE (vectype); + auto_vec dr_chain; if (slp) { nloads = nunits / group_size; @@ -6303,7 +6302,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ltype = vectype; ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - gcc_assert (!slp_perm); + if (slp_perm) + dr_chain.create (ncopies); } for (j = 0; j < ncopies; j++) { @@ -6350,13 +6350,20 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, } if (slp) - SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + { + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + if (slp_perm) + dr_chain.quick_push (gimple_assign_lhs (new_stmt)); + } if (j == 0) STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; else STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; prev_stmt_info = vinfo_for_stmt (new_stmt); } + if (slp_perm) + vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf, + slp_node_instance, false); return true; } -- 2.30.2