From f7300fff74becf365cdadd23c9447521da852e84 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 29 Nov 2017 14:38:06 +0000 Subject: [PATCH] re PR tree-optimization/83202 (Try joining operations on consecutive array elements during tree vectorization) 2017-11-29 Richard Biener PR tree-optimization/83202 * tree-vect-slp.c (scalar_stmts_set_t): New typedef. (bst_fail): Use it. (vect_analyze_slp_cost_1): Add visited set, do not account SLP nodes vectorized to the same stmts multiple times. (vect_analyze_slp_cost): Allocate a visited set and pass it down. (vect_analyze_slp_instance): Adjust. (scalar_stmts_to_slp_tree_map_t): New typedef. (vect_schedule_slp_instance): Add a map recording the SLP node representing the vectorized stmts for a set of scalar stmts. Avoid code-generating redundancies. (vect_schedule_slp): Allocate map and pass it down. * gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c: New testcase. From-SVN: r255233 --- gcc/ChangeLog | 15 ++++++ gcc/testsuite/ChangeLog | 5 ++ .../vect/costmodel/x86_64/costmodel-pr83202.c | 15 ++++++ gcc/tree-vect-slp.c | 46 +++++++++++++++---- 4 files changed, 72 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2b93243477d..003b6b161e5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2017-11-29 Richard Biener + + PR tree-optimization/83202 + * tree-vect-slp.c (scalar_stmts_set_t): New typedef. + (bst_fail): Use it. + (vect_analyze_slp_cost_1): Add visited set, do not account SLP + nodes vectorized to the same stmts multiple times. + (vect_analyze_slp_cost): Allocate a visited set and pass it down. + (vect_analyze_slp_instance): Adjust. + (scalar_stmts_to_slp_tree_map_t): New typedef. + (vect_schedule_slp_instance): Add a map recording the SLP node + representing the vectorized stmts for a set of scalar stmts. + Avoid code-generating redundancies. + (vect_schedule_slp): Allocate map and pass it down. + 2017-11-29 Nathan Sidwell PR c++/83187 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3b90be1b7b2..bf1e37444ba 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-11-29 Richard Biener + + PR tree-optimization/83202 + * gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c: New testcase. + 2017-11-29 Nathan Sidwell PR c++/83187 diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c new file mode 100644 index 00000000000..bdec20fea2b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ + +void test(double data[4][2]) +{ + for (int i = 0; i < 4; i++) + { + data[i][0] = data[i][0] * data[i][0]; + data[i][1] = data[i][1] * data[i][1]; + } +} + +/* We should vectorize this with SLP and V2DF vectors. */ +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index bc81b3d4b04..19f2ac43fd6 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -961,7 +961,8 @@ bst_traits::equal (value_type existing, value_type candidate) return true; } -static hash_set , bst_traits> *bst_fail; +typedef hash_set , bst_traits> scalar_stmts_set_t; +static scalar_stmts_set_t *bst_fail; static slp_tree vect_build_slp_tree_2 (vec_info *vinfo, @@ -1674,7 +1675,8 @@ static void vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, stmt_vector_for_cost *prologue_cost_vec, stmt_vector_for_cost *body_cost_vec, - unsigned ncopies_for_cost) + unsigned ncopies_for_cost, + scalar_stmts_set_t* visited) { unsigned i, j; slp_tree child; @@ -1682,11 +1684,18 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, stmt_vec_info stmt_info; tree lhs; + /* If we already costed the exact same set of scalar stmts we're done. + We share the generated vector stmts for those. */ + if (visited->contains (SLP_TREE_SCALAR_STMTS (node))) + return; + + visited->add (SLP_TREE_SCALAR_STMTS (node).copy ()); + /* Recurse down the SLP tree. */ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec, - body_cost_vec, ncopies_for_cost); + body_cost_vec, ncopies_for_cost, visited); /* Look at the first scalar stmt to determine the cost. */ stmt = SLP_TREE_SCALAR_STMTS (node)[0]; @@ -1871,9 +1880,11 @@ vect_analyze_slp_cost (slp_instance instance, void *data) prologue_cost_vec.create (10); body_cost_vec.create (10); + scalar_stmts_set_t *visited = new scalar_stmts_set_t (); vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance), &prologue_cost_vec, &body_cost_vec, - ncopies_for_cost); + ncopies_for_cost, visited); + delete visited; /* Record the prologue costs, which were delayed until we were sure that SLP was successful. */ @@ -2037,7 +2048,7 @@ vect_analyze_slp_instance (vec_info *vinfo, /* Build the tree for the SLP instance. */ bool *matches = XALLOCAVEC (bool, group_size); unsigned npermutes = 0; - bst_fail = new hash_set , bst_traits> (); + bst_fail = new scalar_stmts_set_t (); node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, &max_nunits, &loads, matches, &npermutes, NULL, max_tree_size); @@ -3702,12 +3713,15 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, return true; } - +typedef hash_map , slp_tree, + simple_hashmap_traits > + scalar_stmts_to_slp_tree_map_t; /* Vectorize SLP instance tree in postorder. */ static bool -vect_schedule_slp_instance (slp_tree node, slp_instance instance) +vect_schedule_slp_instance (slp_tree node, slp_instance instance, + scalar_stmts_to_slp_tree_map_t *bst_map) { gimple *stmt; bool grouped_store, is_store; @@ -3721,8 +3735,19 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance) if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) return false; + /* See if we have already vectorized the same set of stmts and reuse their + vectorized stmts. */ + slp_tree &leader + = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ()); + if (leader) + { + SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader)); + return false; + } + + leader = node; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - vect_schedule_slp_instance (child, instance); + vect_schedule_slp_instance (child, instance, bst_map); /* Push SLP node def-type to stmts. */ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) @@ -3894,8 +3919,11 @@ vect_schedule_slp (vec_info *vinfo) FOR_EACH_VEC_ELT (slp_instances, i, instance) { /* Schedule the tree of INSTANCE. */ + scalar_stmts_to_slp_tree_map_t *bst_map + = new scalar_stmts_to_slp_tree_map_t (); is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), - instance); + instance, bst_map); + delete bst_map; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vectorizing stmts using SLP.\n"); -- 2.30.2