From a1f072e2441c58f6a486f90bb9a32bd5f6c51cb4 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 26 Oct 2018 07:38:59 +0000 Subject: [PATCH] re PR tree-optimization/87105 (Autovectorization [X86, SSE2, AVX2, DoublePrecision]) 2018-10-26 Richard Biener PR tree-optimization/87105 * tree-vectorizer.h (_slp_tree::refcnt): New member. * tree-vect-slp.c (vect_free_slp_tree): Decrement and honor refcnt. (vect_create_new_slp_node): Initialize refcnt to one. (bst_traits): Move. (scalar_stmts_set_t, bst_fail): Remove. (vect_build_slp_tree_2): Add bst_map argument and adjust calls. (vect_build_slp_tree): Add bst_map argument and lookup already created SLP nodes. (vect_print_slp_tree): Handle a SLP graph, print SLP node addresses. (vect_slp_rearrange_stmts): Handle a SLP graph. (vect_analyze_slp_instance): Adjust and free SLP nodes from the CSE map. Fix indenting. (vect_schedule_slp_instance): Add short-cut. * g++.dg/vect/slp-pr87105.cc: Adjust. * gcc.dg/torture/20181024-1.c: New testcase. * g++.dg/opt/20181025-1.C: Likewise. From-SVN: r265522 --- gcc/ChangeLog | 19 ++ gcc/testsuite/ChangeLog | 7 + gcc/testsuite/g++.dg/opt/20181025-1.C | 31 +++ gcc/testsuite/g++.dg/vect/slp-pr87105.cc | 9 +- gcc/testsuite/gcc.dg/torture/20181024-1.c | 41 ++++ gcc/tree-vect-slp.c | 281 ++++++++++++---------- gcc/tree-vectorizer.h | 2 + 7 files changed, 263 insertions(+), 127 deletions(-) create mode 100644 gcc/testsuite/g++.dg/opt/20181025-1.C create mode 100644 gcc/testsuite/gcc.dg/torture/20181024-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b60988c910e..d46a3b47a55 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2018-10-26 Richard Biener + + PR tree-optimization/87105 + * tree-vectorizer.h (_slp_tree::refcnt): New member. + * tree-vect-slp.c (vect_free_slp_tree): Decrement and honor + refcnt. + (vect_create_new_slp_node): Initialize refcnt to one. + (bst_traits): Move. + (scalar_stmts_set_t, bst_fail): Remove. + (vect_build_slp_tree_2): Add bst_map argument and adjust calls. + (vect_build_slp_tree): Add bst_map argument and lookup + already created SLP nodes. + (vect_print_slp_tree): Handle a SLP graph, print SLP node + addresses. + (vect_slp_rearrange_stmts): Handle a SLP graph. + (vect_analyze_slp_instance): Adjust and free SLP nodes from + the CSE map. Fix indenting. + (vect_schedule_slp_instance): Add short-cut. + 2018-10-26 Martin Liska PR testsuite/86158 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 73739030993..36627afeaa3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2018-10-26 Richard Biener + + PR tree-optimization/87105 + * g++.dg/vect/slp-pr87105.cc: Adjust. + * gcc.dg/torture/20181024-1.c: New testcase. + * g++.dg/opt/20181025-1.C: Likewise. + 2018-10-26 Richard Biener PR testsuite/87754 diff --git a/gcc/testsuite/g++.dg/opt/20181025-1.C b/gcc/testsuite/g++.dg/opt/20181025-1.C new file mode 100644 index 00000000000..43d1614f023 --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/20181025-1.C @@ -0,0 +1,31 @@ +// { dg-do compile } +// { dg-options "-Ofast" } + +template +class Vector { + typedef Number value_type; + typedef const value_type *const_iterator; + Number norm_sqr () const; + const_iterator begin () const; + unsigned int dim; +}; +template +static inline Number +local_sqr (const Number x) +{ + return x*x; +} +template +Number +Vector::norm_sqr () const +{ + Number sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0; + const_iterator ptr = begin(), eptr = ptr + (dim/4)*4; + while (ptr!=eptr) + { + sum0 += ::local_sqr(*ptr++); + sum1 += ::local_sqr(*ptr++); + } + return sum0+sum1+sum2+sum3; +} +template class Vector; diff --git a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc index 1023d915201..949b16c848f 100644 --- a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc +++ b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc @@ -2,7 +2,7 @@ // { dg-require-effective-target c++11 } // { dg-require-effective-target vect_double } // For MIN/MAX recognition -// { dg-additional-options "-ffast-math -fvect-cost-model" } +// { dg-additional-options "-ffast-math" } #include #include @@ -99,6 +99,7 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept { // We should have if-converted everything down to straight-line code // { dg-final { scan-tree-dump-times "" 1 "slp2" } } -// We fail to elide an earlier store which makes us not handle a later -// duplicate one for vectorization. -// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail *-*-* } } } +// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" } } +// It's a bit awkward to detect that all stores were vectorized but the +// following more or less does the trick +// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" } } diff --git a/gcc/testsuite/gcc.dg/torture/20181024-1.c b/gcc/testsuite/gcc.dg/torture/20181024-1.c new file mode 100644 index 00000000000..f2cfe7f6d67 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/20181024-1.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=core-avx2" { target { x86_64-*-* i?86-*-* } } } */ + +typedef enum { + C = 0, N, S, E, W, T, B, NE, NW, SE, SW, NT, NB, ST, SB, ET, EB, WT, WB, FLAGS, N_CELL_ENTRIES} CELL_ENTRIES; +typedef double LBM_Grid[(130)*100*100*N_CELL_ENTRIES]; +void foo( LBM_Grid srcGrid ) +{ + double ux , uy , uz , rho , ux1, uy1, uz1, rho1, ux2, uy2, uz2, rho2, u2, px, py; + int i; + for( i = 0; + i < (N_CELL_ENTRIES*( 100*100)); + i += N_CELL_ENTRIES ) + { + rho1 = + ((srcGrid)[((C)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((N)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((S)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((E)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((W)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((T)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((B)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((NE)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((NW)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((SE)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((SW)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((NT)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((NB)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((ST)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((SB)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((ET)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((EB)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((WT)+N_CELL_ENTRIES*( 100*100))+(i)]) + + ((srcGrid)[((WB)+N_CELL_ENTRIES*( 100*100))+(i)]); + rho = 2.0*rho1 - rho2; + px = (((i / N_CELL_ENTRIES) % 100) / (0.5*(100-1))) - 1.0; + uz = 0.01 * (1.0-px*px) * (1.0-py*py); + u2 = 1.5 * (ux*ux + uy*uy + uz*uz); + (((srcGrid)[((C))+(i)])) = (1.0/ 3.0)*rho*(1.0 - u2); + (((srcGrid)[((N))+(i)])) = (1.0/18.0)*rho*(1.0 + uy*(4.5*uy + 3.0) - u2); + } +} diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 3aae1776ef9..ab8504a10bd 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -57,6 +57,9 @@ vect_free_slp_tree (slp_tree node, bool final_p) int i; slp_tree child; + if (--node->refcnt != 0) + return; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) vect_free_slp_tree (child, final_p); @@ -82,7 +85,6 @@ vect_free_slp_tree (slp_tree node, bool final_p) free (node); } - /* Free the memory allocated for the SLP instance. FINAL_P is true if we have vectorized the instance or if we have made a final decision not to vectorize the statements in any way. */ @@ -126,6 +128,7 @@ vect_create_new_slp_node (vec scalar_stmts) SLP_TREE_LOAD_PERMUTATION (node) = vNULL; SLP_TREE_TWO_OPERATORS (node) = false; SLP_TREE_DEF_TYPE (node) = vect_internal_def; + node->refcnt = 1; unsigned i; FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info) @@ -1021,9 +1024,6 @@ bst_traits::equal (value_type existing, value_type candidate) return true; } -typedef hash_set , bst_traits> scalar_stmts_set_t; -static scalar_stmts_set_t *bst_fail; - typedef hash_map , slp_tree, simple_hashmap_traits > scalar_stmts_to_slp_tree_map_t; @@ -1034,30 +1034,33 @@ vect_build_slp_tree_2 (vec_info *vinfo, poly_uint64 *max_nunits, vec *loads, bool *matches, unsigned *npermutes, unsigned *tree_size, - unsigned max_tree_size); + unsigned max_tree_size, + scalar_stmts_to_slp_tree_map_t *bst_map); static slp_tree vect_build_slp_tree (vec_info *vinfo, vec stmts, unsigned int group_size, poly_uint64 *max_nunits, vec *loads, bool *matches, unsigned *npermutes, unsigned *tree_size, - unsigned max_tree_size) + unsigned max_tree_size, + scalar_stmts_to_slp_tree_map_t *bst_map) { - if (bst_fail->contains (stmts)) - return NULL; - slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits, - loads, matches, npermutes, tree_size, - max_tree_size); - /* When SLP build fails for stmts record this, otherwise SLP build - can be exponential in time when we allow to construct parts from - scalars, see PR81723. */ - if (! res) + if (slp_tree *leader = bst_map->get (stmts)) { - vec x; - x.create (stmts.length ()); - x.splice (stmts); - bst_fail->add (x); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n", + *leader ? "" : "failed ", *leader); + if (*leader) + (*leader)->refcnt++; + return *leader; } + slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits, + loads, matches, npermutes, tree_size, + max_tree_size, bst_map); + /* Keep a reference for the bst_map use. */ + if (res) + res->refcnt++; + bst_map->put (stmts.copy (), res); return res; } @@ -1074,7 +1077,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, poly_uint64 *max_nunits, vec *loads, bool *matches, unsigned *npermutes, unsigned *tree_size, - unsigned max_tree_size) + unsigned max_tree_size, + scalar_stmts_to_slp_tree_map_t *bst_map) { unsigned nops, i, this_tree_size = 0; poly_uint64 this_max_nunits = *max_nunits; @@ -1205,7 +1209,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, group_size, &this_max_nunits, &this_loads, matches, npermutes, &this_tree_size, - max_tree_size)) != NULL) + max_tree_size, bst_map)) != NULL) { /* If we have all children of child built up from scalars then just throw that away and build it up this node from scalars. */ @@ -1348,7 +1352,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, group_size, &this_max_nunits, &this_loads, tem, npermutes, &this_tree_size, - max_tree_size)) != NULL) + max_tree_size, bst_map)) != NULL) { /* ... so if successful we can apply the operand swapping to the GIMPLE IL. This is necessary because for example @@ -1441,21 +1445,37 @@ fail: static void vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc, - slp_tree node) + slp_tree node, hash_set &visited) { int i; stmt_vec_info stmt_info; slp_tree child; - dump_printf_loc (dump_kind, loc, "node%s\n", + if (visited.add (node)) + return; + + dump_printf_loc (dump_kind, loc, "node%s %p\n", SLP_TREE_DEF_TYPE (node) != vect_internal_def - ? " (external)" : ""); + ? " (external)" : "", node); FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt); + if (SLP_TREE_CHILDREN (node).is_empty ()) + return; + dump_printf_loc (dump_kind, loc, "\tchildren"); FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - vect_print_slp_tree (dump_kind, loc, child); + dump_printf (dump_kind, " %p", (void *)child); + dump_printf (dump_kind, "\n"); + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) + vect_print_slp_tree (dump_kind, loc, child, visited); } +static void +vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc, + slp_tree node) +{ + hash_set visited; + vect_print_slp_tree (dump_kind, loc, node, visited); +} /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID). If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index @@ -1509,15 +1529,19 @@ vect_mark_slp_stmts_relevant (slp_tree node) static void vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size, - vec permutation) + vec permutation, + hash_set &visited) { stmt_vec_info stmt_info; vec tmp_stmts; unsigned int i; slp_tree child; + if (visited.add (node)) + return; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - vect_slp_rearrange_stmts (child, group_size, permutation); + vect_slp_rearrange_stmts (child, group_size, permutation, visited); gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ()); tmp_stmts.create (group_size); @@ -1578,8 +1602,9 @@ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn) statements in the nodes is not important unless they are memory accesses, we can rearrange the statements in all the nodes according to the order of the loads. */ + hash_set visited; vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size, - node->load_permutation); + node->load_permutation, visited); /* We are done, no actual permutations need to be generated. */ poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn); @@ -1889,12 +1914,18 @@ vect_analyze_slp_instance (vec_info *vinfo, /* Build the tree for the SLP instance. */ bool *matches = XALLOCAVEC (bool, group_size); unsigned npermutes = 0; - bst_fail = new scalar_stmts_set_t (); + scalar_stmts_to_slp_tree_map_t *bst_map + = new scalar_stmts_to_slp_tree_map_t (); poly_uint64 max_nunits = nunits; node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, &max_nunits, &loads, matches, &npermutes, - NULL, max_tree_size); - delete bst_fail; + NULL, max_tree_size, bst_map); + /* The map keeps a reference on SLP nodes built, release that. */ + for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin (); + it != bst_map->end (); ++it) + if ((*it).second) + vect_free_slp_tree ((*it).second, false); + delete bst_map; if (node != NULL) { /* Calculate the unrolling factor based on the smallest type. */ @@ -1924,109 +1955,109 @@ vect_analyze_slp_instance (vec_info *vinfo, } else { - /* Create a new SLP instance. */ - new_instance = XNEW (struct _slp_instance); - SLP_INSTANCE_TREE (new_instance) = node; - SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; - SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; - SLP_INSTANCE_LOADS (new_instance) = loads; - - /* Compute the load permutation. */ - slp_tree load_node; - bool loads_permuted = false; - FOR_EACH_VEC_ELT (loads, i, load_node) - { - vec load_permutation; - int j; - stmt_vec_info load_info; - bool this_load_permuted = false; - load_permutation.create (group_size); - stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT - (SLP_TREE_SCALAR_STMTS (load_node)[0]); - FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info) + /* Create a new SLP instance. */ + new_instance = XNEW (struct _slp_instance); + SLP_INSTANCE_TREE (new_instance) = node; + SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; + SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; + SLP_INSTANCE_LOADS (new_instance) = loads; + + /* Compute the load permutation. */ + slp_tree load_node; + bool loads_permuted = false; + FOR_EACH_VEC_ELT (loads, i, load_node) { - int load_place = vect_get_place_in_interleaving_chain - (load_info, first_stmt_info); - gcc_assert (load_place != -1); - if (load_place != j) - this_load_permuted = true; - load_permutation.safe_push (load_place); + vec load_permutation; + int j; + stmt_vec_info load_info; + bool this_load_permuted = false; + load_permutation.create (group_size); + stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT + (SLP_TREE_SCALAR_STMTS (load_node)[0]); + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info) + { + int load_place = vect_get_place_in_interleaving_chain + (load_info, first_stmt_info); + gcc_assert (load_place != -1); + if (load_place != j) + this_load_permuted = true; + load_permutation.safe_push (load_place); + } + if (!this_load_permuted + /* The load requires permutation when unrolling exposes + a gap either because the group is larger than the SLP + group-size or because there is a gap between the groups. */ + && (known_eq (unrolling_factor, 1U) + || (group_size == DR_GROUP_SIZE (first_stmt_info) + && DR_GROUP_GAP (first_stmt_info) == 0))) + { + load_permutation.release (); + continue; + } + SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation; + loads_permuted = true; } - if (!this_load_permuted - /* The load requires permutation when unrolling exposes - a gap either because the group is larger than the SLP - group-size or because there is a gap between the groups. */ - && (known_eq (unrolling_factor, 1U) - || (group_size == DR_GROUP_SIZE (first_stmt_info) - && DR_GROUP_GAP (first_stmt_info) == 0))) + + if (loads_permuted) { - load_permutation.release (); - continue; + if (!vect_supported_load_permutation_p (new_instance)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Build SLP failed: unsupported load " + "permutation %G", stmt_info->stmt); + vect_free_slp_instance (new_instance, false); + return false; + } } - SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation; - loads_permuted = true; - } - - if (loads_permuted) - { - if (!vect_supported_load_permutation_p (new_instance)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Build SLP failed: unsupported load " - "permutation %G", stmt_info->stmt); - vect_free_slp_instance (new_instance, false); - return false; - } - } /* If the loads and stores can be handled with load/store-lan - instructions do not generate this SLP instance. */ - if (is_a (vinfo) - && loads_permuted - && dr && vect_store_lanes_supported (vectype, group_size, false)) - { - slp_tree load_node; - FOR_EACH_VEC_ELT (loads, i, load_node) + instructions do not generate this SLP instance. */ + if (is_a (vinfo) + && loads_permuted + && dr && vect_store_lanes_supported (vectype, group_size, false)) { - stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT - (SLP_TREE_SCALAR_STMTS (load_node)[0]); - /* Use SLP for strided accesses (or if we can't load-lanes). */ - if (STMT_VINFO_STRIDED_P (stmt_vinfo) - || ! vect_load_lanes_supported - (STMT_VINFO_VECTYPE (stmt_vinfo), - DR_GROUP_SIZE (stmt_vinfo), false)) - break; + slp_tree load_node; + FOR_EACH_VEC_ELT (loads, i, load_node) + { + stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT + (SLP_TREE_SCALAR_STMTS (load_node)[0]); + /* Use SLP for strided accesses (or if we can't load-lanes). */ + if (STMT_VINFO_STRIDED_P (stmt_vinfo) + || ! vect_load_lanes_supported + (STMT_VINFO_VECTYPE (stmt_vinfo), + DR_GROUP_SIZE (stmt_vinfo), false)) + break; + } + if (i == loads.length ()) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Built SLP cancelled: can use " + "load/store-lanes\n"); + vect_free_slp_instance (new_instance, false); + return false; + } } - if (i == loads.length ()) + + vinfo->slp_instances.safe_push (new_instance); + + if (dump_enabled_p ()) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Built SLP cancelled: can use " - "load/store-lanes\n"); - vect_free_slp_instance (new_instance, false); - return false; + dump_printf_loc (MSG_NOTE, vect_location, + "Final SLP tree for instance:\n"); + vect_print_slp_tree (MSG_NOTE, vect_location, node); } - } - vinfo->slp_instances.safe_push (new_instance); - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "Final SLP tree for instance:\n"); - vect_print_slp_tree (MSG_NOTE, vect_location, node); + return true; } - - return true; - } } else { - /* Failed to SLP. */ - /* Free the allocated memory. */ - scalar_stmts.release (); - loads.release (); + /* Failed to SLP. */ + /* Free the allocated memory. */ + scalar_stmts.release (); + loads.release (); } /* For basic block SLP, try to break the group up into multiples of the @@ -3749,8 +3780,13 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance, if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) return; + /* See if we have already vectorized the node in the graph of the + SLP instance. */ + if (SLP_TREE_VEC_STMTS (node).exists ()) + return; + /* See if we have already vectorized the same set of stmts and reuse their - vectorized stmts. */ + vectorized stmts across instances. */ if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node))) { SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader)); @@ -3778,8 +3814,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance, group_size = SLP_INSTANCE_GROUP_SIZE (instance); gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0); - if (!SLP_TREE_VEC_STMTS (node).exists ()) - SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node)); + SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node)); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 08d696a2f7c..e1292aa6eb6 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -130,6 +130,8 @@ struct _slp_tree { scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector size. */ unsigned int vec_stmts_size; + /* Reference count in the SLP graph. */ + unsigned int refcnt; /* Whether the scalar computations use two different operators. */ bool two_operators; /* The DEF type of this node. */ -- 2.30.2