From: Richard Biener Date: Wed, 24 Oct 2018 11:46:58 +0000 (+0000) Subject: re PR tree-optimization/87105 (Autovectorization [X86, SSE2, AVX2, DoublePrecision]) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=be43a8877e2f2f4590ba667b27a24a0cfdf8141d;p=gcc.git re PR tree-optimization/87105 (Autovectorization [X86, SSE2, AVX2, DoublePrecision]) 2018-10-24 Richard Biener PR tree-optimization/87105 * tree-vect-data-refs.c (vect_analyze_group_access_1): Adjust dump classification. (vect_analyze_data_ref_accesses): Handle duplicate loads and stores by splitting the affected group after the fact. * tree-vect-slp.c (vect_build_slp_tree_2): Dump when we fail the SLP build because of size constraints. * gcc.dg/vect/bb-slp-39.c: New testcase. * gfortran.dg/vect/pr83232.f90: Un-XFAIL. From-SVN: r265457 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 55ac3b99e2c..bd70bab9265 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2018-10-24 Richard Biener + + PR tree-optimization/87105 + * tree-vect-data-refs.c (vect_analyze_group_access_1): Adjust + dump classification. + (vect_analyze_data_ref_accesses): Handle duplicate loads and + stores by splitting the affected group after the fact. + * tree-vect-slp.c (vect_build_slp_tree_2): Dump when we + fail the SLP build because of size constraints. + 2018-10-24 Rainer Orth * configure.ac (gcc_cv_ld_aligned_shf_merge): New test. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c17488a87c0..df476ec2eb9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2018-10-24 Richard Biener + + PR tree-optimization/87105 + * gcc.dg/vect/bb-slp-39.c: New testcase. + * gfortran.dg/vect/pr83232.f90: Un-XFAIL. + 2018-10-24 Richard Biener PR tree-optimization/84013 diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-39.c b/gcc/testsuite/gcc.dg/vect/bb-slp-39.c new file mode 100644 index 00000000000..255bb1095dc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-39.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ + +double x[1024]; + +void foo (double *p) +{ + x[0] = 1.; + x[1] = 2.; + *p = 7.; // aliasing store + x[0] = x[0] + 1; + x[1] = x[1] + 1; + *p = 8.; // aliasing store + x[1] = x[1] + 1; + x[0] = x[0] + 1; +} + +/* See that we vectorize three SLP instances. */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp2" } } */ diff --git a/gcc/testsuite/gfortran.dg/vect/pr83232.f90 b/gcc/testsuite/gfortran.dg/vect/pr83232.f90 index 8fd9f0373eb..a35357839fe 100644 --- a/gcc/testsuite/gfortran.dg/vect/pr83232.f90 +++ b/gcc/testsuite/gfortran.dg/vect/pr83232.f90 @@ -27,7 +27,5 @@ call foo (Einc) END SUBROUTINE -! We should vectorize (1) and (2) -! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "slp1" } } -! We fail to vectorize at (3), this can be fixed in the future -! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp1" { xfail *-*-* } } } +! We should vectorize (1), (2) and (3) +! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp1" } } diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index a24e1853e03..9185b1bd1c0 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -2472,7 +2472,7 @@ vect_analyze_group_access_1 (dr_vec_info *dr_info) } if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + dump_printf_loc (MSG_NOTE, vect_location, "Two or more load stmts share the same dr.\n"); /* For load use the same data-ref load. */ @@ -2838,6 +2838,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) determining what dependencies are reversed. */ vec datarefs_copy = datarefs.copy (); datarefs_copy.qsort (dr_group_sort_cmp); + hash_set to_fixup; /* Build the interleaving chains. */ for (i = 0; i < datarefs_copy.length () - 1;) @@ -2920,36 +2921,32 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) { gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1])) < gimple_uid (DR_STMT (drb))); - /* ??? For now we simply "drop" the later reference which is - otherwise the same rather than finishing off this group. - In the end we'd want to re-process duplicates forming - multiple groups from the refs, likely by just collecting - all candidates (including duplicates and split points - below) in a vector and then process them together. */ - continue; + /* Simply link in duplicates and fix up the chain below. */ } - - /* If init_b == init_a + the size of the type * k, we have an - interleaving, and DRA is accessed before DRB. */ - HOST_WIDE_INT type_size_a = tree_to_uhwi (sza); - if (type_size_a == 0 - || (init_b - init_a) % type_size_a != 0) - break; - - /* If we have a store, the accesses are adjacent. This splits - groups into chunks we support (we don't support vectorization - of stores with gaps). */ - if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a) - break; - - /* If the step (if not zero or non-constant) is greater than the - difference between data-refs' inits this splits groups into - suitable sizes. */ - if (tree_fits_shwi_p (DR_STEP (dra))) + else { - HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra)); - if (step != 0 && step <= (init_b - init_a)) + /* If init_b == init_a + the size of the type * k, we have an + interleaving, and DRA is accessed before DRB. */ + HOST_WIDE_INT type_size_a = tree_to_uhwi (sza); + if (type_size_a == 0 + || (init_b - init_a) % type_size_a != 0) break; + + /* If we have a store, the accesses are adjacent. This splits + groups into chunks we support (we don't support vectorization + of stores with gaps). */ + if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a) + break; + + /* If the step (if not zero or non-constant) is greater than the + difference between data-refs' inits this splits groups into + suitable sizes. */ + if (tree_fits_shwi_p (DR_STEP (dra))) + { + HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra)); + if (step != 0 && step <= (init_b - init_a)) + break; + } } if (dump_enabled_p ()) @@ -2968,9 +2965,64 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a; DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b; lastinfo = stmtinfo_b; + + if (init_b == init_prev + && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)) + && dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Queuing group with duplicate access for fixup\n"); } } + /* Fixup groups with duplicate entries by splitting it. */ + while (1) + { + hash_set::iterator it = to_fixup.begin (); + if (!(it != to_fixup.end ())) + break; + stmt_vec_info grp = *it; + to_fixup.remove (grp); + + /* Find the earliest duplicate group member. */ + unsigned first_duplicate = -1u; + stmt_vec_info next, g = grp; + while ((next = DR_GROUP_NEXT_ELEMENT (g))) + { + if ((DR_INIT (STMT_VINFO_DR_INFO (next)->dr) + == DR_INIT (STMT_VINFO_DR_INFO (g)->dr)) + && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate) + first_duplicate = gimple_uid (STMT_VINFO_STMT (next)); + g = next; + } + if (first_duplicate == -1U) + continue; + + /* Then move all stmts after the first duplicate to a new group. + Note this is a heuristic but one with the property that *it + is fixed up completely. */ + g = grp; + stmt_vec_info newgroup = NULL, ng; + while ((next = DR_GROUP_NEXT_ELEMENT (g))) + { + if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate) + { + DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next); + if (!newgroup) + newgroup = next; + else + DR_GROUP_NEXT_ELEMENT (ng) = next; + ng = next; + DR_GROUP_FIRST_ELEMENT (ng) = newgroup; + } + else + g = DR_GROUP_NEXT_ELEMENT (g); + } + DR_GROUP_NEXT_ELEMENT (ng) = NULL; + + /* Fixup the new group which still may contain duplicates. */ + to_fixup.add (newgroup); + } + FOR_EACH_VEC_ELT (datarefs_copy, i, dr) { dr_vec_info *dr_info = vinfo->lookup_dr (dr); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index f60fea0a581..3aae1776ef9 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1191,6 +1191,10 @@ vect_build_slp_tree_2 (vec_info *vinfo, if (++this_tree_size > max_tree_size) { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "Build SLP failed: SLP tree too large\n"); FOR_EACH_VEC_ELT (children, j, child) vect_free_slp_tree (child, false); vect_free_oprnd_info (oprnds_info);