From 90dd6e3df86c81bdc1380513c57cce64caf32f72 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 28 Apr 2015 08:30:44 +0000 Subject: [PATCH] re PR fortran/62283 (basic-block vectorization fails) 2015-04-28 Richard Biener PR tree-optimization/62283 * tree-vect-slp.c (vect_build_slp_tree): When the SLP build fails fatally and we are vectorizing a basic-block simply cause the child to be constructed piecewise. (vect_analyze_slp_cost_1): Adjust. (vect_detect_hybrid_slp_stmts): Likewise. (vect_bb_slp_scalar_cost): Likewise. (vect_get_constant_vectors): For piecewise constructed constants place them after the last def. (vect_get_slp_defs): Adjust. * tree-vect-stmts.c (vect_is_simple_use): Detect in-BB externals for basic-block vectorization. * gfortran.dg/vect/pr62283-2.f: New testcase. * gcc.dg/vect/bb-slp-14.c: Adjust. From-SVN: r222514 --- gcc/ChangeLog | 15 ++++ gcc/testsuite/ChangeLog | 6 ++ gcc/testsuite/gcc.dg/vect/bb-slp-14.c | 5 +- gcc/testsuite/gfortran.dg/vect/pr62283-2.f | 13 ++++ gcc/tree-vect-slp.c | 91 +++++++++++++++++----- gcc/tree-vect-stmts.c | 5 +- 6 files changed, 111 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/vect/pr62283-2.f diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 070a150b31d..8efe3d87e44 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2015-04-28 Richard Biener + + PR tree-optimization/62283 + * tree-vect-slp.c (vect_build_slp_tree): When the SLP build + fails fatally and we are vectorizing a basic-block simply + cause the child to be constructed piecewise. + (vect_analyze_slp_cost_1): Adjust. + (vect_detect_hybrid_slp_stmts): Likewise. + (vect_bb_slp_scalar_cost): Likewise. + (vect_get_constant_vectors): For piecewise constructed + constants place them after the last def. + (vect_get_slp_defs): Adjust. + * tree-vect-stmts.c (vect_is_simple_use): Detect in-BB + externals for basic-block vectorization. + 2015-04-28 Thomas Preud'homme PR target/63503 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f9cce49d453..703bc3aa19e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2015-04-28 Richard Biener + + PR tree-optimization/62283 + * gfortran.dg/vect/pr62283-2.f: New testcase. + * gcc.dg/vect/bb-slp-14.c: Adjust. + 2015-04-28 Richard Biener PR tree-optimization/65851 diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-14.c b/gcc/testsuite/gcc.dg/vect/bb-slp-14.c index a55c48eb09b..d8ba10b974f 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-14.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-14.c @@ -14,7 +14,8 @@ main1 (unsigned int x, unsigned int y) int i; unsigned int a0, a1, a2, a3; - /* Not consecutive load with permutation - not supported. */ + /* Not consecutive load with permutation - supported with building up + the vector from scalars. */ a0 = in[0] + 23; a1 = in[1] + 142; a2 = in[1] + 2; @@ -47,6 +48,6 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */ +/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */ /* { dg-final { cleanup-tree-dump "slp2" } } */ diff --git a/gcc/testsuite/gfortran.dg/vect/pr62283-2.f b/gcc/testsuite/gfortran.dg/vect/pr62283-2.f new file mode 100644 index 00000000000..b71ac3eecef --- /dev/null +++ b/gcc/testsuite/gfortran.dg/vect/pr62283-2.f @@ -0,0 +1,13 @@ +! { dg-do compile } +! { dg-require-effective-target vect_float } +! { dg-additional-options "-fdump-tree-slp2-details" } + subroutine saxpy(alpha,x,y) + real x(4),y(4),alpha + y(1)=y(1)+alpha*x(1) + y(2)=y(2)+alpha*x(2) + y(3)=y(3)+alpha*x(3) + y(4)=y(4)+alpha*x(4) + end +! { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } +! { dg-final { cleanup-tree-dump "slp2" } } +! { dg-final { cleanup-tree-dump "vect" } } diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index d82df3e5daa..b066763bec7 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1017,6 +1017,29 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, continue; } + /* If the SLP build failed fatally and we analyze a basic-block + simply treat nodes we fail to build as externally defined + (and thus build vectors from the scalar defs). + The cost model will reject outright expensive cases. + ??? This doesn't treat cases where permutation ultimatively + fails (or we don't try permutation below). Ideally we'd + even compute a permutation that will end up with the maximum + SLP tree size... */ + if (bb_vinfo + && !matches[0] + /* ??? Rejecting patterns this way doesn't work. We'd have to + do extra work to cancel the pattern so the uses see the + scalar version. */ + && !is_pattern_stmt_p (vinfo_for_stmt (stmt))) + { + dump_printf_loc (MSG_NOTE, vect_location, + "Building vector operands from scalars\n"); + oprnd_info->def_stmts = vNULL; + vect_free_slp_tree (child); + SLP_TREE_CHILDREN (*node).quick_push (NULL); + continue; + } + /* If the SLP build for operand zero failed and operand zero and one can be commutated try that for the scalar stmts that failed the match. */ @@ -1417,9 +1440,10 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, /* Recurse down the SLP tree. */ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo, - instance, child, prologue_cost_vec, - ncopies_for_cost); + if (child) + vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo, + instance, child, prologue_cost_vec, + ncopies_for_cost); /* Look at the first scalar stmt to determine the cost. */ stmt = SLP_TREE_SCALAR_STMTS (node)[0]; @@ -1885,7 +1909,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype) STMT_SLP_TYPE (stmt_vinfo) = hybrid; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) - vect_detect_hybrid_slp_stmts (child, i, stype); + if (child) + vect_detect_hybrid_slp_stmts (child, i, stype); } /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */ @@ -2162,7 +2187,8 @@ vect_bb_slp_scalar_cost (basic_block bb, } FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - scalar_cost += vect_bb_slp_scalar_cost (bb, child, life); + if (child) + scalar_cost += vect_bb_slp_scalar_cost (bb, child, life); return scalar_cost; } @@ -2612,6 +2638,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, number_of_places_left_in_vector = nunits; elts = XALLOCAVEC (tree, nunits); + bool place_after_defs = false; for (j = 0; j < number_of_copies; j++) { for (i = group_size - 1; stmts.iterate (i, &stmt); i--) @@ -2682,6 +2709,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, /* Create 'vect_ = {op0,op1,...,opn}'. */ number_of_places_left_in_vector--; + tree orig_op = op; if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op))) { if (CONSTANT_CLASS_P (op)) @@ -2704,6 +2732,12 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, elts[number_of_places_left_in_vector] = op; if (!CONSTANT_CLASS_P (op)) constant_p = false; + if (TREE_CODE (orig_op) == SSA_NAME + && !SSA_NAME_IS_DEFAULT_DEF (orig_op) + && STMT_VINFO_BB_VINFO (stmt_vinfo) + && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb + == gimple_bb (SSA_NAME_DEF_STMT (orig_op)))) + place_after_defs = true; if (number_of_places_left_in_vector == 0) { @@ -2720,16 +2754,25 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]); vec_cst = build_constructor (vector_type, v); } - voprnds.quick_push (vect_init_vector (stmt, vec_cst, - vector_type, NULL)); + tree init; + gimple_stmt_iterator gsi; + if (place_after_defs) + { + gsi = gsi_for_stmt + (vect_find_last_scalar_stmt_in_slp (slp_node)); + init = vect_init_vector (stmt, vec_cst, vector_type, &gsi); + } + else + init = vect_init_vector (stmt, vec_cst, vector_type, NULL); if (ctor_seq != NULL) { - gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ()); - gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt); + gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init)); gsi_insert_seq_before_without_update (&gsi, ctor_seq, GSI_SAME_STMT); ctor_seq = NULL; } + voprnds.quick_push (init); + place_after_defs = false; } } } @@ -2825,20 +2868,26 @@ vect_get_slp_defs (vec ops, slp_tree slp_node, child = SLP_TREE_CHILDREN (slp_node)[child_index]; /* We have to check both pattern and original def, if available. */ - gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0]; - gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def)); - - if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0) - || (related - && operand_equal_p (oprnd, gimple_get_lhs (related), 0))) + if (child) { - /* The number of vector defs is determined by the number of - vector statements in the node from which we get those - statements. */ - number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child); - vectorized_defs = true; - child_index++; + gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0]; + gimple related + = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def)); + + if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0) + || (related + && operand_equal_p (oprnd, gimple_get_lhs (related), 0))) + { + /* The number of vector defs is determined by the number of + vector statements in the node from which we get those + statements. */ + number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child); + vectorized_defs = true; + child_index++; + } } + else + child_index++; } if (!vectorized_defs) diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 4496293fb46..2ce6d4d7c25 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -7752,7 +7752,10 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo, else { stmt_vinfo = vinfo_for_stmt (*def_stmt); - *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); + if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo)) + *dt = vect_external_def; + else + *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); } if (dump_enabled_p ()) -- 2.30.2