From 1b4dbccc1f828fa00e6acc8b88d24301c65552df Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 26 Sep 2019 16:52:50 +0000 Subject: [PATCH] tree-vect-loop.c (vect_analyze_loop_operations): Analyze loop-closed PHIs that are vect_internal_def. 2019-09-26 Richard Biener * tree-vect-loop.c (vect_analyze_loop_operations): Analyze loop-closed PHIs that are vect_internal_def. (vect_create_epilog_for_reduction): Exit early for nested cycles. Simplify. (vectorizable_lc_phi): New. * tree-vect-stmts.c (vect_analyze_stmt): Call vectorize_lc_phi. (vect_transform_stmt): Likewise. * tree-vectorizer.h (stmt_vec_info_type): Add lc_phi_info_type. (vectorizable_lc_phi): Declare. From-SVN: r276157 --- gcc/ChangeLog | 12 ++++++ gcc/tree-vect-loop.c | 98 ++++++++++++++++++++++++++++++++++++------- gcc/tree-vect-stmts.c | 8 +++- gcc/tree-vectorizer.h | 2 + 4 files changed, 104 insertions(+), 16 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 19892af07c0..66d7d86ba2f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2019-09-26 Richard Biener + + * tree-vect-loop.c (vect_analyze_loop_operations): Analyze + loop-closed PHIs that are vect_internal_def. + (vect_create_epilog_for_reduction): Exit early for nested cycles. + Simplify. + (vectorizable_lc_phi): New. + * tree-vect-stmts.c (vect_analyze_stmt): Call vectorize_lc_phi. + (vect_transform_stmt): Likewise. + * tree-vectorizer.h (stmt_vec_info_type): Add lc_phi_info_type. + (vectorizable_lc_phi): Declare. + 2019-09-26 Richard Biener * tree-vect-loop.c (vect_analyze_loop_operations): Also call diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 1a561f9d16f..237d28b3ce3 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1519,12 +1519,16 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) phi_op = PHI_ARG_DEF (phi, 0); stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op); if (!op_def_info) - return opt_result::failure_at (phi, "unsupported phi"); + return opt_result::failure_at (phi, "unsupported phi\n"); if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer && (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer_by_reduction)) - return opt_result::failure_at (phi, "unsupported phi"); + return opt_result::failure_at (phi, "unsupported phi\n"); + + if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def + && !vectorizable_lc_phi (stmt_info, NULL, NULL)) + return opt_result::failure_at (phi, "unsupported phi\n"); } continue; @@ -4396,6 +4400,10 @@ vect_create_epilog_for_reduction (vec vect_defs, } } + /* For vectorizing nested cycles the above is all we need to do. */ + if (nested_in_vect_loop && !double_reduc) + return; + /* For cond reductions we want to create a new vector (INDEX_COND_EXPR) which is updated with the current index of the loop for every match of the original loop's cond_expr (VEC_STMT). This results in a vector @@ -4588,16 +4596,6 @@ vect_create_epilog_for_reduction (vec vect_defs, new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); bitsize = TYPE_SIZE (scalar_type); - /* In case this is a reduction in an inner-loop while vectorizing an outer - loop - we don't need to extract a single scalar result at the end of the - inner-loop (unless it is double reduction, i.e., the use of reduction is - outside the outer-loop). The final vector of partial results will be used - in the vectorized outer-loop, or reduced to a scalar result at the end of - the outer-loop. */ - if (nested_in_vect_loop && !double_reduc) - ; - else - { /* SLP reduction without reduction chain, e.g., # a1 = phi # b1 = phi @@ -5313,7 +5311,6 @@ vect_create_epilog_for_reduction (vec vect_defs, new_phis[0] = epilog_stmt; } - } if (double_reduc) loop = loop->inner; @@ -5473,7 +5470,7 @@ vect_create_epilog_for_reduction (vec vect_defs, if (double_reduc) loop = outer_loop; else - continue; + gcc_unreachable (); } phis.create (3); @@ -7167,6 +7164,76 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, return true; } +/* Vectorizes LC PHIs of nested cycles (sofar). */ + +bool +vectorizable_lc_phi (stmt_vec_info stmt_info, stmt_vec_info *vec_stmt, + slp_tree slp_node) +{ + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + if (!loop_vinfo + || !is_a (stmt_info->stmt) + || gimple_phi_num_args (stmt_info->stmt) != 1) + return false; + + /* To handle the nested_cycle_def for double-reductions we have to + refactor epilogue generation more. */ + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def + /* && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def */) + return false; + + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type; + return true; + } + + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree scalar_dest = gimple_phi_result (stmt_info->stmt); + basic_block bb = gimple_bb (stmt_info->stmt); + edge e = single_pred_edge (bb); + tree vec_dest = vect_create_destination_var (scalar_dest, vectype); + vec vec_oprnds = vNULL; + vect_get_vec_defs (gimple_phi_arg_def (stmt_info->stmt, 0), NULL_TREE, + stmt_info, &vec_oprnds, NULL, slp_node); + if (slp_node) + { + unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + gcc_assert (vec_oprnds.length () == vec_num); + for (unsigned i = 0; i < vec_num; i++) + { + /* Create the vectorized LC PHI node. */ + gphi *new_phi = create_phi_node (vec_dest, bb); + add_phi_arg (new_phi, vec_oprnds[i], e, UNKNOWN_LOCATION); + stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi); + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info); + } + } + else + { + unsigned ncopies = vect_get_num_copies (loop_vinfo, vectype); + stmt_vec_info prev_phi_info = NULL; + for (unsigned i = 0; i < ncopies; i++) + { + if (i != 0) + vect_get_vec_defs_for_stmt_copy (loop_vinfo, &vec_oprnds, NULL); + /* Create the vectorized LC PHI node. */ + gphi *new_phi = create_phi_node (vec_dest, bb); + add_phi_arg (new_phi, vec_oprnds[0], e, UNKNOWN_LOCATION); + stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi); + if (i == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info; + else + STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info; + prev_phi_info = new_phi_info; + } + } + vec_oprnds.release (); + + return true; +} + + /* Function vect_min_worthwhile_factor. For a loop where we could vectorize the operation indicated by CODE, @@ -8399,7 +8466,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def - || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def) && ! PURE_SLP_STMT (stmt_info)) { if (dump_enabled_p ()) diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index b1e97f85d96..5734068eabc 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -10671,7 +10671,8 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize, || vectorizable_condition (stmt_info, NULL, NULL, false, -1, node, cost_vec) || vectorizable_comparison (stmt_info, NULL, NULL, node, - cost_vec)); + cost_vec) + || vectorizable_lc_phi (stmt_info, NULL, node)); else { if (bb_vinfo) @@ -10820,6 +10821,11 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, gcc_assert (done); break; + case lc_phi_info_type: + done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node); + gcc_assert (done); + break; + default: if (!STMT_VINFO_LIVE_P (stmt_info)) { diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index f140405bbd6..1ab4af7236f 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -694,6 +694,7 @@ enum stmt_vec_info_type { type_promotion_vec_info_type, type_demotion_vec_info_type, type_conversion_vec_info_type, + lc_phi_info_type, loop_exit_ctrl_vec_info_type }; @@ -1653,6 +1654,7 @@ extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *, extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *, stmt_vec_info *, slp_tree, stmt_vector_for_cost *); +extern bool vectorizable_lc_phi (stmt_vec_info, stmt_vec_info *, slp_tree); extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, stmt_vector_for_cost *, -- 2.30.2