From: Richard Biener Date: Thu, 4 Jun 2020 12:17:21 +0000 (+0200) Subject: Simplify SLP code wrt SLP_TREE_DEF_TYPE X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a3a70bf6690e310fb1d4afa4b3dd1b19ddd0e2f4;p=gcc.git Simplify SLP code wrt SLP_TREE_DEF_TYPE The following removes the ugly pushing of SLP_TREE_DEF_TYPE to stmt_infos and instead makes sure to handle invariants fully in vect_is_simple_use plus adjusting a few places I refrained from touching when enforcing vector types for them. It also simplifies building SLP nodes with all external operands from scalars by not doing that in the parent but instead not building those from the start. That also gets rid of vect_update_all_shared_vectypes. 2020-06-04 Richard Biener * tree-vect-slp.c (vect_update_all_shared_vectypes): Remove. (vect_build_slp_tree_2): Simplify building all external op nodes from scalars. (vect_slp_analyze_node_operations): Remove push/pop of STMT_VINFO_DEF_TYPE. (vect_schedule_slp_instance): Likewise. * tree-vect-stmts.c (ect_check_store_rhs): Pass in the stmt_info, use the vect_is_simple_use overload combining SLP and stmt_info analysis. (vect_is_simple_cond): Likewise. (vectorizable_store): Adjust. (vectorizable_condition): Likewise. (vect_is_simple_use): Fully handle invariant SLP nodes here. Amend stmt_info operand extraction with COND_EXPR and masked stores. * tree-vect-loop.c (vectorizable_reduction): Deal with COND_EXPR representation ugliness. --- diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 3c5c0ea9ebc..ad26663595c 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -6197,6 +6197,12 @@ vectorizable_reduction (loop_vec_info loop_vinfo, gcc_assert (SLP_TREE_REPRESENTATIVE (slp_for_stmt_info) == stmt_info); } slp_tree *slp_op = XALLOCAVEC (slp_tree, op_type); + /* We need to skip an extra operand for COND_EXPRs with embedded + comparison. */ + unsigned opno_adjust = 0; + if (code == COND_EXPR + && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt))) + opno_adjust = 1; for (i = 0; i < op_type; i++) { /* The condition of COND_EXPR is checked in vectorizable_condition(). */ @@ -6207,7 +6213,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, enum vect_def_type dt; tree op; if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_for_stmt_info, - i, &op, &slp_op[i], &dt, &tem, + i + opno_adjust, &op, &slp_op[i], &dt, &tem, &def_stmt_info)) { if (dump_enabled_p ()) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index cc33b64454c..f8b12f0dae9 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -616,26 +616,6 @@ vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype) return false; } -/* Try to infer and assign a vector type to all the statements in STMTS. - Used only for BB vectorization. */ - -static bool -vect_update_all_shared_vectypes (vec_info *vinfo, vec stmts) -{ - tree vectype, nunits_vectype; - if (!vect_get_vector_types_for_stmt (vinfo, stmts[0], &vectype, - &nunits_vectype, stmts.length ())) - return false; - - stmt_vec_info stmt_info; - unsigned int i; - FOR_EACH_VEC_ELT (stmts, i, stmt_info) - if (!vect_update_shared_vectype (stmt_info, vectype)) - return false; - - return true; -} - /* Return true if call statements CALL1 and CALL2 are similar enough to be combined into the same SLP group. */ @@ -1349,7 +1329,6 @@ vect_build_slp_tree_2 (vec_info *vinfo, FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) { slp_tree child; - unsigned old_tree_size = this_tree_size; unsigned int j; if (oprnd_info->first_dt == vect_uninitialized_def) @@ -1376,45 +1355,6 @@ vect_build_slp_tree_2 (vec_info *vinfo, matches, npermutes, &this_tree_size, bst_map)) != NULL) { - /* If we have all children of a non-unary child built up from - scalars then just throw that away and build it up this node - from scalars. */ - if (is_a (vinfo) - && SLP_TREE_CHILDREN (child).length () > 1 - /* ??? Rejecting patterns this way doesn't work. We'd have to - do extra work to cancel the pattern so the uses see the - scalar version. */ - && !oprnd_info->any_pattern) - { - slp_tree grandchild; - - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) - if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def) - break; - if (!grandchild - && vect_update_all_shared_vectypes (vinfo, - oprnd_info->def_stmts)) - { - /* Roll back. */ - this_tree_size = old_tree_size; - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) - vect_free_slp_tree (grandchild, false); - SLP_TREE_CHILDREN (child).truncate (0); - - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Building parent vector operands from " - "scalars instead\n"); - oprnd_info->def_stmts = vNULL; - SLP_TREE_DEF_TYPE (child) = vect_external_def; - SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops; - oprnd_info->ops = vNULL; - ++this_tree_size; - children.safe_push (child); - continue; - } - } - oprnd_info->def_stmts = vNULL; children.safe_push (child); continue; @@ -1434,16 +1374,13 @@ vect_build_slp_tree_2 (vec_info *vinfo, do extra work to cancel the pattern so the uses see the scalar version. */ && !is_pattern_stmt_p (stmt_info) - && !oprnd_info->any_pattern - && vect_update_all_shared_vectypes (vinfo, oprnd_info->def_stmts)) + && !oprnd_info->any_pattern) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "Building vector operands from scalars\n"); this_tree_size++; - child = vect_create_new_slp_node (oprnd_info->def_stmts, 0); - SLP_TREE_DEF_TYPE (child) = vect_external_def; - SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops; + child = vect_create_new_slp_node (oprnd_info->ops); children.safe_push (child); oprnd_info->ops = vNULL; oprnd_info->def_stmts = vNULL; @@ -1517,46 +1454,6 @@ vect_build_slp_tree_2 (vec_info *vinfo, tem, npermutes, &this_tree_size, bst_map)) != NULL) { - /* If we have all children of a non-unary child built up from - scalars then just throw that away and build it up this node - from scalars. */ - if (is_a (vinfo) - && SLP_TREE_CHILDREN (child).length () > 1 - /* ??? Rejecting patterns this way doesn't work. We'd have - to do extra work to cancel the pattern so the uses see the - scalar version. */ - && !oprnd_info->any_pattern) - { - unsigned int j; - slp_tree grandchild; - - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) - if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def) - break; - if (!grandchild - && (vect_update_all_shared_vectypes - (vinfo, oprnd_info->def_stmts))) - { - /* Roll back. */ - this_tree_size = old_tree_size; - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) - vect_free_slp_tree (grandchild, false); - SLP_TREE_CHILDREN (child).truncate (0); - - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Building parent vector operands from " - "scalars instead\n"); - oprnd_info->def_stmts = vNULL; - SLP_TREE_DEF_TYPE (child) = vect_external_def; - SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops; - oprnd_info->ops = vNULL; - ++this_tree_size; - children.safe_push (child); - continue; - } - } - oprnd_info->def_stmts = vNULL; children.safe_push (child); continue; @@ -1575,6 +1472,35 @@ fail: vect_free_oprnd_info (oprnds_info); + /* If we have all children of a non-unary child built up from + scalars then just throw that away, causing it built up + from scalars. */ + if (nops > 1 + && is_a (vinfo) + /* ??? Rejecting patterns this way doesn't work. We'd have to + do extra work to cancel the pattern so the uses see the + scalar version. */ + && !is_pattern_stmt_p (stmt_info)) + { + slp_tree child; + unsigned j; + FOR_EACH_VEC_ELT (children, j, child) + if (SLP_TREE_DEF_TYPE (child) != vect_external_def) + break; + if (!child) + { + /* Roll back. */ + FOR_EACH_VEC_ELT (children, j, child) + vect_free_slp_tree (child, false); + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Building parent vector operands from " + "scalars instead\n"); + return NULL; + } + } + *tree_size += this_tree_size + 1; *max_nunits = this_max_nunits; @@ -2817,51 +2743,8 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, visited, lvisited, cost_vec)) return false; - /* ??? We have to catch the case late where two first scalar stmts appear - in multiple SLP children with different def type and fail. Remember - original def types first since SLP_TREE_DEF_TYPE doesn't necessarily - match it when that is vect_internal_def. */ - auto_vec dt; - dt.safe_grow (SLP_TREE_CHILDREN (node).length ()); - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) - if (SLP_TREE_SCALAR_STMTS (child).length () != 0) - dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]); - - /* Push SLP node def-type to stmt operands. */ - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) - if (SLP_TREE_DEF_TYPE (child) != vect_internal_def - && SLP_TREE_SCALAR_STMTS (child).length () != 0) - STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) - = SLP_TREE_DEF_TYPE (child); - - /* Check everything worked out. */ - bool res = true; - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) - if (SLP_TREE_SCALAR_STMTS (child).length () != 0) - { - if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) - { - if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) - != SLP_TREE_DEF_TYPE (child)) - res = false; - } - else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) - != dt[j]) - res = false; - } - if (!res && dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: same operand with different " - "def type in stmt.\n"); - - if (res) - res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance, - cost_vec); - - /* Restore def-types. */ - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) - if (SLP_TREE_SCALAR_STMTS (child).length () != 0) - STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j]; + bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance, + cost_vec); /* When the node can be vectorized cost invariant nodes it references. This is not done in DFS order to allow the refering node @@ -4038,7 +3921,7 @@ vect_schedule_slp_instance (vec_info *vinfo, slp_tree node, slp_instance instance) { gimple_stmt_iterator si; - int i, j; + int i; slp_tree child; /* See if we have already vectorized the node in the graph of the @@ -4065,15 +3948,6 @@ vect_schedule_slp_instance (vec_info *vinfo, FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) vect_schedule_slp_instance (vinfo, child, instance); - /* Push SLP node def-type to stmts. */ - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) - { - stmt_vec_info child_stmt_info; - FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info) - STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child); - } - stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node); /* VECTYPE is the type of the destination. */ @@ -4172,15 +4046,6 @@ vect_schedule_slp_instance (vec_info *vinfo, } if (!done_p) vect_transform_stmt (vinfo, stmt_info, &si, node, instance); - - /* Restore stmt def-types. */ - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) - { - stmt_vec_info child_stmt_info; - FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info) - STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def; - } } /* Replace scalar calls from SLP node NODE with setting of their lhs to zero. diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 5548f0d987f..4cca06ee96e 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2573,7 +2573,8 @@ vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask, *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */ static bool -vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, tree rhs, +vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, + slp_tree slp_node, tree rhs, vect_def_type *rhs_dt_out, tree *rhs_vectype_out, vec_load_store_type *vls_type_out) { @@ -2589,7 +2590,9 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, tree rhs, enum vect_def_type rhs_dt; tree rhs_vectype; - if (!vect_is_simple_use (rhs, vinfo, &rhs_dt, &rhs_vectype)) + slp_tree slp_op; + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, + &rhs, &slp_op, &rhs_dt, &rhs_vectype)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7487,7 +7490,7 @@ vectorizable_store (vec_info *vinfo, return false; } - if (!vect_check_store_rhs (vinfo, stmt_info, + if (!vect_check_store_rhs (vinfo, stmt_info, slp_node, op, &rhs_dt, &rhs_vectype, &vls_type)) return false; @@ -9977,18 +9980,20 @@ vectorizable_load (vec_info *vinfo, condition operands are supportable using vec_is_simple_use. */ static bool -vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node, - tree *comp_vectype, enum vect_def_type *dts, - tree vectype) +vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info, + slp_tree slp_node, tree *comp_vectype, + enum vect_def_type *dts, tree vectype) { tree lhs, rhs; tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; + slp_tree slp_op; /* Mask case. */ if (TREE_CODE (cond) == SSA_NAME && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond))) { - if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype) + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond, + &slp_op, &dts[0], comp_vectype) || !*comp_vectype || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype)) return false; @@ -10003,7 +10008,8 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node, if (TREE_CODE (lhs) == SSA_NAME) { - if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1)) + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, + &lhs, &slp_op, &dts[0], &vectype1)) return false; } else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST @@ -10014,7 +10020,8 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node, if (TREE_CODE (rhs) == SSA_NAME) { - if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2)) + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, + &rhs, &slp_op, &dts[1], &vectype2)) return false; } else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST @@ -10158,21 +10165,17 @@ vectorizable_condition (vec_info *vinfo, cond_expr = gimple_assign_rhs1 (stmt); - if (!vect_is_simple_cond (cond_expr, vinfo, slp_node, + if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node, &comp_vectype, &dts[0], vectype) || !comp_vectype) return false; - unsigned slp_adjust = 0; - if (slp_node && SLP_TREE_CHILDREN (slp_node).length () == 4) - /* ??? Hack. Hope for COND_EXPR GIMPLE sanitizing or refactor - things more... */ - slp_adjust = 1; + unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0; slp_tree then_slp_node, else_slp_node; - if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + slp_adjust, + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust, &then_clause, &then_slp_node, &dts[2], &vectype1)) return false; - if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + slp_adjust, + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust, &else_clause, &else_slp_node, &dts[3], &vectype2)) return false; @@ -10301,7 +10304,7 @@ vectorizable_condition (vec_info *vinfo, if (slp_node && (!vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype) - || (slp_adjust == 1 + || (op_adjust == 1 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype)) || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype) @@ -11853,19 +11856,40 @@ vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node, if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) *op = gimple_get_lhs (SLP_TREE_SCALAR_STMTS (child)[0]->stmt); else - *op = SLP_TREE_SCALAR_OPS (child)[0]; + { + if (def_stmt_info_out) + *def_stmt_info_out = NULL; + *op = SLP_TREE_SCALAR_OPS (child)[0]; + *dt = SLP_TREE_DEF_TYPE (child); + *vectype = SLP_TREE_VECTYPE (child); + return true; + } } else { if (gassign *ass = dyn_cast (stmt->stmt)) { - *op = gimple_op (ass, operand + 1); - /* ??? Ick. But it will vanish with SLP only. */ - if (TREE_CODE (*op) == VIEW_CONVERT_EXPR) - *op = TREE_OPERAND (*op, 0); + if (gimple_assign_rhs_code (ass) == COND_EXPR + && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass))) + { + if (operand < 2) + *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand); + else + *op = gimple_op (ass, operand); + } + else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR) + *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0); + else + *op = gimple_op (ass, operand + 1); } else if (gcall *call = dyn_cast (stmt->stmt)) - *op = gimple_call_arg (call, operand); + { + if (gimple_call_internal_p (call) + && internal_store_fn_p (gimple_call_internal_fn (call))) + operand = internal_fn_stored_value_index (gimple_call_internal_fn + (call)); + *op = gimple_call_arg (call, operand); + } else gcc_unreachable (); }