From: Richard Biener Date: Tue, 23 Jun 2020 12:47:47 +0000 (+0200) Subject: emit SLP vectorized loads earlier X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6924b5e6bd3c89e229c52eafb1353bcbe17ab405;p=gcc.git emit SLP vectorized loads earlier This makes sure to emit SLP vectorized loads where the first scalar load is. This makes SLP dependence checking more powerful because hoisting loads can use TBAA and it increases the freedom for vector placement when there are constraints from live lanes. Vectorized shifts block inserting vectorized stmts always after vectorized defs because it ends up using the original scalar operand even when the SLP graph indicates the shift operand is vectorized (and we actually emit and cost those stmts). vect_slp_analyze_and_verify_node_alignment shows we need alignment for too many places, this is a temporary solution and my plan is to have a single meta-info for a dataref group instead (also getting rid of DR_GROUP_FIRST/NEXT_ELEMENT). 2020-06-24 Richard Biener * tree-vectorizer.h (vect_find_first_scalar_stmt_in_slp): Declare. * tree-vect-data-refs.c (vect_preserves_scalar_order_p): Simplify for new position of vectorized SLP loads. (vect_slp_analyze_node_dependences): Adjust for it. (vect_slp_analyze_and_verify_node_alignment): Compute alignment for the first stmts dataref. * tree-vect-slp.c (vect_find_first_scalar_stmt_in_slp): New. (vect_schedule_slp_instance): Emit loads before the first scalar stmt. * tree-vect-stmts.c (vectorizable_load): Do what the comment says and use vect_find_first_scalar_stmt_in_slp. --- diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 2365a3925bb..eb8288e7a85 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -232,61 +232,43 @@ vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b) && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)) return true; - /* STMT_A and STMT_B belong to overlapping groups. All loads in a - SLP group are emitted at the position of the last scalar load and - all loads in an interleaving group are emitted at the position - of the first scalar load. + /* STMT_A and STMT_B belong to overlapping groups. All loads are + emitted at the position of the first scalar load. Stores in a group are emitted at the position of the last scalar store. Compute that position and check whether the resulting order matches - the current one. - We have not yet decided between SLP and interleaving so we have - to conservatively assume both. */ - stmt_vec_info il_a; - stmt_vec_info last_a = il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a); - if (last_a) + the current one. */ + stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a); + if (il_a) { - for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s; - s = DR_GROUP_NEXT_ELEMENT (s)) - last_a = get_later_stmt (last_a, s); - if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a))) - { - for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s; - s = DR_GROUP_NEXT_ELEMENT (s)) - if (get_later_stmt (il_a, s) == il_a) - il_a = s; - } - else - il_a = last_a; + if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a))) + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + il_a = get_later_stmt (il_a, s); + else /* DR_IS_READ */ + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + if (get_later_stmt (il_a, s) == il_a) + il_a = s; } else - last_a = il_a = stmtinfo_a; - stmt_vec_info il_b; - stmt_vec_info last_b = il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b); - if (last_b) + il_a = stmtinfo_a; + stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b); + if (il_b) { - for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s; - s = DR_GROUP_NEXT_ELEMENT (s)) - last_b = get_later_stmt (last_b, s); - if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b))) - { - for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s; - s = DR_GROUP_NEXT_ELEMENT (s)) - if (get_later_stmt (il_b, s) == il_b) - il_b = s; - } - else - il_b = last_b; + if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b))) + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + il_b = get_later_stmt (il_b, s); + else /* DR_IS_READ */ + for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s; + s = DR_GROUP_NEXT_ELEMENT (s)) + if (get_later_stmt (il_b, s) == il_b) + il_b = s; } else - last_b = il_b = stmtinfo_b; + il_b = stmtinfo_b; bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a); - return (/* SLP */ - (get_later_stmt (last_a, last_b) == last_a) == a_after_b - /* Interleaving */ - && (get_later_stmt (il_a, il_b) == il_a) == a_after_b - /* Mixed */ - && (get_later_stmt (il_a, last_b) == il_a) == a_after_b - && (get_later_stmt (last_a, il_b) == last_a) == a_after_b); + return (get_later_stmt (il_a, il_b) == il_a) == a_after_b; } /* A subroutine of vect_analyze_data_ref_dependence. Handle @@ -702,71 +684,144 @@ vect_slp_analyze_node_dependences (vec_info *vinfo, slp_tree node, /* This walks over all stmts involved in the SLP load/store done in NODE verifying we can sink them up to the last stmt in the group. */ - stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node); - for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k) + if (DR_IS_WRITE (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node)))) { - stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k]; - if (access_info == last_access_info) - continue; - data_reference *dr_a = STMT_VINFO_DATA_REF (access_info); - ao_ref ref; - bool ref_initialized_p = false; - for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt); - gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi)) + stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node); + for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k) { - gimple *stmt = gsi_stmt (gsi); - if (! gimple_vuse (stmt) - || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt))) + stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k]; + if (access_info == last_access_info) continue; - - /* If we couldn't record a (single) data reference for this - stmt we have to resort to the alias oracle. */ - stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt); - data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info); - if (!dr_b) + data_reference *dr_a = STMT_VINFO_DATA_REF (access_info); + ao_ref ref; + bool ref_initialized_p = false; + for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt); + gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi)) { - /* We are moving a store or sinking a load - this means - we cannot use TBAA for disambiguation. */ - if (!ref_initialized_p) - ao_ref_init (&ref, DR_REF (dr_a)); - if (stmt_may_clobber_ref_p_1 (stmt, &ref, false) - || ref_maybe_used_by_stmt_p (stmt, &ref, false)) + gimple *stmt = gsi_stmt (gsi); + if (! gimple_vuse (stmt)) + continue; + + /* If we couldn't record a (single) data reference for this + stmt we have to resort to the alias oracle. */ + stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt); + data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info); + if (!dr_b) + { + /* We are moving a store - this means + we cannot use TBAA for disambiguation. */ + if (!ref_initialized_p) + ao_ref_init (&ref, DR_REF (dr_a)); + if (stmt_may_clobber_ref_p_1 (stmt, &ref, false) + || ref_maybe_used_by_stmt_p (stmt, &ref, false)) + return false; + continue; + } + + bool dependent = false; + /* If we run into a store of this same instance (we've just + marked those) then delay dependence checking until we run + into the last store because this is where it will have + been sunk to (and we verify if we can do that as well). */ + if (gimple_visited_p (stmt)) + { + if (stmt_info != last_store_info) + continue; + unsigned i; + stmt_vec_info store_info; + FOR_EACH_VEC_ELT (stores, i, store_info) + { + data_reference *store_dr + = STMT_VINFO_DATA_REF (store_info); + ddr_p ddr = initialize_data_dependence_relation + (dr_a, store_dr, vNULL); + dependent + = vect_slp_analyze_data_ref_dependence (vinfo, ddr); + free_dependence_relation (ddr); + if (dependent) + break; + } + } + else + { + ddr_p ddr = initialize_data_dependence_relation (dr_a, + dr_b, vNULL); + dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr); + free_dependence_relation (ddr); + } + if (dependent) return false; - continue; } - - bool dependent = false; - /* If we run into a store of this same instance (we've just - marked those) then delay dependence checking until we run - into the last store because this is where it will have - been sunk to (and we verify if we can do that as well). */ - if (gimple_visited_p (stmt)) + } + } + else /* DR_IS_READ */ + { + stmt_vec_info first_access_info + = vect_find_first_scalar_stmt_in_slp (node); + for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k) + { + stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k]; + if (access_info == first_access_info) + continue; + data_reference *dr_a = STMT_VINFO_DATA_REF (access_info); + ao_ref ref; + bool ref_initialized_p = false; + for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt); + gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi)) { - if (stmt_info != last_store_info) + gimple *stmt = gsi_stmt (gsi); + if (! gimple_vdef (stmt)) continue; - unsigned i; - stmt_vec_info store_info; - FOR_EACH_VEC_ELT (stores, i, store_info) + + /* If we couldn't record a (single) data reference for this + stmt we have to resort to the alias oracle. */ + stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt); + data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info); + if (!dr_b) + { + /* We are hoisting a load - this means we can use + TBAA for disambiguation. */ + if (!ref_initialized_p) + ao_ref_init (&ref, DR_REF (dr_a)); + if (stmt_may_clobber_ref_p_1 (stmt, &ref, true)) + return false; + continue; + } + + bool dependent = false; + /* If we run into a store of this same instance (we've just + marked those) then delay dependence checking until we run + into the last store because this is where it will have + been sunk to (and we verify if we can do that as well). */ + if (gimple_visited_p (stmt)) + { + if (stmt_info != last_store_info) + continue; + unsigned i; + stmt_vec_info store_info; + FOR_EACH_VEC_ELT (stores, i, store_info) + { + data_reference *store_dr + = STMT_VINFO_DATA_REF (store_info); + ddr_p ddr = initialize_data_dependence_relation + (dr_a, store_dr, vNULL); + dependent + = vect_slp_analyze_data_ref_dependence (vinfo, ddr); + free_dependence_relation (ddr); + if (dependent) + break; + } + } + else { - data_reference *store_dr = STMT_VINFO_DATA_REF (store_info); - ddr_p ddr = initialize_data_dependence_relation - (dr_a, store_dr, vNULL); - dependent - = vect_slp_analyze_data_ref_dependence (vinfo, ddr); + ddr_p ddr = initialize_data_dependence_relation (dr_a, + dr_b, vNULL); + dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr); free_dependence_relation (ddr); - if (dependent) - break; } + if (dependent) + return false; } - else - { - ddr_p ddr = initialize_data_dependence_relation (dr_a, - dr_b, vNULL); - dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr); - free_dependence_relation (ddr); - } - if (dependent) - return false; } } return true; @@ -2399,10 +2454,20 @@ vect_slp_analyze_and_verify_node_alignment (vec_info *vinfo, slp_tree node) dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info); vect_compute_data_ref_alignment (vinfo, dr_info); - /* For creating the data-ref pointer we need alignment of the - first element anyway. */ + /* In several places we need alignment of the first element anyway. */ if (dr_info != first_dr_info) vect_compute_data_ref_alignment (vinfo, first_dr_info); + + /* For creating the data-ref pointer we need alignment of the + first element as well. */ + first_stmt_info = vect_find_first_scalar_stmt_in_slp (node); + if (first_stmt_info != SLP_TREE_SCALAR_STMTS (node)[0]) + { + first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); + if (dr_info != first_dr_info) + vect_compute_data_ref_alignment (vinfo, first_dr_info); + } + if (! verify_data_ref_alignment (vinfo, dr_info)) { if (dump_enabled_p ()) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 4031db4a9c6..e7a260877a9 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1915,6 +1915,25 @@ vect_find_last_scalar_stmt_in_slp (slp_tree node) return last; } +/* Find the first stmt in NODE. */ + +stmt_vec_info +vect_find_first_scalar_stmt_in_slp (slp_tree node) +{ + stmt_vec_info first = NULL; + stmt_vec_info stmt_vinfo; + + for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++) + { + stmt_vinfo = vect_orig_stmt (stmt_vinfo); + if (!first + || get_later_stmt (stmt_vinfo, first) == first) + first = stmt_vinfo; + } + + return first; +} + /* Splits a group of stores, currently beginning at FIRST_VINFO, into two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE (also containing the first GROUP1_SIZE stmts, since stores are @@ -4205,15 +4224,33 @@ vect_schedule_slp_instance (vec_info *vinfo, "------>vectorizing SLP node starting from: %G", stmt_info->stmt); - /* Vectorized stmts go before the last scalar stmt which is where - all uses are ready. */ - stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node); - if (last_stmt_info) - si = gsi_for_stmt (last_stmt_info->stmt); + if (STMT_VINFO_DATA_REF (stmt_info) + && SLP_TREE_CODE (node) != VEC_PERM_EXPR) + { + /* Vectorized loads go before the first scalar load to make it + ready early, vectorized stores go before the last scalar + stmt which is where all uses are ready. */ + stmt_vec_info last_stmt_info = NULL; + if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) + last_stmt_info = vect_find_first_scalar_stmt_in_slp (node); + else /* DR_IS_WRITE */ + last_stmt_info = vect_find_last_scalar_stmt_in_slp (node); + si = gsi_for_stmt (last_stmt_info->stmt); + } + else if (SLP_TREE_CHILDREN (node).is_empty ()) + /* This happens for reduction PHIs. */ + si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node)->stmt); + else if (stmt_vec_info first_stmt_info + = vect_find_last_scalar_stmt_in_slp (node)) + /* ??? Shifts by scalars hit us here again, we end up vectorizing + the shift operand but end up using the scalar operand anyway. + This needs to be better reflected in the SLP tree. For now + use the last position if available. */ + si = gsi_for_stmt (first_stmt_info->stmt); else { - /* Or if we do not have 1:1 matching scalar stmts emit after the - children vectorized defs. */ + /* Emit other stmts after the children vectorized defs which is + earliest possible. */ gimple *last_stmt = NULL; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 23799f0ac18..de7d77f3872 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -8728,7 +8728,8 @@ vectorizable_load (vec_info *vinfo, /* For BB vectorization always use the first stmt to base the data ref pointer on. */ if (bb_vinfo) - first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0]; + first_stmt_info_for_drptr + = vect_find_first_scalar_stmt_in_slp (slp_node); /* Check if the chain of loads is already vectorized. */ if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists () diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 32feec3e24b..e4d132493ca 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2001,6 +2001,7 @@ extern void vect_get_slp_defs (vec_info *, slp_tree, vec > *, unsigned n = -1U); extern bool vect_slp_bb (basic_block); extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); +extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree); extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info); extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree, unsigned int * = NULL,