From cdb2e365fc0dba2ee052827e5ca65234ca82d605 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Fri, 31 Jul 2020 10:53:39 +0200 Subject: [PATCH] SLP: support entire BB. gcc/ChangeLog: * tree-vect-data-refs.c (dr_group_sort_cmp): Work on data_ref_pair. (vect_analyze_data_ref_accesses): Work on groups. (vect_find_stmt_data_reference): Add group_id argument and fill up dataref_groups vector. * tree-vect-loop.c (vect_get_datarefs_in_loop): Pass new arguments. (vect_analyze_loop_2): Likewise. * tree-vect-slp.c (vect_slp_analyze_bb_1): Pass argument. (vect_slp_bb_region): Likewise. (vect_slp_region): Likewise. (vect_slp_bb):Work on the entire BB. * tree-vectorizer.h (vect_analyze_data_ref_accesses): Add new argument. (vect_find_stmt_data_reference): Likewise. gcc/testsuite/ChangeLog: * gcc.dg/vect/bb-slp-38.c: Adjust pattern as now we only process a single vectorization and now 2 partial. * gcc.dg/vect/bb-slp-45.c: New test. --- gcc/testsuite/gcc.dg/vect/bb-slp-38.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-45.c | 36 ++++++++++++ gcc/tree-vect-data-refs.c | 66 ++++++++++++++------- gcc/tree-vect-loop.c | 5 +- gcc/tree-vect-slp.c | 82 ++++++++++----------------- gcc/tree-vectorizer.h | 5 +- 6 files changed, 120 insertions(+), 76 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-45.c diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c index 59aec54fffd..9c57ea3c2c1 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c @@ -41,4 +41,4 @@ int main() } /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" { target vect_perm } } } */ +/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" { target vect_perm } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-45.c b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c new file mode 100644 index 00000000000..4107a34cb93 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c @@ -0,0 +1,36 @@ +/* { dg-require-effective-target vect_double } */ + +#include "tree-vect.h" + +extern void abort (void); + +double a[8], b[8]; +int x; + +void __attribute__((noinline,noclone)) +bar (void) +{ + x = 1; +} + +void __attribute__((noinline,noclone)) +foo(int i) +{ + double tem1 = a[2*i]; + double tem2 = 2*a[2*i+1]; + bar (); + b[2*i] = 2*tem1; + b[2*i+1] = tem2; +} + +int main() +{ + int i; + check_vect (); + for (i = 0; i < 8; ++i) + b[i] = i; + foo (2); + return 0; +} + +/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */ diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index a78ae61d1b0..88c5faed20d 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -2757,25 +2757,33 @@ vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info) return vect_analyze_group_access (vinfo, dr_info); } +typedef std::pair data_ref_pair; + /* Compare two data-references DRA and DRB to group them into chunks suitable for grouping. */ static int dr_group_sort_cmp (const void *dra_, const void *drb_) { - data_reference_p dra = *(data_reference_p *)const_cast(dra_); - data_reference_p drb = *(data_reference_p *)const_cast(drb_); + data_ref_pair dra_pair = *(data_ref_pair *)const_cast(dra_); + data_ref_pair drb_pair = *(data_ref_pair *)const_cast(drb_); + data_reference_p dra = dra_pair.first; + data_reference_p drb = drb_pair.first; int cmp; /* Stabilize sort. */ if (dra == drb) return 0; - /* DRs in different loops never belong to the same group. */ - loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father; - loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father; - if (loopa != loopb) - return loopa->num < loopb->num ? -1 : 1; + /* DRs in different basic-blocks never belong to the same group. */ + int bb_index1 = gimple_bb (DR_STMT (dra))->index; + int bb_index2 = gimple_bb (DR_STMT (drb))->index; + if (bb_index1 != bb_index2) + return bb_index1 < bb_index2 ? -1 : 1; + + /* Different group IDs lead never belong to the same group. */ + if (dra_pair.second != drb_pair.second) + return dra_pair.second < drb_pair.second ? -1 : 1; /* Ordering of DRs according to base. */ cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra), @@ -2881,11 +2889,11 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info, FORNOW: handle only arrays and pointer accesses. */ opt_result -vect_analyze_data_ref_accesses (vec_info *vinfo) +vect_analyze_data_ref_accesses (vec_info *vinfo, + vec *dataref_groups) { unsigned int i; vec datarefs = vinfo->shared->datarefs; - struct data_reference *dr; DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses"); @@ -2895,14 +2903,21 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) /* Sort the array of datarefs to make building the interleaving chains linear. Don't modify the original vector's order, it is needed for determining what dependencies are reversed. */ - vec datarefs_copy = datarefs.copy (); + vec datarefs_copy; + datarefs_copy.create (datarefs.length ()); + for (unsigned i = 0; i < datarefs.length (); i++) + { + int group_id = dataref_groups ? (*dataref_groups)[i] : 0; + datarefs_copy.quick_push (data_ref_pair (datarefs[i], group_id)); + } datarefs_copy.qsort (dr_group_sort_cmp); hash_set to_fixup; /* Build the interleaving chains. */ for (i = 0; i < datarefs_copy.length () - 1;) { - data_reference_p dra = datarefs_copy[i]; + data_reference_p dra = datarefs_copy[i].first; + int dra_group_id = datarefs_copy[i].second; dr_vec_info *dr_info_a = vinfo->lookup_dr (dra); stmt_vec_info stmtinfo_a = dr_info_a->stmt; stmt_vec_info lastinfo = NULL; @@ -2914,7 +2929,8 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) } for (i = i + 1; i < datarefs_copy.length (); ++i) { - data_reference_p drb = datarefs_copy[i]; + data_reference_p drb = datarefs_copy[i].first; + int drb_group_id = datarefs_copy[i].second; dr_vec_info *dr_info_b = vinfo->lookup_dr (drb); stmt_vec_info stmtinfo_b = dr_info_b->stmt; if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b) @@ -2927,10 +2943,14 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) matters we can push those to a worklist and re-iterate over them. The we can just skip ahead to the next DR here. */ - /* DRs in a different loop should not be put into the same + /* DRs in a different BBs should not be put into the same interleaving group. */ - if (gimple_bb (DR_STMT (dra))->loop_father - != gimple_bb (DR_STMT (drb))->loop_father) + int bb_index1 = gimple_bb (DR_STMT (dra))->index; + int bb_index2 = gimple_bb (DR_STMT (drb))->index; + if (bb_index1 != bb_index2) + break; + + if (dra_group_id != drb_group_id) break; /* Check that the data-refs have same first location (except init) @@ -2977,7 +2997,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra)); HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb)); HOST_WIDE_INT init_prev - = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1])); + = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1].first)); gcc_assert (init_a <= init_b && init_a <= init_prev && init_prev <= init_b); @@ -2985,7 +3005,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) /* Do not place the same access in the interleaving chain twice. */ if (init_b == init_prev) { - gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1])) + gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1].first)) < gimple_uid (DR_STMT (drb))); /* Simply link in duplicates and fix up the chain below. */ } @@ -3098,9 +3118,10 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) to_fixup.add (newgroup); } - FOR_EACH_VEC_ELT (datarefs_copy, i, dr) + data_ref_pair *dr_pair; + FOR_EACH_VEC_ELT (datarefs_copy, i, dr_pair) { - dr_vec_info *dr_info = vinfo->lookup_dr (dr); + dr_vec_info *dr_info = vinfo->lookup_dr (dr_pair->first); if (STMT_VINFO_VECTORIZABLE (dr_info->stmt) && !vect_analyze_data_ref_access (vinfo, dr_info)) { @@ -3991,7 +4012,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, opt_result vect_find_stmt_data_reference (loop_p loop, gimple *stmt, - vec *datarefs) + vec *datarefs, + vec *dataref_groups, int group_id) { /* We can ignore clobbers for dataref analysis - they are removed during loop vectorization and BB vectorization checks dependences with a @@ -4118,6 +4140,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt, newdr->aux = (void *) (-1 - tree_to_uhwi (arg2)); free_data_ref (dr); datarefs->safe_push (newdr); + if (dataref_groups) + dataref_groups->safe_push (group_id); return opt_result::success (); } } @@ -4127,6 +4151,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt, } datarefs->safe_push (dr); + if (dataref_groups) + dataref_groups->safe_push (group_id); return opt_result::success (); } diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index dba230f6320..a92813eb2ac 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1865,7 +1865,8 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs, if (is_gimple_debug (stmt)) continue; ++(*n_stmts); - opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs); + opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs, + NULL, 0); if (!res) { if (is_gimple_call (stmt) && loop->safelen) @@ -2087,7 +2088,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts) /* Analyze the access patterns of the data-refs in the loop (consecutive, complex, etc.). FORNOW: Only handle consecutive access pattern. */ - ok = vect_analyze_data_ref_accesses (loop_vinfo); + ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL); if (!ok) { if (dump_enabled_p ()) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 72192b5a813..167db076454 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3217,7 +3217,8 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo) region. */ static bool -vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal) +vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal, + vec *dataref_groups) { DUMP_VECT_SCOPE ("vect_slp_analyze_bb"); @@ -3239,7 +3240,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal) return false; } - if (!vect_analyze_data_ref_accesses (bb_vinfo)) + if (!vect_analyze_data_ref_accesses (bb_vinfo, dataref_groups)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3348,10 +3349,11 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal) given by DATAREFS. */ static bool -vect_slp_bb_region (gimple_stmt_iterator region_begin, - gimple_stmt_iterator region_end, - vec datarefs, - unsigned int n_stmts) +vect_slp_region (gimple_stmt_iterator region_begin, + gimple_stmt_iterator region_end, + vec datarefs, + vec *dataref_groups, + unsigned int n_stmts) { bb_vec_info bb_vinfo; auto_vector_modes vector_modes; @@ -3378,7 +3380,7 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin, bb_vinfo->shared->check_datarefs (); bb_vinfo->vector_mode = next_vector_mode; - if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal) + if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal, dataref_groups) && dbg_cnt (vect_slp)) { if (dump_enabled_p ()) @@ -3473,45 +3475,30 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin, bool vect_slp_bb (basic_block bb) { - gimple_stmt_iterator gsi; - bool any_vectorized = false; - - gsi = gsi_after_labels (bb); - while (!gsi_end_p (gsi)) + vec datarefs = vNULL; + vec dataref_groups = vNULL; + int insns = 0; + int current_group = 0; + gimple_stmt_iterator region_begin = gsi_start_nondebug_after_labels_bb (bb); + gimple_stmt_iterator region_end = gsi_last_bb (bb); + if (!gsi_end_p (region_end)) + gsi_next (®ion_end); + + for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) { - gimple_stmt_iterator region_begin = gsi; - vec datarefs = vNULL; - int insns = 0; - - for (; !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - if (is_gimple_debug (stmt)) - { - /* Skip leading debug stmts. */ - if (gsi_stmt (region_begin) == stmt) - gsi_next (®ion_begin); - continue; - } - insns++; - - if (gimple_location (stmt) != UNKNOWN_LOCATION) - vect_location = stmt; + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_debug (stmt)) + continue; - if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs)) - break; - } - if (gsi_end_p (region_begin)) - break; + insns++; - /* Skip leading unhandled stmts. */ - if (gsi_stmt (region_begin) == gsi_stmt (gsi)) - { - gsi_next (&gsi); - continue; - } + if (gimple_location (stmt) != UNKNOWN_LOCATION) + vect_location = stmt; - gimple_stmt_iterator region_end = gsi; + if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs, + &dataref_groups, current_group)) + ++current_group; if (insns > param_slp_max_insns_in_bb) { @@ -3520,17 +3507,10 @@ vect_slp_bb (basic_block bb) "not vectorized: too many instructions in " "basic block.\n"); } - else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns)) - any_vectorized = true; - - if (gsi_end_p (region_end)) - break; - - /* Skip the unhandled stmt. */ - gsi_next (&gsi); } - return any_vectorized; + return vect_slp_region (region_begin, region_end, datarefs, + &dataref_groups, insns); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 7ab6d8234ac..8551b686613 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1917,14 +1917,15 @@ extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance); extern opt_result vect_enhance_data_refs_alignment (loop_vec_info); extern opt_result vect_analyze_data_refs_alignment (loop_vec_info); extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance); -extern opt_result vect_analyze_data_ref_accesses (vec_info *); +extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec *); extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, tree, int, internal_fn *, tree *); extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, gather_scatter_info *); extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, - vec *); + vec *, + vec *, int); extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *); extern void vect_record_base_alignments (vec_info *); extern tree vect_create_data_ref_ptr (vec_info *, -- 2.30.2