From: Richard Biener Date: Fri, 24 Jul 2020 11:44:09 +0000 (+0200) Subject: Improve var-tracking dataflow iteration order X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1212cfad09378bc85860a7de22dde0cf7a19fd01;p=gcc.git Improve var-tracking dataflow iteration order This builds upon the rev_post_order_and_mark_dfs_back_seme improvements and makes vt_find_locations iterate over the dataflow problems for each toplevel SCC separately, improving memory locality and avoiding to process nodes after the SCC before the SCC itself stabilized. On the asan_interceptors.cc testcase this for example reduces the number of visited blocks from 3751 to 2867. For stage3-gcc this reduces the number of visited blocks by ~4%. 2020-07-28 Richard Biener PR debug/78288 * var-tracking.c (vt_find_locations): Use rev_post_order_and_mark_dfs_back_seme and separately iterate over toplevel SCCs. --- diff --git a/gcc/var-tracking.c b/gcc/var-tracking.c index a345fb47eb7..743f5dcecf6 100644 --- a/gcc/var-tracking.c +++ b/gcc/var-tracking.c @@ -7084,164 +7084,191 @@ vt_find_locations (void) so that the data-flow runs faster. */ rc_order = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); bb_order = XNEWVEC (int, last_basic_block_for_fn (cfun)); - pre_and_rev_post_order_compute (NULL, rc_order, false); - for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) + auto_bitmap exit_bbs; + bitmap_set_bit (exit_bbs, EXIT_BLOCK); + auto_vec > toplevel_scc_extents; + int n = rev_post_order_and_mark_dfs_back_seme + (cfun, single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), exit_bbs, true, + rc_order, &toplevel_scc_extents); + for (i = 0; i < n; i++) bb_order[rc_order[i]] = i; - free (rc_order); in_worklist = sbitmap_alloc (last_basic_block_for_fn (cfun)); in_pending = sbitmap_alloc (last_basic_block_for_fn (cfun)); bitmap_clear (in_worklist); - FOR_EACH_BB_FN (bb, cfun) - pending->insert (bb_order[bb->index], bb); - bitmap_ones (in_pending); - - while (success && !pending->empty ()) - { - std::swap (worklist, pending); - std::swap (in_worklist, in_pending); + /* We're performing the dataflow iteration independently over the + toplevel SCCs plus leading non-cyclic entry blocks and separately + over the tail. That ensures best memory locality and the least + number of visited blocks. */ + unsigned extent = 0; + int curr_start = -1; + int curr_end = -1; + do + { + curr_start = curr_end + 1; + if (toplevel_scc_extents.length () <= extent) + curr_end = n - 1; + else + curr_end = toplevel_scc_extents[extent++].second; - while (!worklist->empty ()) + for (int i = curr_start; i <= curr_end; ++i) { - bool changed; - edge_iterator ei; - int oldinsz, oldoutsz; + pending->insert (i, BASIC_BLOCK_FOR_FN (cfun, rc_order[i])); + bitmap_set_bit (in_pending, rc_order[i]); + } - bb = worklist->extract_min (); - bitmap_clear_bit (in_worklist, bb->index); + while (success && !pending->empty ()) + { + std::swap (worklist, pending); + std::swap (in_worklist, in_pending); - if (VTI (bb)->in.vars) + while (!worklist->empty ()) { - htabsz -= (shared_hash_htab (VTI (bb)->in.vars)->size () - + shared_hash_htab (VTI (bb)->out.vars)->size ()); - oldinsz = shared_hash_htab (VTI (bb)->in.vars)->elements (); - oldoutsz = shared_hash_htab (VTI (bb)->out.vars)->elements (); - } - else - oldinsz = oldoutsz = 0; + bool changed; + edge_iterator ei; + int oldinsz, oldoutsz; - if (MAY_HAVE_DEBUG_BIND_INSNS) - { - dataflow_set *in = &VTI (bb)->in, *first_out = NULL; - bool first = true, adjust = false; + bb = worklist->extract_min (); + bitmap_clear_bit (in_worklist, bb->index); - /* Calculate the IN set as the intersection of - predecessor OUT sets. */ + if (VTI (bb)->in.vars) + { + htabsz -= (shared_hash_htab (VTI (bb)->in.vars)->size () + + shared_hash_htab (VTI (bb)->out.vars)->size ()); + oldinsz = shared_hash_htab (VTI (bb)->in.vars)->elements (); + oldoutsz = shared_hash_htab (VTI (bb)->out.vars)->elements (); + } + else + oldinsz = oldoutsz = 0; - dataflow_set_clear (in); - dst_can_be_shared = true; + if (MAY_HAVE_DEBUG_BIND_INSNS) + { + dataflow_set *in = &VTI (bb)->in, *first_out = NULL; + bool first = true, adjust = false; - FOR_EACH_EDGE (e, ei, bb->preds) - if (!VTI (e->src)->flooded) - gcc_assert (bb_order[bb->index] - <= bb_order[e->src->index]); - else if (first) - { - dataflow_set_copy (in, &VTI (e->src)->out); - first_out = &VTI (e->src)->out; - first = false; - } - else - { - dataflow_set_merge (in, &VTI (e->src)->out); - adjust = true; - } + /* Calculate the IN set as the intersection of + predecessor OUT sets. */ - if (adjust) - { - dataflow_post_merge_adjust (in, &VTI (bb)->permp); + dataflow_set_clear (in); + dst_can_be_shared = true; - if (flag_checking) - /* Merge and merge_adjust should keep entries in - canonical order. */ - shared_hash_htab (in->vars) - ->traverse (in); + FOR_EACH_EDGE (e, ei, bb->preds) + if (!VTI (e->src)->flooded) + gcc_assert (bb_order[bb->index] + <= bb_order[e->src->index]); + else if (first) + { + dataflow_set_copy (in, &VTI (e->src)->out); + first_out = &VTI (e->src)->out; + first = false; + } + else + { + dataflow_set_merge (in, &VTI (e->src)->out); + adjust = true; + } - if (dst_can_be_shared) + if (adjust) { - shared_hash_destroy (in->vars); - in->vars = shared_hash_copy (first_out->vars); - } - } + dataflow_post_merge_adjust (in, &VTI (bb)->permp); - VTI (bb)->flooded = true; - } - else - { - /* Calculate the IN set as union of predecessor OUT sets. */ - dataflow_set_clear (&VTI (bb)->in); - FOR_EACH_EDGE (e, ei, bb->preds) - dataflow_set_union (&VTI (bb)->in, &VTI (e->src)->out); - } + if (flag_checking) + /* Merge and merge_adjust should keep entries in + canonical order. */ + shared_hash_htab (in->vars) + ->traverse (in); - changed = compute_bb_dataflow (bb); - n_blocks_processed++; - htabsz += (shared_hash_htab (VTI (bb)->in.vars)->size () - + shared_hash_htab (VTI (bb)->out.vars)->size ()); + if (dst_can_be_shared) + { + shared_hash_destroy (in->vars); + in->vars = shared_hash_copy (first_out->vars); + } + } - if (htabmax && htabsz > htabmax) - { - if (MAY_HAVE_DEBUG_BIND_INSNS) - inform (DECL_SOURCE_LOCATION (cfun->decl), - "variable tracking size limit exceeded with " - "%<-fvar-tracking-assignments%>, retrying without"); + VTI (bb)->flooded = true; + } else - inform (DECL_SOURCE_LOCATION (cfun->decl), - "variable tracking size limit exceeded"); - success = false; - break; - } + { + /* Calculate the IN set as union of predecessor OUT sets. */ + dataflow_set_clear (&VTI (bb)->in); + FOR_EACH_EDGE (e, ei, bb->preds) + dataflow_set_union (&VTI (bb)->in, &VTI (e->src)->out); + } - if (changed) - { - FOR_EACH_EDGE (e, ei, bb->succs) + changed = compute_bb_dataflow (bb); + n_blocks_processed++; + htabsz += (shared_hash_htab (VTI (bb)->in.vars)->size () + + shared_hash_htab (VTI (bb)->out.vars)->size ()); + + if (htabmax && htabsz > htabmax) { - if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) - continue; + if (MAY_HAVE_DEBUG_BIND_INSNS) + inform (DECL_SOURCE_LOCATION (cfun->decl), + "variable tracking size limit exceeded with " + "%<-fvar-tracking-assignments%>, retrying without"); + else + inform (DECL_SOURCE_LOCATION (cfun->decl), + "variable tracking size limit exceeded"); + success = false; + break; + } - /* Iterate to an earlier block in RPO in the next - round, iterate to the same block immediately. */ - if (bb_order[e->dest->index] < bb_order[bb->index]) + if (changed) + { + FOR_EACH_EDGE (e, ei, bb->succs) { - if (!bitmap_bit_p (in_pending, e->dest->index)) + if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) + continue; + + /* Iterate to an earlier block in RPO in the next + round, iterate to the same block immediately. */ + if (bb_order[e->dest->index] < bb_order[bb->index]) { - /* Send E->DEST to next round. */ - bitmap_set_bit (in_pending, e->dest->index); - pending->insert (bb_order[e->dest->index], - e->dest); + gcc_assert (bb_order[e->dest->index] >= curr_start); + if (!bitmap_bit_p (in_pending, e->dest->index)) + { + /* Send E->DEST to next round. */ + bitmap_set_bit (in_pending, e->dest->index); + pending->insert (bb_order[e->dest->index], + e->dest); + } + } + else if (bb_order[e->dest->index] <= curr_end + && !bitmap_bit_p (in_worklist, e->dest->index)) + { + /* Add E->DEST to current round or delay + processing if it is in the next SCC. */ + bitmap_set_bit (in_worklist, e->dest->index); + worklist->insert (bb_order[e->dest->index], + e->dest); } - } - else if (!bitmap_bit_p (in_worklist, e->dest->index)) - { - /* Add E->DEST to current round. */ - bitmap_set_bit (in_worklist, e->dest->index); - worklist->insert (bb_order[e->dest->index], - e->dest); } } - } - if (dump_file) - fprintf (dump_file, - "BB %i: in %i (was %i), out %i (was %i), rem %i + %i, tsz %i\n", - bb->index, - (int)shared_hash_htab (VTI (bb)->in.vars)->size (), - oldinsz, - (int)shared_hash_htab (VTI (bb)->out.vars)->size (), - oldoutsz, - (int)worklist->nodes (), (int)pending->nodes (), htabsz); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "BB %i IN:\n", bb->index); - dump_dataflow_set (&VTI (bb)->in); - fprintf (dump_file, "BB %i OUT:\n", bb->index); - dump_dataflow_set (&VTI (bb)->out); + if (dump_file) + fprintf (dump_file, + "BB %i: in %i (was %i), out %i (was %i), rem %i + %i, " + "tsz %i\n", bb->index, + (int)shared_hash_htab (VTI (bb)->in.vars)->size (), + oldinsz, + (int)shared_hash_htab (VTI (bb)->out.vars)->size (), + oldoutsz, + (int)worklist->nodes (), (int)pending->nodes (), + htabsz); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "BB %i IN:\n", bb->index); + dump_dataflow_set (&VTI (bb)->in); + fprintf (dump_file, "BB %i OUT:\n", bb->index); + dump_dataflow_set (&VTI (bb)->out); + } } } } + while (curr_end != n - 1); statistics_counter_event (cfun, "compute_bb_dataflow times", n_blocks_processed); @@ -7250,6 +7277,7 @@ vt_find_locations (void) FOR_EACH_BB_FN (bb, cfun) gcc_assert (VTI (bb)->flooded); + free (rc_order); free (bb_order); delete worklist; delete pending;