From b28491dc2d79763ecbff4f0b9f1f3e48a443be1d Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 18 Aug 2020 18:52:17 -0400 Subject: [PATCH] analyzer: bulk merger/processing of runs of nodes at CFG join points Prior to this patch the analyzer worklist considered only one node or two nodes at a time, processing and/or merging state individually or pairwise. This could lead to explosions of merger nodes at CFG join points, especially after switch statements, which could have large numbers of in-edges, and thus large numbers of merger exploded_nodes could be created, exceeding the per-point limit and thus stopping analysis with -Wanalyzer-too-complex. This patch special-cases the handling for runs of consecutive nodes in the worklist at a CFG join point, processing and merging them all together. The patch fixes a state explosion seen in bzip2.c seen when attempting to reproduce PR analyzer/95188, in a switch statement in a loop for argument parsing. With this patch, the analyzer successfully consolidates the state after the argument parsing to a single exploded node. In gcc.dg/analyzer/pr96653.c there is a switch statement with over 300 cases which leads to hitting the per-point limit. With this patch the consolidation code doesn't manage to merge all of them due to other worklist-ordering bugs, and it still hits the per-point limits, but it does manage some very long consolidations: merged 2 in-enodes into 2 out-enode(s) at SN: 403 merged 2 in-enodes into 2 out-enode(s) at SN: 403 merged 2 in-enodes into 1 out-enode(s) at SN: 11 merged 29 in-enodes into 1 out-enode(s) at SN: 35 merged 6 in-enodes into 1 out-enode(s) at SN: 41 merged 31 in-enodes into 1 out-enode(s) at SN: 35 and with a followup patch to fix an SCC issue it manages: merged 358 in-enodes into 2 out-enode(s) at SN: 402 The patch appears to fix the failure on non-x86_64 of: gcc.dg/analyzer/pr93032-mztools.c (test for excess errors) which is PR analyzer/96616. Unfortunately, the patch introduces a memory leak false positive in gcc.dg/analyzer/pr94851-1.c, but this appears to be a pre-existing bug that was hidden by state-merging failures. gcc/analyzer/ChangeLog: * engine.cc (exploded_node::dump_dot): Show STATUS_BULK_MERGED. (exploded_graph::process_worklist): Call maybe_process_run_of_before_supernode_enodes. (exploded_graph::maybe_process_run_of_before_supernode_enodes): New. (exploded_graph_annotator::print_enode): Show STATUS_BULK_MERGED. * exploded-graph.h (enum exploded_node::status): Add STATUS_BULK_MERGED. gcc/testsuite/ChangeLog: * gcc.dg/analyzer/bzip2-arg-parse-1.c: New test. * gcc.dg/analyzer/loop-n-down-to-1-by-1.c: Remove xfail. * gcc.dg/analyzer/pr94851-1.c: Add xfail. --- gcc/analyzer/engine.cc | 207 ++++++++++++++++++ gcc/analyzer/exploded-graph.h | 4 + .../gcc.dg/analyzer/bzip2-arg-parse-1.c | 95 ++++++++ .../gcc.dg/analyzer/loop-n-down-to-1-by-1.c | 4 +- gcc/testsuite/gcc.dg/analyzer/pr94851-1.c | 3 +- 5 files changed, 310 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/bzip2-arg-parse-1.c diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 5903b19b774..53fafb58633 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -848,6 +848,8 @@ exploded_node::dump_dot (graphviz_out *gv, const dump_args_t &args) const pp_printf (pp, "EN: %i", m_index); if (m_status == STATUS_MERGER) pp_string (pp, " (merger)"); + else if (m_status == STATUS_BULK_MERGED) + pp_string (pp, " (bulk merged)"); pp_newline (pp); if (args.show_enode_details_p (*this)) @@ -2211,6 +2213,12 @@ exploded_graph::process_worklist () if (logger) logger->log ("next to process: EN: %i", node->m_index); + /* If we have a run of nodes that are before-supernode, try merging and + processing them together, rather than pairwise or individually. */ + if (flag_analyzer_state_merge && node != m_origin) + if (maybe_process_run_of_before_supernode_enodes (node)) + goto handle_limit; + /* Avoid exponential explosions of nodes by attempting to merge nodes that are at the same program point and which have sufficiently similar state. */ @@ -2340,6 +2348,7 @@ exploded_graph::process_worklist () process_node (node); + handle_limit: /* Impose a hard limit on the number of exploded nodes, to ensure that the analysis terminates in the face of pathological state explosion (or bugs). @@ -2367,6 +2376,201 @@ exploded_graph::process_worklist () } } +/* Attempt to process a consecutive run of sufficiently-similar nodes in + the worklist at a CFG join-point (having already popped ENODE from the + head of the worklist). + + If ENODE's point is of the form (before-supernode, SNODE) and the next + nodes in the worklist are a consecutive run of enodes of the same form, + for the same supernode as ENODE (but potentially from different in-edges), + process them all together, setting their status to STATUS_BULK_MERGED, + and return true. + Otherwise, return false, in which case ENODE must be processed in the + normal way. + + When processing them all together, generate successor states based + on phi nodes for the appropriate CFG edges, and then attempt to merge + these states into a minimal set of merged successor states, partitioning + the inputs by merged successor state. + + Create new exploded nodes for all of the merged states, and add edges + connecting the input enodes to the corresponding merger exploded nodes. + + We hope we have a much smaller number of merged successor states + compared to the number of input enodes - ideally just one, + if all successor states can be merged. + + Processing and merging many together as one operation rather than as + pairs avoids scaling issues where per-pair mergers could bloat the + graph with merger nodes (especially so after switch statements). */ + +bool +exploded_graph:: +maybe_process_run_of_before_supernode_enodes (exploded_node *enode) +{ + /* A struct for tracking per-input state. */ + struct item + { + item (exploded_node *input_enode) + : m_input_enode (input_enode), + m_processed_state (input_enode->get_state ()), + m_merger_idx (-1) + {} + + exploded_node *m_input_enode; + program_state m_processed_state; + int m_merger_idx; + }; + + gcc_assert (enode->get_status () == exploded_node::STATUS_WORKLIST); + gcc_assert (enode->m_succs.length () == 0); + + const program_point &point = enode->get_point (); + + if (point.get_kind () != PK_BEFORE_SUPERNODE) + return false; + + const supernode *snode = point.get_supernode (); + + logger * const logger = get_logger (); + LOG_SCOPE (logger); + + /* Find a run of enodes in the worklist that are before the same supernode, + but potentially from different in-edges. */ + auto_vec enodes; + enodes.safe_push (enode); + while (exploded_node *enode_2 = m_worklist.peek_next ()) + { + gcc_assert (enode_2->get_status () + == exploded_node::STATUS_WORKLIST); + gcc_assert (enode_2->m_succs.length () == 0); + + const program_point &point_2 = enode_2->get_point (); + + if (point_2.get_kind () == PK_BEFORE_SUPERNODE + && point_2.get_supernode () == snode + && point_2.get_call_string () == point.get_call_string ()) + { + enodes.safe_push (enode_2); + m_worklist.take_next (); + } + else + break; + } + + /* If the only node is ENODE, then give up. */ + if (enodes.length () == 1) + return false; + + if (logger) + logger->log ("got run of %i enodes for SN: %i", + enodes.length (), snode->m_index); + + /* All of these enodes have a shared successor point (even if they + were for different in-edges). */ + program_point next_point (point.get_next ()); + + /* Calculate the successor state for each enode in enodes. */ + auto_delete_vec items (enodes.length ()); + unsigned i; + exploded_node *iter_enode; + FOR_EACH_VEC_ELT (enodes, i, iter_enode) + { + item *it = new item (iter_enode); + items.quick_push (it); + const program_state &state = iter_enode->get_state (); + program_state *next_state = &it->m_processed_state; + const program_point &iter_point = iter_enode->get_point (); + if (const superedge *iter_sedge = iter_point.get_from_edge ()) + { + impl_region_model_context ctxt (*this, iter_enode, + &state, next_state, NULL); + const cfg_superedge *last_cfg_superedge + = iter_sedge->dyn_cast_cfg_superedge (); + if (last_cfg_superedge) + next_state->m_region_model->update_for_phis + (snode, last_cfg_superedge, &ctxt); + } + } + + /* Attempt to partition the items into a set of merged states. + We hope we have a much smaller number of merged states + compared to the number of input enodes - ideally just one, + if all can be merged. */ + auto_delete_vec merged_states; + auto_vec first_item_for_each_merged_state; + item *it; + FOR_EACH_VEC_ELT (items, i, it) + { + const program_state &it_state = it->m_processed_state; + program_state *merged_state; + unsigned iter_merger_idx; + FOR_EACH_VEC_ELT (merged_states, iter_merger_idx, merged_state) + { + program_state merge (m_ext_state); + if (it_state.can_merge_with_p (*merged_state, next_point, &merge)) + { + *merged_state = merge; + it->m_merger_idx = iter_merger_idx; + if (logger) + logger->log ("reusing merger state %i for item %i (EN: %i)", + it->m_merger_idx, i, it->m_input_enode->m_index); + goto got_merger; + } + } + /* If it couldn't be merged with any existing merged_states, + create a new one. */ + if (it->m_merger_idx == -1) + { + it->m_merger_idx = merged_states.length (); + merged_states.safe_push (new program_state (it_state)); + first_item_for_each_merged_state.safe_push (it); + if (logger) + logger->log ("using new merger state %i for item %i (EN: %i)", + it->m_merger_idx, i, it->m_input_enode->m_index); + } + got_merger: + gcc_assert (it->m_merger_idx >= 0); + gcc_assert (it->m_merger_idx < merged_states.length ()); + } + + /* Create merger nodes. */ + auto_vec next_enodes (merged_states.length ()); + program_state *merged_state; + FOR_EACH_VEC_ELT (merged_states, i, merged_state) + { + exploded_node *src_enode + = first_item_for_each_merged_state[i]->m_input_enode; + exploded_node *next + = get_or_create_node (next_point, *merged_state, src_enode); + /* "next" could be NULL; we handle that when adding the edges below. */ + next_enodes.quick_push (next); + if (logger) + { + if (next) + logger->log ("using EN: %i for merger state %i", next->m_index, i); + else + logger->log ("using NULL enode for merger state %i", i); + } + } + + /* Create edges from each input enode to the appropriate successor enode. + Update the status of the now-processed input enodes. */ + FOR_EACH_VEC_ELT (items, i, it) + { + exploded_node *next = next_enodes[it->m_merger_idx]; + if (next) + add_edge (it->m_input_enode, next, NULL); + it->m_input_enode->set_status (exploded_node::STATUS_BULK_MERGED); + } + + if (logger) + logger->log ("merged %i in-enodes into %i out-enode(s) at SN: %i", + items.length (), merged_states.length (), snode->m_index); + + return true; +} + /* Return true if STMT must appear at the start of its exploded node, and thus we can't consolidate its effects within a run of other statements, where PREV_STMT was the previous statement. */ @@ -3944,6 +4148,9 @@ private: case exploded_node::STATUS_MERGER: pp_string (pp, "(M)"); break; + case exploded_node::STATUS_BULK_MERGED: + pp_string (pp, "(BM)"); + break; } gv->end_tdtr (); /* Dump any saved_diagnostics at this enode. */ diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h index e36f94ec125..5d4c3190283 100644 --- a/gcc/analyzer/exploded-graph.h +++ b/gcc/analyzer/exploded-graph.h @@ -161,6 +161,9 @@ class exploded_node : public dnode /* Node was left unprocessed due to merger; it won't have had exploded_graph::process_node called on it. */ STATUS_MERGER, + + /* Node was processed by maybe_process_run_of_before_supernode_enodes. */ + STATUS_BULK_MERGED }; exploded_node (const point_and_state &ps, int index); @@ -730,6 +733,7 @@ public: void build_initial_worklist (); void process_worklist (); + bool maybe_process_run_of_before_supernode_enodes (exploded_node *node); void process_node (exploded_node *node); exploded_node *get_or_create_node (const program_point &point, diff --git a/gcc/testsuite/gcc.dg/analyzer/bzip2-arg-parse-1.c b/gcc/testsuite/gcc.dg/analyzer/bzip2-arg-parse-1.c new file mode 100644 index 00000000000..1f1d8294c33 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/bzip2-arg-parse-1.c @@ -0,0 +1,95 @@ +/* Integration test to verify that we don't explode in this + argument-parsing logic. + Adapted from part of bzip2-1.0.8: bzip2.c: main. */ + +#include +#include +#include "analyzer-decls.h" + +/* This test file has been heavily modified from the bzip2.c original, + which has the following license boilerplate. */ +/* ------------------------------------------------------------------ + This file is part of bzip2/libbzip2, a program and library for + lossless, block-sorting data compression. + + bzip2/libbzip2 version 1.0.8 of 13 July 2019 + Copyright (C) 1996-2019 Julian Seward + + Please read the WARNING, DISCLAIMER and PATENTS sections in the + README file. + + This program is released under the terms of the license contained + in the file LICENSE. + ------------------------------------------------------------------ */ + +typedef char Char; +typedef unsigned char Bool; +typedef int Int32; + +#define True ((Bool)1) +#define False ((Bool)0) + +typedef + struct zzzz { + Char *name; + struct zzzz *link; + } + Cell; + +Int32 verbosity; +Bool keepInputFiles, smallMode; +Bool forceOverwrite, noisy; +Int32 blockSize100k; +Int32 opMode; +Int32 srcMode; +Char *progName; + +extern void license ( void ); +extern void usage ( Char *fullProgName ); + +void test (Cell *argList) +{ + Cell *aa; + Int32 i, j; + + for (aa = argList; aa != NULL; aa = aa->link) { + if (aa->name[0] == '-' && aa->name[1] != '-') { + for (j = 1; aa->name[j] != '\0'; j++) { + switch (aa->name[j]) { + case 'c': srcMode = 2; break; + case 'd': opMode = 2; break; + case 'z': opMode = 1; break; + case 'f': forceOverwrite = True; break; + case 't': opMode = 3; break; + case 'k': keepInputFiles = True; break; + case 's': smallMode = True; break; + case 'q': noisy = False; break; + case '1': blockSize100k = 1; break; + case '2': blockSize100k = 2; break; + case '3': blockSize100k = 3; break; + case '4': blockSize100k = 4; break; + case '5': blockSize100k = 5; break; + case '6': blockSize100k = 6; break; + case '7': blockSize100k = 7; break; + case '8': blockSize100k = 8; break; + case '9': blockSize100k = 9; break; + case 'V': + case 'L': license(); break; + case 'v': verbosity++; break; + case 'h': usage ( progName ); + exit ( 0 ); + break; + default: fprintf ( stderr, "%s: Bad flag `%s'\n", + progName, aa->name ); + usage ( progName ); + exit ( 1 ); + break; + } + } + } + } + + /* The analyzer ought to be able to successfully merge all of the + above changes that can reach here into a single state. */ + __analyzer_dump_exploded_nodes (0); /* { dg-warning "1 processed enode" } */ +} diff --git a/gcc/testsuite/gcc.dg/analyzer/loop-n-down-to-1-by-1.c b/gcc/testsuite/gcc.dg/analyzer/loop-n-down-to-1-by-1.c index e02a849d4bb..553cd2f15dc 100644 --- a/gcc/testsuite/gcc.dg/analyzer/loop-n-down-to-1-by-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/loop-n-down-to-1-by-1.c @@ -24,8 +24,8 @@ void test(int n) __analyzer_dump_exploded_nodes (0); /* { dg-warning "2 processed enodes" } */ } - __analyzer_eval (i <= 0); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */ - /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */ + __analyzer_eval (i <= 0); /* { dg-warning "TRUE" "true" } */ + __analyzer_eval (i == 0); /* { dg-warning "TRUE" "desired" { xfail *-*-* } } */ /* { dg-warning "UNKNOWN" "status quo" { target *-*-* } .-1 } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/pr94851-1.c b/gcc/testsuite/gcc.dg/analyzer/pr94851-1.c index 56571318f91..da79652c570 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr94851-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr94851-1.c @@ -40,7 +40,8 @@ int pamark(void) { last->m_next = p; } - p->m_name = (char)c; + p->m_name = (char)c; /* { dg-bogus "leak of 'p'" "bogus leak" { xfail *-*-* } } */ + // TODO(xfail): related to PR analyzer/97072 and PR analyzer/97074 return 1; } -- 2.30.2