analyzer: consolidate conditionals in paths
authorDavid Malcolm <dmalcolm@redhat.com>
Fri, 29 Jan 2021 20:12:24 +0000 (15:12 -0500)
committerDavid Malcolm <dmalcolm@redhat.com>
Fri, 29 Jan 2021 20:12:24 +0000 (15:12 -0500)
This patch adds a simplification to analyzer paths for
repeated CFG edges generated from compound conditionals.
For example, it simplifies:

    |    5 |   if (a && b && c)
    |      |      ^~~~~~~~~~~~
    |      |      |  |    |
    |      |      |  |    (4) ...to here
    |      |      |  |    (5) following ‘true’ branch (when ‘c != 0’)...
    |      |      |  (2) ...to here
    |      |      |  (3) following ‘true’ branch (when ‘b != 0’)...
    |      |      (1) following ‘true’ branch (when ‘a != 0’)...
    |    6 |     __analyzer_dump_path ();
    |      |     ~~~~~~~~~~~~~~~~~~~~~~~
    |      |     |
    |      |     (6) ...to here

to:

    |    5 |   if (a && b && c)
    |      |      ^
    |      |      |
    |      |      (1) following ‘true’ branch...
    |    6 |     __analyzer_dump_path ();
    |      |     ~~~~~~~~~~~~~~~~~~~~~~~
    |      |     |
    |      |     (2) ...to here

gcc/analyzer/ChangeLog:
* checker-path.cc (event_kind_to_string): Handle
EK_START_CONSOLIDATED_CFG_EDGES and
EK_END_CONSOLIDATED_CFG_EDGES.
(start_consolidated_cfg_edges_event::get_desc): New.
(checker_path::cfg_edge_pair_at_p): New.
* checker-path.h (enum event_kind): Add
EK_START_CONSOLIDATED_CFG_EDGES and
EK_END_CONSOLIDATED_CFG_EDGES.
(class start_consolidated_cfg_edges_event): New class.
(class end_consolidated_cfg_edges_event): New class.
(checker_path::delete_events): New.
(checker_path::replace_event): New.
(checker_path::cfg_edge_pair_at_p): New decl.
* diagnostic-manager.cc (diagnostic_manager::prune_path): Call
consolidate_conditions.
(same_line_as_p): New.
(diagnostic_manager::consolidate_conditions): New.
* diagnostic-manager.h
(diagnostic_manager::consolidate_conditions): New decl.

gcc/testsuite/ChangeLog:
* gcc.dg/analyzer/combined-conditionals-1.c: New test.

gcc/analyzer/checker-path.cc
gcc/analyzer/checker-path.h
gcc/analyzer/diagnostic-manager.cc
gcc/analyzer/diagnostic-manager.h
gcc/testsuite/gcc.dg/analyzer/combined-conditionals-1.c [new file with mode: 0644]

index 9417b86e7dced4df9a281f2c10e0308791efcb65..e6e3ec18688ca0b4f5879049f1d2860bd88916a8 100644 (file)
@@ -93,6 +93,10 @@ event_kind_to_string (enum event_kind ek)
       return "EK_CALL_EDGE";
     case EK_RETURN_EDGE:
       return "EK_RETURN_EDGE";
+    case EK_START_CONSOLIDATED_CFG_EDGES:
+      return "EK_START_CONSOLIDATED_CFG_EDGES";
+    case EK_END_CONSOLIDATED_CFG_EDGES:
+      return "EK_END_CONSOLIDATED_CFG_EDGES";
     case EK_SETJMP:
       return "EK_SETJMP";
     case EK_REWIND_FROM_LONGJMP:
@@ -709,6 +713,16 @@ return_event::is_return_p () const
   return true;
 }
 
+/* class start_consolidated_cfg_edges_event : public checker_event.  */
+
+label_text
+start_consolidated_cfg_edges_event::get_desc (bool can_colorize) const
+{
+  return make_label_text (can_colorize,
+                         "following %qs branch...",
+                         m_edge_sense ? "true" : "false");
+}
+
 /* class setjmp_event : public checker_event.  */
 
 /* Implementation of diagnostic_event::get_desc vfunc for
@@ -991,6 +1005,18 @@ checker_path::fixup_locations (pending_diagnostic *pd)
     e->set_location (pd->fixup_location (e->get_location ()));
 }
 
+/* Return true if there is a (start_cfg_edge_event, end_cfg_edge_event) pair
+   at (IDX, IDX + 1).  */
+
+bool
+checker_path::cfg_edge_pair_at_p (unsigned idx) const
+{
+  if (m_events.length () < idx + 1)
+    return false;
+  return (m_events[idx]->m_kind == EK_START_CFG_EDGE
+         && m_events[idx + 1]->m_kind == EK_END_CFG_EDGE);
+}
+
 } // namespace ana
 
 #endif /* #if ENABLE_ANALYZER */
index 078e9e6ba5a9df8ab470c40fe9ee4f66ad825c7e..f76bb94e7cccb3a635d7cc35e9fd94ef8baf1490 100644 (file)
@@ -37,6 +37,8 @@ enum event_kind
   EK_END_CFG_EDGE,
   EK_CALL_EDGE,
   EK_RETURN_EDGE,
+  EK_START_CONSOLIDATED_CFG_EDGES,
+  EK_END_CONSOLIDATED_CFG_EDGES,
   EK_SETJMP,
   EK_REWIND_FROM_LONGJMP,
   EK_REWIND_TO_SETJMP,
@@ -63,6 +65,8 @@ extern const char *event_kind_to_string (enum event_kind ek);
           end_cfg_edge_event (EK_END_CFG_EDGE)
          call_event (EK_CALL_EDGE)
          return_edge (EK_RETURN_EDGE)
+       start_consolidated_cfg_edges_event (EK_START_CONSOLIDATED_CFG_EDGES)
+       end_consolidated_cfg_edges_event (EK_END_CONSOLIDATED_CFG_EDGES)
        setjmp_event (EK_SETJMP)
        rewind_event
          rewind_from_longjmp_event (EK_REWIND_FROM_LONGJMP)
@@ -337,6 +341,42 @@ public:
   bool is_return_p () const FINAL OVERRIDE;
 };
 
+/* A concrete event subclass for the start of a consolidated run of CFG
+   edges all either TRUE or FALSE e.g. "following 'false' branch...'.  */
+
+class start_consolidated_cfg_edges_event : public checker_event
+{
+public:
+  start_consolidated_cfg_edges_event (location_t loc, tree fndecl, int depth,
+                                     bool edge_sense)
+  : checker_event (EK_START_CONSOLIDATED_CFG_EDGES, loc, fndecl, depth),
+    m_edge_sense (edge_sense)
+  {
+  }
+
+  label_text get_desc (bool can_colorize) const FINAL OVERRIDE;
+
+ private:
+  bool m_edge_sense;
+};
+
+/* A concrete event subclass for the end of a consolidated run of
+   CFG edges e.g. "...to here'.  */
+
+class end_consolidated_cfg_edges_event : public checker_event
+{
+public:
+  end_consolidated_cfg_edges_event (location_t loc, tree fndecl, int depth)
+  : checker_event (EK_END_CONSOLIDATED_CFG_EDGES, loc, fndecl, depth)
+  {
+  }
+
+  label_text get_desc (bool /*can_colorize*/) const FINAL OVERRIDE
+  {
+    return label_text::borrow ("...to here");
+  }
+};
+
 /* A concrete event subclass for a setjmp or sigsetjmp call.  */
 
 class setjmp_event : public checker_event
@@ -490,6 +530,19 @@ public:
     delete event;
   }
 
+  void delete_events (unsigned start_idx, unsigned len)
+  {
+    for (unsigned i = start_idx; i < start_idx + len; i++)
+      delete m_events[i];
+    m_events.block_remove (start_idx, len);
+  }
+
+  void replace_event (unsigned idx, checker_event *new_event)
+  {
+    delete m_events[idx];
+    m_events[idx] = new_event;
+  }
+
   void add_final_event (const state_machine *sm,
                        const exploded_node *enode, const gimple *stmt,
                        tree var, state_machine::state_t state);
@@ -525,6 +578,8 @@ public:
     return false;
   }
 
+  bool cfg_edge_pair_at_p (unsigned idx) const;
+
 private:
   DISABLE_COPY_AND_ASSIGN(checker_path);
 
index 22ca024f28e95823a7a3d8c3902fbba80e0e22b6..cbb76d89e0d19b3cb09459fdf9ac5bb2972fda4d 100644 (file)
@@ -1301,6 +1301,7 @@ diagnostic_manager::prune_path (checker_path *path,
   path->maybe_log (get_logger (), "path");
   prune_for_sm_diagnostic (path, sm, sval, state);
   prune_interproc_events (path);
+  consolidate_conditions (path);
   finish_pruning (path);
   path->maybe_log (get_logger (), "pruned");
 }
@@ -1656,6 +1657,151 @@ diagnostic_manager::prune_interproc_events (checker_path *path) const
   while (changed);
 }
 
+/* Return true iff event IDX within PATH is on the same line as REF_EXP_LOC.  */
+
+static bool
+same_line_as_p (const expanded_location &ref_exp_loc,
+               checker_path *path, unsigned idx)
+{
+  const checker_event *ev = path->get_checker_event (idx);
+  expanded_location idx_exp_loc = expand_location (ev->get_location ());
+  gcc_assert (ref_exp_loc.file);
+  if (idx_exp_loc.file == NULL)
+    return false;
+  if (strcmp (ref_exp_loc.file, idx_exp_loc.file))
+    return false;
+  return ref_exp_loc.line == idx_exp_loc.line;
+}
+
+/* This path-readability optimization reduces the verbosity of compound
+   conditional statements (without needing to reconstruct the AST, which
+   has already been lost).
+
+   For example, it converts:
+
+    |   61 |   if (cp[0] != '\0' && cp[0] != '#')
+    |      |      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    |      |      |              |    |
+    |      |      |              |    (6) ...to here
+    |      |      |              (7) following ‘true’ branch...
+    |      |      (5) following ‘true’ branch...
+    |   62 |     {
+    |   63 |       alias = cp++;
+    |      |               ~~~~
+    |      |                 |
+    |      |                 (8) ...to here
+
+   into:
+
+    |   61 |   if (cp[0] != '\0' && cp[0] != '#')
+    |      |      ~
+    |      |      |
+    |      |      (5) following ‘true’ branch...
+    |   62 |     {
+    |   63 |       alias = cp++;
+    |      |               ~~~~
+    |      |                 |
+    |      |                 (6) ...to here
+
+   by combining events 5-8 into new events 5-6.
+
+   Find runs of consecutive (start_cfg_edge_event, end_cfg_edge_event) pairs
+   in which all events apart from the final end_cfg_edge_event are on the same
+   line, and for which either all the CFG edges are TRUE edges, or all are
+   FALSE edges.
+
+   Consolidate each such run into a
+     (start_consolidated_cfg_edges_event, end_consolidated_cfg_edges_event)
+   pair.  */
+
+void
+diagnostic_manager::consolidate_conditions (checker_path *path) const
+{
+  /* Don't simplify edges if we're debugging them.  */
+  if (flag_analyzer_verbose_edges)
+    return;
+
+  for (unsigned start_idx = 0; start_idx < path->num_events () - 1; start_idx++)
+    {
+      if (path->cfg_edge_pair_at_p (start_idx))
+       {
+         const checker_event *old_start_ev
+           = path->get_checker_event (start_idx);
+         expanded_location start_exp_loc
+           = expand_location (old_start_ev->get_location ());
+         if (start_exp_loc.file == NULL)
+           continue;
+         if (!same_line_as_p (start_exp_loc, path, start_idx + 1))
+           continue;
+
+         /* Are we looking for a run of all TRUE edges, or all FALSE edges?  */
+         gcc_assert (old_start_ev->m_kind == EK_START_CFG_EDGE);
+         const start_cfg_edge_event *old_start_cfg_ev
+           = (const start_cfg_edge_event *)old_start_ev;
+         const cfg_superedge& first_cfg_sedge
+           = old_start_cfg_ev->get_cfg_superedge ();
+         bool edge_sense;
+         if (first_cfg_sedge.true_value_p ())
+           edge_sense = true;
+         else if (first_cfg_sedge.false_value_p ())
+           edge_sense = false;
+         else
+           continue;
+
+         /* Find a run of CFG start/end event pairs from
+              [start_idx, next_idx)
+            where all apart from the final event are on the same line,
+            and all are either TRUE or FALSE edges, matching the initial.  */
+         unsigned next_idx = start_idx + 2;
+         while (path->cfg_edge_pair_at_p (next_idx)
+                && same_line_as_p (start_exp_loc, path, next_idx))
+           {
+             const checker_event *iter_ev
+               = path->get_checker_event (next_idx);
+             gcc_assert (iter_ev->m_kind == EK_START_CFG_EDGE);
+             const start_cfg_edge_event *iter_cfg_ev
+               = (const start_cfg_edge_event *)iter_ev;
+             const cfg_superedge& iter_cfg_sedge
+               = iter_cfg_ev->get_cfg_superedge ();
+             if (edge_sense)
+               {
+                 if (!iter_cfg_sedge.true_value_p ())
+                   break;
+               }
+             else
+               {
+                 if (!iter_cfg_sedge.false_value_p ())
+                   break;
+               }
+             next_idx += 2;
+           }
+
+         /* If we have more than one pair in the run, consolidate.  */
+         if (next_idx > start_idx + 2)
+           {
+             const checker_event *old_end_ev
+               = path->get_checker_event (next_idx - 1);
+             log ("consolidating CFG edge events %i-%i into %i-%i",
+                  start_idx, next_idx - 1, start_idx, start_idx +1);
+             start_consolidated_cfg_edges_event *new_start_ev
+               = new start_consolidated_cfg_edges_event
+               (old_start_ev->get_location (),
+                old_start_ev->get_fndecl (),
+                old_start_ev->get_stack_depth (),
+                edge_sense);
+             checker_event *new_end_ev
+               = new end_consolidated_cfg_edges_event
+               (old_end_ev->get_location (),
+                old_end_ev->get_fndecl (),
+                old_end_ev->get_stack_depth ());
+             path->replace_event (start_idx, new_start_ev);
+             path->replace_event (start_idx + 1, new_end_ev);
+             path->delete_events (start_idx + 2, next_idx - (start_idx + 2));
+           }
+       }
+    }
+}
+
 /* Final pass of diagnostic_manager::prune_path.
 
    If all we're left with is in one function, then filter function entry
index 17f6ed99fc477eff62420100288725e513d48781..9fb952b422848d1f527c7dbcb6eb0863a6984d85 100644 (file)
@@ -175,6 +175,7 @@ private:
                                state_machine::state_t state) const;
   void update_for_unsuitable_sm_exprs (tree *expr) const;
   void prune_interproc_events (checker_path *path) const;
+  void consolidate_conditions (checker_path *path) const;
   void finish_pruning (checker_path *path) const;
 
   engine *m_eng;
diff --git a/gcc/testsuite/gcc.dg/analyzer/combined-conditionals-1.c b/gcc/testsuite/gcc.dg/analyzer/combined-conditionals-1.c
new file mode 100644 (file)
index 0000000..caac267
--- /dev/null
@@ -0,0 +1,55 @@
+/* Verify that we correctly consolidate conditionals in paths.  */
+
+#include "analyzer-decls.h"
+
+extern int foo ();
+extern int bar ();
+extern int baz ();
+
+void test_1 (int a, int b, int c)
+{
+  if (a && b && c) /* { dg-message "\\(1\\) following 'true' branch\\.\\.\\." } */
+    __analyzer_dump_path (); /* { dg-message "\\(2\\) \\.\\.\\.to here" } */
+}
+
+void test_2 (int a, int b, int c)
+{
+  if (a && b) /* { dg-message "\\(1\\) following 'true' branch\\.\\.\\." } */
+    if (c) /* { dg-message "\\(2\\) \\.\\.\\.to here" } */
+      __analyzer_dump_path ();
+}
+
+void test_3 (int a, int b, int c)
+{
+  if (a) /* { dg-message "\\(1\\) following 'true' branch" } */
+    if (b && c) /* { dg-message "\\(2\\) \\.\\.\\.to here" } */
+      __analyzer_dump_path ();
+}
+
+void test_4 (void)
+{
+  while (foo () && bar ()) /* { dg-message "\\(1\\) following 'true' branch\\.\\.\\." } */
+    __analyzer_dump_path (); /* { dg-message "\\(2\\) \\.\\.\\.to here" } */
+}
+
+void test_5 (int a, int b, int c)
+{
+  if (a || b || c) /* { dg-message "\\(1\\) following 'false' branch\\.\\.\\." } */
+    {
+    }
+  else
+    __analyzer_dump_path (); /* { dg-message "\\(2\\) \\.\\.\\.to here" } */
+}
+
+void test_6 (void)
+{
+  int i;
+  for (i = 0; i < 10 && foo (); i++) /* { dg-message "\\(1\\) following 'true' branch\\.\\.\\." } */
+    __analyzer_dump_path (); /* { dg-message "\\(2\\) \\.\\.\\.to here" } */
+}
+
+int test_7 (void)
+{
+  if (foo () ? bar () ? baz () : 0 : 0) /* { dg-message "\\(1\\) following 'true' branch\\.\\.\\." } */
+    __analyzer_dump_path (); /* { dg-message "\\(2\\) \\.\\.\\.to here" } */    
+}