Add analyzer plugin support and CPython GIL example
authorDavid Malcolm <dmalcolm@redhat.com>
Mon, 30 Nov 2020 19:32:28 +0000 (14:32 -0500)
committerDavid Malcolm <dmalcolm@redhat.com>
Mon, 30 Nov 2020 19:32:28 +0000 (14:32 -0500)
This patch adds a new GCC plugin event: PLUGIN_ANALYZER_INIT, called
when -fanalyzer is starting, allowing for GCC plugins to register
additional state-machine-based checks within -fanalyzer.  The idea
is that 3rd-party code might want to add domain-specific checks for
its own APIs - with the caveat that the analyzer is itself still
rather experimental.

As an example, the patch adds a proof-of-concept plugin to the testsuite
for checking CPython code: verifying that code that relinquishes
CPython's global interpreter lock doesn't attempt to do anything with
PyObjects in the sections where the lock isn't held.  It also adds a
warning about nested releases of the lock, which is forbidden.
For example:

demo.c: In function 'foo':
demo.c:11:3: warning: use of PyObject '*(obj)' without the GIL
   11 |   Py_INCREF (obj);
      |   ^~~~~~~~~
  'test': events 1-3
    |
    |   15 | void test (PyObject *obj)
    |      |      ^~~~
    |      |      |
    |      |      (1) entry to 'test'
    |   16 | {
    |   17 |   Py_BEGIN_ALLOW_THREADS
    |      |   ~~~~~~~~~~~~~~~~~~~~~~
    |      |   |
    |      |   (2) releasing the GIL here
    |   18 |   foo (obj);
    |      |   ~~~~~~~~~
    |      |   |
    |      |   (3) calling 'foo' from 'test'
    |
    +--> 'foo': events 4-5
           |
           |    9 | foo (PyObject *obj)
           |      | ^~~
           |      | |
           |      | (4) entry to 'foo'
           |   10 | {
           |   11 |   Py_INCREF (obj);
           |      |   ~~~~~~~~~
           |      |   |
           |      |   (5) PyObject '*(obj)' used here without the GIL
           |

Doing so requires adding some logic for ignoring macro expansions in
analyzer diagnostics, since the insides of Py_INCREF and
Py_BEGIN_ALLOW_THREADS are not of interest to the user for these cases.

gcc/analyzer/ChangeLog:
* analyzer-pass.cc (pass_analyzer::execute): Move sorry call to...
(sorry_no_analyzer): New.
* analyzer.h (class state_machine): New forward decl.
(class logger): New forward decl.
(class plugin_analyzer_init_iface): New.
(sorry_no_analyzer): New decl.
* checker-path.cc (checker_path::fixup_locations): New.
* checker-path.h (checker_event::set_location): New.
(checker_path::fixup_locations): New decl.
* diagnostic-manager.cc
(diagnostic_manager::emit_saved_diagnostic): Call
checker_path::fixup_locations, and call fixup_location
on the primary location.
* engine.cc: Include "plugin.h".
(class plugin_analyzer_init_impl): New.
(impl_run_checkers): Invoke PLUGIN_ANALYZER_INIT callbacks.
* pending-diagnostic.h (pending_diagnostic::fixup_location): New
vfunc.

gcc/ChangeLog:
* doc/plugins.texi (Plugin callbacks): Add PLUGIN_ANALYZER_INIT.
* plugin.c (register_callback): Likewise.
(invoke_plugin_callbacks_full): Likewise.
* plugin.def (PLUGIN_ANALYZER_INIT): New event.

gcc/testsuite/ChangeLog:
* gcc.dg/plugin/analyzer_gil_plugin.c: New test.
* gcc.dg/plugin/gil-1.c: New test.
* gcc.dg/plugin/gil.h: New header.
* gcc.dg/plugin/plugin.exp (plugin_test_list): Add the new plugin
and test.

14 files changed:
gcc/analyzer/analyzer-pass.cc
gcc/analyzer/analyzer.h
gcc/analyzer/checker-path.cc
gcc/analyzer/checker-path.h
gcc/analyzer/diagnostic-manager.cc
gcc/analyzer/engine.cc
gcc/analyzer/pending-diagnostic.h
gcc/doc/plugins.texi
gcc/plugin.c
gcc/plugin.def
gcc/testsuite/gcc.dg/plugin/analyzer_gil_plugin.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/plugin/gil-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/plugin/gil.h [new file with mode: 0644]
gcc/testsuite/gcc.dg/plugin/plugin.exp

index a27421e46d427a9ed19e4250ad338d2b3fd1c7d6..1f65bf8b154753635e17e97eceb409de0ede42d5 100644 (file)
@@ -83,9 +83,7 @@ pass_analyzer::execute (function *)
 #if ENABLE_ANALYZER
   ana::run_checkers ();
 #else
-  sorry ("%qs was not enabled in this build of GCC"
-        " (missing configure-time option %qs)",
-        "-fanalyzer", "--enable-analyzer");
+  sorry_no_analyzer ();
 #endif
 
   return 0;
@@ -100,3 +98,17 @@ make_pass_analyzer (gcc::context *ctxt)
 {
   return new pass_analyzer (ctxt);
 }
+
+#if !ENABLE_ANALYZER
+
+/* Issue a "sorry" diagnostic that the analyzer was not enabled.  */
+
+void
+sorry_no_analyzer ()
+{
+  sorry ("%qs was not enabled in this build of GCC"
+        " (missing configure-time option %qs)",
+        "-fanalyzer", "--enable-analyzer");
+}
+
+#endif /* #if !ENABLE_ANALYZER */
index c84d7fdcaab92f001e421ab96c99de84a15b313b..cd46bfc0d0d6707d1e32dce14bd39efb907487af 100644 (file)
@@ -189,6 +189,15 @@ private:
 
 extern location_t get_stmt_location (const gimple *stmt, function *fun);
 
+/* Passed by pointer to PLUGIN_ANALYZER_INIT callbacks.  */
+
+class plugin_analyzer_init_iface
+{
+public:
+  virtual void register_state_machine (state_machine *) = 0;
+  virtual logger *get_logger () const = 0;
+};
+
 } // namespace ana
 
 extern bool is_special_named_call_p (const gcall *call, const char *funcname,
@@ -309,4 +318,8 @@ private:
 #pragma GCC diagnostic ignored "-Wformat-diag"
 #endif
 
+#if !ENABLE_ANALYZER
+extern void sorry_no_analyzer ();
+#endif /* #if !ENABLE_ANALYZER */
+
 #endif /* GCC_ANALYZER_ANALYZER_H */
index 1f6d6a86001ab2cebbcb6584df6520914e179f88..062730cc789f0dfbfb75cdca76180cebd14c823b 100644 (file)
@@ -982,6 +982,15 @@ checker_path::add_final_event (const state_machine *sm,
   add_event (end_of_path);
 }
 
+void
+checker_path::fixup_locations (pending_diagnostic *pd)
+{
+  checker_event *e;
+  int i;
+  FOR_EACH_VEC_ELT (m_events, i, e)
+    e->set_location (pd->fixup_location (e->get_location ()));
+}
+
 } // namespace ana
 
 #endif /* #if ENABLE_ANALYZER */
index a00b3cf09eaf0e06e44e815d35a7fc7436eafd31..79553baec97261c3070731f5587e39be06f1bdfa 100644 (file)
@@ -105,6 +105,8 @@ public:
 
   void dump (pretty_printer *pp) const;
 
+  void set_location (location_t loc) { m_loc = loc; }
+
  public:
   const enum event_kind m_kind;
  protected:
@@ -504,6 +506,8 @@ public:
       e->prepare_for_emission (this, pd, diagnostic_event_id_t (i));
   }
 
+  void fixup_locations (pending_diagnostic *pd);
+
   void record_setjmp_event (const exploded_node *enode,
                            diagnostic_event_id_t setjmp_emission_id)
   {
index 13f6dd2f341e54bb46663e84e8a7307a7d0563b8..9739ca6ee449b05f183d1dd08b4d4103056e0501 100644 (file)
@@ -665,7 +665,14 @@ diagnostic_manager::emit_saved_diagnostic (const exploded_graph &eg,
 
   emission_path.prepare_for_emission (sd.m_d);
 
-  gcc_rich_location rich_loc (get_stmt_location (stmt, sd.m_snode->m_fun));
+  location_t loc = get_stmt_location (stmt, sd.m_snode->m_fun);
+
+  /* Allow the pending_diagnostic to fix up the primary location
+     and any locations for events.  */
+  loc = sd.m_d->fixup_location (loc);
+  emission_path.fixup_locations (sd.m_d);
+
+  gcc_rich_location rich_loc (loc);
   rich_loc.set_path (&emission_path);
 
   auto_diagnostic_group d;
index 0a8e5b87e01a3d97987563b28a51f6ce07c92bb9..d022b3a8edb9a642159ed67d9f457f2948290f5d 100644 (file)
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "analyzer/state-purge.h"
 #include "analyzer/bar-chart.h"
 #include <zlib.h>
+#include "plugin.h"
 
 /* For an overview, see gcc/doc/analyzer.texi.  */
 
@@ -4638,6 +4639,33 @@ dump_analyzer_json (const supergraph &sg,
   free (filename);
 }
 
+/* Concrete subclass of plugin_analyzer_init_iface, allowing plugins
+   to register new state machines.  */
+
+class plugin_analyzer_init_impl : public plugin_analyzer_init_iface
+{
+public:
+  plugin_analyzer_init_impl (auto_delete_vec <state_machine> *checkers,
+                            logger *logger)
+  : m_checkers (checkers),
+    m_logger (logger)
+  {}
+
+  void register_state_machine (state_machine *sm) FINAL OVERRIDE
+  {
+    m_checkers->safe_push (sm);
+  }
+
+  logger *get_logger () const FINAL OVERRIDE
+  {
+    return m_logger;
+  }
+
+private:
+  auto_delete_vec <state_machine> *m_checkers;
+  logger *m_logger;
+};
+
 /* Run the analysis "engine".  */
 
 void
@@ -4683,6 +4711,9 @@ impl_run_checkers (logger *logger)
   auto_delete_vec <state_machine> checkers;
   make_checkers (checkers, logger);
 
+  plugin_analyzer_init_impl data (&checkers, logger);
+  invoke_plugin_callbacks (PLUGIN_ANALYZER_INIT, &data);
+
   if (logger)
     {
       int i;
index ce626f678711fe269802ab9dd26d96694756f2c2..0cec27a66e52ed64b3565677ec9552ec1844efa1 100644 (file)
@@ -175,6 +175,14 @@ class pending_diagnostic
      diagnostic deduplication.  */
   static bool same_tree_p (tree t1, tree t2);
 
+  /* A vfunc for fixing up locations (both the primary location for the
+     diagnostic, and for events in their paths), e.g. to avoid unwinding
+     inside specific macros.  */
+  virtual location_t fixup_location (location_t loc) const
+  {
+    return loc;
+  }
+
   /* For greatest precision-of-wording, the various following "describe_*"
      virtual functions give the pending diagnostic a way to describe events
      in a diagnostic_path in terms that make sense for that diagnostic.
index 23a8b410ebb0be8107f3a7eb18d6047754192f91..1c211bbacf1c121563a222e7ffb64459484b279e 100644 (file)
@@ -218,6 +218,10 @@ enum plugin_event
    as a const char* pointer.  */
   PLUGIN_INCLUDE_FILE,
 
+  /* Called when -fanalyzer starts. The event data is an
+     ana::plugin_analyzer_init_iface *.  */
+  PLUGIN_ANALYZER_INIT,
+
   PLUGIN_EVENT_FIRST_DYNAMIC    /* Dummy event used for indexing callback
                                    array.  */
 @};
index 69b6f5b8b6a567e45e1eeb8c820ffdc823ca21a8..76069e647982fcf3a5010f0a66b2df77d604e6be 100644 (file)
@@ -497,6 +497,7 @@ register_callback (const char *plugin_name,
       case PLUGIN_EARLY_GIMPLE_PASSES_END:
       case PLUGIN_NEW_PASS:
       case PLUGIN_INCLUDE_FILE:
+      case PLUGIN_ANALYZER_INIT:
         {
           struct callback_info *new_callback;
           if (!callback)
@@ -577,6 +578,7 @@ invoke_plugin_callbacks_full (int event, void *gcc_data)
       case PLUGIN_EARLY_GIMPLE_PASSES_END:
       case PLUGIN_NEW_PASS:
       case PLUGIN_INCLUDE_FILE:
+      case PLUGIN_ANALYZER_INIT:
         {
           /* Iterate over every callback registered with this event and
              call it.  */
index 3482ed71906c033f35a2dd992419d2efce763487..b8110190d3aa143a5dacd91e0fb470a7ee07418c 100644 (file)
@@ -99,6 +99,10 @@ DEFEVENT (PLUGIN_NEW_PASS)
    as a const char* pointer.  */
 DEFEVENT (PLUGIN_INCLUDE_FILE)
 
+/* Called when -fanalyzer starts. The event data is an
+   ana::plugin_analyzer_init_iface *.  */
+DEFEVENT (PLUGIN_ANALYZER_INIT)
+
 /* When adding a new hard-coded plugin event, don't forget to edit in
    file plugin.c the functions register_callback and
    invoke_plugin_callbacks_full accordingly!  */
diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_gil_plugin.c b/gcc/testsuite/gcc.dg/plugin/analyzer_gil_plugin.c
new file mode 100644 (file)
index 0000000..05133d5
--- /dev/null
@@ -0,0 +1,436 @@
+/* Proof-of-concept of a -fanalyzer plugin.
+   Detect (some) uses of CPython API outside of the Global Interpreter Lock.
+   https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock
+*/
+/* { dg-options "-g" } */
+
+#include "gcc-plugin.h"
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic.h"
+#include "tree.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "diagnostic-event-id.h"
+#include "analyzer/analyzer.h"
+#include "analyzer/analyzer-logging.h"
+#include "json.h"
+#include "analyzer/sm.h"
+#include "analyzer/pending-diagnostic.h"
+
+int plugin_is_GPL_compatible;
+
+#if ENABLE_ANALYZER
+
+namespace ana {
+
+static bool
+type_based_on_pyobject_p (tree type)
+{
+  /* Ideally we'd also check for "subclasses" here by iterating up the
+     first field of each struct.  */
+  if (TREE_CODE (type) != RECORD_TYPE)
+    return false;
+  tree name = TYPE_IDENTIFIER (type);
+  if (!name)
+    return false;
+  return id_equal (name, "PyObject");
+}
+
+/* An experimental state machine, for tracking whether the GIL is held,
+   as global state..  */
+
+class gil_state_machine : public state_machine
+{
+public:
+  gil_state_machine (logger *logger);
+
+  bool inherited_state_p () const FINAL OVERRIDE { return false; }
+
+  bool on_stmt (sm_context *sm_ctxt,
+               const supernode *node,
+               const gimple *stmt) const FINAL OVERRIDE;
+
+  void on_condition (sm_context *sm_ctxt,
+                    const supernode *node,
+                    const gimple *stmt,
+                    tree lhs,
+                    enum tree_code op,
+                    tree rhs) const FINAL OVERRIDE;
+
+  bool can_purge_p (state_t s) const FINAL OVERRIDE;
+
+  void check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
+                                            const supernode *node,
+                                            const gimple *stmt,
+                                            tree op) const;
+
+ private:
+  void check_for_pyobject_in_call (sm_context *sm_ctxt,
+                                  const supernode *node,
+                                  const gcall *call,
+                                  tree callee_fndecl) const;
+
+ public:
+  /* These states are "global", rather than per-expression.  */
+
+  /* State for when we've released the GIL.  */
+  state_t m_released_gil;
+
+  /* Stop state.  */
+  state_t m_stop;
+};
+
+/* Subclass for diagnostics involving the GIL.  */
+
+class gil_diagnostic : public pending_diagnostic
+{
+public:
+  location_t fixup_location (location_t loc) const FINAL OVERRIDE
+  {
+    /* Ideally we'd check for specific macros here, and only
+       resolve certain macros.  */
+    if (linemap_location_from_macro_expansion_p (line_table, loc))
+      loc = linemap_resolve_location (line_table, loc,
+                                     LRK_MACRO_EXPANSION_POINT, NULL);
+    return loc;
+  }
+
+  label_text describe_state_change (const evdesc::state_change &change)
+    FINAL OVERRIDE
+  {
+    if (change.is_global_p ()
+       && change.m_new_state == m_sm.m_released_gil)
+      return change.formatted_print ("releasing the GIL here");
+    if (change.is_global_p ()
+       && change.m_new_state == m_sm.get_start_state ())
+      return change.formatted_print ("acquiring the GIL here");
+    return label_text ();
+  }
+
+ protected:
+  gil_diagnostic (const gil_state_machine &sm) : m_sm (sm)
+  {
+  }
+
+ private:
+  const gil_state_machine &m_sm;
+};
+
+class double_save_thread : public gil_diagnostic
+{
+ public:
+  double_save_thread (const gil_state_machine &sm, const gcall *call)
+  : gil_diagnostic (sm), m_call (call)
+  {}
+
+  const char *get_kind () const FINAL OVERRIDE
+  {
+    return "double_save_thread";
+  }
+
+  bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
+  {
+    const double_save_thread &sub_other
+      = (const double_save_thread &)base_other;
+    return m_call == sub_other.m_call;
+  }
+
+  bool emit (rich_location *rich_loc) FINAL OVERRIDE
+  {
+    return warning_at (rich_loc, 0,
+                      "nested usage of %qs", "Py_BEGIN_ALLOW_THREADS");
+  }
+
+  label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
+  {
+    return ev.formatted_print ("nested usage of %qs here",
+                              "Py_BEGIN_ALLOW_THREADS");
+  }
+
+ private:
+  const gcall *m_call;
+};
+
+class fncall_without_gil : public gil_diagnostic
+{
+ public:
+  fncall_without_gil (const gil_state_machine &sm, const gcall *call,
+                     tree callee_fndecl, unsigned arg_idx)
+  : gil_diagnostic (sm), m_call (call), m_callee_fndecl (callee_fndecl),
+    m_arg_idx (arg_idx)
+  {}
+
+  const char *get_kind () const FINAL OVERRIDE
+  {
+    return "fncall_without_gil";
+  }
+
+  bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
+  {
+    const fncall_without_gil &sub_other
+      = (const fncall_without_gil &)base_other;
+    return (m_call == sub_other.m_call
+           && m_callee_fndecl == sub_other.m_callee_fndecl
+           && m_arg_idx == sub_other.m_arg_idx);
+  }
+
+  bool emit (rich_location *rich_loc) FINAL OVERRIDE
+  {
+    auto_diagnostic_group d;
+    /* There isn't a warning ID for use to use.  */
+    if (m_callee_fndecl)
+      return warning_at (rich_loc, 0,
+                        "use of PyObject as argument %i of %qE"
+                        " without the GIL",
+                        m_arg_idx + 1, m_callee_fndecl);
+    else
+      return warning_at (rich_loc, 0,
+                        "use of PyObject as argument %i of call"
+                        " without the GIL",
+                        m_arg_idx + 1, m_callee_fndecl);
+  }
+
+  label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
+  {
+    if (m_callee_fndecl)
+      return ev.formatted_print ("use of PyObject as argument %i of %qE here"
+                                " without the GIL",
+                                m_arg_idx + 1, m_callee_fndecl);
+    else
+      return ev.formatted_print ("use of PyObject as argument %i of call here"
+                                " without the GIL",
+                                m_arg_idx + 1, m_callee_fndecl);
+  }
+
+ private:
+  const gcall *m_call;
+  tree m_callee_fndecl;
+  unsigned m_arg_idx;
+};
+
+class pyobject_usage_without_gil : public gil_diagnostic
+{
+ public:
+  pyobject_usage_without_gil (const gil_state_machine &sm, tree expr)
+  : gil_diagnostic (sm), m_expr (expr)
+  {}
+
+  const char *get_kind () const FINAL OVERRIDE
+  {
+    return "pyobject_usage_without_gil";
+  }
+
+  bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
+  {
+    return same_tree_p (m_expr,
+                       ((const pyobject_usage_without_gil&)base_other).m_expr);
+  }
+
+  bool emit (rich_location *rich_loc) FINAL OVERRIDE
+  {
+    auto_diagnostic_group d;
+    /* There isn't a warning ID for use to use.  */
+    return warning_at (rich_loc, 0,
+                      "use of PyObject %qE without the GIL", m_expr);
+  }
+
+  label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
+  {
+    return ev.formatted_print ("PyObject %qE used here without the GIL",
+                              m_expr);
+  }
+
+ private:
+  tree m_expr;
+};
+
+/* gil_state_machine's ctor.  */
+
+gil_state_machine::gil_state_machine (logger *logger)
+: state_machine ("gil", logger)
+{
+  m_released_gil = add_state ("released_gil");
+  m_stop = add_state ("stop");
+}
+
+struct cb_data
+{
+  cb_data (const gil_state_machine &sm, sm_context *sm_ctxt,
+          const supernode *snode, const gimple *stmt)
+  : m_sm (sm), m_sm_ctxt (sm_ctxt), m_snode (snode), m_stmt (stmt)
+  {
+  }
+
+  const gil_state_machine &m_sm;
+  sm_context *m_sm_ctxt;
+  const supernode *m_snode;
+  const gimple *m_stmt;
+};
+
+static bool
+check_for_pyobject (gimple *, tree op, tree, void *data)
+{
+  cb_data *d = (cb_data *)data;
+  d->m_sm.check_for_pyobject_usage_without_gil (d->m_sm_ctxt, d->m_snode,
+                                               d->m_stmt, op);
+  return true;
+}
+
+/* Assuming that the GIL has been released, complain about any
+   PyObject * arguments passed to CALL.  */
+
+void
+gil_state_machine::check_for_pyobject_in_call (sm_context *sm_ctxt,
+                                              const supernode *node,
+                                              const gcall *call,
+                                              tree callee_fndecl) const
+{
+  for (unsigned i = 0; i < gimple_call_num_args (call); i++)
+    {
+      tree arg = gimple_call_arg (call, i);
+      if (TREE_CODE (TREE_TYPE (arg)) != POINTER_TYPE)
+       continue;
+      tree type = TREE_TYPE (TREE_TYPE (arg));
+      if (type_based_on_pyobject_p (type))
+       {
+         sm_ctxt->warn (node, call, NULL_TREE,
+                        new fncall_without_gil (*this, call,
+                                                callee_fndecl,
+                                                i));
+         sm_ctxt->set_global_state (m_stop);
+       }
+    }
+}
+
+/* Implementation of state_machine::on_stmt vfunc for gil_state_machine.  */
+
+bool
+gil_state_machine::on_stmt (sm_context *sm_ctxt,
+                           const supernode *node,
+                           const gimple *stmt) const
+{
+  const state_t global_state = sm_ctxt->get_global_state ();
+  if (const gcall *call = dyn_cast <const gcall *> (stmt))
+    {
+      if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
+       {
+         if (is_named_call_p (callee_fndecl, "PyEval_SaveThread", call, 0))
+           {
+             if (0)
+               inform (input_location, "found call to %qs",
+                       "PyEval_SaveThread");
+             if (global_state == m_released_gil)
+               {
+                 sm_ctxt->warn (node, stmt, NULL_TREE,
+                                new double_save_thread (*this, call));
+                 sm_ctxt->set_global_state (m_stop);
+               }
+             else
+               sm_ctxt->set_global_state (m_released_gil);
+             return true;
+           }
+         else if (is_named_call_p (callee_fndecl, "PyEval_RestoreThread",
+                                   call, 1))
+           {
+             if (0)
+               inform (input_location, "found call to %qs",
+                       "PyEval_SaveThread");
+             if (global_state == m_released_gil)
+               sm_ctxt->set_global_state (m_start);
+             return true;
+           }
+         else if (global_state == m_released_gil)
+           {
+             /* Find PyObject * args of calls to fns with unknown bodies.  */
+             if (!fndecl_has_gimple_body_p (callee_fndecl))
+               check_for_pyobject_in_call (sm_ctxt, node, call, callee_fndecl);
+           }
+       }
+      else if (global_state == m_released_gil)
+       check_for_pyobject_in_call (sm_ctxt, node, call, NULL);
+    }
+  else
+    if (global_state == m_released_gil)
+      {
+       /* Walk the stmt, finding uses of PyObject (or "subclasses").  */
+       cb_data d (*this, sm_ctxt, node, stmt);
+       walk_stmt_load_store_addr_ops (const_cast <gimple *> (stmt), &d,
+                                      check_for_pyobject,
+                                      check_for_pyobject,
+                                      check_for_pyobject);
+    }
+  return false;
+}
+
+/* Implementation of state_machine::on_condition vfunc for
+   gil_state_machine.  */
+
+void
+gil_state_machine::on_condition (sm_context *sm_ctxt ATTRIBUTE_UNUSED,
+                                const supernode *node ATTRIBUTE_UNUSED,
+                                const gimple *stmt ATTRIBUTE_UNUSED,
+                                tree lhs ATTRIBUTE_UNUSED,
+                                enum tree_code op ATTRIBUTE_UNUSED,
+                                tree rhs ATTRIBUTE_UNUSED) const
+{
+  // Empty
+}
+
+bool
+gil_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
+{
+  return true;
+}
+
+void
+gil_state_machine::check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
+                                                        const supernode *node,
+                                                        const gimple *stmt,
+                                                        tree op) const
+{
+  tree type = TREE_TYPE (op);
+  if (type_based_on_pyobject_p (type))
+    {
+      sm_ctxt->warn (node, stmt, NULL_TREE,
+                    new pyobject_usage_without_gil (*this, op));
+      sm_ctxt->set_global_state (m_stop);
+    }
+}
+
+/* Callback handler for the PLUGIN_ANALYZER_INIT event.  */
+
+static void
+gil_analyzer_init_cb (void *gcc_data, void */*user_data*/)
+{
+  ana::plugin_analyzer_init_iface *iface
+    = (ana::plugin_analyzer_init_iface *)gcc_data;
+  LOG_SCOPE (iface->get_logger ());
+  if (0)
+    inform (input_location, "got here: gil_analyzer_init_cb");
+  iface->register_state_machine (new gil_state_machine (iface->get_logger ()));
+}
+
+} // namespace ana
+
+#endif /* #if ENABLE_ANALYZER */
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+            struct plugin_gcc_version *version)
+{
+#if ENABLE_ANALYZER
+  const char *plugin_name = plugin_info->base_name;
+  if (0)
+    inform (input_location, "got here; %qs", plugin_name);
+  register_callback (plugin_info->base_name,
+                    PLUGIN_ANALYZER_INIT,
+                    ana::gil_analyzer_init_cb,
+                    NULL); /* void *user_data */
+#else
+  sorry_no_analyzer ();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/gil-1.c b/gcc/testsuite/gcc.dg/plugin/gil-1.c
new file mode 100644 (file)
index 0000000..4e8f535
--- /dev/null
@@ -0,0 +1,90 @@
+/* { dg-do compile } */
+/* { dg-options "-fanalyzer" } */
+
+#include "gil.h"
+
+void test_1 (void)
+{
+  Py_BEGIN_ALLOW_THREADS
+  Py_END_ALLOW_THREADS
+}
+
+void test_2 (PyObject *obj)
+{
+  Py_BEGIN_ALLOW_THREADS /* { dg-message "releasing the GIL here" } */
+
+  Py_INCREF (obj); /* { dg-warning "use of PyObject '\\*\\(obj\\)' without the GIL" } */
+  Py_DECREF (obj);
+
+  Py_END_ALLOW_THREADS
+}
+
+void test_3 (PyObject *obj)
+{
+  Py_BEGIN_ALLOW_THREADS /* { dg-message "releasing the GIL here" } */
+
+  Py_BEGIN_ALLOW_THREADS /* { dg-warning "nested usage of 'Py_BEGIN_ALLOW_THREADS'" } */
+  Py_END_ALLOW_THREADS
+
+  Py_END_ALLOW_THREADS
+}
+
+void test_4 (PyObject *obj)
+{
+  /* These aren't nested, so should be OK.  */
+  Py_BEGIN_ALLOW_THREADS
+  Py_END_ALLOW_THREADS
+
+  Py_BEGIN_ALLOW_THREADS
+  Py_END_ALLOW_THREADS
+}
+
+/* Interprocedural example of erroneously nested usage.  */
+
+static void  __attribute__((noinline))
+called_by_test_5 (void)
+{
+  Py_BEGIN_ALLOW_THREADS /* { dg-warning "nested usage of 'Py_BEGIN_ALLOW_THREADS'" } */
+  Py_END_ALLOW_THREADS
+}
+
+void test_5 (PyObject *obj)
+{
+  Py_BEGIN_ALLOW_THREADS /* { dg-message "releasing the GIL here" } */
+  called_by_test_5 ();
+  Py_END_ALLOW_THREADS
+}
+
+/* Interprocedural example of bogusly using a PyObject outside of GIL.  */
+
+static void  __attribute__((noinline))
+called_by_test_6 (PyObject *obj)
+{
+  Py_INCREF (obj); /* { dg-warning "use of PyObject '\\*\\(obj\\)' without the GIL" } */
+  Py_DECREF (obj);
+}
+
+void test_6 (PyObject *obj)
+{
+  Py_BEGIN_ALLOW_THREADS /* { dg-message "releasing the GIL here" } */
+  called_by_test_6 (obj);
+  Py_END_ALLOW_THREADS
+}
+
+extern void called_by_test_7 (PyObject *obj);
+
+void test_7 (PyObject *obj)
+{
+  Py_BEGIN_ALLOW_THREADS /* { dg-message "releasing the GIL here" } */
+  called_by_test_7 (obj); /* { dg-warning "use of PyObject as argument 1 of 'called_by_test_7' without the GIL" } */
+  Py_END_ALLOW_THREADS
+}
+
+typedef void (*callback_t) (PyObject *);
+
+void test_8 (PyObject *obj, callback_t cb)
+{
+  Py_BEGIN_ALLOW_THREADS /* { dg-message "releasing the GIL here" } */
+  cb (obj); /* { dg-warning "use of PyObject as argument 1 of call without the GIL" } */
+  Py_END_ALLOW_THREADS
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/gil.h b/gcc/testsuite/gcc.dg/plugin/gil.h
new file mode 100644 (file)
index 0000000..b0610cd
--- /dev/null
@@ -0,0 +1,32 @@
+/* Adapted from CPython 3.8's ceval.h.  */
+typedef struct PyThreadState PyThreadState;
+extern PyThreadState * PyEval_SaveThread(void);
+extern void PyEval_RestoreThread(PyThreadState *);
+
+#define Py_BEGIN_ALLOW_THREADS { \
+                        PyThreadState *_save; \
+                        _save = PyEval_SaveThread();
+#define Py_BLOCK_THREADS        PyEval_RestoreThread(_save);
+#define Py_UNBLOCK_THREADS      _save = PyEval_SaveThread();
+#define Py_END_ALLOW_THREADS    PyEval_RestoreThread(_save); \
+                 }
+
+/* Adapted/hacked up from CPython 3.8's object.h.  */
+
+typedef struct _object {
+    int ob_refcnt;
+} PyObject;
+
+#define _PyObject_CAST(op) ((PyObject*)(op))
+
+extern void _Py_Dealloc(PyObject *);
+
+#define _Py_INCREF(OP) do { (OP)->ob_refcnt++; } while (0);
+#define _Py_DECREF(OP) do {      \
+    if (--(OP)->ob_refcnt == 0) { \
+      _Py_Dealloc(OP);           \
+    }                            \
+  } while (0)
+
+#define Py_INCREF(op) _Py_INCREF(_PyObject_CAST(op))
+#define Py_DECREF(op) _Py_DECREF(_PyObject_CAST(op))
index 5dd102ae05c535c8b2e2396d50a579e25e961a02..7f0ffd68f8b7298c279b2ad01a22b713ada22031 100644 (file)
@@ -118,6 +118,8 @@ set plugin_test_list [list \
     { dump_plugin.c \
          dump-1.c \
          dump-2.c } \
+    { analyzer_gil_plugin.c \
+         gil-1.c } \
 ]
 
 foreach plugin_test $plugin_test_list {