--- /dev/null
+/* Proof-of-concept of a -fanalyzer plugin.
+ Detect (some) uses of CPython API outside of the Global Interpreter Lock.
+ https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock
+*/
+/* { dg-options "-g" } */
+
+#include "gcc-plugin.h"
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic.h"
+#include "tree.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "diagnostic-event-id.h"
+#include "analyzer/analyzer.h"
+#include "analyzer/analyzer-logging.h"
+#include "json.h"
+#include "analyzer/sm.h"
+#include "analyzer/pending-diagnostic.h"
+
+int plugin_is_GPL_compatible;
+
+#if ENABLE_ANALYZER
+
+namespace ana {
+
+static bool
+type_based_on_pyobject_p (tree type)
+{
+ /* Ideally we'd also check for "subclasses" here by iterating up the
+ first field of each struct. */
+ if (TREE_CODE (type) != RECORD_TYPE)
+ return false;
+ tree name = TYPE_IDENTIFIER (type);
+ if (!name)
+ return false;
+ return id_equal (name, "PyObject");
+}
+
+/* An experimental state machine, for tracking whether the GIL is held,
+ as global state.. */
+
+class gil_state_machine : public state_machine
+{
+public:
+ gil_state_machine (logger *logger);
+
+ bool inherited_state_p () const FINAL OVERRIDE { return false; }
+
+ bool on_stmt (sm_context *sm_ctxt,
+ const supernode *node,
+ const gimple *stmt) const FINAL OVERRIDE;
+
+ void on_condition (sm_context *sm_ctxt,
+ const supernode *node,
+ const gimple *stmt,
+ tree lhs,
+ enum tree_code op,
+ tree rhs) const FINAL OVERRIDE;
+
+ bool can_purge_p (state_t s) const FINAL OVERRIDE;
+
+ void check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
+ const supernode *node,
+ const gimple *stmt,
+ tree op) const;
+
+ private:
+ void check_for_pyobject_in_call (sm_context *sm_ctxt,
+ const supernode *node,
+ const gcall *call,
+ tree callee_fndecl) const;
+
+ public:
+ /* These states are "global", rather than per-expression. */
+
+ /* State for when we've released the GIL. */
+ state_t m_released_gil;
+
+ /* Stop state. */
+ state_t m_stop;
+};
+
+/* Subclass for diagnostics involving the GIL. */
+
+class gil_diagnostic : public pending_diagnostic
+{
+public:
+ location_t fixup_location (location_t loc) const FINAL OVERRIDE
+ {
+ /* Ideally we'd check for specific macros here, and only
+ resolve certain macros. */
+ if (linemap_location_from_macro_expansion_p (line_table, loc))
+ loc = linemap_resolve_location (line_table, loc,
+ LRK_MACRO_EXPANSION_POINT, NULL);
+ return loc;
+ }
+
+ label_text describe_state_change (const evdesc::state_change &change)
+ FINAL OVERRIDE
+ {
+ if (change.is_global_p ()
+ && change.m_new_state == m_sm.m_released_gil)
+ return change.formatted_print ("releasing the GIL here");
+ if (change.is_global_p ()
+ && change.m_new_state == m_sm.get_start_state ())
+ return change.formatted_print ("acquiring the GIL here");
+ return label_text ();
+ }
+
+ protected:
+ gil_diagnostic (const gil_state_machine &sm) : m_sm (sm)
+ {
+ }
+
+ private:
+ const gil_state_machine &m_sm;
+};
+
+class double_save_thread : public gil_diagnostic
+{
+ public:
+ double_save_thread (const gil_state_machine &sm, const gcall *call)
+ : gil_diagnostic (sm), m_call (call)
+ {}
+
+ const char *get_kind () const FINAL OVERRIDE
+ {
+ return "double_save_thread";
+ }
+
+ bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
+ {
+ const double_save_thread &sub_other
+ = (const double_save_thread &)base_other;
+ return m_call == sub_other.m_call;
+ }
+
+ bool emit (rich_location *rich_loc) FINAL OVERRIDE
+ {
+ return warning_at (rich_loc, 0,
+ "nested usage of %qs", "Py_BEGIN_ALLOW_THREADS");
+ }
+
+ label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
+ {
+ return ev.formatted_print ("nested usage of %qs here",
+ "Py_BEGIN_ALLOW_THREADS");
+ }
+
+ private:
+ const gcall *m_call;
+};
+
+class fncall_without_gil : public gil_diagnostic
+{
+ public:
+ fncall_without_gil (const gil_state_machine &sm, const gcall *call,
+ tree callee_fndecl, unsigned arg_idx)
+ : gil_diagnostic (sm), m_call (call), m_callee_fndecl (callee_fndecl),
+ m_arg_idx (arg_idx)
+ {}
+
+ const char *get_kind () const FINAL OVERRIDE
+ {
+ return "fncall_without_gil";
+ }
+
+ bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
+ {
+ const fncall_without_gil &sub_other
+ = (const fncall_without_gil &)base_other;
+ return (m_call == sub_other.m_call
+ && m_callee_fndecl == sub_other.m_callee_fndecl
+ && m_arg_idx == sub_other.m_arg_idx);
+ }
+
+ bool emit (rich_location *rich_loc) FINAL OVERRIDE
+ {
+ auto_diagnostic_group d;
+ /* There isn't a warning ID for use to use. */
+ if (m_callee_fndecl)
+ return warning_at (rich_loc, 0,
+ "use of PyObject as argument %i of %qE"
+ " without the GIL",
+ m_arg_idx + 1, m_callee_fndecl);
+ else
+ return warning_at (rich_loc, 0,
+ "use of PyObject as argument %i of call"
+ " without the GIL",
+ m_arg_idx + 1, m_callee_fndecl);
+ }
+
+ label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
+ {
+ if (m_callee_fndecl)
+ return ev.formatted_print ("use of PyObject as argument %i of %qE here"
+ " without the GIL",
+ m_arg_idx + 1, m_callee_fndecl);
+ else
+ return ev.formatted_print ("use of PyObject as argument %i of call here"
+ " without the GIL",
+ m_arg_idx + 1, m_callee_fndecl);
+ }
+
+ private:
+ const gcall *m_call;
+ tree m_callee_fndecl;
+ unsigned m_arg_idx;
+};
+
+class pyobject_usage_without_gil : public gil_diagnostic
+{
+ public:
+ pyobject_usage_without_gil (const gil_state_machine &sm, tree expr)
+ : gil_diagnostic (sm), m_expr (expr)
+ {}
+
+ const char *get_kind () const FINAL OVERRIDE
+ {
+ return "pyobject_usage_without_gil";
+ }
+
+ bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
+ {
+ return same_tree_p (m_expr,
+ ((const pyobject_usage_without_gil&)base_other).m_expr);
+ }
+
+ bool emit (rich_location *rich_loc) FINAL OVERRIDE
+ {
+ auto_diagnostic_group d;
+ /* There isn't a warning ID for use to use. */
+ return warning_at (rich_loc, 0,
+ "use of PyObject %qE without the GIL", m_expr);
+ }
+
+ label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
+ {
+ return ev.formatted_print ("PyObject %qE used here without the GIL",
+ m_expr);
+ }
+
+ private:
+ tree m_expr;
+};
+
+/* gil_state_machine's ctor. */
+
+gil_state_machine::gil_state_machine (logger *logger)
+: state_machine ("gil", logger)
+{
+ m_released_gil = add_state ("released_gil");
+ m_stop = add_state ("stop");
+}
+
+struct cb_data
+{
+ cb_data (const gil_state_machine &sm, sm_context *sm_ctxt,
+ const supernode *snode, const gimple *stmt)
+ : m_sm (sm), m_sm_ctxt (sm_ctxt), m_snode (snode), m_stmt (stmt)
+ {
+ }
+
+ const gil_state_machine &m_sm;
+ sm_context *m_sm_ctxt;
+ const supernode *m_snode;
+ const gimple *m_stmt;
+};
+
+static bool
+check_for_pyobject (gimple *, tree op, tree, void *data)
+{
+ cb_data *d = (cb_data *)data;
+ d->m_sm.check_for_pyobject_usage_without_gil (d->m_sm_ctxt, d->m_snode,
+ d->m_stmt, op);
+ return true;
+}
+
+/* Assuming that the GIL has been released, complain about any
+ PyObject * arguments passed to CALL. */
+
+void
+gil_state_machine::check_for_pyobject_in_call (sm_context *sm_ctxt,
+ const supernode *node,
+ const gcall *call,
+ tree callee_fndecl) const
+{
+ for (unsigned i = 0; i < gimple_call_num_args (call); i++)
+ {
+ tree arg = gimple_call_arg (call, i);
+ if (TREE_CODE (TREE_TYPE (arg)) != POINTER_TYPE)
+ continue;
+ tree type = TREE_TYPE (TREE_TYPE (arg));
+ if (type_based_on_pyobject_p (type))
+ {
+ sm_ctxt->warn (node, call, NULL_TREE,
+ new fncall_without_gil (*this, call,
+ callee_fndecl,
+ i));
+ sm_ctxt->set_global_state (m_stop);
+ }
+ }
+}
+
+/* Implementation of state_machine::on_stmt vfunc for gil_state_machine. */
+
+bool
+gil_state_machine::on_stmt (sm_context *sm_ctxt,
+ const supernode *node,
+ const gimple *stmt) const
+{
+ const state_t global_state = sm_ctxt->get_global_state ();
+ if (const gcall *call = dyn_cast <const gcall *> (stmt))
+ {
+ if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
+ {
+ if (is_named_call_p (callee_fndecl, "PyEval_SaveThread", call, 0))
+ {
+ if (0)
+ inform (input_location, "found call to %qs",
+ "PyEval_SaveThread");
+ if (global_state == m_released_gil)
+ {
+ sm_ctxt->warn (node, stmt, NULL_TREE,
+ new double_save_thread (*this, call));
+ sm_ctxt->set_global_state (m_stop);
+ }
+ else
+ sm_ctxt->set_global_state (m_released_gil);
+ return true;
+ }
+ else if (is_named_call_p (callee_fndecl, "PyEval_RestoreThread",
+ call, 1))
+ {
+ if (0)
+ inform (input_location, "found call to %qs",
+ "PyEval_SaveThread");
+ if (global_state == m_released_gil)
+ sm_ctxt->set_global_state (m_start);
+ return true;
+ }
+ else if (global_state == m_released_gil)
+ {
+ /* Find PyObject * args of calls to fns with unknown bodies. */
+ if (!fndecl_has_gimple_body_p (callee_fndecl))
+ check_for_pyobject_in_call (sm_ctxt, node, call, callee_fndecl);
+ }
+ }
+ else if (global_state == m_released_gil)
+ check_for_pyobject_in_call (sm_ctxt, node, call, NULL);
+ }
+ else
+ if (global_state == m_released_gil)
+ {
+ /* Walk the stmt, finding uses of PyObject (or "subclasses"). */
+ cb_data d (*this, sm_ctxt, node, stmt);
+ walk_stmt_load_store_addr_ops (const_cast <gimple *> (stmt), &d,
+ check_for_pyobject,
+ check_for_pyobject,
+ check_for_pyobject);
+ }
+ return false;
+}
+
+/* Implementation of state_machine::on_condition vfunc for
+ gil_state_machine. */
+
+void
+gil_state_machine::on_condition (sm_context *sm_ctxt ATTRIBUTE_UNUSED,
+ const supernode *node ATTRIBUTE_UNUSED,
+ const gimple *stmt ATTRIBUTE_UNUSED,
+ tree lhs ATTRIBUTE_UNUSED,
+ enum tree_code op ATTRIBUTE_UNUSED,
+ tree rhs ATTRIBUTE_UNUSED) const
+{
+ // Empty
+}
+
+bool
+gil_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
+{
+ return true;
+}
+
+void
+gil_state_machine::check_for_pyobject_usage_without_gil (sm_context *sm_ctxt,
+ const supernode *node,
+ const gimple *stmt,
+ tree op) const
+{
+ tree type = TREE_TYPE (op);
+ if (type_based_on_pyobject_p (type))
+ {
+ sm_ctxt->warn (node, stmt, NULL_TREE,
+ new pyobject_usage_without_gil (*this, op));
+ sm_ctxt->set_global_state (m_stop);
+ }
+}
+
+/* Callback handler for the PLUGIN_ANALYZER_INIT event. */
+
+static void
+gil_analyzer_init_cb (void *gcc_data, void */*user_data*/)
+{
+ ana::plugin_analyzer_init_iface *iface
+ = (ana::plugin_analyzer_init_iface *)gcc_data;
+ LOG_SCOPE (iface->get_logger ());
+ if (0)
+ inform (input_location, "got here: gil_analyzer_init_cb");
+ iface->register_state_machine (new gil_state_machine (iface->get_logger ()));
+}
+
+} // namespace ana
+
+#endif /* #if ENABLE_ANALYZER */
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+ struct plugin_gcc_version *version)
+{
+#if ENABLE_ANALYZER
+ const char *plugin_name = plugin_info->base_name;
+ if (0)
+ inform (input_location, "got here; %qs", plugin_name);
+ register_callback (plugin_info->base_name,
+ PLUGIN_ANALYZER_INIT,
+ ana::gil_analyzer_init_cb,
+ NULL); /* void *user_data */
+#else
+ sorry_no_analyzer ();
+#endif
+ return 0;
+}