From c460fd4ba1cdacf04305475e605071889ed0e92f Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Tue, 18 Aug 2020 11:17:26 -0500
Subject: [PATCH] Add the relevance manager module (#4894)

This PR adds the relevance manager module, which will be used in forthcoming PRs to query assignments for whether a literal is "relevant" (e.g. critical for satisfying the input formula). Leveraging this technique has led to noticeable improvements for non-linear arithmetic (https://github.com/ajreynol/CVC4/tree/relManager).

This PR does not enable it, but adds the module only.
---
 src/CMakeLists.txt               |   2 +
 src/theory/relevance_manager.cpp | 315 +++++++++++++++++++++++++++++++
 src/theory/relevance_manager.h   | 154 +++++++++++++++
 3 files changed, 471 insertions(+)
 create mode 100644 src/theory/relevance_manager.cpp
 create mode 100644 src/theory/relevance_manager.h
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1d54573e9..62a9b35d0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -695,6 +695,8 @@ libcvc4_add_sources(
   theory/quantifiers/theory_quantifiers_type_rules.h
   theory/quantifiers_engine.cpp
   theory/quantifiers_engine.h
+  theory/relevance_manager.cpp
+  theory/relevance_manager.h
   theory/rep_set.cpp
   theory/rep_set.h
   theory/rewriter.cpp
diff --git a/src/theory/relevance_manager.cpp b/src/theory/relevance_manager.cpp
new file mode 100644
index 000000000..71962ee07
--- /dev/null
+++ b/src/theory/relevance_manager.cpp
@@ -0,0 +1,315 @@
+/*********************                                                        */
+/*! \file relevance_manager.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Implementation of relevance manager.
+ **/
+
+#include "theory/relevance_manager.h"
+
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+
+RelevanceManager::RelevanceManager(context::UserContext* userContext,
+                                   Valuation val)
+    : d_val(val), d_input(userContext), d_computed(false), d_success(false)
+{
+}
+
+void RelevanceManager::notifyPreprocessedAssertions(
+    const std::vector<Node>& assertions)
+{
+  // add to input list, which is user-context dependent
+  std::vector<Node> toProcess;
+  for (const Node& a : assertions)
+  {
+    if (a.getKind() == AND)
+    {
+      // split top-level AND
+      for (const Node& ac : a)
+      {
+        toProcess.push_back(ac);
+      }
+    }
+    else
+    {
+      d_input.push_back(a);
+    }
+  }
+  addAssertionsInternal(toProcess);
+}
+
+void RelevanceManager::notifyPreprocessedAssertion(Node n)
+{
+  std::vector<Node> toProcess;
+  toProcess.push_back(n);
+  addAssertionsInternal(toProcess);
+}
+
+void RelevanceManager::addAssertionsInternal(std::vector<Node>& toProcess)
+{
+  size_t i = 0;
+  while (i < toProcess.size())
+  {
+    Node a = toProcess[i];
+    if (a.getKind() == AND)
+    {
+      // split AND
+      for (const Node& ac : a)
+      {
+        toProcess.push_back(ac);
+      }
+    }
+    else
+    {
+      // note that a could be a literal, in which case we could add it to
+      // an "always relevant" set here.
+      d_input.push_back(a);
+    }
+    i++;
+  }
+}
+
+void RelevanceManager::resetRound()
+{
+  d_computed = false;
+  d_rset.clear();
+}
+
+void RelevanceManager::computeRelevance()
+{
+  d_computed = true;
+  Trace("rel-manager") << "RelevanceManager::computeRelevance..." << std::endl;
+  std::unordered_map<TNode, int, TNodeHashFunction> cache;
+  for (const Node& node: d_input)
+  {
+    TNode n = node;
+    int val = justify(n, cache);
+    if (val != 1)
+    {
+      std::stringstream serr;
+      serr << "RelevanceManager::computeRelevance: WARNING: failed to justify "
+           << n;
+      Trace("rel-manager") << serr.str() << std::endl;
+      Assert(false) << serr.str();
+      d_success = false;
+      return;
+    }
+  }
+  Trace("rel-manager") << "...success, size = " << d_rset.size() << std::endl;
+  d_success = true;
+}
+
+bool RelevanceManager::isBooleanConnective(TNode cur)
+{
+  Kind k = cur.getKind();
+  return k == NOT || k == IMPLIES || k == AND || k == OR || k == ITE || k == XOR
+         || (k == EQUAL && cur[0].getType().isBoolean());
+}
+
+bool RelevanceManager::updateJustifyLastChild(
+    TNode cur,
+    std::vector<int>& childrenJustify,
+    std::unordered_map<TNode, int, TNodeHashFunction>& cache)
+{
+  // This method is run when we are informed that child index of cur
+  // has justify status lastChildJustify. We return true if we would like to
+  // compute the next child, in this case we push the status of the current
+  // child to childrenJustify.
+  size_t nchildren = cur.getNumChildren();
+  Assert(isBooleanConnective(cur));
+  size_t index = childrenJustify.size();
+  Assert(index < nchildren);
+  Assert(cache.find(cur[index]) != cache.end());
+  Kind k = cur.getKind();
+  // Lookup the last child's value in the overall cache, we may choose to
+  // add this to childrenJustify if we return true.
+  int lastChildJustify = cache[cur[index]];
+  if (k == NOT)
+  {
+    cache[cur] = -lastChildJustify;
+  }
+  else if (k == IMPLIES || k == AND || k == OR)
+  {
+    if (lastChildJustify != 0)
+    {
+      // See if we short circuited? The value for short circuiting is false if
+      // we are AND or the first child of IMPLIES.
+      if (lastChildJustify
+          == ((k == AND || (k == IMPLIES && index == 0)) ? -1 : 1))
+      {
+        cache[cur] = k == AND ? -1 : 1;
+        return false;
+      }
+    }
+    if (index + 1 == nchildren)
+    {
+      // finished all children, compute the overall value
+      int ret = k == AND ? 1 : -1;
+      for (int cv : childrenJustify)
+      {
+        if (cv == 0)
+        {
+          ret = 0;
+          break;
+        }
+      }
+      cache[cur] = ret;
+    }
+    else
+    {
+      // continue
+      childrenJustify.push_back(lastChildJustify);
+      return true;
+    }
+  }
+  else if (lastChildJustify == 0)
+  {
+    // all other cases, an unknown child implies we are unknown
+    cache[cur] = 0;
+  }
+  else if (k == ITE)
+  {
+    if (index == 0)
+    {
+      Assert(lastChildJustify != 0);
+      // continue with branch
+      childrenJustify.push_back(lastChildJustify);
+      if (lastChildJustify == -1)
+      {
+        // also mark first branch as don't care
+        childrenJustify.push_back(0);
+      }
+      return true;
+    }
+    else
+    {
+      // should be in proper branch
+      Assert(childrenJustify[0] == (index == 1 ? 1 : -1));
+      // we are the value of the branch
+      cache[cur] = lastChildJustify;
+    }
+  }
+  else
+  {
+    Assert(k == XOR || k == EQUAL);
+    Assert(nchildren == 2);
+    Assert(lastChildJustify != 0);
+    if (index == 0)
+    {
+      // must compute the other child
+      childrenJustify.push_back(lastChildJustify);
+      return true;
+    }
+    else
+    {
+      // both children known, compute value
+      Assert(childrenJustify.size() == 1 && childrenJustify[0] != 0);
+      cache[cur] =
+          ((k == XOR ? -1 : 1) * lastChildJustify == childrenJustify[0]) ? 1
+                                                                         : -1;
+    }
+  }
+  return false;
+}
+
+int RelevanceManager::justify(
+    TNode n, std::unordered_map<TNode, int, TNodeHashFunction>& cache)
+{
+  // the vector of values of children
+  std::unordered_map<TNode, std::vector<int>, TNodeHashFunction> childJustify;
+  std::unordered_map<TNode, int, TNodeHashFunction>::iterator it;
+  std::unordered_map<TNode, std::vector<int>, TNodeHashFunction>::iterator itc;
+  std::vector<TNode> visit;
+  TNode cur;
+  visit.push_back(n);
+  do
+  {
+    cur = visit.back();
+    // should always have Boolean type
+    Assert(cur.getType().isBoolean());
+    it = cache.find(cur);
+    if (it != cache.end())
+    {
+      visit.pop_back();
+      // already computed value
+      continue;
+    }
+    itc = childJustify.find(cur);
+    // have we traversed to children yet?
+    if (itc == childJustify.end())
+    {
+      // are we not a Boolean connective (including NOT)?
+      if (isBooleanConnective(cur))
+      {
+        // initialize its children justify vector as empty
+        childJustify[cur].clear();
+        // start with the first child
+        visit.push_back(cur[0]);
+      }
+      else
+      {
+        visit.pop_back();
+        // The atom case, lookup the value in the valuation class to
+        // see its current value in the SAT solver, if it has one.
+        int ret = 0;
+        // otherwise we look up the value
+        bool value;
+        if (d_val.hasSatValue(cur, value))
+        {
+          ret = value ? 1 : -1;
+          d_rset.insert(cur);
+        }
+        cache[cur] = ret;
+      }
+    }
+    else
+    {
+      // this processes the impact of the current child on the value of cur,
+      // and possibly requests that a new child is computed.
+      if (updateJustifyLastChild(cur, itc->second, cache))
+      {
+        Assert(itc->second.size() < cur.getNumChildren());
+        TNode nextChild = cur[itc->second.size()];
+        visit.push_back(nextChild);
+      }
+      else
+      {
+        visit.pop_back();
+      }
+    }
+  } while (!visit.empty());
+  Assert(cache.find(n) != cache.end());
+  return cache[n];
+}
+
+bool RelevanceManager::isRelevant(Node lit)
+{
+  if (!d_computed)
+  {
+    computeRelevance();
+  }
+  if (!d_success)
+  {
+    // always relevant if we failed to compute
+    return true;
+  }
+  // agnostic to negation
+  while (lit.getKind() == NOT)
+  {
+    lit = lit[0];
+  }
+  return d_rset.find(lit) != d_rset.end();
+}
+
+}  // namespace theory
+}  // namespace CVC4
diff --git a/src/theory/relevance_manager.h b/src/theory/relevance_manager.h
new file mode 100644
index 000000000..bbb094fc0
--- /dev/null
+++ b/src/theory/relevance_manager.h
@@ -0,0 +1,154 @@
+/*********************                                                        */
+/*! \file relevance_manager.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Relevance manager.
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__RELEVANCE_MANAGER__H
+#define CVC4__THEORY__RELEVANCE_MANAGER__H
+
+#include <unordered_map>
+#include <unordered_set>
+
+#include "context/cdlist.h"
+#include "expr/node.h"
+#include "theory/valuation.h"
+
+namespace CVC4 {
+namespace theory {
+
+/**
+ * This class manages queries related to relevance of asserted literals.
+ * In particular, note the following definition:
+ *
+ * Let F be a formula, and let L = { l_1, ..., l_n } be a set of
+ * literals that propositionally entail it. A "relevant selection of L with
+ * respect to F" is a subset of L that also propositionally entails F.
+ *
+ * This class computes a relevant selection of the current assertion stack
+ * at FULL effort with respect to the input formula + theory lemmas that are
+ * critical to justify (see LemmaProperty::NEEDS_JUSTIFY). By default, theory
+ * lemmas are not critical to justify; in fact, all T-valid theory lemmas
+ * are not critical to justify, since they are guaranteed to be satisfied in
+ * all inputs. However, some theory lemmas that introduce skolems need
+ * justification.
+ *
+ * As an example of such a lemma, take the example input formula:
+ *   (and (exists ((x Int)) (P x)) (not (P 0)))
+ * A skolemization lemma like the following needs justification:
+ *   (=> (exists ((x Int)) (P x)) (P k))
+ * Intuitively, this is because the satisfiability of the existential above is
+ * being deferred to the satisfiability of (P k) where k is fresh. Thus,
+ * a relevant selection must include both (exists ((x Int)) (P x)) and (P k)
+ * in this example.
+ *
+ * Theories are responsible for marking such lemmas using the NEEDS_JUSTIFY
+ * property when calling OutputChannel::lemma.
+ *
+ * Notice that this class has some relation to the justification decision
+ * heuristic (--decision=justification), which constructs a relevant selection
+ * of the input formula by construction. This class is orthogonal to this
+ * method, since it computes relevant selection *after* a full assignment. Thus
+ * its main advantage with respect to decision=justification is that it can be
+ * used in combination with any SAT decision heuristic.
+ *
+ * Internally, this class stores the input assertions and can be asked if an
+ * asserted literal is part of the current relevant selection. The relevant
+ * selection is computed lazily, i.e. only when someone asks if a literal is
+ * relevant, and only at most once per FULL effort check.
+ */
+class RelevanceManager
+{
+  typedef context::CDList<Node> NodeList;
+
+ public:
+  RelevanceManager(context::UserContext* userContext, Valuation val);
+  /**
+   * Notify (preprocessed) assertions. This is called for input formulas or
+   * lemmas that need justification that have been fully processed, just before
+   * adding them to the PropEngine.
+   */
+  void notifyPreprocessedAssertions(const std::vector<Node>& assertions);
+  /** Singleton version of above */
+  void notifyPreprocessedAssertion(Node n);
+  /**
+   * Reset round, called at the beginning of a full effort check in
+   * TheoryEngine.
+   */
+  void resetRound();
+  /**
+   * Is lit part of the current relevant selection? This call is valid during
+   * full effort check in TheoryEngine. This means that theories can query this
+   * during FULL or LAST_CALL efforts, through the Valuation class.
+   */
+  bool isRelevant(Node lit);
+
+ private:
+  /**
+   * Add the set of assertions to the formulas known to this class. This
+   * method handles optimizations such as breaking apart top-level applications
+   * of and.
+   */
+  void addAssertionsInternal(std::vector<Node>& toProcess);
+  /** compute the relevant selection */
+  void computeRelevance();
+  /**
+   * Justify formula n. To "justify" means we have added literals to our
+   * relevant selection set (d_rset) whose current values ensure that n
+   * evaluates to true or false.
+   *
+   * This method returns 1 if we justified n to be true, -1 means
+   * justified n to be false, 0 means n could not be justified.
+   */
+  int justify(TNode n,
+              std::unordered_map<TNode, int, TNodeHashFunction>& cache);
+  /** Is the top symbol of cur a Boolean connective? */
+  bool isBooleanConnective(TNode cur);
+  /**
+   * Update justify last child. This method is a helper function for justify,
+   * which is called at the moment that Boolean connective formula cur
+   * has a new child that has been computed in the justify cache.
+   *
+   * @param cur The Boolean connective formula
+   * @param childrenJustify The values of the previous children (not including
+   * the current one)
+   * @param cache The justify cache
+   * @return True if we wish to visit the next child. If this is the case, then
+   * the justify value of the current child is added to childrenJustify.
+   */
+  bool updateJustifyLastChild(
+      TNode cur,
+      std::vector<int>& childrenJustify,
+      std::unordered_map<TNode, int, TNodeHashFunction>& cache);
+  /** The valuation object, used to query current value of theory literals */
+  Valuation d_val;
+  /** The input assertions */
+  NodeList d_input;
+  /** The current relevant selection. */
+  std::unordered_set<TNode, TNodeHashFunction> d_rset;
+  /** Have we computed the relevant selection this round? */
+  bool d_computed;
+  /**
+   * Did we succeed in computing the relevant selection? If this is false, there
+   * was a syncronization issue between the input formula and the satisfying
+   * assignment since this class found that the input formula was not satisfied
+   * by the assignment. This should never happen, but if it does, this class
+   * aborts and indicates that all literals are relevant.
+   */
+  bool d_success;
+};
+
+}  // namespace theory
+}  // namespace CVC4
+
+#endif /* CVC4__THEORY__RELEVANCE_MANAGER__H */
-- 
2.30.2