From 86ca9f1aafb83202508221efdd8d3b7b772c4b70 Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Wed, 5 Jan 2022 12:01:25 -0600
Subject: [PATCH] Track input list for atoms in difficulty manager (#7851)

This makes our support for get-difficulty faster by tracking the set of input formulas that atoms occur in, which is highly important for benchmarks with many assertions and where many lemmas are added at standard effort.

This is work towards making get-difficulty scale on a large benchmark from Certora.
---
 src/smt/proof_manager.cpp         |   6 +
 src/theory/difficulty_manager.cpp |  19 ++--
 src/theory/difficulty_manager.h   |   7 +-
 src/theory/relevance_manager.cpp  | 175 +++++++++++++++++++++++++-----
 src/theory/relevance_manager.h    |  29 ++++-
 5 files changed, 198 insertions(+), 38 deletions(-)
diff --git a/src/smt/proof_manager.cpp b/src/smt/proof_manager.cpp
index 5f628219b..9a459a0d7 100644
--- a/src/smt/proof_manager.cpp
+++ b/src/smt/proof_manager.cpp
@@ -224,6 +224,7 @@ void PfManager::checkProof(std::shared_ptr<ProofNode> pfn, Assertions& as)
 void PfManager::translateDifficultyMap(std::map<Node, Node>& dmap,
                                        Assertions& as)
 {
+  Trace("difficulty-proc") << "Translate difficulty start" << std::endl;
   Trace("difficulty") << "PfManager::translateDifficultyMap" << std::endl;
   if (dmap.empty())
   {
@@ -231,6 +232,7 @@ void PfManager::translateDifficultyMap(std::map<Node, Node>& dmap,
   }
   std::map<Node, Node> dmapp = dmap;
   dmap.clear();
+  Trace("difficulty-proc") << "Get ppAsserts" << std::endl;
   std::vector<Node> ppAsserts;
   for (const std::pair<const Node, Node>& ppa : dmapp)
   {
@@ -241,12 +243,14 @@ void PfManager::translateDifficultyMap(std::map<Node, Node>& dmap,
     // internally by the difficuly manager.
     ppAsserts.push_back(ppa.first);
   }
+  Trace("difficulty-proc") << "Make SAT refutation" << std::endl;
   // assume a SAT refutation from all input assertions that were marked
   // as having a difficulty
   CDProof cdp(d_pnm.get());
   Node fnode = NodeManager::currentNM()->mkConst(false);
   cdp.addStep(fnode, PfRule::SAT_REFUTATION, ppAsserts, {});
   std::shared_ptr<ProofNode> pf = cdp.getProofFor(fnode);
+  Trace("difficulty-proc") << "Get final proof" << std::endl;
   std::shared_ptr<ProofNode> fpf = getFinalProof(pf, as);
   Trace("difficulty-debug") << "Final proof is " << *fpf.get() << std::endl;
   Assert(fpf->getRule() == PfRule::SCOPE);
@@ -256,6 +260,7 @@ void PfManager::translateDifficultyMap(std::map<Node, Node>& dmap,
   const std::vector<std::shared_ptr<ProofNode>>& children = fpf->getChildren();
   DifficultyPostprocessCallback dpc;
   ProofNodeUpdater dpnu(d_pnm.get(), dpc);
+  Trace("difficulty-proc") << "Compute accumulated difficulty" << std::endl;
   // For each child of SAT_REFUTATION, we increment the difficulty on all
   // "source" free assumptions (see DifficultyPostprocessCallback) by the
   // difficulty of the preprocessed assertion.
@@ -274,6 +279,7 @@ void PfManager::translateDifficultyMap(std::map<Node, Node>& dmap,
   }
   // get the accumulated difficulty map from the callback
   dpc.getDifficultyMap(dmap);
+  Trace("difficulty-proc") << "Translate difficulty end" << std::endl;
 }
 
 ProofChecker* PfManager::getProofChecker() const { return d_pchecker.get(); }
diff --git a/src/theory/difficulty_manager.cpp b/src/theory/difficulty_manager.cpp
index 7714abfa9..b66b56e56 100644
--- a/src/theory/difficulty_manager.cpp
+++ b/src/theory/difficulty_manager.cpp
@@ -17,6 +17,7 @@
 
 #include "options/smt_options.h"
 #include "smt/env.h"
+#include "theory/relevance_manager.h"
 #include "theory/theory_model.h"
 #include "util/rational.h"
 
@@ -25,8 +26,10 @@ using namespace cvc5::kind;
 namespace cvc5 {
 namespace theory {
 
-DifficultyManager::DifficultyManager(context::Context* c, Valuation val)
-    : d_input(c), d_val(val), d_dfmap(c)
+DifficultyManager::DifficultyManager(RelevanceManager* rlv,
+                                     context::Context* c,
+                                     Valuation val)
+    : d_rlv(rlv), d_input(c), d_val(val), d_dfmap(c)
 {
 }
 
@@ -48,8 +51,7 @@ void DifficultyManager::getDifficultyMap(std::map<Node, Node>& dmap)
   }
 }
 
-void DifficultyManager::notifyLemma(const context::CDHashMap<Node, Node>& rse,
-                                    Node n)
+void DifficultyManager::notifyLemma(Node n)
 {
   if (options::difficultyMode() != options::DifficultyMode::LEMMA_LITERAL)
   {
@@ -73,19 +75,18 @@ void DifficultyManager::notifyLemma(const context::CDHashMap<Node, Node>& rse,
   {
     litsToCheck.push_back(n);
   }
-  context::CDHashMap<Node, Node>::const_iterator it;
   for (TNode nc : litsToCheck)
   {
     bool pol = nc.getKind() != kind::NOT;
     TNode atom = pol ? nc : nc[0];
-    it = rse.find(atom);
+    TNode exp = d_rlv->getExplanationForRelevant(atom);
     Trace("diff-man-debug")
-        << "Check literal: " << atom << ", has reason = " << (it != rse.end())
+        << "Check literal: " << atom << ", has reason = " << (!exp.isNull())
         << std::endl;
     // must be an input assertion
-    if (it != rse.end() && d_input.find(it->second) != d_input.end())
+    if (!exp.isNull() && d_input.find(exp) != d_input.end())
     {
-      incrementDifficulty(it->second);
+      incrementDifficulty(exp);
     }
   }
 }
diff --git a/src/theory/difficulty_manager.h b/src/theory/difficulty_manager.h
index 1437965fe..5e6e038de 100644
--- a/src/theory/difficulty_manager.h
+++ b/src/theory/difficulty_manager.h
@@ -27,6 +27,7 @@ namespace cvc5 {
 namespace theory {
 
 class TheoryModel;
+class RelevanceManager;
 
 /**
  * Difficulty manager, which tracks an estimate of the difficulty of each
@@ -38,7 +39,7 @@ class DifficultyManager
   typedef context::CDHashMap<Node, uint64_t> NodeUIntMap;
 
  public:
-  DifficultyManager(context::Context* c, Valuation val);
+  DifficultyManager(RelevanceManager* rlv, context::Context* c, Valuation val);
   /** Notify input assertions */
   void notifyInputAssertions(const std::vector<Node>& assertions);
   /**
@@ -58,7 +59,7 @@ class DifficultyManager
    * the reason why that literal was relevant in the current context
    * @param lem The lemma
    */
-  void notifyLemma(const context::CDHashMap<Node, Node>& rse, Node lem);
+  void notifyLemma(Node lem);
   /**
    * Notify that `m` is a (candidate) model. This increments the difficulty
    * of assertions that are not satisfied by that model.
@@ -70,6 +71,8 @@ class DifficultyManager
  private:
   /** Increment difficulty on assertion a */
   void incrementDifficulty(TNode a, uint64_t amount = 1);
+  /** Pointer to the parent relevance manager */
+  RelevanceManager* d_rlv;
   /**
    * The input assertions, tracked to ensure we do not increment difficulty
    * on lemmas.
diff --git a/src/theory/relevance_manager.cpp b/src/theory/relevance_manager.cpp
index 87945aa06..39e1eb2e5 100644
--- a/src/theory/relevance_manager.cpp
+++ b/src/theory/relevance_manager.cpp
@@ -21,6 +21,7 @@
 #include "expr/term_context_stack.h"
 #include "options/smt_options.h"
 #include "smt/env.h"
+#include "theory/relevance_manager.h"
 
 using namespace cvc5::kind;
 
@@ -31,8 +32,10 @@ RelevanceManager::RelevanceManager(Env& env, Valuation val)
     : EnvObj(env),
       d_val(val),
       d_input(userContext()),
+      d_atomMap(userContext()),
       d_rset(context()),
       d_inFullEffortCheck(false),
+      d_fullEffortCheckFail(false),
       d_success(false),
       d_trackRSetExp(false),
       d_miniscopeTopLevel(true),
@@ -41,7 +44,7 @@ RelevanceManager::RelevanceManager(Env& env, Valuation val)
 {
   if (options().smt.produceDifficulty)
   {
-    d_dman.reset(new DifficultyManager(userContext(), val));
+    d_dman = std::make_unique<DifficultyManager>(this, userContext(), val);
     d_trackRSetExp = true;
     // we cannot miniscope AND at the top level, since we need to
     // preserve the exact form of preprocessed assertions so the dependencies
@@ -68,6 +71,8 @@ void RelevanceManager::notifyPreprocessedAssertions(
     else
     {
       d_input.push_back(a);
+      // add to atoms map
+      addInputToAtomsMap(a);
     }
   }
   addAssertionsInternal(toProcess);
@@ -106,12 +111,42 @@ void RelevanceManager::addAssertionsInternal(std::vector<Node>& toProcess)
       // note that a could be a literal, in which case we could add it to
       // an "always relevant" set here.
       d_input.push_back(a);
+      // add to atoms map
+      addInputToAtomsMap(a);
     }
     i++;
   }
 }
 
-void RelevanceManager::beginRound() { d_inFullEffortCheck = true; }
+void RelevanceManager::addInputToAtomsMap(TNode input)
+{
+  std::unordered_set<TNode> visited;
+  std::vector<TNode> visit;
+  TNode cur;
+  visit.push_back(input);
+  do
+  {
+    cur = visit.back();
+    visit.pop_back();
+    if (visited.find(cur) == visited.end())
+    {
+      visited.insert(cur);
+      if (expr::isBooleanConnective(cur))
+      {
+        visit.insert(visit.end(), cur.begin(), cur.end());
+        continue;
+      }
+      NodeList* ilist = getInputListFor(cur);
+      ilist->push_back(input);
+    }
+  } while (!visit.empty());
+}
+
+void RelevanceManager::beginRound()
+{
+  d_inFullEffortCheck = true;
+  d_fullEffortCheckFail = false;
+}
 
 void RelevanceManager::endRound() { d_inFullEffortCheck = false; }
 
@@ -122,26 +157,18 @@ void RelevanceManager::computeRelevance()
   Assert(d_inFullEffortCheck || d_trackRSetExp);
   Trace("rel-manager") << "RelevanceManager::computeRelevance, full effort = "
                        << d_inFullEffortCheck << "..." << std::endl;
+  // if we already failed
+  if (d_fullEffortCheckFail)
+  {
+    d_success = false;
+    return;
+  }
   for (const Node& node: d_input)
   {
-    TNode n = node;
-    int32_t val = justify(n);
-    if (val != 1)
+    if (!computeRelevanceFor(node))
     {
-      // if we are in full effort check and fail to justify, then we should
-      // give a failure and set success to false, or otherwise calls to
-      // isRelevant cannot be trusted.
-      if (d_inFullEffortCheck)
-      {
-        std::stringstream serr;
-        serr
-            << "RelevanceManager::computeRelevance: WARNING: failed to justify "
-            << n;
-        Trace("rel-manager") << serr.str() << std::endl;
-        Assert(false) << serr.str();
-        d_success = false;
-        return;
-      }
+      d_success = false;
+      return;
     }
   }
   if (Trace.isOn("rel-manager"))
@@ -157,7 +184,29 @@ void RelevanceManager::computeRelevance()
                            << std::endl;
     }
   }
-  d_success = true;
+  d_success = !d_fullEffortCheckFail;
+}
+
+bool RelevanceManager::computeRelevanceFor(TNode input)
+{
+  int32_t val = justify(input);
+  if (val != 1)
+  {
+    // if we are in full effort check and fail to justify, then we should
+    // give a failure and set success to false, or otherwise calls to
+    // isRelevant cannot be trusted.
+    if (d_inFullEffortCheck)
+    {
+      std::stringstream serr;
+      serr << "RelevanceManager::computeRelevance: WARNING: failed to justify "
+           << input;
+      Trace("rel-manager") << serr.str() << std::endl;
+      Assert(false) << serr.str();
+      d_fullEffortCheckFail = true;
+      return false;
+    }
+  }
+  return true;
 }
 
 bool RelevanceManager::updateJustifyLastChild(const RlvPair& cur,
@@ -366,9 +415,11 @@ int32_t RelevanceManager::justify(TNode n)
   return d_jcache[ci];
 }
 
-bool RelevanceManager::isRelevant(Node lit)
+bool RelevanceManager::isRelevant(TNode lit)
 {
   Assert(d_inFullEffortCheck);
+  // since this is used in full effort, and typically for all asserted literals,
+  // we just ensure relevance is fully computed here
   computeRelevance();
   if (!d_success)
   {
@@ -383,6 +434,80 @@ bool RelevanceManager::isRelevant(Node lit)
   return d_rset.find(lit) != d_rset.end();
 }
 
+TNode RelevanceManager::getExplanationForRelevant(TNode lit)
+{
+  // agnostic to negation
+  while (lit.getKind() == NOT)
+  {
+    lit = lit[0];
+  }
+  NodeList* ilist = nullptr;
+  TNode nextInput;
+  size_t ninputs = 0;
+  size_t index = 0;
+  do
+  {
+    // check if it has an explanation yet
+    TNode exp = getExplanationForRelevantInternal(lit);
+    if (!exp.isNull())
+    {
+      return exp;
+    }
+    // if the first time, we get the list of input formulas the atom occurs in
+    if (index == 0)
+    {
+      ilist = getInputListFor(lit, false);
+      if (ilist != nullptr)
+      {
+        ninputs = ilist->size();
+      }
+      Trace("rel-manager-exp-debug")
+          << "Atom " << lit << " occurs in " << ninputs << " assertions..."
+          << std::endl;
+    }
+    if (index < ninputs)
+    {
+      // justify the next
+      nextInput = (*ilist)[index];
+      index++;
+      // justify the next input that the atom occurs in
+      computeRelevanceFor(nextInput);
+    }
+    else
+    {
+      nextInput = TNode::null();
+    }
+  } while (!nextInput.isNull());
+
+  return TNode::null();
+}
+
+TNode RelevanceManager::getExplanationForRelevantInternal(TNode atom) const
+{
+  NodeMap::const_iterator it = d_rsetExp.find(atom);
+  if (it != d_rsetExp.end())
+  {
+    return it->second;
+  }
+  return TNode::null();
+}
+
+RelevanceManager::NodeList* RelevanceManager::getInputListFor(TNode atom,
+                                                              bool doMake)
+{
+  NodeListMap::const_iterator it = d_atomMap.find(atom);
+  if (it == d_atomMap.end())
+  {
+    if (!doMake)
+    {
+      return nullptr;
+    }
+    d_atomMap[atom] = std::make_shared<NodeList>(userContext());
+    it = d_atomMap.find(atom);
+  }
+  return it->second.get();
+}
+
 std::unordered_set<TNode> RelevanceManager::getRelevantAssertions(bool& success)
 {
   computeRelevance();
@@ -399,14 +524,14 @@ std::unordered_set<TNode> RelevanceManager::getRelevantAssertions(bool& success)
   return rset;
 }
 
-void RelevanceManager::notifyLemma(Node n)
+void RelevanceManager::notifyLemma(TNode n)
 {
   // notice that we may be in FULL or STANDARD effort here.
   if (d_dman != nullptr)
   {
-    // ensure we know which literals are relevant, and why
-    computeRelevance();
-    d_dman->notifyLemma(d_rsetExp, n);
+    // notice that we don't compute relevance here, instead it is computed
+    // on demand based on the literals in n.
+    d_dman->notifyLemma(n);
   }
 }
 
diff --git a/src/theory/relevance_manager.h b/src/theory/relevance_manager.h
index 284adc855..be4afe47f 100644
--- a/src/theory/relevance_manager.h
+++ b/src/theory/relevance_manager.h
@@ -81,6 +81,7 @@ class RelevanceManager : protected EnvObj
   using RlvPairHashFunction = PairHashFunction<Node, uint32_t, std::hash<Node>>;
   using NodeList = context::CDList<Node>;
   using NodeMap = context::CDHashMap<Node, Node>;
+  using NodeListMap = context::CDHashMap<Node, std::shared_ptr<NodeList>>;
   using NodeSet = context::CDHashSet<Node>;
   using RlvPairIntMap =
       context::CDHashMap<RlvPair, int32_t, RlvPairHashFunction>;
@@ -118,7 +119,13 @@ class RelevanceManager : protected EnvObj
    * with "sat". This means that theories can query this during FULL or
    * LAST_CALL efforts, through the Valuation class.
    */
-  bool isRelevant(Node lit);
+  bool isRelevant(TNode lit);
+  /**
+   * Get the explanation for literal lit is relevant. This returns the
+   * preprocessed assertion that was the reason why the literal was relevant
+   * in the current context. It returns null if the literal is not relevant.
+   */
+  TNode getExplanationForRelevant(TNode lit);
   /**
    * Get the current relevant selection (see above). This computes this set
    * if not already done so. This call is valid during a full effort check in
@@ -134,7 +141,7 @@ class RelevanceManager : protected EnvObj
    */
   std::unordered_set<TNode> getRelevantAssertions(bool& success);
   /** Notify lemma, for difficulty measurements */
-  void notifyLemma(Node n);
+  void notifyLemma(TNode n);
   /** Notify that m is a (candidate) model, for difficulty measurements */
   void notifyCandidateModel(TheoryModel* m);
   /**
@@ -143,6 +150,16 @@ class RelevanceManager : protected EnvObj
   void getDifficultyMap(std::map<Node, Node>& dmap);
 
  private:
+  /**
+   * Called when an input assertion is added, this populates d_atomMap.
+   */
+  void addInputToAtomsMap(TNode input);
+  /**
+   * Compute relevance for input assertion input. This returns false and
+   * sets d_fullEffortCheckFail to true if we are at full effort and input
+   * fails to be computed.
+   */
+  bool computeRelevanceFor(TNode input);
   /**
    * Add the set of assertions to the formulas known to this class. This
    * method handles optimizations such as breaking apart top-level applications
@@ -174,10 +191,16 @@ class RelevanceManager : protected EnvObj
    */
   bool updateJustifyLastChild(const RlvPair& cur,
                               std::vector<int32_t>& childrenJustify);
+  /** Return the explanation for why atom is relevant, if it exists */
+  TNode getExplanationForRelevantInternal(TNode atom) const;
+  /** Get the list of assertions that contain atom */
+  NodeList* getInputListFor(TNode atom, bool doMake = true);
   /** The valuation object, used to query current value of theory literals */
   Valuation d_val;
   /** The input assertions */
   NodeList d_input;
+  /** Map from atoms to the list of input assertions that are contained in */
+  NodeListMap d_atomMap;
   /**
    * The current relevant selection, SAT-context dependent, includes
    * literals that are definitely relevant in this context.
@@ -185,6 +208,8 @@ class RelevanceManager : protected EnvObj
   NodeSet d_rset;
   /** Are we in a full effort check? */
   bool d_inFullEffortCheck;
+  /** Have we failed to justify a formula in a full effort check? */
+  bool d_fullEffortCheckFail;
   /**
    * Did we succeed in computing the relevant selection? If this is false, there
    * was a syncronization issue between the input formula and the satisfying
-- 
2.30.2