From 9b7f2b6b541f192acf2dc525076a4aa0e995be14 Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Mon, 31 Aug 2020 23:35:57 -0500
Subject: [PATCH] Add the inference manager for datatypes (#4968)

This is in preparation for converting datatypes to the new standard. It adds a specialized version of inference manager buffered that datatypes uses. This required adding several utility methods to its base classes.

A follow up PR will connect this to TheoryDatatypes.
---
 src/CMakeLists.txt                         |   2 +
 src/theory/datatypes/inference_manager.cpp | 128 +++++++++++++++++++++
 src/theory/datatypes/inference_manager.h   |  77 +++++++++++++
 src/theory/inference_manager_buffered.cpp  |   6 +
 src/theory/inference_manager_buffered.h    |   6 +
 src/theory/theory_inference_manager.cpp    |  68 ++++++++++-
 src/theory/theory_inference_manager.h      |  75 ++++++++++--
 7 files changed, 349 insertions(+), 13 deletions(-)
 create mode 100644 src/theory/datatypes/inference_manager.cpp
 create mode 100644 src/theory/datatypes/inference_manager.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 06bc9917f..2a7fd11a8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -471,6 +471,8 @@ libcvc4_add_sources(
   theory/combination_engine.h
   theory/datatypes/datatypes_rewriter.cpp
   theory/datatypes/datatypes_rewriter.h
+  theory/datatypes/inference_manager.cpp
+  theory/datatypes/inference_manager.h
   theory/datatypes/sygus_datatype_utils.cpp
   theory/datatypes/sygus_datatype_utils.h
   theory/datatypes/sygus_extension.cpp
diff --git a/src/theory/datatypes/inference_manager.cpp b/src/theory/datatypes/inference_manager.cpp
new file mode 100644
index 000000000..42cad0b65
--- /dev/null
+++ b/src/theory/datatypes/inference_manager.cpp
@@ -0,0 +1,128 @@
+/*********************                                                        */
+/*! \file inference_manager.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Datatypes inference manager
+ **/
+
+#include "theory/datatypes/inference_manager.h"
+
+#include "expr/dtype.h"
+#include "options/datatypes_options.h"
+#include "theory/theory.h"
+
+using namespace CVC4::kind;
+
+namespace CVC4 {
+namespace theory {
+namespace datatypes {
+
+InferenceManager::InferenceManager(Theory& t,
+                                   TheoryState& state,
+                                   ProofNodeManager* pnm)
+    : InferenceManagerBuffered(t, state, pnm)
+{
+  d_true = NodeManager::currentNM()->mkConst(true);
+}
+
+bool InferenceManager::mustCommunicateFact(Node n, Node exp) const
+{
+  Trace("dt-lemma-debug") << "Compute for " << exp << " => " << n << std::endl;
+  bool addLemma = false;
+  if (options::dtInferAsLemmas() && exp != d_true)
+  {
+    addLemma = true;
+  }
+  else if (n.getKind() == EQUAL)
+  {
+    TypeNode tn = n[0].getType();
+    if (!tn.isDatatype())
+    {
+      addLemma = true;
+    }
+    else
+    {
+      const DType& dt = tn.getDType();
+      addLemma = dt.involvesExternalType();
+    }
+  }
+  else if (n.getKind() == LEQ || n.getKind() == OR)
+  {
+    addLemma = true;
+  }
+  if (addLemma)
+  {
+    Trace("dt-lemma-debug") << "Communicate " << n << std::endl;
+    return true;
+  }
+  Trace("dt-lemma-debug") << "Do not need to communicate " << n << std::endl;
+  return false;
+}
+
+void InferenceManager::process()
+{
+  // process pending lemmas, used infrequently, only for definitional lemmas
+  doPendingLemmas();
+  // now process the pending facts
+  size_t i = 0;
+  NodeManager* nm = NodeManager::currentNM();
+  while (!d_theoryState.isInConflict() && i < d_pendingFact.size())
+  {
+    std::pair<Node, Node>& pfact = d_pendingFact[i];
+    const Node& fact = pfact.first;
+    const Node& exp = pfact.second;
+    Trace("datatypes-debug")
+        << "Assert fact (#" << (i + 1) << "/" << d_pendingFact.size() << ") "
+        << fact << " with explanation " << exp << std::endl;
+    // check to see if we have to communicate it to the rest of the system
+    if (mustCommunicateFact(fact, exp))
+    {
+      Node lem = fact;
+      if (exp.isNull() || exp == d_true)
+      {
+        Trace("dt-lemma-debug") << "Trivial explanation." << std::endl;
+      }
+      else
+      {
+        Trace("dt-lemma-debug") << "Get explanation..." << std::endl;
+        std::vector<TNode> assumptions;
+        explain(exp, assumptions);
+        if (!assumptions.empty())
+        {
+          std::vector<Node> children;
+          for (const TNode& assumption : assumptions)
+          {
+            children.push_back(assumption.negate());
+          }
+          children.push_back(fact);
+          lem = nm->mkNode(OR, children);
+        }
+      }
+      Trace("dt-lemma") << "Datatypes lemma : " << lem << std::endl;
+      lemma(lem);
+    }
+    else
+    {
+      // assert the internal fact
+      bool polarity = fact.getKind() != NOT;
+      TNode atom = polarity ? fact : fact[0];
+      assertInternalFact(atom, polarity, exp);
+    }
+    Trace("datatypes-debug") << "Finished fact " << fact
+                             << ", now = " << d_theoryState.isInConflict()
+                             << " " << d_pendingFact.size() << std::endl;
+    i++;
+  }
+  d_pendingFact.clear();
+}
+
+}  // namespace datatypes
+}  // namespace theory
+}  // namespace CVC4
diff --git a/src/theory/datatypes/inference_manager.h b/src/theory/datatypes/inference_manager.h
new file mode 100644
index 000000000..786cd8129
--- /dev/null
+++ b/src/theory/datatypes/inference_manager.h
@@ -0,0 +1,77 @@
+/*********************                                                        */
+/*! \file inference_manager.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Andrew Reynolds
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Datatypes inference manager
+ **/
+
+#include "cvc4_private.h"
+
+#ifndef CVC4__THEORY__DATATYPES__INFERENCE_MANAGER_H
+#define CVC4__THEORY__DATATYPES__INFERENCE_MANAGER_H
+
+#include "context/cdhashmap.h"
+#include "expr/node.h"
+#include "theory/inference_manager_buffered.h"
+
+namespace CVC4 {
+namespace theory {
+namespace datatypes {
+
+/**
+ * The datatypes inference manager. The main unique features of this inference
+ * manager are:
+ * (1) Explicit caching of lemmas,
+ * (2) A custom process() method with relies on a policy determining which
+ * facts must be sent as lemmas (mustCommunicateFact).
+ * (3) Methods for tracking when lemmas and facts have been processed.
+ */
+class InferenceManager : public InferenceManagerBuffered
+{
+  typedef context::CDHashSet<Node, NodeHashFunction> NodeSet;
+
+ public:
+  InferenceManager(Theory& t, TheoryState& state, ProofNodeManager* pnm);
+  ~InferenceManager() {}
+  /**
+   * Process the current lemmas and facts. This is a custom method that can
+   * be seen as overriding the behavior of calling both doPendingLemmas and
+   * doPendingFacts. It determines whether facts should be sent as lemmas
+   * or processed internally.
+   */
+  void process();
+
+ protected:
+  /**
+   * Must communicate fact method.
+   * The datatypes decision procedure makes "internal" inferences :
+   *  (1) Unification : C( t1...tn ) = C( s1...sn ) => ti = si
+   *  (2) Label : ~is_C1(t) ... ~is_C{i-1}(t) ~is_C{i+1}(t) ... ~is_Cn(t) =>
+   * is_Ci( t )
+   *  (3) Instantiate : is_C( t ) => t = C( sel_1( t ) ... sel_n( t ) )
+   *  (4) collapse selector : S( C( t1...tn ) ) = t'
+   *  (5) collapse term size : size( C( t1...tn ) ) = 1 + size( t1 ) + ... +
+   * size( tn )
+   *  (6) non-negative size : 0 <= size(t)
+   * This method returns true if the fact must be sent out as a lemma. If it
+   * returns false, then we assert the fact internally. We may need to
+   * communicate outwards if the conclusions involve other theories.  Also
+   * communicate (6) and OR conclusions.
+   */
+  bool mustCommunicateFact(Node n, Node exp) const;
+  /** Common node */
+  Node d_true;
+};
+
+}  // namespace datatypes
+}  // namespace theory
+}  // namespace CVC4
+
+#endif
diff --git a/src/theory/inference_manager_buffered.cpp b/src/theory/inference_manager_buffered.cpp
index adbcc3033..14f6c4f4a 100644
--- a/src/theory/inference_manager_buffered.cpp
+++ b/src/theory/inference_manager_buffered.cpp
@@ -117,6 +117,12 @@ void InferenceManagerBuffered::doPendingPhaseRequirements()
   }
   d_pendingReqPhase.clear();
 }
+void InferenceManagerBuffered::clearPendingFacts() { d_pendingFact.clear(); }
+void InferenceManagerBuffered::clearPendingLemmas() { d_pendingLem.clear(); }
+void InferenceManagerBuffered::clearPendingPhaseRequirements()
+{
+  d_pendingReqPhase.clear();
+}
 
 }  // namespace theory
 }  // namespace CVC4
diff --git a/src/theory/inference_manager_buffered.h b/src/theory/inference_manager_buffered.h
index bb46ef566..596a346de 100644
--- a/src/theory/inference_manager_buffered.h
+++ b/src/theory/inference_manager_buffered.h
@@ -145,6 +145,12 @@ class InferenceManagerBuffered : public TheoryInferenceManager
    * phase requirements and clears d_pendingReqPhase.
    */
   void doPendingPhaseRequirements();
+  /** Clear pending facts, without processing */
+  void clearPendingFacts();
+  /** Clear pending lemmas, without processing */
+  void clearPendingLemmas();
+  /** Clear pending phase requirements, without processing */
+  void clearPendingPhaseRequirements();
 
  protected:
   /** A set of pending lemmas */
diff --git a/src/theory/theory_inference_manager.cpp b/src/theory/theory_inference_manager.cpp
index a42c33814..20c1fbf37 100644
--- a/src/theory/theory_inference_manager.cpp
+++ b/src/theory/theory_inference_manager.cpp
@@ -30,7 +30,10 @@ TheoryInferenceManager::TheoryInferenceManager(Theory& t,
       d_out(t.getOutputChannel()),
       d_ee(nullptr),
       d_pnm(pnm),
-      d_keep(t.getSatContext())
+      d_keep(t.getSatContext()),
+      d_lemmasSent(t.getUserContext()),
+      d_numCurrentLemmas(0),
+      d_numCurrentFacts(0)
 {
 }
 
@@ -47,6 +50,12 @@ void TheoryInferenceManager::setEqualityEngine(eq::EqualityEngine* ee)
   }
 }
 
+void TheoryInferenceManager::reset()
+{
+  d_numCurrentLemmas = 0;
+  d_numCurrentFacts = 0;
+}
+
 void TheoryInferenceManager::conflictEqConstantMerge(TNode a, TNode b)
 {
   if (!d_theoryState.isInConflict())
@@ -125,15 +134,41 @@ TrustNode TheoryInferenceManager::explainConflictEqConstantMerge(TNode a,
                   << " mkTrustedConflictEqConstantMerge";
 }
 
-LemmaStatus TheoryInferenceManager::lemma(TNode lem, LemmaProperty p)
+bool TheoryInferenceManager::lemma(TNode lem, LemmaProperty p, bool doCache)
+{
+  TrustNode tlem = TrustNode::mkTrustLemma(lem, nullptr);
+  return trustedLemma(tlem, p, doCache);
+}
+
+bool TheoryInferenceManager::trustedLemma(const TrustNode& tlem,
+                                          LemmaProperty p,
+                                          bool doCache)
+{
+  if (doCache)
+  {
+    if (!cacheLemma(tlem.getNode(), p))
+    {
+      return false;
+    }
+  }
+  d_numCurrentLemmas++;
+  d_out.trustedLemma(tlem, p);
+  return true;
+}
+
+bool TheoryInferenceManager::hasCachedLemma(TNode lem, LemmaProperty p)
+{
+  return d_lemmasSent.find(lem) != d_lemmasSent.end();
+}
+
+uint32_t TheoryInferenceManager::numAddedLemmas() const
 {
-  return d_out.lemma(lem, p);
+  return d_numCurrentLemmas;
 }
 
-LemmaStatus TheoryInferenceManager::trustedLemma(const TrustNode& tlem,
-                                                 LemmaProperty p)
+bool TheoryInferenceManager::hasAddedLemma() const
 {
-  return d_out.trustedLemma(tlem, p);
+  return d_numCurrentLemmas != 0;
 }
 
 void TheoryInferenceManager::assertInternalFact(TNode atom, bool pol, TNode exp)
@@ -178,6 +213,7 @@ void TheoryInferenceManager::processInternalFact(TNode atom,
   Assert(d_ee != nullptr);
   Trace("infer-manager") << "TheoryInferenceManager::assertInternalFact: "
                          << expn << std::endl;
+  d_numCurrentFacts++;
   // Now, assert the fact. How to do so depends on whether proofs are enabled.
   // If no proof production, or no proof rule was given
   if (d_pfee == nullptr || id == PfRule::UNKNOWN)
@@ -244,5 +280,25 @@ Node TheoryInferenceManager::mkExplain(TNode n)
   return NodeManager::currentNM()->mkAnd(assumptions);
 }
 
+uint32_t TheoryInferenceManager::numAddedFacts() const
+{
+  return d_numCurrentFacts;
+}
+
+bool TheoryInferenceManager::hasAddedFact() const
+{
+  return d_numCurrentFacts != 0;
+}
+
+bool TheoryInferenceManager::cacheLemma(TNode lem, LemmaProperty p)
+{
+  if (d_lemmasSent.find(lem) != d_lemmasSent.end())
+  {
+    return false;
+  }
+  d_lemmasSent.insert(lem);
+  return true;
+}
+
 }  // namespace theory
 }  // namespace CVC4
diff --git a/src/theory/theory_inference_manager.h b/src/theory/theory_inference_manager.h
index af8e817b4..d5c0fe1b2 100644
--- a/src/theory/theory_inference_manager.h
+++ b/src/theory/theory_inference_manager.h
@@ -71,13 +71,23 @@ class TheoryInferenceManager
    */
   TheoryInferenceManager(Theory& t, TheoryState& state, ProofNodeManager* pnm);
   virtual ~TheoryInferenceManager() {}
-  //--------------------------------------- initialization
   /**
    * Set equality engine, ee is a pointer to the official equality engine
    * of theory.
    */
   void setEqualityEngine(eq::EqualityEngine* ee);
-  //--------------------------------------- end initialization
+  /**
+   * Reset, which resets counters regarding the number of added lemmas and
+   * internal facts. This method should be manually called by the theory at
+   * the appropriate time for the purpose of tracking the usage of this
+   * inference manager.
+   *
+   * For example, some theories implement an internal checking loop that
+   * repeats while new facts are added. The theory should call reset at the
+   * beginning of this loop and repeat its strategy while hasAddedFact is true.
+   */
+  void reset();
+  //--------------------------------------- propagations
   /**
    * T-propagate literal lit, possibly encountered by equality engine,
    * returns false if we are in conflict.
@@ -94,6 +104,7 @@ class TheoryInferenceManager
    * Theory, if it exists.
    */
   virtual TrustNode explainLit(TNode lit);
+  //--------------------------------------- conflicts
   /**
    * Raise conflict, called when constants a and b merge. Sends the conflict
    * on the output channel corresponding to the equality engine's explanation
@@ -114,11 +125,39 @@ class TheoryInferenceManager
    * been provided in a custom way.
    */
   void trustedConflict(TrustNode tconf);
-  /** Send lemma lem with property p on the output channel. */
-  LemmaStatus lemma(TNode lem, LemmaProperty p = LemmaProperty::NONE);
-  /** Send (trusted) lemma lem with property p on the output channel. */
-  LemmaStatus trustedLemma(const TrustNode& tlem,
-                           LemmaProperty p = LemmaProperty::NONE);
+  //--------------------------------------- lemmas
+  /**
+   * Send (trusted) lemma lem with property p on the output channel.
+   *
+   * @param tlem The trust node containing the lemma and its proof generator.
+   * @param p The property of the lemma
+   * @param doCache If true, we send the lemma only if it has not already been
+   * cached (see cacheLemma), and add it to the cache during this call.
+   * @return true if the lemma was sent on the output channel.
+   */
+  bool trustedLemma(const TrustNode& tlem,
+                    LemmaProperty p = LemmaProperty::NONE,
+                    bool doCache = true);
+  /**
+   * Send lemma lem with property p on the output channel. Same as above, with
+   * a node instead of a trust node.
+   */
+  bool lemma(TNode lem,
+             LemmaProperty p = LemmaProperty::NONE,
+             bool doCache = true);
+  /**
+   * Has this inference manager cached and sent the given lemma (in this user
+   * context)? This method can be overridden by the particular manager. If not,
+   * this returns true if lem is in the cache d_lemmasSent maintained by this
+   * class. Notice that the default cache in this base class is not dependent
+   * on the lemma property.
+   */
+  virtual bool hasCachedLemma(TNode lem, LemmaProperty p);
+  /** The number of lemmas we have sent since the last call to reset */
+  uint32_t numAddedLemmas() const;
+  /** Have we added a lemma since the last call to reset? */
+  bool hasAddedLemma() const;
+  //--------------------------------------- internal facts
   /**
    * Assert internal fact. This is recommended method for asserting "internal"
    * facts into the equality engine of the theory. In particular, this method
@@ -163,6 +202,10 @@ class TheoryInferenceManager
                           bool pol,
                           const std::vector<Node>& exp,
                           ProofGenerator* pg);
+  /** The number of internal facts we have added since the last call to reset */
+  uint32_t numAddedFacts() const;
+  /** Have we added a internal fact since the last call to reset? */
+  bool hasAddedFact() const;
 
  protected:
   /**
@@ -192,6 +235,15 @@ class TheoryInferenceManager
    * conjunctions), return the explanation as a conjunction.
    */
   Node mkExplain(TNode n);
+  /**
+   * Cache that lemma lem is being sent with property p. Return true if it
+   * did not already exist in the cache maintained by this class. If this
+   * method is not overridden, then we use the d_lemmasSent cache maintained
+   * by this class, which notice is not dependent on lemma property. If
+   * the lemma property should be taken into account, the manager should
+   * override this method to take the lemma property into account as needed.
+   */
+  virtual bool cacheLemma(TNode lem, LemmaProperty p);
   /** The theory object */
   Theory& d_theory;
   /** Reference to the state of theory */
@@ -211,6 +263,15 @@ class TheoryInferenceManager
    * SAT-context-dependent.
    */
   NodeSet d_keep;
+  /**
+   * A cache of all lemmas sent, which is a user-context-dependent set of
+   * nodes. Notice that this cache does not depedent on lemma property.
+   */
+  NodeSet d_lemmasSent;
+  /** The number of lemmas sent since the last call to reset. */
+  uint32_t d_numCurrentLemmas;
+  /** The number of internal facts added since the last call to reset. */
+  uint32_t d_numCurrentFacts;
 };
 
 }  // namespace theory
-- 
2.30.2