From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Tue, 2 Feb 2021 20:48:59 +0000 (-0600)
Subject: (proof-new) Refactor theory preprocessing (#5835)
X-Git-Tag: cvc5-1.0.0~2328
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3393c0c828b44f88c92e52a2b49d8a572b2a9b93;p=cvc5.git

(proof-new) Refactor theory preprocessing (#5835)

This simplifies our algorithm for theory preprocessing. The motivation is two-fold:
(1) Proofs are currently incorrect in rare cases due to incorrectly tracking pre vs post rewrites.
(2) We can avoid traversing Boolean connectives with term formula removal. Note that this PR relies on the observation that term formula removal leaves Boolean structure unchanged and can apply locally to theory atoms.

Furthermore, notice that our handling of lemmas generated by term formula removal is now more uniform. In the second algorithm, term formula removal is not applied to fixed point anymore. In other words, we do not remove skolems from lemmas generated by term formula removal recursively. Instead, since lemmas are theory-preprocessed recursively, we use the fixed point that runs outside the above algorithm instead of relying on the inner fixed point in term formula removal.

This PR resolves several open issues with proof production on proof-new.

I will performance test this change on SMT-LIB.
---

diff --git a/src/theory/theory_preprocessor.cpp b/src/theory/theory_preprocessor.cpp
index 3b5e9cee4..0719d4130 100644
--- a/src/theory/theory_preprocessor.cpp
+++ b/src/theory/theory_preprocessor.cpp
@@ -31,6 +31,7 @@ TheoryPreprocessor::TheoryPreprocessor(TheoryEngine& engine,
     : d_engine(engine),
       d_logicInfo(engine.getLogicInfo()),
       d_ppCache(userContext),
+      d_rtfCache(userContext),
       d_tfr(userContext, pnm),
       d_tpg(pnm ? new TConvProofGenerator(
                       pnm,
@@ -40,6 +41,19 @@ TheoryPreprocessor::TheoryPreprocessor(TheoryEngine& engine,
                       "TheoryPreprocessor::preprocess_rewrite",
                       &d_iqtc)
                 : nullptr),
+      d_tpgRtf(pnm ? new TConvProofGenerator(pnm,
+                                             userContext,
+                                             TConvPolicy::FIXPOINT,
+                                             TConvCachePolicy::NEVER,
+                                             "TheoryPreprocessor::rtf",
+                                             &d_iqtc)
+                   : nullptr),
+      d_tpgRew(pnm ? new TConvProofGenerator(pnm,
+                                             userContext,
+                                             TConvPolicy::ONCE,
+                                             TConvCachePolicy::NEVER,
+                                             "TheoryPreprocessor::pprew")
+                   : nullptr),
       d_tspg(nullptr),
       d_lp(pnm ? new LazyCDProof(pnm,
                                  nullptr,
@@ -51,14 +65,12 @@ TheoryPreprocessor::TheoryPreprocessor(TheoryEngine& engine,
   {
     // Make the main term conversion sequence generator, which tracks up to
     // three conversions made in succession:
-    // (1) theory preprocessing+rewriting
-    // (2) term formula removal
-    // (3) rewriting
-    // Steps (1) and (3) use a common term conversion generator.
+    // (1) rewriting
+    // (2) (theory preprocessing+rewriting until fixed point)+term formula
+    // removal+rewriting.
     std::vector<ProofGenerator*> ts;
-    ts.push_back(d_tpg.get());
-    ts.push_back(d_tfr.getTConvProofGenerator());
-    ts.push_back(d_tpg.get());
+    ts.push_back(d_tpgRew.get());
+    ts.push_back(d_tpgRtf.get());
     d_tspg.reset(new TConvSeqProofGenerator(
         pnm, ts, userContext, "TheoryPreprocessor::sequence"));
   }
@@ -83,46 +95,33 @@ TrustNode TheoryPreprocessor::preprocessInternal(
 
   Trace("tpp-debug") << "TheoryPreprocessor::preprocess: start " << node
                      << std::endl;
-  // run theory preprocessing
-  TrustNode tpp = theoryPreprocess(node);
-  Node ppNode = tpp.getNode();
 
-  // Remove the ITEs, fixed point
-  TrustNode ttfr = d_tfr.run(ppNode, newLemmas, newSkolems, true);
-  Node rtfNode = ttfr.isNull() ? ppNode : ttfr.getNode();
+  // We must rewrite before preprocessing, because some terms when rewritten
+  // may introduce new terms that are not top-level and require preprocessing.
+  // An example of this is (forall ((x Int)) (and (tail L) (P x))) which
+  // rewrites to (and (tail L) (forall ((x Int)) (P x))). The subterm (tail L)
+  // must be preprocessed as a child here.
+  Node irNode = rewriteWithProof(node, d_tpgRew.get(), true);
 
-  if (Debug.isOn("lemma-ites"))
-  {
-    Debug("lemma-ites") << "removed ITEs from lemma: " << rtfNode << endl;
-    Debug("lemma-ites") << " + now have the following " << newLemmas.size()
-                        << " lemma(s):" << endl;
-    for (size_t i = 0, lsize = newLemmas.size(); i <= lsize; ++i)
-    {
-      Debug("lemma-ites") << " + " << newLemmas[i].getNode() << endl;
-    }
-    Debug("lemma-ites") << endl;
-  }
-  // Rewrite the main node, we rewrite and store the proof step, if necessary,
-  // in d_tpg, which maintains the fact that d_tpg can prove the rewrite.
-  Node retNode = rewriteWithProof(rtfNode);
+  // run theory preprocessing
+  TrustNode tpp = theoryPreprocess(irNode, newLemmas, newSkolems);
+  Node ppNode = tpp.getNode();
 
   if (Trace.isOn("tpp-debug"))
   {
-    if (node != ppNode)
-    {
-      Trace("tpp-debug") << "after preprocessing : " << ppNode << std::endl;
-    }
-    if (rtfNode != ppNode)
+    if (node != irNode)
     {
-      Trace("tpp-debug") << "after rtf : " << rtfNode << std::endl;
+      Trace("tpp-debug") << "after initial rewriting : " << irNode << std::endl;
     }
-    if (retNode != rtfNode)
+    if (irNode != ppNode)
     {
-      Trace("tpp-debug") << "after rewriting : " << retNode << std::endl;
+      Trace("tpp-debug")
+          << "after preprocessing + rewriting and term formula removal : "
+          << ppNode << std::endl;
     }
     Trace("tpp-debug") << "TheoryPreprocessor::preprocess: finish" << std::endl;
   }
-  if (node == retNode)
+  if (node == ppNode)
   {
     Trace("tpp-debug") << "...TheoryPreprocessor::preprocess returned no change"
                        << std::endl;
@@ -136,19 +135,17 @@ TrustNode TheoryPreprocessor::preprocessInternal(
   {
     std::vector<Node> cterms;
     cterms.push_back(node);
+    cterms.push_back(irNode);
     cterms.push_back(ppNode);
-    cterms.push_back(rtfNode);
-    cterms.push_back(retNode);
     // We have that:
-    // node -> ppNode via preprocessing + rewriting
-    // ppNode -> rtfNode via term formula removal
-    // rtfNode -> retNode via rewriting
+    // node -> irNode via rewriting
+    // irNode -> ppNode via theory-preprocessing + rewriting + tf removal
     tret = d_tspg->mkTrustRewriteSequence(cterms);
     tret.debugCheckClosed("tpp-debug", "TheoryPreprocessor::lemma_ret");
   }
   else
   {
-    tret = TrustNode::mkTrustRewrite(node, retNode, nullptr);
+    tret = TrustNode::mkTrustRewrite(node, ppNode, nullptr);
   }
 
   // now, rewrite the lemmas
@@ -270,7 +267,10 @@ struct preprocess_stack_element
   preprocess_stack_element(TNode n) : node(n), children_added(false) {}
 };
 
-TrustNode TheoryPreprocessor::theoryPreprocess(TNode assertion)
+TrustNode TheoryPreprocessor::theoryPreprocess(
+    TNode assertion,
+    std::vector<TrustNode>& newLemmas,
+    std::vector<Node>& newSkolems)
 {
   Trace("theory::preprocess")
       << "TheoryPreprocessor::theoryPreprocess(" << assertion << ")" << endl;
@@ -287,36 +287,54 @@ TrustNode TheoryPreprocessor::theoryPreprocess(TNode assertion)
     TNode current = stackHead.node;
 
     Trace("theory::preprocess-debug")
-        << "TheoryPreprocessor::theoryPreprocess(" << assertion
-        << "): processing " << current << endl;
+        << "TheoryPreprocessor::theoryPreprocess processing " << current
+        << endl;
 
     // If node already in the cache we're done, pop from the stack
-    NodeMap::iterator find = d_ppCache.find(current);
-    if (find != d_ppCache.end())
+    if (d_rtfCache.find(current) != d_rtfCache.end())
     {
       toVisit.pop_back();
       continue;
     }
 
-    if (!d_logicInfo.isTheoryEnabled(Theory::theoryOf(current))
-        && Theory::theoryOf(current) != THEORY_SAT_SOLVER)
+    TheoryId tid = Theory::theoryOf(current);
+
+    if (!d_logicInfo.isTheoryEnabled(tid) && tid != THEORY_SAT_SOLVER)
     {
       stringstream ss;
       ss << "The logic was specified as " << d_logicInfo.getLogicString()
-         << ", which doesn't include " << Theory::theoryOf(current)
+         << ", which doesn't include " << tid
          << ", but got a preprocessing-time fact for that theory." << endl
          << "The fact:" << endl
          << current;
       throw LogicException(ss.str());
     }
-
     // If this is an atom, we preprocess its terms with the theory ppRewriter
-    if (Theory::theoryOf(current) != THEORY_BOOL)
+    if (tid != THEORY_BOOL)
     {
       Node ppRewritten = ppTheoryRewrite(current);
-      d_ppCache[current] = ppRewritten;
-      Assert(Rewriter::rewrite(d_ppCache[current].get())
-             == d_ppCache[current].get());
+      Assert(Rewriter::rewrite(ppRewritten) == ppRewritten);
+      if (isProofEnabled() && ppRewritten != current)
+      {
+        TrustNode trn =
+            TrustNode::mkTrustRewrite(current, ppRewritten, d_tpg.get());
+        registerTrustedRewrite(trn, d_tpgRtf.get(), true);
+      }
+
+      // Term formula removal without fixed point. We do not need to do fixed
+      // point since newLemmas are theory-preprocessed until fixed point in
+      // preprocessInternal (at top-level, when procLemmas=true).
+      TrustNode ttfr = d_tfr.run(ppRewritten, newLemmas, newSkolems, false);
+      Node rtfNode = ppRewritten;
+      if (!ttfr.isNull())
+      {
+        rtfNode = ttfr.getNode();
+        registerTrustedRewrite(ttfr, d_tpgRtf.get(), true);
+      }
+      // Finish the conversion by rewriting. This is registered as a
+      // post-rewrite, since it is the last step applied for theory atoms.
+      Node retNode = rewriteWithProof(rtfNode, d_tpgRtf.get(), false);
+      d_rtfCache[current] = retNode;
       continue;
     }
 
@@ -331,17 +349,18 @@ TrustNode TheoryPreprocessor::theoryPreprocess(TNode assertion)
       }
       for (unsigned i = 0; i < current.getNumChildren(); ++i)
       {
-        Assert(d_ppCache.find(current[i]) != d_ppCache.end());
-        builder << d_ppCache[current[i]].get();
+        Assert(d_rtfCache.find(current[i]) != d_rtfCache.end());
+        builder << d_rtfCache[current[i]].get();
       }
       // Mark the substitution and continue
       Node result = builder;
-      // always rewrite here, since current may not be in rewritten form
-      result = rewriteWithProof(result);
+      // always rewrite here, since current may not be in rewritten form after
+      // reconstruction
+      result = rewriteWithProof(result, d_tpgRtf.get(), false);
       Trace("theory::preprocess-debug")
-          << "TheoryPreprocessor::theoryPreprocess(" << assertion
-          << "): setting " << current << " -> " << result << endl;
-      d_ppCache[current] = result;
+          << "TheoryPreprocessor::theoryPreprocess setting " << current
+          << " -> " << result << endl;
+      d_rtfCache[current] = result;
       toVisit.pop_back();
     }
     else
@@ -356,8 +375,7 @@ TrustNode TheoryPreprocessor::theoryPreprocess(TNode assertion)
              ++child_it)
         {
           TNode childNode = *child_it;
-          NodeMap::iterator childFind = d_ppCache.find(childNode);
-          if (childFind == d_ppCache.end())
+          if (d_rtfCache.find(childNode) == d_rtfCache.end())
           {
             toVisit.push_back(childNode);
           }
@@ -367,15 +385,16 @@ TrustNode TheoryPreprocessor::theoryPreprocess(TNode assertion)
       {
         // No children, so we're done
         Trace("theory::preprocess-debug")
-            << "SubstitutionMap::internalSubstitute(" << assertion
-            << "): setting " << current << " -> " << current << endl;
-        d_ppCache[current] = current;
+            << "SubstitutionMap::internalSubstitute setting " << current
+            << " -> " << current << endl;
+        d_rtfCache[current] = current;
         toVisit.pop_back();
       }
     }
   }
+  Assert(d_rtfCache.find(assertion) != d_rtfCache.end());
   // Return the substituted version
-  Node res = d_ppCache[assertion];
+  Node res = d_rtfCache[assertion];
   return TrustNode::mkTrustRewrite(assertion, res, d_tpg.get());
 }
 
@@ -391,27 +410,24 @@ Node TheoryPreprocessor::ppTheoryRewrite(TNode term)
   {
     return preprocessWithProof(term);
   }
+  // should be in rewritten form here
+  Assert(term == Rewriter::rewrite(term));
   Trace("theory-pp") << "ppTheoryRewrite { " << term << endl;
-  // We must rewrite before preprocessing, because some terms when rewritten
-  // may introduce new terms that are not top-level and require preprocessing.
-  // An example of this is (forall ((x Int)) (and (tail L) (P x))) which
-  // rewrites to (and (tail L) (forall ((x Int)) (P x))). The subterm (tail L)
-  // must be preprocessed as a child here.
-  Node newTerm = rewriteWithProof(term);
   // do not rewrite inside quantifiers
-  if (newTerm.getNumChildren() > 0 && !newTerm.isClosure())
+  Node newTerm = term;
+  if (!term.isClosure())
   {
-    NodeBuilder<> newNode(newTerm.getKind());
-    if (newTerm.getMetaKind() == kind::metakind::PARAMETERIZED)
+    NodeBuilder<> newNode(term.getKind());
+    if (term.getMetaKind() == kind::metakind::PARAMETERIZED)
     {
-      newNode << newTerm.getOperator();
+      newNode << term.getOperator();
     }
-    for (const Node& nt : newTerm)
+    for (const Node& nt : term)
     {
       newNode << ppTheoryRewrite(nt);
     }
     newTerm = Node(newNode);
-    newTerm = rewriteWithProof(newTerm);
+    newTerm = rewriteWithProof(newTerm, d_tpg.get(), false);
   }
   newTerm = preprocessWithProof(newTerm);
   d_ppCache[term] = newTerm;
@@ -419,9 +435,10 @@ Node TheoryPreprocessor::ppTheoryRewrite(TNode term)
   return newTerm;
 }
 
-Node TheoryPreprocessor::rewriteWithProof(Node term)
+Node TheoryPreprocessor::rewriteWithProof(Node term,
+                                          TConvProofGenerator* pg,
+                                          bool isPre)
 {
-  // FIXME (project #37): should properly distinguish pre vs post rewrite
   Node termr = Rewriter::rewrite(term);
   // store rewrite step if tracking proofs and it rewrites
   if (isProofEnabled())
@@ -432,7 +449,7 @@ Node TheoryPreprocessor::rewriteWithProof(Node term)
       Trace("tpp-debug") << "TheoryPreprocessor: addRewriteStep (rewriting) "
                          << term << " -> " << termr << std::endl;
       // always use term context hash 0 (default)
-      d_tpg->addRewriteStep(term, termr, PfRule::REWRITE, {}, {term}, false);
+      pg->addRewriteStep(term, termr, PfRule::REWRITE, {}, {term}, isPre);
     }
   }
   return termr;
@@ -471,37 +488,52 @@ Node TheoryPreprocessor::preprocessWithProof(Node term)
   }
   Node termr = trn.getNode();
   Assert(term != termr);
-  // FIXME (project #37): should properly distinguish pre vs post rewrite
   if (isProofEnabled())
   {
-    if (trn.getGenerator() != nullptr)
-    {
-      Trace("tpp-debug") << "TheoryPreprocessor: addRewriteStep (generator) "
-                         << term << " -> " << termr << std::endl;
-      trn.debugCheckClosed("tpp-debug",
-                           "TheoryPreprocessor::preprocessWithProof");
-      // always use term context hash 0 (default)
-      d_tpg->addRewriteStep(
-          term, termr, trn.getGenerator(), false, PfRule::ASSUME, true);
-    }
-    else
-    {
-      Trace("tpp-debug") << "TheoryPreprocessor: addRewriteStep (trusted) "
-                         << term << " -> " << termr << std::endl;
-      // small step trust
-      d_tpg->addRewriteStep(term,
-                            termr,
-                            PfRule::THEORY_PREPROCESS,
-                            {},
-                            {term.eqNode(termr)},
-                            false);
-    }
+    registerTrustedRewrite(trn, d_tpg.get(), false);
   }
-  termr = rewriteWithProof(termr);
+  // Rewrite again here, which notice is a *pre* rewrite.
+  termr = rewriteWithProof(termr, d_tpg.get(), true);
   return ppTheoryRewrite(termr);
 }
 
 bool TheoryPreprocessor::isProofEnabled() const { return d_tpg != nullptr; }
 
+void TheoryPreprocessor::registerTrustedRewrite(TrustNode trn,
+                                                TConvProofGenerator* pg,
+                                                bool isPre)
+{
+  if (!isProofEnabled() || trn.isNull())
+  {
+    return;
+  }
+  Assert(trn.getKind() == TrustNodeKind::REWRITE);
+  Node eq = trn.getProven();
+  Node term = eq[0];
+  Node termr = eq[1];
+  if (trn.getGenerator() != nullptr)
+  {
+    Trace("tpp-debug") << "TheoryPreprocessor: addRewriteStep (generator) "
+                       << term << " -> " << termr << std::endl;
+    trn.debugCheckClosed("tpp-debug",
+                         "TheoryPreprocessor::preprocessWithProof");
+    // always use term context hash 0 (default)
+    pg->addRewriteStep(
+        term, termr, trn.getGenerator(), isPre, PfRule::ASSUME, true);
+  }
+  else
+  {
+    Trace("tpp-debug") << "TheoryPreprocessor: addRewriteStep (trusted) "
+                       << term << " -> " << termr << std::endl;
+    // small step trust
+    pg->addRewriteStep(term,
+                       termr,
+                       PfRule::THEORY_PREPROCESS,
+                       {},
+                       {term.eqNode(termr)},
+                       isPre);
+  }
+}
+
 }  // namespace theory
 }  // namespace CVC4
diff --git a/src/theory/theory_preprocessor.h b/src/theory/theory_preprocessor.h
index 6a1f6b3ef..a23abed63 100644
--- a/src/theory/theory_preprocessor.h
+++ b/src/theory/theory_preprocessor.h
@@ -38,6 +38,35 @@ namespace theory {
 
 /**
  * The preprocessor used in TheoryEngine.
+ *
+ * A summary of the steps taken by the method preprocess:
+ *
+ * [1]
+ * apply rewriter
+ * [2]
+ * TRAVERSE(
+ *   prerewrite:
+ *    if theory atom {
+ *      TRAVERSE(
+ *        prerewrite:
+ *          // nothing
+ *        postrewrite:
+ *          apply rewriter
+ *          apply ppRewrite
+ *            if changed
+ *              apply rewriter
+ *              REPEAT traversal
+ *      )
+ *      apply term formula removal
+ *      apply rewriter
+ *    }
+ *  postrewrite: // for Boolean connectives
+ *    apply rewriter
+ * )
+ *
+ * Note that the rewriter must be applied beforehand, since the rewriter may
+ * rewrite a theory atom into a formula, e.g. quantifiers miniscoping. This
+ * impacts what the inner traversal is applied to.
  */
 class TheoryPreprocessor
 {
@@ -89,7 +118,9 @@ class TheoryPreprocessor
    * Runs theory specific preprocessing (Theory::ppRewrite) on the non-Boolean
    * parts of the node.
    */
-  TrustNode theoryPreprocess(TNode node);
+  TrustNode theoryPreprocess(TNode node,
+                             std::vector<TrustNode>& newLemmas,
+                             std::vector<Node>& newSkolems);
   /**
    * Internal helper for preprocess, which also optionally preprocesses the
    * new lemmas generated until a fixed point is reached based on argument
@@ -112,38 +143,61 @@ class TheoryPreprocessor
   TheoryEngine& d_engine;
   /** Logic info of theory engine */
   const LogicInfo& d_logicInfo;
-  /** Cache for theory-preprocessing of assertions */
+  /**
+   * Cache for theory-preprocessing of theory atoms. The domain of this map
+   * are terms that appear within theory atoms given to this class.
+   */
   NodeMap d_ppCache;
+  /**
+   * Cache for theory-preprocessing + term formula removal of the Boolean
+   * structure of assertions. The domain of this map are the Boolean
+   * connectives and theory atoms given to this class.
+   */
+  NodeMap d_rtfCache;
   /** The term formula remover */
   RemoveTermFormulas d_tfr;
   /** The term context, which computes hash values for term contexts. */
   InQuantTermContext d_iqtc;
   /**
    * A term conversion proof generator storing preprocessing and rewriting
-   * steps.
+   * steps, which is done until fixed point in the inner traversal of this
+   * class for theory atoms in step [2] above.
    */
   std::unique_ptr<TConvProofGenerator> d_tpg;
   /**
-   * A term conversion sequence generator, which applies theory preprocessing,
-   * term formula removal, and rewriting in sequence.
+   * A term conversion proof generator storing large steps from d_tpg (results
+   * of its fixed point) and term formula removal steps for the outer traversal
+   * of this class for theory atoms in step [2] above.
    */
-  std::unique_ptr<TConvSeqProofGenerator> d_tspg;
+  std::unique_ptr<TConvProofGenerator> d_tpgRtf;
   /**
    * A term conversion proof generator storing rewriting steps, which is used
-   * for calls to preprocess when doTheoryPreprocess is false. We store
-   * (top-level) rewrite steps only. Notice this is intentionally separate
-   * from d_tpg, which interleaves both preprocessing and rewriting.
+   * for top-level rewriting before the preprocessing pass, step [1] above.
    */
   std::unique_ptr<TConvProofGenerator> d_tpgRew;
+  /**
+   * A term conversion sequence generator, which applies rewriting,
+   * (theory-preprocessing + rewriting + term formula removal), rewriting again
+   * in sequence, given by d_tpgRew and d_tpgRtf.
+   */
+  std::unique_ptr<TConvSeqProofGenerator> d_tspg;
   /** A lazy proof, for additional lemmas. */
   std::unique_ptr<LazyCDProof> d_lp;
-  /** Helper for theoryPreprocess */
+  /**
+   * Helper for theoryPreprocess, which traverses the provided term and
+   * applies ppRewrite and rewriting until fixed point on term using
+   * the method preprocessWithProof helper below.
+   */
   Node ppTheoryRewrite(TNode term);
   /**
-   * Rewrite with proof, which stores a REWRITE step in d_tpg if necessary
+   * Rewrite with proof, which stores a REWRITE step in pg if necessary
    * and returns the rewritten form of term.
+   *
+   * @param term The term to rewrite
+   * @param pg The proof generator to register to
+   * @param isPre whether the rewrite is a pre-rewrite.
    */
-  Node rewriteWithProof(Node term);
+  Node rewriteWithProof(Node term, TConvProofGenerator* pg, bool isPre);
   /**
    * Preprocess with proof, which calls the appropriate ppRewrite method,
    * stores the corresponding rewrite step in d_tpg if necessary and returns
@@ -151,6 +205,18 @@ class TheoryPreprocessor
    * term is already in rewritten form.
    */
   Node preprocessWithProof(Node term);
+  /**
+   * Register rewrite trn based on trust node into term conversion generator
+   * pg, which uses THEORY_PREPROCESS as a step if no proof generator is
+   * provided in trn.
+   *
+   * @param trn The REWRITE trust node
+   * @param pg The proof generator to register to
+   * @param isPre whether the rewrite is a pre-rewrite.
+   */
+  void registerTrustedRewrite(TrustNode trn,
+                              TConvProofGenerator* pg,
+                              bool isPre);
   /** Proofs enabled */
   bool isProofEnabled() const;
 };