From 5e969241cd3bdec90138edb5d1c88d3e1797cb43 Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Tue, 31 Aug 2021 19:05:29 -0500
Subject: [PATCH] Lazy proof reconstruction for strings theory solver (#7096)

This makes it so that we call the strings::InferProofCons::convert lazily, instead deferring to a temporary, trusted STRING_INFERENCE rule.

This decreases our average proof overhead on commonly solved instances from 64% to 58% on QF_S + QF_SLIA. It also reduces timeouts significantly, from 2010 to 1460. (Note these numbers were with a slightly extended version of this branch that includes other pending PRs for proofs).

Also fixes one subtle issue where proof reconstruction was misusing sequential substitutions (leading to warnings during proof post-processing).
---
 src/proof/proof_rule.cpp                |   1 +
 src/proof/proof_rule.h                  |   9 ++
 src/smt/proof_manager.cpp               |   2 +
 src/smt/proof_post_processor.cpp        |  16 ++++
 src/theory/strings/infer_proof_cons.cpp | 107 +++++++++++++++++-------
 src/theory/strings/infer_proof_cons.h   |  74 +++++++++++-----
 src/theory/strings/proof_checker.cpp    |   7 ++
 src/theory/strings/sequences_stats.cpp  |   2 -
 src/theory/strings/sequences_stats.h    |   7 --
 9 files changed, 168 insertions(+), 57 deletions(-)
diff --git a/src/proof/proof_rule.cpp b/src/proof/proof_rule.cpp
index 286396523..acb49843f 100644
--- a/src/proof/proof_rule.cpp
+++ b/src/proof/proof_rule.cpp
@@ -157,6 +157,7 @@ const char* toString(PfRule id)
     case PfRule::RE_ELIM: return "RE_ELIM";
     case PfRule::STRING_CODE_INJ: return "STRING_CODE_INJ";
     case PfRule::STRING_SEQ_UNIT_INJ: return "STRING_SEQ_UNIT_INJ";
+    case PfRule::STRING_INFERENCE: return "STRING_INFERENCE";
     //================================================= Arith rules
     case PfRule::MACRO_ARITH_SCALE_SUM_UB:
       return "ARITH_SCALE_SUM_UPPER_BOUNDS";
diff --git a/src/proof/proof_rule.h b/src/proof/proof_rule.h
index 7a3ce6882..bc2b6437b 100644
--- a/src/proof/proof_rule.h
+++ b/src/proof/proof_rule.h
@@ -1070,6 +1070,15 @@ enum class PfRule : uint32_t
   // Also applies to the case where (seq.unit y) is a constant sequence
   // of length one.
   STRING_SEQ_UNIT_INJ,
+  //======================== Trusted
+  // ======== String inference
+  // Children: ?
+  // Arguments: (F id isRev exp)
+  // ---------------------
+  // Conclusion: F
+  // used to bookkeep an inference that has not yet been converted via
+  // strings::InferProofCons::convert.
+  STRING_INFERENCE,
 
   //================================================= Arithmetic rules
   // ======== Adding Inequalities
diff --git a/src/smt/proof_manager.cpp b/src/smt/proof_manager.cpp
index 829d6f2f5..49f67e94c 100644
--- a/src/smt/proof_manager.cpp
+++ b/src/smt/proof_manager.cpp
@@ -82,6 +82,8 @@ PfManager::PfManager(Env& env, SmtEngine* smte)
         d_pfpp->setEliminateRule(PfRule::THEORY_REWRITE);
       }
     }
+    // theory-specific lazy proof reconstruction
+    d_pfpp->setEliminateRule(PfRule::STRING_INFERENCE);
     d_pfpp->setEliminateRule(PfRule::BV_BITBLAST);
   }
   d_false = NodeManager::currentNM()->mkConst(false);
diff --git a/src/smt/proof_post_processor.cpp b/src/smt/proof_post_processor.cpp
index 98ec30093..5ad311f8d 100644
--- a/src/smt/proof_post_processor.cpp
+++ b/src/smt/proof_post_processor.cpp
@@ -24,6 +24,7 @@
 #include "theory/builtin/proof_checker.h"
 #include "theory/bv/bitblast/proof_bitblaster.h"
 #include "theory/rewriter.h"
+#include "theory/strings/infer_proof_cons.h"
 #include "theory/theory.h"
 #include "util/rational.h"
 
@@ -1080,6 +1081,21 @@ Node ProofPostprocessCallback::expandMacros(PfRule id,
                           << std::endl;
     return sumBounds;
   }
+  else if (id == PfRule::STRING_INFERENCE)
+  {
+    // get the arguments
+    Node conc;
+    InferenceId iid;
+    bool isRev;
+    std::vector<Node> exp;
+    if (strings::InferProofCons::unpackArgs(args, conc, iid, isRev, exp))
+    {
+      if (strings::InferProofCons::addProofTo(cdp, conc, iid, isRev, exp))
+      {
+        return conc;
+      }
+    }
+  }
   else if (id == PfRule::BV_BITBLAST)
   {
     bv::BBProof bb(nullptr, d_pnm, true);
diff --git a/src/theory/strings/infer_proof_cons.cpp b/src/theory/strings/infer_proof_cons.cpp
index f48d29416..e9a6da104 100644
--- a/src/theory/strings/infer_proof_cons.cpp
+++ b/src/theory/strings/infer_proof_cons.cpp
@@ -59,6 +59,69 @@ void InferProofCons::notifyFact(const InferInfo& ii)
   d_lazyFactMap.insert(ii.d_conc, iic);
 }
 
+bool InferProofCons::addProofTo(CDProof* pf,
+                                Node conc,
+                                InferenceId infer,
+                                bool isRev,
+                                const std::vector<Node>& exp)
+{
+  // now go back and convert it to proof steps and add to proof
+  bool useBuffer = false;
+  ProofStep ps;
+  TheoryProofStepBuffer psb(pf->getManager()->getChecker());
+  // run the conversion
+  convert(infer, isRev, conc, exp, ps, psb, useBuffer);
+  // make the proof based on the step or the buffer
+  if (useBuffer)
+  {
+    if (!pf->addSteps(psb))
+    {
+      return false;
+    }
+  }
+  else
+  {
+    if (!pf->addStep(conc, ps))
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+void InferProofCons::packArgs(Node conc,
+                              InferenceId infer,
+                              bool isRev,
+                              const std::vector<Node>& exp,
+                              std::vector<Node>& args)
+{
+  args.push_back(conc);
+  args.push_back(mkInferenceIdNode(infer));
+  args.push_back(NodeManager::currentNM()->mkConst(isRev));
+  // The vector exp is stored as arguments; its flatten form are premises. We
+  // need both since the grouping of exp is important, e.g. { (and a b), c }
+  // is different from { a, b, c } in the convert routine, since positions
+  // of formulas in exp have special meaning.
+  args.insert(args.end(), exp.begin(), exp.end());
+}
+
+bool InferProofCons::unpackArgs(const std::vector<Node>& args,
+                                Node& conc,
+                                InferenceId& infer,
+                                bool& isRev,
+                                std::vector<Node>& exp)
+{
+  Assert(args.size() >= 3);
+  conc = args[0];
+  if (!getInferenceId(args[1], infer))
+  {
+    return false;
+  }
+  isRev = args[2].getConst<bool>();
+  exp.insert(exp.end(), args.begin() + 3, args.end());
+  return true;
+}
+
 void InferProofCons::convert(InferenceId infer,
                              bool isRev,
                              Node conc,
@@ -134,7 +197,7 @@ void InferProofCons::convert(InferenceId infer,
           useBuffer = true;
         }
       }
-      if (!useBuffer)
+      else
       {
         // use the predicate version?
         ps.d_args.push_back(conc);
@@ -933,8 +996,6 @@ void InferProofCons::convert(InferenceId infer,
     ps.d_args.push_back(t);
     // use the trust rule
     ps.d_rule = PfRule::THEORY_INFERENCE;
-    // add to stats
-    d_statistics.d_inferencesNoPf << infer;
   }
   if (Trace.isOn("strings-ipc-debug"))
   {
@@ -1023,8 +1084,6 @@ Node InferProofCons::convertTrans(Node eqa,
 
 std::shared_ptr<ProofNode> InferProofCons::getProofFor(Node fact)
 {
-  // temporary proof
-  CDProof pf(d_pnm);
   // get the inference
   NodeInferInfoMap::iterator it = d_lazyFactMap.find(fact);
   if (it == d_lazyFactMap.end())
@@ -1038,28 +1097,20 @@ std::shared_ptr<ProofNode> InferProofCons::getProofFor(Node fact)
     }
   }
   AlwaysAssert(it != d_lazyFactMap.end());
-  // now go back and convert it to proof steps and add to proof
-  bool useBuffer = false;
-  ProofStep ps;
-  TheoryProofStepBuffer psb(d_pnm->getChecker());
   std::shared_ptr<InferInfo> ii = (*it).second;
-  // run the conversion
-  convert(ii->getId(), ii->d_idRev, ii->d_conc, ii->d_premises, ps, psb, useBuffer);
-  // make the proof based on the step or the buffer
-  if (useBuffer)
-  {
-    if (!pf.addSteps(psb))
-    {
-      return nullptr;
-    }
-  }
-  else
+  Assert(ii->d_conc == fact);
+  // make a placeholder proof using STRINGS_INFERENCE, which is reconstructed
+  // during post-process
+  CDProof pf(d_pnm);
+  std::vector<Node> args;
+  packArgs(ii->d_conc, ii->getId(), ii->d_idRev, ii->d_premises, args);
+  // must flatten
+  std::vector<Node> exp;
+  for (const Node& ec : ii->d_premises)
   {
-    if (!pf.addStep(fact, ps))
-    {
-      return nullptr;
-    }
+    utils::flattenOp(AND, ec, exp);
   }
+  pf.addStep(fact, PfRule::STRING_INFERENCE, exp, args);
   return pf.getProofFor(fact);
 }
 
@@ -1071,7 +1122,7 @@ std::string InferProofCons::identify() const
 bool InferProofCons::purifyCoreSubstitution(Node& tgt,
                                             std::vector<Node>& children,
                                             TheoryProofStepBuffer& psb,
-                                            bool concludeTgtNew) const
+                                            bool concludeTgtNew)
 {
   // collect the terms to purify, which are the LHS of the substitution
   std::unordered_set<Node> termsToPurify;
@@ -1106,7 +1157,7 @@ Node InferProofCons::purifyCorePredicate(
     Node lit,
     bool concludeNew,
     TheoryProofStepBuffer& psb,
-    std::unordered_set<Node>& termsToPurify) const
+    std::unordered_set<Node>& termsToPurify)
 {
   bool pol = lit.getKind() != NOT;
   Node atom = pol ? lit : lit[0];
@@ -1145,8 +1196,8 @@ Node InferProofCons::purifyCorePredicate(
   return newLit;
 }
 
-Node InferProofCons::purifyCoreTerm(
-    Node n, std::unordered_set<Node>& termsToPurify) const
+Node InferProofCons::purifyCoreTerm(Node n,
+                                    std::unordered_set<Node>& termsToPurify)
 {
   Assert(n.getType().isStringLike());
   if (n.getNumChildren() == 0)
diff --git a/src/theory/strings/infer_proof_cons.h b/src/theory/strings/infer_proof_cons.h
index 6bc64a085..110d231cf 100644
--- a/src/theory/strings/infer_proof_cons.h
+++ b/src/theory/strings/infer_proof_cons.h
@@ -42,6 +42,10 @@ namespace strings {
  *
  * The main (private) method of this class is convert below, which is
  * called when we need to construct a proof node from an InferInfo.
+ *
+ * This class uses lazy proof reconstruction. Namely, the getProofFor method
+ * returns applications of the rule STRING_INFERENCE, which store the arguments
+ * to the proof conversion routine "convert" below.
  */
 class InferProofCons : public ProofGenerator
 {
@@ -72,10 +76,40 @@ class InferProofCons : public ProofGenerator
    *
    * It should be the case that a call was made to notifyFact(ii) where
    * ii.d_conc is fact in this SAT context.
+   *
+   * This returns the appropriate application of STRING_INFERENCE so that it
+   * can be reconstructed if necessary during proof post-processing.
    */
   std::shared_ptr<ProofNode> getProofFor(Node fact) override;
   /** Identify this generator (for debugging, etc..) */
   virtual std::string identify() const override;
+  /**
+   * Add proof of running convert on the given arguments to CDProof pf. This is
+   * called lazily during proof post-processing.
+   */
+  static bool addProofTo(CDProof* pf,
+                         Node conc,
+                         InferenceId infer,
+                         bool isRev,
+                         const std::vector<Node>& exp);
+  /**
+   * Pack arguments of a STRING_INFERENCE rule application in args. This proof
+   * rule stores the arguments to the convert method of this class below.
+   */
+  static void packArgs(Node conc,
+                       InferenceId infer,
+                       bool isRev,
+                       const std::vector<Node>& exp,
+                       std::vector<Node>& args);
+  /**
+   * Inverse of the above method, which recovers the arguments that were packed
+   * into the vector args.
+   */
+  static bool unpackArgs(const std::vector<Node>& args,
+                         Node& conc,
+                         InferenceId& infer,
+                         bool& isRev,
+                         std::vector<Node>& exp);
 
  private:
   /** convert
@@ -100,26 +134,26 @@ class InferProofCons : public ProofGenerator
    * In particular, psb will contain a set of steps that form a proof
    * whose conclusion is ii.d_conc and whose free assumptions are ii.d_ant.
    */
-  void convert(InferenceId infer,
-               bool isRev,
-               Node conc,
-               const std::vector<Node>& exp,
-               ProofStep& ps,
-               TheoryProofStepBuffer& psb,
-               bool& useBuffer);
+  static void convert(InferenceId infer,
+                      bool isRev,
+                      Node conc,
+                      const std::vector<Node>& exp,
+                      ProofStep& ps,
+                      TheoryProofStepBuffer& psb,
+                      bool& useBuffer);
   /**
    * Convert length proof. If this method returns true, it adds proof step(s)
    * to the buffer psb that conclude lenReq from premises lenExp.
    */
-  bool convertLengthPf(Node lenReq,
-                       const std::vector<Node>& lenExp,
-                       TheoryProofStepBuffer& psb);
+  static bool convertLengthPf(Node lenReq,
+                              const std::vector<Node>& lenExp,
+                              TheoryProofStepBuffer& psb);
   /**
    * Helper method, adds the proof of (TRANS eqa eqb) into the proof step
    * buffer psb, where eqa and eqb are flipped as needed. Returns the
    * conclusion, or null if we were not able to construct a TRANS step.
    */
-  Node convertTrans(Node eqa, Node eqb, TheoryProofStepBuffer& psb);
+  static Node convertTrans(Node eqa, Node eqb, TheoryProofStepBuffer& psb);
   /**
    * Purify core substitution.
    *
@@ -159,10 +193,10 @@ class InferProofCons : public ProofGenerator
    * children'[i] from children[i] for all i, and tgt' from tgt (or vice versa
    * based on concludeTgtNew).
    */
-  bool purifyCoreSubstitution(Node& tgt,
-                              std::vector<Node>& children,
-                              TheoryProofStepBuffer& psb,
-                              bool concludeTgtNew = false) const;
+  static bool purifyCoreSubstitution(Node& tgt,
+                                     std::vector<Node>& children,
+                                     TheoryProofStepBuffer& psb,
+                                     bool concludeTgtNew = false);
   /**
    * Return the purified form of the predicate lit with respect to a set of
    * terms to purify, call the returned literal lit'.
@@ -171,17 +205,17 @@ class InferProofCons : public ProofGenerator
    * Note that string predicates that require purification are string
    * (dis)equalities only.
    */
-  Node purifyCorePredicate(Node lit,
-                           bool concludeNew,
-                           TheoryProofStepBuffer& psb,
-                           std::unordered_set<Node>& termsToPurify) const;
+  static Node purifyCorePredicate(Node lit,
+                                  bool concludeNew,
+                                  TheoryProofStepBuffer& psb,
+                                  std::unordered_set<Node>& termsToPurify);
   /**
    * Purify term with respect to a set of terms to purify. This replaces
    * all terms to purify with their purification variables that occur in
    * positions that are relevant for the core calculus of strings (direct
    * children of concat or equal).
    */
-  Node purifyCoreTerm(Node n, std::unordered_set<Node>& termsToPurify) const;
+  static Node purifyCoreTerm(Node n, std::unordered_set<Node>& termsToPurify);
   /** the proof node manager */
   ProofNodeManager* d_pnm;
   /** The lazy fact map */
diff --git a/src/theory/strings/proof_checker.cpp b/src/theory/strings/proof_checker.cpp
index 5a4008724..f566a4f39 100644
--- a/src/theory/strings/proof_checker.cpp
+++ b/src/theory/strings/proof_checker.cpp
@@ -53,6 +53,8 @@ void StringProofRuleChecker::registerTo(ProofChecker* pc)
   pc->registerChecker(PfRule::RE_ELIM, this);
   pc->registerChecker(PfRule::STRING_CODE_INJ, this);
   pc->registerChecker(PfRule::STRING_SEQ_UNIT_INJ, this);
+  // trusted rule
+  pc->registerTrustedChecker(PfRule::STRING_INFERENCE, this, 2);
 }
 
 Node StringProofRuleChecker::checkInternal(PfRule id,
@@ -504,6 +506,11 @@ Node StringProofRuleChecker::checkInternal(PfRule id,
     AlwaysAssert(t[0].getType() == t[1].getType());
     return t[0].eqNode(t[1]);
   }
+  else if (id == PfRule::STRING_INFERENCE)
+  {
+    Assert(args.size() >= 3);
+    return args[0];
+  }
   return Node::null();
 }
 
diff --git a/src/theory/strings/sequences_stats.cpp b/src/theory/strings/sequences_stats.cpp
index 2c4834de2..85939d75f 100644
--- a/src/theory/strings/sequences_stats.cpp
+++ b/src/theory/strings/sequences_stats.cpp
@@ -26,8 +26,6 @@ SequencesStatistics::SequencesStatistics()
         smtStatisticsRegistry().registerInt("theory::strings::checkRuns")),
       d_strategyRuns(
           smtStatisticsRegistry().registerInt("theory::strings::strategyRuns")),
-      d_inferencesNoPf(smtStatisticsRegistry().registerHistogram<InferenceId>(
-          "theory::strings::inferencesNoPf")),
       d_cdSimplifications(smtStatisticsRegistry().registerHistogram<Kind>(
           "theory::strings::cdSimplifications")),
       d_reductions(smtStatisticsRegistry().registerHistogram<Kind>(
diff --git a/src/theory/strings/sequences_stats.h b/src/theory/strings/sequences_stats.h
index 398b8894d..9ec9d3434 100644
--- a/src/theory/strings/sequences_stats.h
+++ b/src/theory/strings/sequences_stats.h
@@ -57,13 +57,6 @@ class SequencesStatistics
   /** Number of calls to run the strategy */
   IntStat d_strategyRuns;
   //--------------- inferences
-  /**
-   * Counts the number of applications of each type of inference that were not
-   * processed as a proof step. This is a subset of the statistics in
-   * TheoryInferenceManager, i.e.
-   * (theory::strings::inferences{Facts,Lemmas,Conflicts}).
-   */
-  HistogramStat<InferenceId> d_inferencesNoPf;
   /**
    * Counts the number of applications of each type of context-dependent
    * simplification. The sum of this map is equal to the number of EXTF or
-- 
2.30.2