Lazy proof reconstruction for strings theory solver (#7096)
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>
Wed, 1 Sep 2021 00:05:29 +0000 (19:05 -0500)
committerGitHub <noreply@github.com>
Wed, 1 Sep 2021 00:05:29 +0000 (00:05 +0000)
This makes it so that we call the strings::InferProofCons::convert lazily, instead deferring to a temporary, trusted STRING_INFERENCE rule.

This decreases our average proof overhead on commonly solved instances from 64% to 58% on QF_S + QF_SLIA. It also reduces timeouts significantly, from 2010 to 1460. (Note these numbers were with a slightly extended version of this branch that includes other pending PRs for proofs).

Also fixes one subtle issue where proof reconstruction was misusing sequential substitutions (leading to warnings during proof post-processing).

src/proof/proof_rule.cpp
src/proof/proof_rule.h
src/smt/proof_manager.cpp
src/smt/proof_post_processor.cpp
src/theory/strings/infer_proof_cons.cpp
src/theory/strings/infer_proof_cons.h
src/theory/strings/proof_checker.cpp
src/theory/strings/sequences_stats.cpp
src/theory/strings/sequences_stats.h

index 286396523db4aad3dd8d8c0c2e3d1a17f6f7e9c3..acb49843f446667ce03028cee7b5d2cbaa6306f7 100644 (file)
@@ -157,6 +157,7 @@ const char* toString(PfRule id)
     case PfRule::RE_ELIM: return "RE_ELIM";
     case PfRule::STRING_CODE_INJ: return "STRING_CODE_INJ";
     case PfRule::STRING_SEQ_UNIT_INJ: return "STRING_SEQ_UNIT_INJ";
+    case PfRule::STRING_INFERENCE: return "STRING_INFERENCE";
     //================================================= Arith rules
     case PfRule::MACRO_ARITH_SCALE_SUM_UB:
       return "ARITH_SCALE_SUM_UPPER_BOUNDS";
index 7a3ce6882df4e78147bb3da309d36b3dba208e77..bc2b6437bf42266015c0efe31b25cb7e911f58df 100644 (file)
@@ -1070,6 +1070,15 @@ enum class PfRule : uint32_t
   // Also applies to the case where (seq.unit y) is a constant sequence
   // of length one.
   STRING_SEQ_UNIT_INJ,
+  //======================== Trusted
+  // ======== String inference
+  // Children: ?
+  // Arguments: (F id isRev exp)
+  // ---------------------
+  // Conclusion: F
+  // used to bookkeep an inference that has not yet been converted via
+  // strings::InferProofCons::convert.
+  STRING_INFERENCE,
 
   //================================================= Arithmetic rules
   // ======== Adding Inequalities
index 829d6f2f5f358480f4d110f0f767d0cdd57f182d..49f67e94c3eda1898896003eed4281fa8744c7d4 100644 (file)
@@ -82,6 +82,8 @@ PfManager::PfManager(Env& env, SmtEngine* smte)
         d_pfpp->setEliminateRule(PfRule::THEORY_REWRITE);
       }
     }
+    // theory-specific lazy proof reconstruction
+    d_pfpp->setEliminateRule(PfRule::STRING_INFERENCE);
     d_pfpp->setEliminateRule(PfRule::BV_BITBLAST);
   }
   d_false = NodeManager::currentNM()->mkConst(false);
index 98ec300935892aaa79b03c1cbbc74555b9a47dab..5ad311f8d9e427ba9e77adb0e6f3e7400c427481 100644 (file)
@@ -24,6 +24,7 @@
 #include "theory/builtin/proof_checker.h"
 #include "theory/bv/bitblast/proof_bitblaster.h"
 #include "theory/rewriter.h"
+#include "theory/strings/infer_proof_cons.h"
 #include "theory/theory.h"
 #include "util/rational.h"
 
@@ -1080,6 +1081,21 @@ Node ProofPostprocessCallback::expandMacros(PfRule id,
                           << std::endl;
     return sumBounds;
   }
+  else if (id == PfRule::STRING_INFERENCE)
+  {
+    // get the arguments
+    Node conc;
+    InferenceId iid;
+    bool isRev;
+    std::vector<Node> exp;
+    if (strings::InferProofCons::unpackArgs(args, conc, iid, isRev, exp))
+    {
+      if (strings::InferProofCons::addProofTo(cdp, conc, iid, isRev, exp))
+      {
+        return conc;
+      }
+    }
+  }
   else if (id == PfRule::BV_BITBLAST)
   {
     bv::BBProof bb(nullptr, d_pnm, true);
index f48d2941628b966bf95ac8b53ed2e44f72fe1a3c..e9a6da10452fd5300ab9118c08f777ae66eaf694 100644 (file)
@@ -59,6 +59,69 @@ void InferProofCons::notifyFact(const InferInfo& ii)
   d_lazyFactMap.insert(ii.d_conc, iic);
 }
 
+bool InferProofCons::addProofTo(CDProof* pf,
+                                Node conc,
+                                InferenceId infer,
+                                bool isRev,
+                                const std::vector<Node>& exp)
+{
+  // now go back and convert it to proof steps and add to proof
+  bool useBuffer = false;
+  ProofStep ps;
+  TheoryProofStepBuffer psb(pf->getManager()->getChecker());
+  // run the conversion
+  convert(infer, isRev, conc, exp, ps, psb, useBuffer);
+  // make the proof based on the step or the buffer
+  if (useBuffer)
+  {
+    if (!pf->addSteps(psb))
+    {
+      return false;
+    }
+  }
+  else
+  {
+    if (!pf->addStep(conc, ps))
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+void InferProofCons::packArgs(Node conc,
+                              InferenceId infer,
+                              bool isRev,
+                              const std::vector<Node>& exp,
+                              std::vector<Node>& args)
+{
+  args.push_back(conc);
+  args.push_back(mkInferenceIdNode(infer));
+  args.push_back(NodeManager::currentNM()->mkConst(isRev));
+  // The vector exp is stored as arguments; its flatten form are premises. We
+  // need both since the grouping of exp is important, e.g. { (and a b), c }
+  // is different from { a, b, c } in the convert routine, since positions
+  // of formulas in exp have special meaning.
+  args.insert(args.end(), exp.begin(), exp.end());
+}
+
+bool InferProofCons::unpackArgs(const std::vector<Node>& args,
+                                Node& conc,
+                                InferenceId& infer,
+                                bool& isRev,
+                                std::vector<Node>& exp)
+{
+  Assert(args.size() >= 3);
+  conc = args[0];
+  if (!getInferenceId(args[1], infer))
+  {
+    return false;
+  }
+  isRev = args[2].getConst<bool>();
+  exp.insert(exp.end(), args.begin() + 3, args.end());
+  return true;
+}
+
 void InferProofCons::convert(InferenceId infer,
                              bool isRev,
                              Node conc,
@@ -134,7 +197,7 @@ void InferProofCons::convert(InferenceId infer,
           useBuffer = true;
         }
       }
-      if (!useBuffer)
+      else
       {
         // use the predicate version?
         ps.d_args.push_back(conc);
@@ -933,8 +996,6 @@ void InferProofCons::convert(InferenceId infer,
     ps.d_args.push_back(t);
     // use the trust rule
     ps.d_rule = PfRule::THEORY_INFERENCE;
-    // add to stats
-    d_statistics.d_inferencesNoPf << infer;
   }
   if (Trace.isOn("strings-ipc-debug"))
   {
@@ -1023,8 +1084,6 @@ Node InferProofCons::convertTrans(Node eqa,
 
 std::shared_ptr<ProofNode> InferProofCons::getProofFor(Node fact)
 {
-  // temporary proof
-  CDProof pf(d_pnm);
   // get the inference
   NodeInferInfoMap::iterator it = d_lazyFactMap.find(fact);
   if (it == d_lazyFactMap.end())
@@ -1038,28 +1097,20 @@ std::shared_ptr<ProofNode> InferProofCons::getProofFor(Node fact)
     }
   }
   AlwaysAssert(it != d_lazyFactMap.end());
-  // now go back and convert it to proof steps and add to proof
-  bool useBuffer = false;
-  ProofStep ps;
-  TheoryProofStepBuffer psb(d_pnm->getChecker());
   std::shared_ptr<InferInfo> ii = (*it).second;
-  // run the conversion
-  convert(ii->getId(), ii->d_idRev, ii->d_conc, ii->d_premises, ps, psb, useBuffer);
-  // make the proof based on the step or the buffer
-  if (useBuffer)
-  {
-    if (!pf.addSteps(psb))
-    {
-      return nullptr;
-    }
-  }
-  else
+  Assert(ii->d_conc == fact);
+  // make a placeholder proof using STRINGS_INFERENCE, which is reconstructed
+  // during post-process
+  CDProof pf(d_pnm);
+  std::vector<Node> args;
+  packArgs(ii->d_conc, ii->getId(), ii->d_idRev, ii->d_premises, args);
+  // must flatten
+  std::vector<Node> exp;
+  for (const Node& ec : ii->d_premises)
   {
-    if (!pf.addStep(fact, ps))
-    {
-      return nullptr;
-    }
+    utils::flattenOp(AND, ec, exp);
   }
+  pf.addStep(fact, PfRule::STRING_INFERENCE, exp, args);
   return pf.getProofFor(fact);
 }
 
@@ -1071,7 +1122,7 @@ std::string InferProofCons::identify() const
 bool InferProofCons::purifyCoreSubstitution(Node& tgt,
                                             std::vector<Node>& children,
                                             TheoryProofStepBuffer& psb,
-                                            bool concludeTgtNew) const
+                                            bool concludeTgtNew)
 {
   // collect the terms to purify, which are the LHS of the substitution
   std::unordered_set<Node> termsToPurify;
@@ -1106,7 +1157,7 @@ Node InferProofCons::purifyCorePredicate(
     Node lit,
     bool concludeNew,
     TheoryProofStepBuffer& psb,
-    std::unordered_set<Node>& termsToPurify) const
+    std::unordered_set<Node>& termsToPurify)
 {
   bool pol = lit.getKind() != NOT;
   Node atom = pol ? lit : lit[0];
@@ -1145,8 +1196,8 @@ Node InferProofCons::purifyCorePredicate(
   return newLit;
 }
 
-Node InferProofCons::purifyCoreTerm(
-    Node n, std::unordered_set<Node>& termsToPurify) const
+Node InferProofCons::purifyCoreTerm(Node n,
+                                    std::unordered_set<Node>& termsToPurify)
 {
   Assert(n.getType().isStringLike());
   if (n.getNumChildren() == 0)
index 6bc64a085dc22449018faa637adaa761c39f9ebf..110d231cf177ba1d8fab64ae3f266dd2096f3b05 100644 (file)
@@ -42,6 +42,10 @@ namespace strings {
  *
  * The main (private) method of this class is convert below, which is
  * called when we need to construct a proof node from an InferInfo.
+ *
+ * This class uses lazy proof reconstruction. Namely, the getProofFor method
+ * returns applications of the rule STRING_INFERENCE, which store the arguments
+ * to the proof conversion routine "convert" below.
  */
 class InferProofCons : public ProofGenerator
 {
@@ -72,10 +76,40 @@ class InferProofCons : public ProofGenerator
    *
    * It should be the case that a call was made to notifyFact(ii) where
    * ii.d_conc is fact in this SAT context.
+   *
+   * This returns the appropriate application of STRING_INFERENCE so that it
+   * can be reconstructed if necessary during proof post-processing.
    */
   std::shared_ptr<ProofNode> getProofFor(Node fact) override;
   /** Identify this generator (for debugging, etc..) */
   virtual std::string identify() const override;
+  /**
+   * Add proof of running convert on the given arguments to CDProof pf. This is
+   * called lazily during proof post-processing.
+   */
+  static bool addProofTo(CDProof* pf,
+                         Node conc,
+                         InferenceId infer,
+                         bool isRev,
+                         const std::vector<Node>& exp);
+  /**
+   * Pack arguments of a STRING_INFERENCE rule application in args. This proof
+   * rule stores the arguments to the convert method of this class below.
+   */
+  static void packArgs(Node conc,
+                       InferenceId infer,
+                       bool isRev,
+                       const std::vector<Node>& exp,
+                       std::vector<Node>& args);
+  /**
+   * Inverse of the above method, which recovers the arguments that were packed
+   * into the vector args.
+   */
+  static bool unpackArgs(const std::vector<Node>& args,
+                         Node& conc,
+                         InferenceId& infer,
+                         bool& isRev,
+                         std::vector<Node>& exp);
 
  private:
   /** convert
@@ -100,26 +134,26 @@ class InferProofCons : public ProofGenerator
    * In particular, psb will contain a set of steps that form a proof
    * whose conclusion is ii.d_conc and whose free assumptions are ii.d_ant.
    */
-  void convert(InferenceId infer,
-               bool isRev,
-               Node conc,
-               const std::vector<Node>& exp,
-               ProofStep& ps,
-               TheoryProofStepBuffer& psb,
-               bool& useBuffer);
+  static void convert(InferenceId infer,
+                      bool isRev,
+                      Node conc,
+                      const std::vector<Node>& exp,
+                      ProofStep& ps,
+                      TheoryProofStepBuffer& psb,
+                      bool& useBuffer);
   /**
    * Convert length proof. If this method returns true, it adds proof step(s)
    * to the buffer psb that conclude lenReq from premises lenExp.
    */
-  bool convertLengthPf(Node lenReq,
-                       const std::vector<Node>& lenExp,
-                       TheoryProofStepBuffer& psb);
+  static bool convertLengthPf(Node lenReq,
+                              const std::vector<Node>& lenExp,
+                              TheoryProofStepBuffer& psb);
   /**
    * Helper method, adds the proof of (TRANS eqa eqb) into the proof step
    * buffer psb, where eqa and eqb are flipped as needed. Returns the
    * conclusion, or null if we were not able to construct a TRANS step.
    */
-  Node convertTrans(Node eqa, Node eqb, TheoryProofStepBuffer& psb);
+  static Node convertTrans(Node eqa, Node eqb, TheoryProofStepBuffer& psb);
   /**
    * Purify core substitution.
    *
@@ -159,10 +193,10 @@ class InferProofCons : public ProofGenerator
    * children'[i] from children[i] for all i, and tgt' from tgt (or vice versa
    * based on concludeTgtNew).
    */
-  bool purifyCoreSubstitution(Node& tgt,
-                              std::vector<Node>& children,
-                              TheoryProofStepBuffer& psb,
-                              bool concludeTgtNew = false) const;
+  static bool purifyCoreSubstitution(Node& tgt,
+                                     std::vector<Node>& children,
+                                     TheoryProofStepBuffer& psb,
+                                     bool concludeTgtNew = false);
   /**
    * Return the purified form of the predicate lit with respect to a set of
    * terms to purify, call the returned literal lit'.
@@ -171,17 +205,17 @@ class InferProofCons : public ProofGenerator
    * Note that string predicates that require purification are string
    * (dis)equalities only.
    */
-  Node purifyCorePredicate(Node lit,
-                           bool concludeNew,
-                           TheoryProofStepBuffer& psb,
-                           std::unordered_set<Node>& termsToPurify) const;
+  static Node purifyCorePredicate(Node lit,
+                                  bool concludeNew,
+                                  TheoryProofStepBuffer& psb,
+                                  std::unordered_set<Node>& termsToPurify);
   /**
    * Purify term with respect to a set of terms to purify. This replaces
    * all terms to purify with their purification variables that occur in
    * positions that are relevant for the core calculus of strings (direct
    * children of concat or equal).
    */
-  Node purifyCoreTerm(Node n, std::unordered_set<Node>& termsToPurify) const;
+  static Node purifyCoreTerm(Node n, std::unordered_set<Node>& termsToPurify);
   /** the proof node manager */
   ProofNodeManager* d_pnm;
   /** The lazy fact map */
index 5a4008724f1794ff7519d1a787dbf7d81107160a..f566a4f390119017523a6d47689c0a408d74669c 100644 (file)
@@ -53,6 +53,8 @@ void StringProofRuleChecker::registerTo(ProofChecker* pc)
   pc->registerChecker(PfRule::RE_ELIM, this);
   pc->registerChecker(PfRule::STRING_CODE_INJ, this);
   pc->registerChecker(PfRule::STRING_SEQ_UNIT_INJ, this);
+  // trusted rule
+  pc->registerTrustedChecker(PfRule::STRING_INFERENCE, this, 2);
 }
 
 Node StringProofRuleChecker::checkInternal(PfRule id,
@@ -504,6 +506,11 @@ Node StringProofRuleChecker::checkInternal(PfRule id,
     AlwaysAssert(t[0].getType() == t[1].getType());
     return t[0].eqNode(t[1]);
   }
+  else if (id == PfRule::STRING_INFERENCE)
+  {
+    Assert(args.size() >= 3);
+    return args[0];
+  }
   return Node::null();
 }
 
index 2c4834de2a2c1acc487af4a73b4f9f17f0a52d8e..85939d75fc717347f21a692d385f09e82acec718 100644 (file)
@@ -26,8 +26,6 @@ SequencesStatistics::SequencesStatistics()
         smtStatisticsRegistry().registerInt("theory::strings::checkRuns")),
       d_strategyRuns(
           smtStatisticsRegistry().registerInt("theory::strings::strategyRuns")),
-      d_inferencesNoPf(smtStatisticsRegistry().registerHistogram<InferenceId>(
-          "theory::strings::inferencesNoPf")),
       d_cdSimplifications(smtStatisticsRegistry().registerHistogram<Kind>(
           "theory::strings::cdSimplifications")),
       d_reductions(smtStatisticsRegistry().registerHistogram<Kind>(
index 398b8894d45b8b9ba64d4560bff45ef41acdb370..9ec9d3434350c25c28a82382a134a197e2a023b9 100644 (file)
@@ -57,13 +57,6 @@ class SequencesStatistics
   /** Number of calls to run the strategy */
   IntStat d_strategyRuns;
   //--------------- inferences
-  /**
-   * Counts the number of applications of each type of inference that were not
-   * processed as a proof step. This is a subset of the statistics in
-   * TheoryInferenceManager, i.e.
-   * (theory::strings::inferences{Facts,Lemmas,Conflicts}).
-   */
-  HistogramStat<InferenceId> d_inferencesNoPf;
   /**
    * Counts the number of applications of each type of context-dependent
    * simplification. The sum of this map is equal to the number of EXTF or