From 30aca1c6289f57ec8178c20736e9a8420fd6f9f1 Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Tue, 18 Jan 2022 15:03:26 -0600
Subject: [PATCH] Distinguish purification types for strings proof
 reconstruction (#7521)

This improves the success rate for strings proof reconstruction for context-dependent simplifications.

It also makes a minor modification to how EXTF inferences work. As a special case, A => B becomes A ^ ~B => false when B is already equal to false. This PR removes this behavior for the sake of consistency. (This could be explored as a more general strategy for turning facts to conflicts from any source if necessary).

The strings solver applies context-dependent simplification in various ways. This ensures that the proof reconstruction is faithful for cases of congruence + rewriting instead of substitution+rewriting, which the inferences STRINGS_EXTF and STRINGS_EXTF_N rely on.

This fixes 11 cases where proof reconstruction fails for string theory lemmas on Amazon benchmarks.
---
 src/theory/strings/extf_solver.cpp      |  10 +-
 src/theory/strings/infer_proof_cons.cpp | 182 ++++++++++++++++++------
 src/theory/strings/infer_proof_cons.h   |  68 +++++++--
 3 files changed, 197 insertions(+), 63 deletions(-)
diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp
index 07105a311..0b789c1a9 100644
--- a/src/theory/strings/extf_solver.cpp
+++ b/src/theory/strings/extf_solver.cpp
@@ -392,15 +392,7 @@ void ExtfSolver::checkExtfEval(int effort)
             {
               if (n.getType().isBoolean())
               {
-                if (d_state.areEqual(n, nrc == d_true ? d_false : d_true))
-                {
-                  einfo.d_exp.push_back(nrc == d_true ? n.negate() : n);
-                  conc = d_false;
-                }
-                else
-                {
-                  conc = nrc == d_true ? n : n.negate();
-                }
+                conc = nrc == d_true ? n : n.negate();
               }
               else
               {
diff --git a/src/theory/strings/infer_proof_cons.cpp b/src/theory/strings/infer_proof_cons.cpp
index 81da50062..1bed68dc7 100644
--- a/src/theory/strings/infer_proof_cons.cpp
+++ b/src/theory/strings/infer_proof_cons.cpp
@@ -164,6 +164,8 @@ void InferProofCons::convert(InferenceId infer,
   switch (infer)
   {
     // ========================== equal by substitution+rewriting
+    case InferenceId::STRINGS_EXTF:
+    case InferenceId::STRINGS_EXTF_N:
     case InferenceId::STRINGS_I_NORM_S:
     case InferenceId::STRINGS_I_CONST_MERGE:
     case InferenceId::STRINGS_I_NORM:
@@ -171,23 +173,37 @@ void InferProofCons::convert(InferenceId infer,
     case InferenceId::STRINGS_NORMAL_FORM:
     case InferenceId::STRINGS_CODE_PROXY:
     {
+      PurifyType pt = PurifyType::CORE_EQ;
+      if (infer == InferenceId::STRINGS_EXTF
+          || infer == InferenceId::STRINGS_EXTF_N)
+      {
+        // since we use congruence+rewriting, and not substitution+rewriting,
+        // these must purify a substitution over arguments to the left hand
+        // side of what we are proving.
+        pt = PurifyType::EXTF;
+      }
       std::vector<Node> pcs = ps.d_children;
       Node pconc = conc;
       // purify core substitution proves conc from pconc if necessary,
       // we apply MACRO_SR_PRED_INTRO to prove pconc
-      if (purifyCoreSubstitution(pconc, pcs, psb, false))
+      if (purifyCoreSubstitutionAndTarget(pt, pconc, pcs, psb, false))
       {
+        Trace("strings-ipc-core") << "...purified substitution to " << pcs
+                                  << ", now apply to " << pconc << std::endl;
         if (psb.applyPredIntro(pconc, pcs))
         {
           useBuffer = true;
         }
       }
+      else
+      {
+        Trace("strings-ipc-core")
+            << "...failed to purify substitution" << std::endl;
+      }
     }
     break;
     // ========================== substitution + rewriting
     case InferenceId::STRINGS_RE_NF_CONFLICT:
-    case InferenceId::STRINGS_EXTF:
-    case InferenceId::STRINGS_EXTF_N:
     case InferenceId::STRINGS_EXTF_D:
     case InferenceId::STRINGS_EXTF_D_N:
     case InferenceId::STRINGS_I_CONST_CONFLICT:
@@ -196,8 +212,10 @@ void InferProofCons::convert(InferenceId infer,
       if (!ps.d_children.empty())
       {
         std::vector<Node> exps(ps.d_children.begin(), ps.d_children.end() - 1);
-        Node src = ps.d_children[ps.d_children.size() - 1];
-        if (psb.applyPredTransform(src, conc, exps))
+        Node psrc = ps.d_children[ps.d_children.size() - 1];
+        // we apply the substitution on the purified form to get the
+        // original conclusion
+        if (psb.applyPredTransform(psrc, conc, exps))
         {
           useBuffer = true;
         }
@@ -331,7 +349,8 @@ void InferProofCons::convert(InferenceId infer,
       Node pmainEq = mainEq;
       // we transform mainEq to pmainEq and then use this as the first
       // argument to MACRO_SR_PRED_ELIM.
-      if (!purifyCoreSubstitution(pmainEq, pcsr, psb, true))
+      if (!purifyCoreSubstitutionAndTarget(
+              PurifyType::CORE_EQ, pmainEq, pcsr, psb, true))
       {
         break;
       }
@@ -1164,24 +1183,57 @@ std::string InferProofCons::identify() const
   return "strings::InferProofCons";
 }
 
-bool InferProofCons::purifyCoreSubstitution(Node& tgt,
-                                            std::vector<Node>& children,
-                                            TheoryProofStepBuffer& psb,
-                                            bool concludeTgtNew)
+bool InferProofCons::purifyCoreSubstitutionAndTarget(
+    PurifyType pt,
+    Node& tgt,
+    std::vector<Node>& children,
+    TheoryProofStepBuffer& psb,
+    bool concludeTgtNew)
 {
   // collect the terms to purify, which are the LHS of the substitution
   std::unordered_set<Node> termsToPurify;
+  if (!purifyCoreSubstitution(children, psb, termsToPurify))
+  {
+    return false;
+  }
+  // no need to purify, e.g. if all LHS of substituion are variables
+  if (termsToPurify.empty())
+  {
+    return true;
+  }
+  // now, purify the target predicate
+  tgt = purifyPredicate(pt, tgt, concludeTgtNew, psb, termsToPurify);
+  if (tgt.isNull())
+  {
+    Trace("strings-ipc-pure-subs")
+        << "...failed to purify target " << tgt << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool InferProofCons::purifyCoreSubstitution(
+    std::vector<Node>& children,
+    TheoryProofStepBuffer& psb,
+    std::unordered_set<Node>& termsToPurify)
+{
   for (const Node& nc : children)
   {
-    Assert(nc.getKind() == EQUAL && nc[0].getType().isStringLike());
-    termsToPurify.insert(nc[0]);
+    Assert(nc.getKind() == EQUAL);
+    if (!nc[0].isVar())
+    {
+      termsToPurify.insert(nc[0]);
+    }
   }
   // now, purify each of the children of the substitution
   for (size_t i = 0, nchild = children.size(); i < nchild; i++)
   {
-    Node pnc = purifyCorePredicate(children[i], true, psb, termsToPurify);
+    Node pnc = purifyPredicate(
+        PurifyType::SUBS_EQ, children[i], true, psb, termsToPurify);
     if (pnc.isNull())
     {
+      Trace("strings-ipc-pure-subs")
+          << "...failed to purify " << children[i] << std::endl;
       return false;
     }
     if (children[i] != pnc)
@@ -1193,44 +1245,72 @@ bool InferProofCons::purifyCoreSubstitution(Node& tgt,
     // we now should have a substitution with only atomic terms
     Assert(children[i][0].getNumChildren() == 0);
   }
-  // now, purify the target predicate
-  tgt = purifyCorePredicate(tgt, concludeTgtNew, psb, termsToPurify);
-  return !tgt.isNull();
+  return true;
 }
 
-Node InferProofCons::purifyCorePredicate(
-    Node lit,
-    bool concludeNew,
-    TheoryProofStepBuffer& psb,
-    std::unordered_set<Node>& termsToPurify)
+Node InferProofCons::purifyPredicate(PurifyType pt,
+                                     Node lit,
+                                     bool concludeNew,
+                                     TheoryProofStepBuffer& psb,
+                                     std::unordered_set<Node>& termsToPurify)
 {
   bool pol = lit.getKind() != NOT;
   Node atom = pol ? lit : lit[0];
-  if (atom.getKind() != EQUAL || !atom[0].getType().isStringLike())
+  NodeManager* nm = NodeManager::currentNM();
+  Node newLit;
+  if (pt == PurifyType::SUBS_EQ)
   {
-    // we only purify string (dis)equalities
-    return lit;
+    if (atom.getKind() != EQUAL)
+    {
+      Assert(false) << "Expected equality";
+      return lit;
+    }
+    // purify the LHS directly, purify the RHS as a core term
+    newLit = nm->mkNode(EQUAL,
+                        maybePurifyTerm(atom[0], termsToPurify),
+                        purifyCoreTerm(atom[1], termsToPurify));
   }
-  // purify both sides of the equality
-  std::vector<Node> pcs;
-  bool childChanged = false;
-  for (const Node& lc : atom)
+  else if (pt == PurifyType::CORE_EQ)
   {
-    Node plc = purifyCoreTerm(lc, termsToPurify);
-    childChanged = childChanged || plc != lc;
-    pcs.push_back(plc);
+    if (atom.getKind() != EQUAL || !atom[0].getType().isStringLike())
+    {
+      // we only purify string (dis)equalities
+      return lit;
+    }
+    // purify both sides of the equality
+    std::vector<Node> pcs;
+    for (const Node& lc : atom)
+    {
+      pcs.push_back(purifyCoreTerm(lc, termsToPurify));
+    }
+    newLit = nm->mkNode(EQUAL, pcs);
   }
-  if (!childChanged)
+  else if (pt == PurifyType::EXTF)
   {
-    return lit;
+    if (atom.getKind() == EQUAL)
+    {
+      // purify the left hand side, which should be an extended function
+      newLit = nm->mkNode(EQUAL, purifyApp(atom[0], termsToPurify), atom[1]);
+    }
+    else
+    {
+      // predicate case, e.g. for inferring contains
+      newLit = purifyApp(atom, termsToPurify);
+    }
+  }
+  else
+  {
+    Assert(false) << "Unknown purify type in InferProofCons::purifyPredicate";
   }
-  NodeManager* nm = NodeManager::currentNM();
-  Node newLit = nm->mkNode(EQUAL, pcs);
   if (!pol)
   {
     newLit = newLit.notNode();
   }
-  Assert(lit != newLit);
+  if (lit == newLit)
+  {
+    // no change
+    return lit;
+  }
   // prove by transformation, should always succeed
   if (!psb.applyPredTransform(
           concludeNew ? lit : newLit, concludeNew ? newLit : lit, {}))
@@ -1244,12 +1324,6 @@ Node InferProofCons::purifyCorePredicate(
 Node InferProofCons::purifyCoreTerm(Node n,
                                     std::unordered_set<Node>& termsToPurify)
 {
-  Assert(n.getType().isStringLike());
-  if (n.getNumChildren() == 0)
-  {
-    return n;
-  }
-  NodeManager* nm = NodeManager::currentNM();
   if (n.getKind() == STRING_CONCAT)
   {
     std::vector<Node> pcs;
@@ -1257,14 +1331,34 @@ Node InferProofCons::purifyCoreTerm(Node n,
     {
       pcs.push_back(purifyCoreTerm(nc, termsToPurify));
     }
-    return nm->mkNode(STRING_CONCAT, pcs);
+    return NodeManager::currentNM()->mkNode(STRING_CONCAT, pcs);
+  }
+  return maybePurifyTerm(n, termsToPurify);
+}
+
+Node InferProofCons::purifyApp(Node n, std::unordered_set<Node>& termsToPurify)
+{
+  if (n.getNumChildren() == 0)
+  {
+    return n;
   }
+  std::vector<Node> pcs;
+  for (const Node& nc : n)
+  {
+    pcs.push_back(maybePurifyTerm(nc, termsToPurify));
+  }
+  return NodeManager::currentNM()->mkNode(n.getKind(), pcs);
+}
+
+Node InferProofCons::maybePurifyTerm(Node n,
+                                     std::unordered_set<Node>& termsToPurify)
+{
   if (termsToPurify.find(n) == termsToPurify.end())
   {
     // did not need to purify
     return n;
   }
-  SkolemManager* sm = nm->getSkolemManager();
+  SkolemManager* sm = NodeManager::currentNM()->getSkolemManager();
   Node k = sm->mkPurifySkolem(n, "k");
   return k;
 }
diff --git a/src/theory/strings/infer_proof_cons.h b/src/theory/strings/infer_proof_cons.h
index 02b266fd7..6022f5443 100644
--- a/src/theory/strings/infer_proof_cons.h
+++ b/src/theory/strings/infer_proof_cons.h
@@ -159,6 +159,15 @@ class InferProofCons : public ProofGenerator
    * conclusion, or null if we were not able to construct a TRANS step.
    */
   static Node convertTrans(Node eqa, Node eqb, TheoryProofStepBuffer& psb);
+  enum class PurifyType
+  {
+    // we are purifying an equality corresponding to a substitution
+    SUBS_EQ,
+    // we are purifying a (dis)equality in the core calculus
+    CORE_EQ,
+    // we are purifying a equality or predicate for extended function rewriting
+    EXTF
+  };
   /**
    * Purify core substitution.
    *
@@ -188,6 +197,26 @@ class InferProofCons : public ProofGenerator
    * in psb ensure that a proof step involving the purified substitution will
    * have the same net effect as a proof step using the original substitution.
    *
+   * The argument pt determines which arguments are relevant for purification.
+   * For core calculus (CORE_EQ) rules, examples of relevant positions are
+   * non-concatenation terms in positions like:
+   *   (= * (str.++ * (str.++ * *) * *))
+   *   (not (= (str.++ * *) *))
+   * For extended function simplification, examples of relevant positions are:
+   *   (= (str.replace * * *) "")
+   *   (str.contains * *)
+   * If we are purifying a substitution with equality, the LHS is a relevant
+   * position to purify, and the RHS is treated like CORE_EQ.
+   *
+   * For example, given substitution (= t ""), an example
+   * inference in the core calculus is:
+   *   (= (str.++ t "A") (str.++ "B" u)) => false
+   * An example of an extended function inference is:
+   *   (= (str.replace (str.substr t 0 2) t "C") (str.++ "C" f[t]))
+   * Note for the latter, we do not apply the substitution to
+   * (str.substr t 0 2).
+   *
+   * @param pt Determines the positions that are relevant for purification.
    * @param tgt The term we were originally going to apply the substitution to.
    * @param children The premises corresponding to the substitution.
    * @param psb The proof step buffer
@@ -198,22 +227,31 @@ class InferProofCons : public ProofGenerator
    * children'[i] from children[i] for all i, and tgt' from tgt (or vice versa
    * based on concludeTgtNew).
    */
-  static bool purifyCoreSubstitution(Node& tgt,
-                                     std::vector<Node>& children,
+  static bool purifyCoreSubstitutionAndTarget(PurifyType pt,
+                                              Node& tgt,
+                                              std::vector<Node>& children,
+                                              TheoryProofStepBuffer& psb,
+                                              bool concludeTgtNew = false);
+  /**
+   * Same as above, without a target. This updates termsToPurify with the
+   * set of LHS of the substitutions, which are terms that must be purified
+   * when applying the resulting substitution to a target.
+   */
+  static bool purifyCoreSubstitution(std::vector<Node>& children,
                                      TheoryProofStepBuffer& psb,
-                                     bool concludeTgtNew = false);
+                                     std::unordered_set<Node>& termsToPurify);
   /**
    * Return the purified form of the predicate lit with respect to a set of
    * terms to purify, call the returned literal lit'.
    * If concludeNew is true, then we add a proof of lit' from lit in psb;
-   * otherwise we add a proof of lit from lit'.
-   * Note that string predicates that require purification are string
-   * (dis)equalities only.
+   * otherwise we add a proof of lit from lit'. The positions which are purified
+   * are configurable based on the argument pt.
    */
-  static Node purifyCorePredicate(Node lit,
-                                  bool concludeNew,
-                                  TheoryProofStepBuffer& psb,
-                                  std::unordered_set<Node>& termsToPurify);
+  static Node purifyPredicate(PurifyType pt,
+                              Node lit,
+                              bool concludeNew,
+                              TheoryProofStepBuffer& psb,
+                              std::unordered_set<Node>& termsToPurify);
   /**
    * Purify term with respect to a set of terms to purify. This replaces
    * all terms to purify with their purification variables that occur in
@@ -221,6 +259,16 @@ class InferProofCons : public ProofGenerator
    * children of concat or equal).
    */
   static Node purifyCoreTerm(Node n, std::unordered_set<Node>& termsToPurify);
+  /**
+   * Purify application, which replaces each direct child nc of n with
+   * maybePurifyTerm(nc, termsToPurify).
+   */
+  static Node purifyApp(Node n, std::unordered_set<Node>& termsToPurify);
+  /**
+   * Maybe purify term, which returns the skolem variable for n if it occurs
+   * in termsToPurify.
+   */
+  static Node maybePurifyTerm(Node n, std::unordered_set<Node>& termsToPurify);
   /** the proof node manager */
   ProofNodeManager* d_pnm;
   /** The lazy fact map */
-- 
2.30.2