From ac6150d7992b5dd1cace8d8d6e0d308e4a741c12 Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Mon, 30 Nov 2020 09:41:37 -0600
Subject: [PATCH] (proof-new) Proofs for regular expression elimination (#5361)

Adds support for proofs for regular expression elimination in TheoryStrings::ppRewrite via a coarse-grained rule RE_ELIM.
This rule is updated by this PR to distinguish whether we apply aggressive elimination. Aggressive elimination is not currently supported, since it is non-deterministic due to generating fresh BOUND_VARIABLE.
---
 src/expr/proof_rule.h                 | 10 +++--
 src/theory/strings/proof_checker.cpp  | 17 ++++++--
 src/theory/strings/regexp_elim.cpp    | 57 +++++++++++++++++++++------
 src/theory/strings/regexp_elim.h      | 36 +++++++++++++++--
 src/theory/strings/theory_strings.cpp | 27 +++++++------
 5 files changed, 111 insertions(+), 36 deletions(-)

diff --git a/src/expr/proof_rule.h b/src/expr/proof_rule.h
index 19efe6285..58dfd973c 100644
--- a/src/expr/proof_rule.h
+++ b/src/expr/proof_rule.h
@@ -959,11 +959,13 @@ enum class PfRule : uint32_t
   // fixed length of component i of the regular expression concatenation R.
   RE_UNFOLD_NEG_CONCAT_FIXED,
   // ======== Regular expression elimination
-  // Children: (P:F)
-  // Arguments: none
+  // Children: none
+  // Arguments: (F, b)
   // ---------------------
-  // Conclusion: R
-  // where R = strings::RegExpElimination::eliminate(F).
+  // Conclusion: (= F strings::RegExpElimination::eliminate(F, b))
+  // where b is a Boolean indicating whether we are using aggressive
+  // eliminations. Notice this rule concludes (= F F) if no eliminations
+  // are performed for F.
   RE_ELIM,
   //======================== Code points
   // Children: none
diff --git a/src/theory/strings/proof_checker.cpp b/src/theory/strings/proof_checker.cpp
index 0b6cf6652..2726097bc 100644
--- a/src/theory/strings/proof_checker.cpp
+++ b/src/theory/strings/proof_checker.cpp
@@ -438,9 +438,20 @@ Node StringProofRuleChecker::checkInternal(PfRule id,
   }
   else if (id == PfRule::RE_ELIM)
   {
-    Assert(children.size() == 1);
-    Assert(args.empty());
-    return RegExpElimination::eliminate(children[0]);
+    Assert(children.empty());
+    Assert(args.size() == 2);
+    bool isAgg;
+    if (!getBool(args[1], isAgg))
+    {
+      return Node::null();
+    }
+    Node ea = RegExpElimination::eliminate(args[0], isAgg);
+    // if we didn't eliminate, then this trivially proves the reflexive equality
+    if (ea.isNull())
+    {
+      ea = args[0];
+    }
+    return args[0].eqNode(ea);
   }
   else if (id == PfRule::STRING_CODE_INJ)
   {
diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp
index 1d0db0e4d..aaa9ffc48 100644
--- a/src/theory/strings/regexp_elim.cpp
+++ b/src/theory/strings/regexp_elim.cpp
@@ -20,30 +20,58 @@
 #include "theory/strings/regexp_entail.h"
 #include "theory/strings/theory_strings_utils.h"
 
-using namespace CVC4;
 using namespace CVC4::kind;
-using namespace CVC4::theory;
-using namespace CVC4::theory::strings;
 
-RegExpElimination::RegExpElimination()
+namespace CVC4 {
+namespace theory {
+namespace strings {
+
+RegExpElimination::RegExpElimination(bool isAgg,
+                                     ProofNodeManager* pnm,
+                                     context::Context* c)
+    : d_isAggressive(isAgg),
+      d_pnm(pnm),
+      d_epg(pnm == nullptr
+                ? nullptr
+                : new EagerProofGenerator(pnm, c, "RegExpElimination::epg"))
 {
 }
 
-Node RegExpElimination::eliminate(Node atom)
+Node RegExpElimination::eliminate(Node atom, bool isAgg)
 {
   Assert(atom.getKind() == STRING_IN_REGEXP);
   if (atom[1].getKind() == REGEXP_CONCAT)
   {
-    return eliminateConcat(atom);
+    return eliminateConcat(atom, isAgg);
   }
   else if (atom[1].getKind() == REGEXP_STAR)
   {
-    return eliminateStar(atom);
+    return eliminateStar(atom, isAgg);
   }
   return Node::null();
 }
 
-Node RegExpElimination::eliminateConcat(Node atom)
+TrustNode RegExpElimination::eliminateTrusted(Node atom)
+{
+  Node eatom = eliminate(atom, d_isAggressive);
+  if (!eatom.isNull())
+  {
+    // Currently aggressive doesnt work due to fresh bound variables
+    if (isProofEnabled() && !d_isAggressive)
+    {
+      Node eq = atom.eqNode(eatom);
+      Node aggn = NodeManager::currentNM()->mkConst(d_isAggressive);
+      std::shared_ptr<ProofNode> pn =
+          d_pnm->mkNode(PfRule::RE_ELIM, {}, {atom, aggn}, eq);
+      d_epg->setProofFor(eq, pn);
+      return TrustNode::mkTrustRewrite(atom, eatom, d_epg.get());
+    }
+    return TrustNode::mkTrustRewrite(atom, eatom, nullptr);
+  }
+  return TrustNode::null();
+}
+
+Node RegExpElimination::eliminateConcat(Node atom, bool isAgg)
 {
   NodeManager* nm = NodeManager::currentNM();
   Node x = atom[0];
@@ -217,7 +245,7 @@ Node RegExpElimination::eliminateConcat(Node atom)
         // otherwise, we can use indexof to represent some next occurrence
         if (gap_exact[i + 1] && i + 1 != size)
         {
-          if (!options::regExpElimAgg())
+          if (!isAgg)
           {
             canProcess = false;
             break;
@@ -330,7 +358,7 @@ Node RegExpElimination::eliminateConcat(Node atom)
     }
   }
 
-  if (!options::regExpElimAgg())
+  if (!isAgg)
   {
     return Node::null();
   }
@@ -455,9 +483,9 @@ Node RegExpElimination::eliminateConcat(Node atom)
   return Node::null();
 }
 
-Node RegExpElimination::eliminateStar(Node atom)
+Node RegExpElimination::eliminateStar(Node atom, bool isAgg)
 {
-  if (!options::regExpElimAgg())
+  if (!isAgg)
   {
     return Node::null();
   }
@@ -580,3 +608,8 @@ Node RegExpElimination::returnElim(Node atom, Node atomElim, const char* id)
                    << "." << std::endl;
   return atomElim;
 }
+bool RegExpElimination::isProofEnabled() const { return d_pnm != nullptr; }
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace CVC4
diff --git a/src/theory/strings/regexp_elim.h b/src/theory/strings/regexp_elim.h
index 0c1acd29d..9d6fecc93 100644
--- a/src/theory/strings/regexp_elim.h
+++ b/src/theory/strings/regexp_elim.h
@@ -18,6 +18,8 @@
 #define CVC4__THEORY__STRINGS__REGEXP_ELIM_H
 
 #include "expr/node.h"
+#include "theory/eager_proof_generator.h"
+#include "theory/trust_node.h"
 
 namespace CVC4 {
 namespace theory {
@@ -33,14 +35,32 @@ namespace strings {
 class RegExpElimination
 {
  public:
-  RegExpElimination();
+  /**
+   * @param isAgg Whether aggressive eliminations are enabled
+   * @param pnm The proof node manager to use (for proofs)
+   * @param c The context to use (for proofs)
+   */
+  RegExpElimination(bool isAgg = false,
+                    ProofNodeManager* pnm = nullptr,
+                    context::Context* c = nullptr);
   /** eliminate membership
    *
    * This method takes as input a regular expression membership atom of the
    * form (str.in.re x R). If this method returns a non-null node ret, then ret
    * is equivalent to atom.
+   *
+   * @param atom The node to eliminate
+   * @param isAgg Whether we apply aggressive elimination techniques
+   * @return The node with regular expressions eliminated, or null if atom
+   * was unchanged.
+   */
+  static Node eliminate(Node atom, bool isAgg);
+
+  /**
+   * Return the trust node corresponding to rewriting n based on eliminate
+   * above.
    */
-  static Node eliminate(Node atom);
+  TrustNode eliminateTrusted(Node atom);
 
  private:
   /** return elimination
@@ -50,9 +70,17 @@ class RegExpElimination
    */
   static Node returnElim(Node atom, Node atomElim, const char* id);
   /** elimination for regular expression concatenation */
-  static Node eliminateConcat(Node atom);
+  static Node eliminateConcat(Node atom, bool isAgg);
   /** elimination for regular expression star */
-  static Node eliminateStar(Node atom);
+  static Node eliminateStar(Node atom, bool isAgg);
+  /** Are proofs enabled? */
+  bool isProofEnabled() const;
+  /** Are aggressive eliminations enabled? */
+  bool d_isAggressive;
+  /** Pointer to the proof node manager */
+  ProofNodeManager* d_pnm;
+  /** An eager proof generator for storing proofs in eliminate trusted above */
+  std::unique_ptr<EagerProofGenerator> d_epg;
 }; /* class RegExpElimination */
 
 }  // namespace strings
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index a9e2c0051..d3e9e34f1 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -997,27 +997,28 @@ void TheoryStrings::checkRegisterTermsNormalForms()
 TrustNode TheoryStrings::ppRewrite(TNode atom)
 {
   Trace("strings-ppr") << "TheoryStrings::ppRewrite " << atom << std::endl;
+  TrustNode ret;
   Node atomRet = atom;
   if (options::regExpElim() && atom.getKind() == STRING_IN_REGEXP)
   {
     // aggressive elimination of regular expression membership
-    Node atomElim = d_regexp_elim.eliminate(atomRet);
-    if (!atomElim.isNull())
+    ret = d_regexp_elim.eliminateTrusted(atomRet);
+    if (!ret.isNull())
     {
-      Trace("strings-ppr") << "  rewrote " << atom << " -> " << atomElim
+      Trace("strings-ppr") << "  rewrote " << atom << " -> " << ret.getNode()
                            << " via regular expression elimination."
                            << std::endl;
-      atomRet = atomElim;
+      atomRet = ret.getNode();
     }
   }
   if( !options::stringLazyPreproc() ){
     //eager preprocess here
     std::vector< Node > new_nodes;
     StringsPreprocess* p = d_esolver.getPreprocess();
-    Node ret = p->processAssertion(atomRet, new_nodes);
-    if (ret != atomRet)
+    Node pret = p->processAssertion(atomRet, new_nodes);
+    if (pret != atomRet)
     {
-      Trace("strings-ppr") << "  rewrote " << atomRet << " -> " << ret
+      Trace("strings-ppr") << "  rewrote " << atomRet << " -> " << pret
                            << ", with " << new_nodes.size() << " lemmas."
                            << std::endl;
       for (const Node& lem : new_nodes)
@@ -1026,16 +1027,16 @@ TrustNode TheoryStrings::ppRewrite(TNode atom)
         ++(d_statistics.d_lemmasEagerPreproc);
         d_out->lemma(lem);
       }
-      atomRet = ret;
+      atomRet = pret;
+      // Don't support proofs yet, thus we must return nullptr. This is the
+      // case even if we had proven the elimination via regexp elimination
+      // above.
+      ret = TrustNode::mkTrustRewrite(atom, atomRet, nullptr);
     }else{
       Assert(new_nodes.empty());
     }
   }
-  if (atomRet != atom)
-  {
-    return TrustNode::mkTrustRewrite(atom, atomRet, nullptr);
-  }
-  return TrustNode::null();
+  return ret;
 }
 
 /** run the given inference step */
-- 
2.30.2