From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Sun, 18 Aug 2019 12:32:40 +0000 (-0500)
Subject: Context-independent regular expression unfolding (#3168)
X-Git-Tag: cvc5-1.0.0~4005
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b1e9c6b7f5e4beb0183e48f5a1cbbf679f52d7d7;p=cvc5.git

Context-independent regular expression unfolding (#3168)
---

diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp
index 463891839..ea38c4dd9 100644
--- a/src/theory/strings/regexp_solver.cpp
+++ b/src/theory/strings/regexp_solver.cpp
@@ -80,7 +80,7 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
   if (!addedLemma)
   {
     // get all memberships
-    std::vector<Node> allMems;
+    std::map<Node, Node> allMems;
     for (const std::pair<const Node, std::vector<Node> >& mr : mems)
     {
       for (const Node& m : mr.second)
@@ -88,7 +88,7 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
         bool polarity = m.getKind() != NOT;
         if (polarity || !options::stringIgnNegMembership())
         {
-          allMems.push_back(m);
+          allMems[m] = mr.first;
         }
       }
     }
@@ -100,8 +100,10 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
     // check positive (e=0), then negative (e=1) memberships
     for (unsigned e = 0; e < 2; e++)
     {
-      for (const Node& assertion : allMems)
+      for (const std::pair<const Node, Node>& mp : allMems)
       {
+        Node assertion = mp.first;
+        Node rep = mp.second;
         // check regular expression membership
         Trace("regexp-debug")
             << "Check : " << assertion << " "
@@ -126,43 +128,67 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
         bool flag = true;
         Node x = atom[0];
         Node r = atom[1];
+        Assert(rep == d_parent.getRepresentative(x));
+        // The following code takes normal forms into account for the purposes
+        // of simplifying a regular expression membership x in R. For example,
+        // if x = "A" in the current context, then we may be interested in
+        // reasoning about ( x in R ) * { x -> "A" }. Say we update the
+        // membership to nx in R', then:
+        // - nfexp => ( x in R ) <=> nx in R'
+        // - rnfexp => R = R'
+        // We use these explanations below as assumptions on inferences when
+        // appropriate. Notice that for inferring conflicts and tautologies,
+        // we use the normal form of x always. This is because we always want to
+        // discover conflicts/tautologies whenever possible.
+        // For inferences based on regular expression unfolding, we do not use
+        // the normal form of x. The reason is that it is better to unfold
+        // regular expression memberships in a context-indepedent manner,
+        // that is, not taking into account the current normal form of x, since
+        // this ensures these lemmas are still relevant after backtracking.
+        std::vector<Node> nfexp;
         std::vector<Node> rnfexp;
-
+        // The normal form of x is stored in nx, while x is left unchanged.
+        Node nx = x;
         if (!x.isConst())
         {
-          x = d_parent.getNormalString(x, rnfexp);
-          changed = true;
+          nx = d_parent.getNormalString(x, nfexp);
         }
+        // If r is not a constant regular expression, we update it based on
+        // normal forms, which may concretize its variables.
         if (!d_regexp_opr.checkConstRegExp(r))
         {
           r = getNormalSymRegExp(r, rnfexp);
+          nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
           changed = true;
         }
         Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
-                                   << x << " IN " << r << std::endl;
-        if (changed)
+                                   << nx << " IN " << r << std::endl;
+        if (nx != x || changed)
         {
-          Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, x, r));
-          if (!polarity)
-          {
-            tmp = tmp.negate();
-          }
-          if (tmp == d_true)
-          {
-            d_regexp_ccached.insert(assertion);
-            continue;
-          }
-          else if (tmp == d_false)
+          // We rewrite the membership nx IN r.
+          Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
+          Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
+          if (tmp.isConst())
           {
-            std::vector<Node> exp_n;
-            exp_n.push_back(assertion);
-            Node conc = Node::null();
-            d_im.sendInference(rnfexp, exp_n, conc, "REGEXP NF Conflict");
-            addedLemma = true;
-            break;
+            if (tmp.getConst<bool>() == polarity)
+            {
+              // it is satisfied in this SAT context
+              d_regexp_ccached.insert(assertion);
+              continue;
+            }
+            else
+            {
+              // we have a conflict
+              std::vector<Node> exp_n;
+              exp_n.push_back(assertion);
+              Node conc = Node::null();
+              d_im.sendInference(nfexp, exp_n, conc, "REGEXP NF Conflict");
+              addedLemma = true;
+              break;
+            }
           }
         }
-        if (e == 1 && repUnfold.find(x) != repUnfold.end())
+        if (e == 1 && repUnfold.find(rep) != repUnfold.end())
         {
           // do not unfold negative memberships of strings that have new
           // positive unfoldings. For example:
@@ -188,9 +214,8 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
         if (flag)
         {
           // check if the term is atomic
-          Node xr = d_parent.getRepresentative(x);
           Trace("strings-regexp")
-              << "Unroll/simplify membership of atomic term " << xr
+              << "Unroll/simplify membership of atomic term " << rep
               << std::endl;
           // if so, do simple unrolling
           std::vector<Node> nvec;
@@ -216,7 +241,7 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
             // Remember that we have unfolded a membership for x
             // notice that we only do this here, after we have definitely
             // added a lemma.
-            repUnfold.insert(x);
+            repUnfold.insert(rep);
           }
         }
         if (d_im.hasConflict())
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt
index 4bceb1b74..8c2950c3e 100644
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -1591,6 +1591,8 @@ set(regress_1_tests
   regress1/strings/nterm-re-inter-sigma.smt2
   regress1/strings/pierre150331.smt2
   regress1/strings/policy_variable.smt2
+  regress1/strings/query4674.smt2
+  regress1/strings/query8485.smt2
   regress1/strings/re-all-char-hard.smt2
   regress1/strings/re-agg-total1.smt2
   regress1/strings/re-agg-total2.smt2
diff --git a/test/regress/regress1/strings/query4674.smt2 b/test/regress/regress1/strings/query4674.smt2
new file mode 100644
index 000000000..7132fa6a8
--- /dev/null
+++ b/test/regress/regress1/strings/query4674.smt2
@@ -0,0 +1,8 @@
+(set-info :smt-lib-version 2.6)
+(set-logic ALL)
+(set-info :status sat)
+(set-option :strings-exp true)
+(set-option :re-elim false)
+(declare-fun x () String)
+(assert (let ((_let_0 (re.* re.allchar ))) (and (not (= (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "A" (str.++ "B" (str.++ "C" "A")))) _let_0 re.allchar  _let_0)) (str.in.re x (re.++ _let_0 re.allchar  _let_0 re.allchar  _let_0 (str.to.re (str.++ "B" (str.++ "C" (str.++ "B" "B")))) _let_0)))) (not (= (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "C" (str.++ "B" "C"))) _let_0 (str.to.re "B") _let_0)) (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re "C") _let_0 (str.to.re (str.++ "B" (str.++ "C" "B"))) _let_0)))))))
+(check-sat)
diff --git a/test/regress/regress1/strings/query8485.smt2 b/test/regress/regress1/strings/query8485.smt2
new file mode 100644
index 000000000..ccd3b7f9a
--- /dev/null
+++ b/test/regress/regress1/strings/query8485.smt2
@@ -0,0 +1,8 @@
+(set-info :smt-lib-version 2.6)
+(set-logic ALL)
+(set-info :status sat)
+(set-option :strings-exp true)
+(set-option :re-elim false)
+(declare-fun x () String)
+(assert (let ((_let_0 (re.* re.allchar ))) (and (not (= (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "B" (str.++ "A" (str.++ "C" "B")))) _let_0 re.allchar  _let_0)) (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "A" (str.++ "B" (str.++ "C" "C")))) _let_0 re.allchar  _let_0)))) (not (= (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "C" (str.++ "B" (str.++ "A" "B")))) _let_0 re.allchar  _let_0)) (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "C" (str.++ "B" "A"))) _let_0 (str.to.re "B") _let_0)))))))
+(check-sat)