Context-independent regular expression unfolding (#3168)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Sun, 18 Aug 2019 12:32:40 +0000 (07:32 -0500)

committer GitHub <noreply@github.com>

Sun, 18 Aug 2019 12:32:40 +0000 (07:32 -0500)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Sun, 18 Aug 2019 12:32:40 +0000 (07:32 -0500)
committer GitHub <noreply@github.com>
Sun, 18 Aug 2019 12:32:40 +0000 (07:32 -0500)
diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp

index 463891839affb44f04d5e977b490d4f75c7e6722..ea38c4dd98f10f31b1ad25a35ce1892676e10b5d 100644 (file)
--- a/src/theory/strings/regexp_solver.cpp
+++ b/src/theory/strings/regexp_solver.cpp
@@ -80,7 +80,7 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
    if (!addedLemma)
    {
      // get all memberships
-    std::vector<Node> allMems;
+    std::map<Node, Node> allMems;
      for (const std::pair<const Node, std::vector<Node> >& mr : mems)
      {
        for (const Node& m : mr.second)
@@ -88,7 +88,7 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
          bool polarity = m.getKind() != NOT;
          if (polarity || !options::stringIgnNegMembership())
          {
-          allMems.push_back(m);
+          allMems[m] = mr.first;
          }
        }
      }
@@ -100,8 +100,10 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
      // check positive (e=0), then negative (e=1) memberships
      for (unsigned e = 0; e < 2; e++)
      {
-      for (const Node& assertion : allMems)
+      for (const std::pair<const Node, Node>& mp : allMems)
        {
+        Node assertion = mp.first;
+        Node rep = mp.second;
          // check regular expression membership
          Trace("regexp-debug")
              << "Check : " << assertion << " "
@@ -126,43 +128,67 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
          bool flag = true;
          Node x = atom[0];
          Node r = atom[1];
+        Assert(rep == d_parent.getRepresentative(x));
+        // The following code takes normal forms into account for the purposes
+        // of simplifying a regular expression membership x in R. For example,
+        // if x = "A" in the current context, then we may be interested in
+        // reasoning about ( x in R ) * { x -> "A" }. Say we update the
+        // membership to nx in R', then:
+        // - nfexp => ( x in R ) <=> nx in R'
+        // - rnfexp => R = R'
+        // We use these explanations below as assumptions on inferences when
+        // appropriate. Notice that for inferring conflicts and tautologies,
+        // we use the normal form of x always. This is because we always want to
+        // discover conflicts/tautologies whenever possible.
+        // For inferences based on regular expression unfolding, we do not use
+        // the normal form of x. The reason is that it is better to unfold
+        // regular expression memberships in a context-indepedent manner,
+        // that is, not taking into account the current normal form of x, since
+        // this ensures these lemmas are still relevant after backtracking.
+        std::vector<Node> nfexp;
          std::vector<Node> rnfexp;
-
+        // The normal form of x is stored in nx, while x is left unchanged.
+        Node nx = x;
          if (!x.isConst())
          {
-          x = d_parent.getNormalString(x, rnfexp);
-          changed = true;
+          nx = d_parent.getNormalString(x, nfexp);
          }
+        // If r is not a constant regular expression, we update it based on
+        // normal forms, which may concretize its variables.
          if (!d_regexp_opr.checkConstRegExp(r))
          {
            r = getNormalSymRegExp(r, rnfexp);
+          nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
            changed = true;
          }
          Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
-                                   << x << " IN " << r << std::endl;
-        if (changed)
+                                   << nx << " IN " << r << std::endl;
+        if (nx != x || changed)
          {
-          Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, x, r));
-          if (!polarity)
-          {
-            tmp = tmp.negate();
-          }
-          if (tmp == d_true)
-          {
-            d_regexp_ccached.insert(assertion);
-            continue;
-          }
-          else if (tmp == d_false)
+          // We rewrite the membership nx IN r.
+          Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
+          Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
+          if (tmp.isConst())
            {
-            std::vector<Node> exp_n;
-            exp_n.push_back(assertion);
-            Node conc = Node::null();
-            d_im.sendInference(rnfexp, exp_n, conc, "REGEXP NF Conflict");
-            addedLemma = true;
-            break;
+            if (tmp.getConst<bool>() == polarity)
+            {
+              // it is satisfied in this SAT context
+              d_regexp_ccached.insert(assertion);
+              continue;
+            }
+            else
+            {
+              // we have a conflict
+              std::vector<Node> exp_n;
+              exp_n.push_back(assertion);
+              Node conc = Node::null();
+              d_im.sendInference(nfexp, exp_n, conc, "REGEXP NF Conflict");
+              addedLemma = true;
+              break;
+            }
            }
          }
-        if (e == 1 && repUnfold.find(x) != repUnfold.end())
+        if (e == 1 && repUnfold.find(rep) != repUnfold.end())
          {
            // do not unfold negative memberships of strings that have new
            // positive unfoldings. For example:
@@ -188,9 +214,8 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
          if (flag)
          {
            // check if the term is atomic
-          Node xr = d_parent.getRepresentative(x);
            Trace("strings-regexp")
-              << "Unroll/simplify membership of atomic term " << xr
+              << "Unroll/simplify membership of atomic term " << rep
                << std::endl;
            // if so, do simple unrolling
            std::vector<Node> nvec;
@@ -216,7 +241,7 @@ void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
              // Remember that we have unfolded a membership for x
              // notice that we only do this here, after we have definitely
              // added a lemma.
-            repUnfold.insert(x);
+            repUnfold.insert(rep);
            }
          }
          if (d_im.hasConflict())
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt

index 4bceb1b74b960d1966916c72f351cef60f501bfb..8c2950c3edd9b5e2001788036c0ad8865564aa1e 100644 (file)
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -1591,6 +1591,8 @@ set(regress_1_tests
    regress1/strings/nterm-re-inter-sigma.smt2
    regress1/strings/pierre150331.smt2
    regress1/strings/policy_variable.smt2
+  regress1/strings/query4674.smt2
+  regress1/strings/query8485.smt2
    regress1/strings/re-all-char-hard.smt2
    regress1/strings/re-agg-total1.smt2
    regress1/strings/re-agg-total2.smt2
diff --git a/test/regress/regress1/strings/query4674.smt2 b/test/regress/regress1/strings/query4674.smt2

new file mode 100644 (file)

index 0000000..7132fa6
--- /dev/null
+++ b/test/regress/regress1/strings/query4674.smt2
@@ -0,0 +1,8 @@
+(set-info :smt-lib-version 2.6)
+(set-logic ALL)
+(set-info :status sat)
+(set-option :strings-exp true)
+(set-option :re-elim false)
+(declare-fun x () String)
+(assert (let ((_let_0 (re.* re.allchar ))) (and (not (= (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "A" (str.++ "B" (str.++ "C" "A")))) _let_0 re.allchar  _let_0)) (str.in.re x (re.++ _let_0 re.allchar  _let_0 re.allchar  _let_0 (str.to.re (str.++ "B" (str.++ "C" (str.++ "B" "B")))) _let_0)))) (not (= (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "C" (str.++ "B" "C"))) _let_0 (str.to.re "B") _let_0)) (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re "C") _let_0 (str.to.re (str.++ "B" (str.++ "C" "B"))) _let_0)))))))
+(check-sat)
diff --git a/test/regress/regress1/strings/query8485.smt2 b/test/regress/regress1/strings/query8485.smt2

new file mode 100644 (file)

index 0000000..ccd3b7f
--- /dev/null
+++ b/test/regress/regress1/strings/query8485.smt2
@@ -0,0 +1,8 @@
+(set-info :smt-lib-version 2.6)
+(set-logic ALL)
+(set-info :status sat)
+(set-option :strings-exp true)
+(set-option :re-elim false)
+(declare-fun x () String)
+(assert (let ((_let_0 (re.* re.allchar ))) (and (not (= (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "B" (str.++ "A" (str.++ "C" "B")))) _let_0 re.allchar  _let_0)) (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "A" (str.++ "B" (str.++ "C" "C")))) _let_0 re.allchar  _let_0)))) (not (= (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "C" (str.++ "B" (str.++ "A" "B")))) _let_0 re.allchar  _let_0)) (str.in.re x (re.++ _let_0 re.allchar  _let_0 (str.to.re (str.++ "C" (str.++ "B" "A"))) _let_0 (str.to.re "B") _let_0)))))))
+(check-sat)
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Sun, 18 Aug 2019 12:32:40 +0000 (07:32 -0500)
committer	GitHub <noreply@github.com>
	Sun, 18 Aug 2019 12:32:40 +0000 (07:32 -0500)
src/theory/strings/regexp_solver.cpp		patch \| blob \| history
test/regress/CMakeLists.txt		patch \| blob \| history
test/regress/regress1/strings/query4674.smt2	[new file with mode: 0644]	patch \| blob
test/regress/regress1/strings/query8485.smt2	[new file with mode: 0644]	patch \| blob