Fix non-termination issues in simpleRegExpConsume (#4667)
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>
Sun, 28 Jun 2020 13:34:44 +0000 (08:34 -0500)
committerGitHub <noreply@github.com>
Sun, 28 Jun 2020 13:34:44 +0000 (08:34 -0500)
There were two issues related to RE in bodies of re.* that accepted the empty string that led to non-termination in the rewriter for regular expressions.

This also improves trace messages for simpleRegExpConsume.

Fixes #4662.

src/theory/strings/regexp_entail.cpp
src/theory/strings/sequences_rewriter.cpp
test/regress/CMakeLists.txt
test/regress/regress0/strings/issue4662-consume-nterm.smt2 [new file with mode: 0644]

index f005b2b427cd5a96327b2dcce09524e45a0b059b..7e1f42f375dd2f69ed8b4891b2483d23e3277e6d 100644 (file)
@@ -48,6 +48,8 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
         do_next = false;
         Node xc = mchildren[mchildren.size() - 1];
         Node rc = children[children.size() - 1];
+        Trace("regexp-ext-rewrite-debug")
+            << "* " << xc << " in " << rc << std::endl;
         Assert(rc.getKind() != REGEXP_CONCAT);
         Assert(xc.getKind() != STRING_CONCAT);
         if (rc.getKind() == STRING_TO_REGEXP)
@@ -57,7 +59,15 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
             children.pop_back();
             mchildren.pop_back();
             do_next = true;
-            Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl;
+            Trace("regexp-ext-rewrite-debug") << "- strip equal" << std::endl;
+          }
+          else if (rc[0].isConst() && Word::isEmpty(rc[0]))
+          {
+            Trace("regexp-ext-rewrite-debug")
+                << "- ignore empty RE" << std::endl;
+            // ignore and continue
+            children.pop_back();
+            do_next = true;
           }
           else if (xc.isConst() && rc[0].isConst())
           {
@@ -65,8 +75,8 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
             size_t index;
             Node s = Word::splitConstant(xc, rc[0], index, t == 0);
             Trace("regexp-ext-rewrite-debug")
-                << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> "
-                << s << " " << index << " " << t << std::endl;
+                << "- CRE: Regexp const split : " << xc << " " << rc[0]
+                << " -> " << s << " " << index << " " << t << std::endl;
             if (s.isNull())
             {
               Trace("regexp-ext-rewrite-debug")
@@ -76,7 +86,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
             else
             {
               Trace("regexp-ext-rewrite-debug")
-                  << "...strip equal const" << std::endl;
+                  << "strip equal const" << std::endl;
               children.pop_back();
               mchildren.pop_back();
               if (index == 0)
@@ -88,6 +98,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
                 children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
               }
             }
+            Trace("regexp-ext-rewrite-debug") << "- split const" << std::endl;
             do_next = true;
           }
         }
@@ -97,7 +108,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
           CVC4::String s = xc.getConst<String>();
           if (Word::isEmpty(xc))
           {
-            Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl;
+            Trace("regexp-ext-rewrite-debug") << "ignore empty" << std::endl;
             // ignore and continue
             mchildren.pop_back();
             do_next = true;
@@ -127,6 +138,8 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
             }
             else
             {
+              Trace("regexp-ext-rewrite-debug")
+                  << "...return false" << std::endl;
               return nm->mkConst(false);
             }
           }
@@ -135,19 +148,23 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
             // see if any/each child does not work
             bool result_valid = true;
             Node result;
-            Node emp_s = nm->mkConst(::CVC4::String(""));
+            Node emp_s = nm->mkConst(String(""));
             for (unsigned i = 0; i < rc.getNumChildren(); i++)
             {
               std::vector<Node> mchildren_s;
               std::vector<Node> children_s;
               mchildren_s.push_back(xc);
               utils::getConcat(rc[i], children_s);
+              Trace("regexp-ext-rewrite-debug") << push;
               Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
+              Trace("regexp-ext-rewrite-debug") << pop;
               if (!ret.isNull())
               {
                 // one conjunct cannot be satisfied, return false
                 if (rc.getKind() == REGEXP_INTER)
                 {
+                  Trace("regexp-ext-rewrite-debug")
+                      << "...return " << ret << std::endl;
                   return ret;
                 }
               }
@@ -182,10 +199,15 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
               {
                 // all disjuncts cannot be satisfied, return false
                 Assert(rc.getKind() == REGEXP_UNION);
+                Trace("regexp-ext-rewrite-debug")
+                    << "...return false" << std::endl;
                 return nm->mkConst(false);
               }
               else
               {
+                Trace("regexp-ext-rewrite-debug")
+                    << "- same result, try again, children now " << children
+                    << std::endl;
                 // all branches led to the same result
                 children.pop_back();
                 mchildren.pop_back();
@@ -210,17 +232,19 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
             std::vector<Node> children_s;
             utils::getConcat(rc[0], children_s);
             Trace("regexp-ext-rewrite-debug")
-                << "...recursive call on body of star" << std::endl;
+                << "- recursive call on body of star" << std::endl;
+            Trace("regexp-ext-rewrite-debug") << push;
             Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
+            Trace("regexp-ext-rewrite-debug") << pop;
             if (!ret.isNull())
             {
               Trace("regexp-ext-rewrite-debug")
-                  << "CRE : regexp star infeasable " << xc << " " << rc
+                  << "CRE : regexp star infeasable " << xc << " " << rc
                   << std::endl;
               children.pop_back();
               if (!children.empty())
               {
-                Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl;
+                Trace("regexp-ext-rewrite-debug") << "continue" << std::endl;
                 do_next = true;
               }
             }
@@ -244,16 +268,22 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
                     std::reverse(mchildren_ss.begin(), mchildren_ss.end());
                     std::reverse(children_ss.begin(), children_ss.end());
                   }
-                  if (simpleRegexpConsume(mchildren_ss, children_ss, t)
-                          .isNull())
+                  Trace("regexp-ext-rewrite-debug")
+                      << "- recursive call required repeat star" << std::endl;
+                  Trace("regexp-ext-rewrite-debug") << push;
+                  Node rets = simpleRegexpConsume(mchildren_ss, children_ss, t);
+                  Trace("regexp-ext-rewrite-debug") << pop;
+                  if (rets.isNull())
                   {
                     can_skip = true;
                   }
                 }
                 if (!can_skip)
                 {
+                  TypeNode stype = nm->stringType();
+                  Node prev = utils::mkConcat(mchildren, stype);
                   Trace("regexp-ext-rewrite-debug")
-                      << "...can't skip" << std::endl;
+                      << "can't skip" << std::endl;
                   // take the result of fully consuming once
                   if (t == 1)
                   {
@@ -262,12 +292,15 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
                   mchildren.clear();
                   mchildren.insert(
                       mchildren.end(), mchildren_s.begin(), mchildren_s.end());
-                  do_next = true;
+                  Node curr = utils::mkConcat(mchildren, stype);
+                  do_next = (prev != curr);
+                  Trace("regexp-ext-rewrite-debug")
+                      << "- do_next = " << do_next << std::endl;
                 }
                 else
                 {
                   Trace("regexp-ext-rewrite-debug")
-                      << "...can skip " << rc << " from " << xc << std::endl;
+                      << "can skip " << rc << " from " << xc << std::endl;
                 }
               }
             }
@@ -276,7 +309,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
         if (!do_next)
         {
           Trace("regexp-ext-rewrite")
-              << "Cannot consume : " << xc << " " << rc << std::endl;
+              << "- cannot consume : " << xc << " " << rc << std::endl;
         }
       }
     }
@@ -286,6 +319,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
       std::reverse(mchildren.begin(), mchildren.end());
     }
   }
+  Trace("regexp-ext-rewrite-debug") << "...finished, return null" << std::endl;
   return Node::null();
 }
 
index 9df03f32c246dfec1ba4b0df0e9e9b1585668986..20b892d0fa3e4d3c404f3145120d47c01e2ba952 100644 (file)
@@ -1315,9 +1315,13 @@ Node SequencesRewriter::rewriteMembership(TNode node)
           }
           else
           {
+            Node prev = retNode;
             retNode = nm->mkNode(
                 STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r);
-            success = true;
+            // Iterate again if the node changed. It may not have changed if
+            // nothing was consumed from mchildren (e.g. if the body of the
+            // re.* accepts the empty string.
+            success = (retNode != prev);
           }
         }
       }
index 7863c5ec0606843b5b425193edee00a72d9175cb..fc9cf37ac2ef8684aa7723ddd403100e1aaca57a 100644 (file)
@@ -966,6 +966,7 @@ set(regress_0_tests
   regress0/strings/issue3657-evalLeq.smt2
   regress0/strings/issue4070.smt2
   regress0/strings/issue4376.smt2
+  regress0/strings/issue4662-consume-nterm.smt2
   regress0/strings/itos-entail.smt2
   regress0/strings/large-model.smt2
   regress0/strings/leadingzero001.smt2
diff --git a/test/regress/regress0/strings/issue4662-consume-nterm.smt2 b/test/regress/regress0/strings/issue4662-consume-nterm.smt2
new file mode 100644 (file)
index 0000000..a87279b
--- /dev/null
@@ -0,0 +1,6 @@
+(set-logic QF_S)
+(set-info :status unsat)
+(declare-fun a () String)
+(define-fun b () RegLan (str.to_re "A"))
+(assert (str.in_re (str.++ "B" a) (re.+ (re.++ (re.opt b) (re.opt b)))))
+(check-sat)