Rewrite simple regexp pattern to str.contains (#2827)
authorAndres Noetzli <andres.noetzli@gmail.com>
Wed, 13 Feb 2019 20:18:09 +0000 (12:18 -0800)
committerGitHub <noreply@github.com>
Wed, 13 Feb 2019 20:18:09 +0000 (12:18 -0800)
src/theory/strings/theory_strings_rewriter.cpp
test/unit/theory/theory_strings_rewriter_white.h

index eff68ebe6bd88f183fdfe396d302f490b0045452..79667b9aa8f92e8bfc8faee3c0379517900dfdd9 100644 (file)
@@ -1244,8 +1244,12 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) {
     bool allSigma = true;
     bool allSigmaStrict = true;
     unsigned allSigmaMinSize = 0;
-    for (const Node& rc : r)
+    Node constStr;
+    size_t constIdx = 0;
+    size_t nchildren = r.getNumChildren();
+    for (size_t i = 0; i < nchildren; i++)
     {
+      Node rc = r[i];
       Assert(rc.getKind() != kind::REGEXP_EMPTY);
       if (rc.getKind() == kind::REGEXP_SIGMA)
       {
@@ -1255,6 +1259,19 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) {
       {
         allSigmaStrict = false;
       }
+      else if (rc.getKind() == STRING_TO_REGEXP)
+      {
+        if (constStr.isNull())
+        {
+          constStr = rc[0];
+          constIdx = i;
+        }
+        else
+        {
+          allSigma = false;
+          break;
+        }
+      }
       else
       {
         allSigma = false;
@@ -1263,10 +1280,21 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) {
     }
     if (allSigma)
     {
-      Node num = nm->mkConst(Rational(allSigmaMinSize));
-      Node lenx = nm->mkNode(STRING_LENGTH, x);
-      retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num);
-      return returnRewrite(node, retNode, "re-concat-pure-allchar");
+      if (constStr.isNull())
+      {
+        // x in re.++(_*, _, _) ---> str.len(x) >= 2
+        Node num = nm->mkConst(Rational(allSigmaMinSize));
+        Node lenx = nm->mkNode(STRING_LENGTH, x);
+        retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num);
+        return returnRewrite(node, retNode, "re-concat-pure-allchar");
+      }
+      else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0
+               && constIdx != nchildren - 1)
+      {
+        // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc")
+        retNode = nm->mkNode(STRING_STRCTN, x, constStr);
+        return returnRewrite(node, retNode, "re-concat-to-contains");
+      }
     }
   }else if( r.getKind()==kind::REGEXP_INTER || r.getKind()==kind::REGEXP_UNION ){
     std::vector< Node > mvec;
index c5d20daefcc6272090f55f1e84b12fe303510f2f..79a50d5334ad42183399dd376714ac181b92a704 100644 (file)
@@ -1269,6 +1269,52 @@ class TheoryStringsRewriterWhite : public CxxTest::TestSuite
     }
   }
 
+  void testRewriteMembership()
+  {
+    TypeNode strType = d_nm->stringType();
+
+    std::vector<Node> vec_empty;
+    Node abc = d_nm->mkConst(::CVC4::String("ABC"));
+    Node re_abc = d_nm->mkNode(kind::STRING_TO_REGEXP, abc);
+    Node x = d_nm->mkVar("x", strType);
+
+    {
+      // Same normal form for:
+      //
+      // (str.in.re x (re.++ (re.* re.allchar)
+      //                     (re.* re.allchar)
+      //                     (str.to.re "ABC")
+      //                     (re.* re.allchar)))
+      //
+      // (str.contains x "ABC")
+      Node sig_star = d_nm->mkNode(kind::REGEXP_STAR,
+                                   d_nm->mkNode(kind::REGEXP_SIGMA, vec_empty));
+      Node lhs = d_nm->mkNode(
+          kind::STRING_IN_REGEXP,
+          x,
+          d_nm->mkNode(
+              kind::REGEXP_CONCAT, sig_star, sig_star, re_abc, sig_star));
+      Node rhs = d_nm->mkNode(kind::STRING_STRCTN, x, abc);
+      sameNormalForm(lhs, rhs);
+    }
+
+    {
+      // Different normal forms for:
+      //
+      // (str.in.re x (re.++ (re.* re.allchar) (str.to.re "ABC")))
+      //
+      // (str.contains x "ABC")
+      Node sig_star = d_nm->mkNode(kind::REGEXP_STAR,
+                                   d_nm->mkNode(kind::REGEXP_SIGMA, vec_empty));
+      Node lhs =
+          d_nm->mkNode(kind::STRING_IN_REGEXP,
+                       x,
+                       d_nm->mkNode(kind::REGEXP_CONCAT, sig_star, re_abc));
+      Node rhs = d_nm->mkNode(kind::STRING_STRCTN, x, abc);
+      differentNormalForms(lhs, rhs);
+    }
+  }
+
  private:
   ExprManager* d_em;
   SmtEngine* d_smt;