From 83cd4823d6bf8e0c8e7d82afbfac824744491762 Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Wed, 13 Feb 2019 12:18:09 -0800 Subject: [PATCH] Rewrite simple regexp pattern to str.contains (#2827) --- .../strings/theory_strings_rewriter.cpp | 38 +++++++++++++-- .../theory/theory_strings_rewriter_white.h | 46 +++++++++++++++++++ 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index eff68ebe6..79667b9aa 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -1244,8 +1244,12 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) { bool allSigma = true; bool allSigmaStrict = true; unsigned allSigmaMinSize = 0; - for (const Node& rc : r) + Node constStr; + size_t constIdx = 0; + size_t nchildren = r.getNumChildren(); + for (size_t i = 0; i < nchildren; i++) { + Node rc = r[i]; Assert(rc.getKind() != kind::REGEXP_EMPTY); if (rc.getKind() == kind::REGEXP_SIGMA) { @@ -1255,6 +1259,19 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) { { allSigmaStrict = false; } + else if (rc.getKind() == STRING_TO_REGEXP) + { + if (constStr.isNull()) + { + constStr = rc[0]; + constIdx = i; + } + else + { + allSigma = false; + break; + } + } else { allSigma = false; @@ -1263,10 +1280,21 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) { } if (allSigma) { - Node num = nm->mkConst(Rational(allSigmaMinSize)); - Node lenx = nm->mkNode(STRING_LENGTH, x); - retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num); - return returnRewrite(node, retNode, "re-concat-pure-allchar"); + if (constStr.isNull()) + { + // x in re.++(_*, _, _) ---> str.len(x) >= 2 + Node num = nm->mkConst(Rational(allSigmaMinSize)); + Node lenx = nm->mkNode(STRING_LENGTH, x); + retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num); + return returnRewrite(node, retNode, "re-concat-pure-allchar"); + } + else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0 + && constIdx != nchildren - 1) + { + // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc") + retNode = nm->mkNode(STRING_STRCTN, x, constStr); + return returnRewrite(node, retNode, "re-concat-to-contains"); + } } }else if( r.getKind()==kind::REGEXP_INTER || r.getKind()==kind::REGEXP_UNION ){ std::vector< Node > mvec; diff --git a/test/unit/theory/theory_strings_rewriter_white.h b/test/unit/theory/theory_strings_rewriter_white.h index c5d20daef..79a50d533 100644 --- a/test/unit/theory/theory_strings_rewriter_white.h +++ b/test/unit/theory/theory_strings_rewriter_white.h @@ -1269,6 +1269,52 @@ class TheoryStringsRewriterWhite : public CxxTest::TestSuite } } + void testRewriteMembership() + { + TypeNode strType = d_nm->stringType(); + + std::vector vec_empty; + Node abc = d_nm->mkConst(::CVC4::String("ABC")); + Node re_abc = d_nm->mkNode(kind::STRING_TO_REGEXP, abc); + Node x = d_nm->mkVar("x", strType); + + { + // Same normal form for: + // + // (str.in.re x (re.++ (re.* re.allchar) + // (re.* re.allchar) + // (str.to.re "ABC") + // (re.* re.allchar))) + // + // (str.contains x "ABC") + Node sig_star = d_nm->mkNode(kind::REGEXP_STAR, + d_nm->mkNode(kind::REGEXP_SIGMA, vec_empty)); + Node lhs = d_nm->mkNode( + kind::STRING_IN_REGEXP, + x, + d_nm->mkNode( + kind::REGEXP_CONCAT, sig_star, sig_star, re_abc, sig_star)); + Node rhs = d_nm->mkNode(kind::STRING_STRCTN, x, abc); + sameNormalForm(lhs, rhs); + } + + { + // Different normal forms for: + // + // (str.in.re x (re.++ (re.* re.allchar) (str.to.re "ABC"))) + // + // (str.contains x "ABC") + Node sig_star = d_nm->mkNode(kind::REGEXP_STAR, + d_nm->mkNode(kind::REGEXP_SIGMA, vec_empty)); + Node lhs = + d_nm->mkNode(kind::STRING_IN_REGEXP, + x, + d_nm->mkNode(kind::REGEXP_CONCAT, sig_star, re_abc)); + Node rhs = d_nm->mkNode(kind::STRING_STRCTN, x, abc); + differentNormalForms(lhs, rhs); + } + } + private: ExprManager* d_em; SmtEngine* d_smt; -- 2.30.2