From ec24a92382d0884e5b9b07a8c2f2ed056c98ae9a Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Tue, 13 Aug 2019 13:19:18 -0500 Subject: [PATCH] Add string rewrite involving allchar stars (#3167) --- .../strings/theory_strings_rewriter.cpp | 70 ++++++++++++++----- test/regress/CMakeLists.txt | 1 + test/regress/regress1/strings/rew-check1.smt2 | 10 +++ 3 files changed, 65 insertions(+), 16 deletions(-) create mode 100644 test/regress/regress1/strings/rew-check1.smt2 diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index aac2477ea..32190e093 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -763,7 +763,12 @@ Node TheoryStringsRewriter::rewriteConcatRegExp(TNode node) Trace("strings-rewrite-debug") << "Strings::rewriteConcatRegExp start " << node << std::endl; std::vector cvec; + // the current accumulation of constant strings std::vector preReStr; + // whether the last component was (_)* + bool lastAllStar = false; + String emptyStr = String(""); + // this loop checks to see if components can be combined or dropped for (unsigned i = 0, size = vec.size(); i <= size; i++) { Node curr; @@ -771,41 +776,57 @@ Node TheoryStringsRewriter::rewriteConcatRegExp(TNode node) { curr = vec[i]; Assert(curr.getKind() != REGEXP_CONCAT); - if (!cvec.empty() && preReStr.empty()) - { - Node cvecLast = cvec.back(); - if (cvecLast.getKind() == REGEXP_STAR && cvecLast[0] == curr) - { - // by convention, flip the order (a*)++a ---> a++(a*) - cvec[cvec.size() - 1] = curr; - cvec.push_back(cvecLast); - curr = Node::null(); - } - } } // update preReStr if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP) { + lastAllStar = false; preReStr.push_back(curr[0]); curr = Node::null(); } else if (!preReStr.empty()) { + Assert(!lastAllStar); // this groups consecutive strings a++b ---> ab Node acc = nm->mkNode(STRING_TO_REGEXP, utils::mkConcat(STRING_CONCAT, preReStr)); cvec.push_back(acc); preReStr.clear(); } - if (!curr.isNull() && curr.getKind() == REGEXP_STAR) + else if (!curr.isNull() && lastAllStar) { - // we can group stars (a*)++(a*) ---> a* - if (!cvec.empty() && cvec.back() == curr) + // if empty, drop it + // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)* + if (testConstStringInRegExp(emptyStr, 0, curr)) { curr = Node::null(); } } if (!curr.isNull()) + { + lastAllStar = false; + if (curr.getKind() == REGEXP_STAR) + { + // we can group stars (a)* ++ (a)* ---> (a)* + if (!cvec.empty() && cvec.back() == curr) + { + curr = Node::null(); + } + else if (curr[0].getKind() == REGEXP_SIGMA) + { + Assert(!lastAllStar); + lastAllStar = true; + // go back and remove empty ones from back of cvec + // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)* + while (!cvec.empty() + && testConstStringInRegExp(emptyStr, 0, cvec.back())) + { + cvec.pop_back(); + } + } + } + } + if (!curr.isNull()) { cvec.push_back(curr); } @@ -814,10 +835,27 @@ Node TheoryStringsRewriter::rewriteConcatRegExp(TNode node) retNode = utils::mkConcat(REGEXP_CONCAT, cvec); if (retNode != node) { - // handles all cases where consecutive re constants are combined, and cases - // where arguments are swapped, as described in the loop above. + // handles all cases where consecutive re constants are combined or dropped + // as described in the loop above. return returnRewrite(node, retNode, "re.concat"); } + + // flipping adjacent star arguments + changed = false; + for (size_t i = 0, size = cvec.size() - 1; i < size; i++) + { + if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1]) + { + // by convention, flip the order (a*)++a ---> a++(a*) + std::swap(cvec[i], cvec[i+1]); + changed = true; + } + } + if (changed) + { + retNode = utils::mkConcat(REGEXP_CONCAT, cvec); + return returnRewrite(node, retNode, "re.concat.opt"); + } return node; } diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index f3c80559a..a04fd4963 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1609,6 +1609,7 @@ set(regress_1_tests regress1/strings/replaceall-len.smt2 regress1/strings/replaceall-replace.smt2 regress1/strings/rew-020618.smt2 + regress1/strings/rew-check1.smt2 regress1/strings/simple-re-consume.smt2 regress1/strings/stoi-400million.smt2 regress1/strings/stoi-solve.smt2 diff --git a/test/regress/regress1/strings/rew-check1.smt2 b/test/regress/regress1/strings/rew-check1.smt2 new file mode 100644 index 000000000..5cb85e4ca --- /dev/null +++ b/test/regress/regress1/strings/rew-check1.smt2 @@ -0,0 +1,10 @@ +(set-info :smt-lib-version 2.5) +(set-logic ALL) +(set-info :status unsat) +(declare-fun x () String) +(assert (not (= +(str.in.re x (re.++ (re.* re.allchar ) (str.to.re "A") (re.* re.allchar ) re.allchar (re.* re.allchar ) (re.* (str.to.re "A")) (re.* re.allchar ))) +(str.in.re x (re.++ (re.* (str.to.re "A")) (re.* (str.to.re "B")) (re.* re.allchar ) (str.to.re "A") (re.* re.allchar ) re.allchar (re.* re.allchar ))) +))) + +(check-sat) -- 2.30.2