From 70bfc4f67bcad32fa1b9b581b71b3a2d70e14d7e Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Wed, 16 Sep 2020 13:36:05 -0700 Subject: [PATCH] Only rewrite replace_re(_all) if regexp is const (#5075) Fixes #5074. This commit fixes an issue in the rewriter of str.replace_re and str.replace_re_all. For both operators, we do two kinds of rewrites: (1) If the first argument is a constant, then we check whether the regular expression appears in that constant and (2) we check whether the regular expression matches the empty string. Both of those checks were assuming a constant regular expression but there was no guard in place for it. This commit adds the missing guard. --- src/theory/strings/sequences_rewriter.cpp | 86 ++++++++++++----------- test/api/CMakeLists.txt | 1 + test/api/issue5074.cpp | 30 ++++++++ 3 files changed, 77 insertions(+), 40 deletions(-) create mode 100644 test/api/issue5074.cpp diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index f05c32356..29c0aa2d5 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -3021,31 +3021,34 @@ Node SequencesRewriter::rewriteReplaceRe(Node node) Node y = node[1]; Node z = node[2]; - if (x.isConst()) + if (RegExpEntail::isConstRegExp(y)) { - // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z" - std::pair match = firstMatch(x, y); - if (match.first != string::npos) + if (x.isConst()) { - String s = x.getConst(); - Node ret = nm->mkNode(STRING_CONCAT, - nm->mkConst(s.substr(0, match.first)), - z, - nm->mkConst(s.substr(match.second))); - return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL); + // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z" + std::pair match = firstMatch(x, y); + if (match.first != string::npos) + { + String s = x.getConst(); + Node ret = nm->mkNode(STRING_CONCAT, + nm->mkConst(s.substr(0, match.first)), + z, + nm->mkConst(s.substr(match.second))); + return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL); + } + else + { + return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL); + } } - else + // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true + String emptyStr(""); + if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y)) { - return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL); + Node ret = nm->mkNode(STRING_CONCAT, z, x); + return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE); } } - // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true - String emptyStr(""); - if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y)) - { - Node ret = nm->mkNode(STRING_CONCAT, z, x); - return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE); - } return node; } @@ -3057,31 +3060,34 @@ Node SequencesRewriter::rewriteReplaceReAll(Node node) Node y = node[1]; Node z = node[2]; - if (x.isConst()) + if (RegExpEntail::isConstRegExp(y)) { - // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) ---> - // "Z" ++ y ++ "Z" ++ y - TypeNode t = x.getType(); - Node emp = Word::mkEmptyWord(t); - Node yp = Rewriter::rewrite( - nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp))); - std::vector res; - String rem = x.getConst(); - std::pair match(0, 0); - while (rem.size() >= 0) + if (x.isConst()) { - match = firstMatch(nm->mkConst(rem), yp); - if (match.first == string::npos) - { - break; + // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) ---> + // "Z" ++ y ++ "Z" ++ y + TypeNode t = x.getType(); + Node emp = Word::mkEmptyWord(t); + Node yp = Rewriter::rewrite( + nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp))); + std::vector res; + String rem = x.getConst(); + std::pair match(0, 0); + while (rem.size() >= 0) + { + match = firstMatch(nm->mkConst(rem), yp); + if (match.first == string::npos) + { + break; + } + res.push_back(nm->mkConst(rem.substr(0, match.first))); + res.push_back(z); + rem = rem.substr(match.second); } - res.push_back(nm->mkConst(rem.substr(0, match.first))); - res.push_back(z); - rem = rem.substr(match.second); + res.push_back(nm->mkConst(rem)); + Node ret = utils::mkConcat(res, t); + return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL); } - res.push_back(nm->mkConst(rem)); - Node ret = utils::mkConcat(res, t); - return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL); } return node; diff --git a/test/api/CMakeLists.txt b/test/api/CMakeLists.txt index 8d9110e9f..9df87e9b5 100644 --- a/test/api/CMakeLists.txt +++ b/test/api/CMakeLists.txt @@ -34,6 +34,7 @@ cvc4_add_api_test(smt2_compliance) # TODO(cvc4-projects#209): Add methods for retrieving statistics to new API # cvc4_add_api_test(statistics) cvc4_add_api_test(two_solvers) +cvc4_add_api_test(issue5074) # if we've built using libedit, then we want the interactive shell tests if (USE_EDITLINE) diff --git a/test/api/issue5074.cpp b/test/api/issue5074.cpp new file mode 100644 index 000000000..b39701f6b --- /dev/null +++ b/test/api/issue5074.cpp @@ -0,0 +1,30 @@ +/********************* */ +/*! \file issue5074.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Test for issue #5074 + **/ + +#include "api/cvc4cpp.h" + +using namespace CVC4::api; + +int main() +{ + Solver slv; + Term c1 = slv.mkConst(slv.getIntegerSort()); + Term t6 = slv.mkTerm(Kind::STRING_FROM_CODE, c1); + Term t12 = slv.mkTerm(Kind::STRING_TO_REGEXP, t6); + Term t14 = slv.mkTerm(Kind::STRING_REPLACE_RE, {t6, t12, t6}); + Term t16 = slv.mkTerm(Kind::STRING_CONTAINS, {t14, t14}); + slv.checkEntailed(t16); + + return 0; +} -- 2.30.2