From 29f0b8f378377ed836bddaaf88883d0b2eeb545d Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Thu, 27 May 2021 15:42:10 -0700 Subject: [PATCH] Fix `str.replace_re` and `str.replace_re_all` (#6615) Fixes #6057. The reductions of `str.replace_re` and `str.replace_re_all` were not correctly enforcing that the operations replace the _first_ occurrence of some regular expression in a string. This commit fixes the issue by introducing a new operator `str.indexof_re(s, r, n)`, which, analoguously to `str.indexof`, returns the index of the first match of the regular expression `r` in `s`. The commit adds basic rewrites for evaluating the operator as well as its reduction. Additionally, it converts the reductions of `str.replace_re` and `str.replace_re_all` to use that new operator. This simplifies the reductions of the two operators and ensures that the semantics are consistent between the two. --- NEWS | 4 + src/api/cpp/cvc5.cpp | 2 + src/api/cpp/cvc5_kind.h | 16 ++ src/parser/smt2/smt2.cpp | 1 + src/printer/smt2/smt2_printer.cpp | 2 + src/theory/strings/extf_solver.cpp | 11 +- src/theory/strings/kinds | 8 +- src/theory/strings/rewrites.cpp | 4 + src/theory/strings/rewrites.h | 4 + src/theory/strings/sequences_rewriter.cpp | 59 ++++- src/theory/strings/sequences_rewriter.h | 6 + src/theory/strings/skolem_cache.cpp | 18 ++ src/theory/strings/skolem_cache.h | 21 +- src/theory/strings/term_registry.cpp | 18 +- src/theory/strings/theory_strings.cpp | 1 + .../strings/theory_strings_preprocess.cpp | 234 ++++++++++++------ src/theory/strings/theory_strings_utils.cpp | 5 +- test/regress/CMakeLists.txt | 10 + test/regress/regress0/strings/indexof_re.smt2 | 18 ++ .../regress1/strings/indexof_re_red.smt2 | 52 ++++ .../issue6057-replace-re-all-jiwonparc.smt2 | 9 + .../strings/issue6057-replace-re.smt2 | 7 + .../strings/issue6203-6-replace-re.smt2 | 9 + .../strings/issue6337-replace-re-all.smt2 | 10 + .../strings/issue6337-replace-re.smt2 | 13 + .../issue6057-replace-re-all-simplified.smt2 | 11 + .../strings/issue6057-replace-re-all.smt2 | 9 + test/regress/regress2/strings/replace_re.smt2 | 8 + .../regress3/strings/indexof_re_red.smt2 | 9 + 29 files changed, 486 insertions(+), 93 deletions(-) create mode 100644 test/regress/regress0/strings/indexof_re.smt2 create mode 100644 test/regress/regress1/strings/indexof_re_red.smt2 create mode 100644 test/regress/regress1/strings/issue6057-replace-re-all-jiwonparc.smt2 create mode 100644 test/regress/regress1/strings/issue6057-replace-re.smt2 create mode 100644 test/regress/regress1/strings/issue6203-6-replace-re.smt2 create mode 100644 test/regress/regress1/strings/issue6337-replace-re-all.smt2 create mode 100644 test/regress/regress1/strings/issue6337-replace-re.smt2 create mode 100644 test/regress/regress2/strings/issue6057-replace-re-all-simplified.smt2 create mode 100644 test/regress/regress2/strings/issue6057-replace-re-all.smt2 create mode 100644 test/regress/regress3/strings/indexof_re_red.smt2 diff --git a/NEWS b/NEWS index 26e0edbb6..54575dcc2 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,10 @@ New Features: if arrays `a` and `b` are equal on all indices within indices `i` and `j`. * Support for an integer operator `(_ iand n)` that returns the bitwise `and` of two integers, seen as integers modulo n. +* Strings: + * Support for `str.indexof_re(s, r, n)`, which returns the index of the first + occurrence of a regular expression `r` in a string `s` after index `n` or + -1 if `r` does not match a substring after `n`. Improvements: * New API: Added functions to retrieve the heap/nil term when using separation diff --git a/src/api/cpp/cvc5.cpp b/src/api/cpp/cvc5.cpp index 16366c3cf..0f2d5ad1b 100644 --- a/src/api/cpp/cvc5.cpp +++ b/src/api/cpp/cvc5.cpp @@ -312,6 +312,7 @@ const static std::unordered_map s_kinds{ {STRING_CHARAT, cvc5::Kind::STRING_CHARAT}, {STRING_CONTAINS, cvc5::Kind::STRING_STRCTN}, {STRING_INDEXOF, cvc5::Kind::STRING_STRIDOF}, + {STRING_INDEXOF_RE, cvc5::Kind::STRING_INDEXOF_RE}, {STRING_REPLACE, cvc5::Kind::STRING_STRREPL}, {STRING_REPLACE_ALL, cvc5::Kind::STRING_STRREPLALL}, {STRING_REPLACE_RE, cvc5::Kind::STRING_REPLACE_RE}, @@ -618,6 +619,7 @@ const static std::unordered_map {cvc5::Kind::STRING_CHARAT, STRING_CHARAT}, {cvc5::Kind::STRING_STRCTN, STRING_CONTAINS}, {cvc5::Kind::STRING_STRIDOF, STRING_INDEXOF}, + {cvc5::Kind::STRING_INDEXOF_RE, STRING_INDEXOF_RE}, {cvc5::Kind::STRING_STRREPL, STRING_REPLACE}, {cvc5::Kind::STRING_STRREPLALL, STRING_REPLACE_ALL}, {cvc5::Kind::STRING_REPLACE_RE, STRING_REPLACE_RE}, diff --git a/src/api/cpp/cvc5_kind.h b/src/api/cpp/cvc5_kind.h index 6f6bf1d0e..4263eedb2 100644 --- a/src/api/cpp/cvc5_kind.h +++ b/src/api/cpp/cvc5_kind.h @@ -2630,6 +2630,22 @@ enum CVC5_EXPORT Kind : int32_t * - `Solver::mkTerm(Kind kind, const std::vector& children) const` */ STRING_INDEXOF, + /** + * String index-of regular expression match. + * Returns the first match of a regular expression r in a string s. If the + * index is negative or greater than the length of string s1, or r does not + * match a substring in s after index i, the result is -1. + * + * Parameters: + * - 1: Term of sort String (string s) + * - 2: Term of sort RegLan (regular expression r) + * - 3: Term of sort Integer (index i) + * + * Create with: + * - `Solver::mkTerm(Kind kind, const Term& child1, const Term& child2, const Term& child3) const` + * - `Solver::mkTerm(Kind kind, const std::vector& children) const` + */ + STRING_INDEXOF_RE, /** * String replace. * Replaces a string s2 in a string s1 with string s3. If s2 does not appear diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp index 88f4b4ef8..d32f149bf 100644 --- a/src/parser/smt2/smt2.cpp +++ b/src/parser/smt2/smt2.cpp @@ -158,6 +158,7 @@ void Smt2::addStringOperators() { addOperator(api::STRING_REPLACE_RE_ALL, "str.replace_re_all"); if (!strictModeEnabled()) { + addOperator(api::STRING_INDEXOF_RE, "str.indexof_re"); addOperator(api::STRING_UPDATE, "str.update"); addOperator(api::STRING_TOLOWER, "str.tolower"); addOperator(api::STRING_TOUPPER, "str.toupper"); diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp index 6258834c4..4607d2747 100644 --- a/src/printer/smt2/smt2_printer.cpp +++ b/src/printer/smt2/smt2_printer.cpp @@ -723,6 +723,7 @@ void Smt2Printer::toStream(std::ostream& out, case kind::STRING_CHARAT: case kind::STRING_STRCTN: case kind::STRING_STRIDOF: + case kind::STRING_INDEXOF_RE: case kind::STRING_STRREPL: case kind::STRING_STRREPLALL: case kind::STRING_REPLACE_RE: @@ -1309,6 +1310,7 @@ std::string Smt2Printer::smtKindString(Kind k, Variant v) case kind::STRING_STRCTN: return "str.contains" ; case kind::STRING_CHARAT: return "str.at" ; case kind::STRING_STRIDOF: return "str.indexof" ; + case kind::STRING_INDEXOF_RE: return "str.indexof_re"; case kind::STRING_STRREPL: return "str.replace" ; case kind::STRING_STRREPLALL: return "str.replace_all"; case kind::STRING_REPLACE_RE: return "str.replace_re"; diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index a1de5e295..9576c1d81 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -53,6 +53,7 @@ ExtfSolver::ExtfSolver(SolverState& s, d_extt.addFunctionKind(kind::STRING_SUBSTR); d_extt.addFunctionKind(kind::STRING_UPDATE); d_extt.addFunctionKind(kind::STRING_STRIDOF); + d_extt.addFunctionKind(kind::STRING_INDEXOF_RE); d_extt.addFunctionKind(kind::STRING_ITOS); d_extt.addFunctionKind(kind::STRING_STOI); d_extt.addFunctionKind(kind::STRING_STRREPL); @@ -191,11 +192,11 @@ bool ExtfSolver::doReduction(int effort, Node n) { NodeManager* nm = NodeManager::currentNM(); Assert(k == STRING_SUBSTR || k == STRING_UPDATE || k == STRING_STRCTN - || k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI - || k == STRING_STRREPL || k == STRING_STRREPLALL || k == SEQ_NTH - || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL - || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER - || k == STRING_REV); + || k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_ITOS + || k == STRING_STOI || k == STRING_STRREPL || k == STRING_STRREPLALL + || k == SEQ_NTH || k == STRING_REPLACE_RE + || k == STRING_REPLACE_RE_ALL || k == STRING_LEQ + || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV); std::vector new_nodes; Node res = d_preproc.simplify(n, new_nodes); Assert(res != n); diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 1353ca964..743a5c006 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -21,13 +21,12 @@ operator STRING_CHARAT 2 "string charat" operator STRING_STRCTN 2 "string contains" operator STRING_LT 2 "string less than" operator STRING_LEQ 2 "string less than or equal" -operator STRING_STRIDOF 3 "string indexof" +operator STRING_STRIDOF 3 "string index of substring" +operator STRING_INDEXOF_RE 3 "string index of regular expression match" operator STRING_STRREPL 3 "string replace" operator STRING_STRREPLALL 3 "string replace all" operator STRING_REPLACE_RE 3 "string replace regular expression match" operator STRING_REPLACE_RE_ALL 3 "string replace all regular expression matches" -typerule STRING_REPLACE_RE "SimpleTypeRule" -typerule STRING_REPLACE_RE_ALL "SimpleTypeRule" operator STRING_PREFIX 2 "string prefixof" operator STRING_SUFFIX 2 "string suffixof" operator STRING_IS_DIGIT 1 "string isdigit, returns true if argument is a string of length one that represents a digit" @@ -152,8 +151,11 @@ typerule STRING_UPDATE ::cvc5::theory::strings::StringUpdateTypeRule typerule STRING_CHARAT ::cvc5::theory::strings::StringAtTypeRule typerule STRING_STRCTN ::cvc5::theory::strings::StringRelationTypeRule typerule STRING_STRIDOF ::cvc5::theory::strings::StringIndexOfTypeRule +typerule STRING_INDEXOF_RE "SimpleTypeRule" typerule STRING_STRREPL ::cvc5::theory::strings::StringReplaceTypeRule typerule STRING_STRREPLALL ::cvc5::theory::strings::StringReplaceTypeRule +typerule STRING_REPLACE_RE "SimpleTypeRule" +typerule STRING_REPLACE_RE_ALL "SimpleTypeRule" typerule STRING_PREFIX ::cvc5::theory::strings::StringStrToBoolTypeRule typerule STRING_SUFFIX ::cvc5::theory::strings::StringStrToBoolTypeRule typerule STRING_REV ::cvc5::theory::strings::StringStrToStrTypeRule diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp index 0c4e35df7..549d33efc 100644 --- a/src/theory/strings/rewrites.cpp +++ b/src/theory/strings/rewrites.cpp @@ -68,6 +68,10 @@ const char* toString(Rewrite r) case Rewrite::IDOF_PULL_ENDPT: return "IDOF_PULL_ENDPT"; case Rewrite::IDOF_STRIP_CNST_ENDPTS: return "IDOF_STRIP_CNST_ENDPTS"; case Rewrite::IDOF_STRIP_SYM_LEN: return "IDOF_STRIP_SYM_LEN"; + case Rewrite::INDEXOF_RE_EMP_RE: return "INDEXOF_RE_EMP_RE"; + case Rewrite::INDEXOF_RE_EVAL: return "INDEXOF_RE_EVAL"; + case Rewrite::INDEXOF_RE_INVALID_INDEX: return "INDEXOF_RE_INVALID_INDEX"; + case Rewrite::INDEXOF_RE_MAX_INDEX: return "INDEXOF_RE_MAX_INDEX"; case Rewrite::ITOS_EVAL: return "ITOS_EVAL"; case Rewrite::RE_AND_EMPTY: return "RE_AND_EMPTY"; case Rewrite::RE_ANDOR_FLATTEN: return "RE_ANDOR_FLATTEN"; diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h index 482666faa..5e63c55c8 100644 --- a/src/theory/strings/rewrites.h +++ b/src/theory/strings/rewrites.h @@ -73,6 +73,10 @@ enum class Rewrite : uint32_t IDOF_PULL_ENDPT, IDOF_STRIP_CNST_ENDPTS, IDOF_STRIP_SYM_LEN, + INDEXOF_RE_EMP_RE, + INDEXOF_RE_EVAL, + INDEXOF_RE_INVALID_INDEX, + INDEXOF_RE_MAX_INDEX, ITOS_EVAL, RE_AND_EMPTY, RE_ANDOR_FLATTEN, diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 15c37070a..7ef1242c6 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -1461,6 +1461,10 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) { retNode = rewriteIndexof(node); } + else if (nk == kind::STRING_INDEXOF_RE) + { + retNode = rewriteIndexofRe(node); + } else if (nk == kind::STRING_STRREPL) { retNode = rewriteReplace(node); @@ -2529,6 +2533,59 @@ Node SequencesRewriter::rewriteIndexof(Node node) return node; } +Node SequencesRewriter::rewriteIndexofRe(Node node) +{ + Assert(node.getKind() == STRING_INDEXOF_RE); + NodeManager* nm = NodeManager::currentNM(); + Node s = node[0]; + Node r = node[1]; + Node n = node[2]; + Node zero = nm->mkConst(Rational(0)); + Node slen = nm->mkNode(STRING_LENGTH, s); + + if (ArithEntail::check(zero, n, true) || ArithEntail::check(n, slen, true)) + { + Node ret = nm->mkConst(Rational(-1)); + return returnRewrite(node, ret, Rewrite::INDEXOF_RE_INVALID_INDEX); + } + + if (RegExpEntail::isConstRegExp(r)) + { + if (s.isConst() && n.isConst()) + { + Rational nrat = n.getConst(); + cvc5::Rational rMaxInt(cvc5::String::maxSize()); + if (nrat > rMaxInt) + { + // We know that, due to limitations on the size of string constants + // in our implementation, that accessing a position greater than + // rMaxInt is guaranteed to be out of bounds. + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, Rewrite::INDEXOF_RE_MAX_INDEX); + } + + uint32_t start = nrat.getNumerator().toUnsignedInt(); + Node rem = nm->mkConst(s.getConst().substr(start)); + std::pair match = firstMatch(rem, r); + Node ret = nm->mkConst( + Rational(match.first == string::npos + ? -1 + : static_cast(start + match.first))); + return returnRewrite(node, ret, Rewrite::INDEXOF_RE_EVAL); + } + + if (ArithEntail::check(n, zero) && ArithEntail::check(slen, n)) + { + String emptyStr(""); + if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, r)) + { + return returnRewrite(node, n, Rewrite::INDEXOF_RE_EMP_RE); + } + } + } + return node; +} + Node SequencesRewriter::rewriteReplace(Node node) { Assert(node.getKind() == kind::STRING_STRREPL); @@ -3150,7 +3207,7 @@ Node SequencesRewriter::rewriteReplaceReAll(Node node) std::vector res; String rem = x.getConst(); std::pair match(0, 0); - while (rem.size() >= 0) + while (rem.size() != 0) { match = firstMatch(nm->mkConst(rem), yp); if (match.first == string::npos) diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h index 7af24596a..37ed78786 100644 --- a/src/theory/strings/sequences_rewriter.h +++ b/src/theory/strings/sequences_rewriter.h @@ -194,6 +194,12 @@ class SequencesRewriter : public TheoryRewriter * Returns the rewritten form of node. */ Node rewriteIndexof(Node node); + /** rewrite indexof regular expression match + * This is the entry point for post-rewriting terms n of the form + * str.indexof_re( s, r, n ) + * Returns the rewritten form of node. + */ + Node rewriteIndexofRe(Node node); /** rewrite replace * This is the entry point for post-rewriting terms n of the form * str.replace( s, t, r ) diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp index 9b23301f3..913518bc8 100644 --- a/src/theory/strings/skolem_cache.cpp +++ b/src/theory/strings/skolem_cache.cpp @@ -40,6 +40,16 @@ struct IndexVarAttributeId }; typedef expr::Attribute IndexVarAttribute; +/** + * A bound variable corresponding to the universally quantified integer + * variable used to range over the valid lengths of a string, used for + * axiomatizing the behavior of some term. + */ +struct LengthVarAttributeId +{ +}; +typedef expr::Attribute LengthVarAttribute; + SkolemCache::SkolemCache(bool useOpts) : d_useOpts(useOpts) { NodeManager* nm = NodeManager::currentNM(); @@ -300,6 +310,14 @@ Node SkolemCache::mkIndexVar(Node t) return bvm->mkBoundVar(t, intType); } +Node SkolemCache::mkLengthVar(Node t) +{ + NodeManager* nm = NodeManager::currentNM(); + TypeNode intType = nm->integerType(); + BoundVarManager* bvm = nm->getBoundVarManager(); + return bvm->mkBoundVar(t, intType); +} + } // namespace strings } // namespace theory } // namespace cvc5 diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h index 126ee313d..f0376dbc6 100644 --- a/src/theory/strings/skolem_cache.h +++ b/src/theory/strings/skolem_cache.h @@ -107,9 +107,14 @@ class SkolemCache // in_re(a, re.++(_*, b, _*)) => // exists k_pre, k_match, k_post. // a = k_pre ++ k_match ++ k_post ^ - // ~in_re(k_pre ++ substr(k_match, 0, str.len(k_match) - 1), - // re.++(_*, b, _*)) ^ - // in_re(k2, y) + // len(k_pre) = indexof_re(x, y, 0) ^ + // (forall l. 0 < l < len(k_match) => + // ~in_re(substr(k_match, 0, l), r)) ^ + // in_re(k_match, b) + // + // k_pre is the prefix before the first, shortest match of b in a. k_match + // is the substring of a matched by b. It is either empty or there is no + // shorter string that matches b. SK_FIRST_MATCH_PRE, SK_FIRST_MATCH, SK_FIRST_MATCH_POST, @@ -180,6 +185,16 @@ class SkolemCache */ static Node mkIndexVar(Node t); + /** Make length variable + * + * This returns an integer variable of kind BOUND_VARIABLE that is used for + * axiomatizing the behavior of a term or predicate t. It refers to lengths + * of strings in the reduction of t. For example, the length variable for the + * term str.indexof(s, r, n) is used to quantify over the lengths of strings + * that could be matched by r. + */ + static Node mkLengthVar(Node t); + private: /** * Simplifies the arguments for a string skolem used for indexing into the diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp index af2eecde8..c7b3b5300 100644 --- a/src/theory/strings/term_registry.cpp +++ b/src/theory/strings/term_registry.cpp @@ -88,10 +88,11 @@ Node TermRegistry::eagerReduce(Node t, SkolemCache* sc) LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality())))); lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1); } - else if (tk == STRING_STRIDOF) + else if (tk == STRING_STRIDOF || tk == STRING_INDEXOF_RE) { - // (and (>= (str.indexof x y n) (- 1)) (<= (str.indexof x y n) (str.len - // x))) + // (and (>= (f x y n) (- 1)) (<= (f x y n) (str.len x))) + // + // where f in { str.indexof, str.indexof_re } Node l = nm->mkNode(STRING_LENGTH, t[0]); lemma = nm->mkNode(AND, nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))), @@ -143,11 +144,12 @@ void TermRegistry::preRegisterTerm(TNode n) Kind k = n.getKind(); if (!options::stringExp()) { - if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI - || k == STRING_STRREPL || k == STRING_SUBSTR || k == STRING_STRREPLALL - || k == SEQ_NTH || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL - || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER - || k == STRING_TOUPPER || k == STRING_REV || k == STRING_UPDATE) + if (k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_ITOS + || k == STRING_STOI || k == STRING_STRREPL || k == STRING_SUBSTR + || k == STRING_STRREPLALL || k == SEQ_NTH || k == STRING_REPLACE_RE + || k == STRING_REPLACE_RE_ALL || k == STRING_STRCTN || k == STRING_LEQ + || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV + || k == STRING_UPDATE) { std::stringstream ss; ss << "Term of kind " << k diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 06a39ec64..b1f5a0765 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -130,6 +130,7 @@ void TheoryStrings::finishInit() d_equalityEngine->addFunctionKind(kind::STRING_ITOS, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_STOI, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_STRIDOF, eagerEval); + d_equalityEngine->addFunctionKind(kind::STRING_INDEXOF_RE, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_STRREPL, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_STRREPLALL, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_REPLACE_RE, eagerEval); diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 356ae28ac..83a4fa04a 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -253,6 +253,89 @@ Node StringsPreprocess::reduce(Node t, // Thus, indexof( x, y, n ) = skk. retNode = skk; } + else if (t.getKind() == kind::STRING_INDEXOF_RE) + { + // processing term: indexof_re(s, r, n) + Node s = t[0]; + Node r = t[1]; + Node n = t[2]; + Node skk = sc->mkTypedSkolemCached( + nm->integerType(), t, SkolemCache::SK_PURIFY, "iork"); + Node sLen = nm->mkNode(STRING_LENGTH, s); + + // n > len(x) + Node ub = nm->mkNode(GT, n, sLen); + // 0 > n + Node lb = nm->mkNode(GT, zero, n); + // n > len(x) OR 0 > n + Node condNegOne = nm->mkNode(OR, ub, lb); + // skk = -1 + Node retNegOne = skk.eqNode(negOne); + + Node emp = Word::mkEmptyWord(s.getType()); + // in_re("", r) + Node matchEmptyStr = nm->mkNode(STRING_IN_REGEXP, emp, r); + // skk = n + Node retN = skk.eqNode(n); + + Node i = SkolemCache::mkIndexVar(t); + Node l = SkolemCache::mkLengthVar(t); + Node bvl = nm->mkNode(BOUND_VAR_LIST, i, l); + Node bound = + nm->mkNode(AND, + { + nm->mkNode(GEQ, i, n), + nm->mkNode(LT, i, nm->mkNode(ITE, retNegOne, sLen, skk)), + nm->mkNode(GT, l, zero), + nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, i)), + }); + Node body = nm->mkNode( + OR, + bound.negate(), + nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, i, l), r) + .negate()); + // forall il. + // n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i => + // ~in_re(substr(s, i, l), r) + Node firstMatch = mkForallInternal(bvl, body); + Node bvll = nm->mkNode(BOUND_VAR_LIST, l); + Node validLen = + nm->mkNode(AND, + nm->mkNode(GEQ, l, n), + nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, skk))); + Node matchBody = nm->mkNode( + AND, + validLen, + nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, skk, l), r)); + // skk != -1 => + // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r)) + Node match = nm->mkNode( + OR, retNegOne, mkForallInternal(bvll, matchBody.negate()).negate()); + + // assert: + // IF: n > len(s) OR 0 > n + // THEN: skk = -1 + // ELIF: in_re("", r) + // THEN: skk = n + // ELSE: (forall il. + // n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i => + // ~in_re(substr(s, i, l), r)) ^ + // (skk != -1 => + // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r)) + // + // Note that this reduction relies on eager reduction lemmas being sent to + // properly limit the range of skk. + Node rr = nm->mkNode( + ITE, + condNegOne, + retNegOne, + nm->mkNode( + ITE, matchEmptyStr, retN, nm->mkNode(AND, firstMatch, match))); + asserts.push_back(rr); + + // Thus, indexof_re(s, r, n) = skk. + retNode = skk; + } else if (t.getKind() == STRING_ITOS) { // processing term: int.to.str( n ) @@ -596,17 +679,14 @@ Node StringsPreprocess::reduce(Node t, } else if (t.getKind() == STRING_REPLACE_RE) { + // processing term: replace_re( x, y, z ) Node x = t[0]; Node y = t[1]; Node z = t[2]; Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k"); - std::vector emptyVec; - Node sigmaStar = - nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec)); - Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, y, sigmaStar); - // in_re(x, re.++(_*, y, _*)) - Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re); + // indexof_re(x, y, 0) + Node idx = nm->mkNode(STRING_INDEXOF_RE, x, y, zero); // in_re("", y) Node matchesEmpty = @@ -620,42 +700,40 @@ Node StringsPreprocess::reduce(Node t, sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match"); Node k3 = sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post"); + Node k2Len = nm->mkNode(STRING_LENGTH, k2); // x = k1 ++ k2 ++ k3 - Node splitX = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3)); - // ~in_re(k1 ++ str.substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*)) - Node k2len = nm->mkNode(STRING_LENGTH, k2); - Node firstMatch = - nm->mkNode( - STRING_IN_REGEXP, - nm->mkNode( - STRING_CONCAT, - k1, - nm->mkNode( - STRING_SUBSTR, k2, zero, nm->mkNode(MINUS, k2len, one))), - re) - .negate(); + Node split = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3)); + // len(k1) = indexof_re(x, y, 0) + Node k1Len = nm->mkNode(STRING_LENGTH, k1).eqNode(idx); + Node l = SkolemCache::mkLengthVar(t); + Node bvll = nm->mkNode(BOUND_VAR_LIST, l); + Node bound = + nm->mkNode(AND, nm->mkNode(LEQ, zero, l), nm->mkNode(LT, l, k2Len)); + Node body = nm->mkNode( + OR, + bound.negate(), + nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, k2, zero, l), y) + .negate()); + // forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r) + Node shortestMatch = mkForallInternal(bvll, body); // in_re(k2, y) - Node k2Match = nm->mkNode(STRING_IN_REGEXP, k2, y); + Node match = nm->mkNode(STRING_IN_REGEXP, k2, y); // k = k1 ++ z ++ k3 - Node res2 = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3)); + Node res = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3)); - // IF in_re(x, re.++(_*, y, _*)) - // THEN: - // IF in_re("", y) - // THEN: k = z ++ x - // ELSE: - // x = k1 ++ k2 ++ k3 ^ - // ~in_re(k1 ++ substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*)) ^ - // in_re(k2, y) ^ k = k1 ++ z ++ k3 - // ELSE: k = x - asserts.push_back(nm->mkNode( - ITE, - hasMatch, + // IF: indexof_re(x, y, 0) = -1 + // THEN: k = x + // ELSE: + // x = k1 ++ k2 ++ k3 ^ + // len(k1) = indexof_re(x, y, 0) ^ + // (forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)) ^ + // in_re(k2, y) ^ + // k = k1 ++ z ++ k3 + asserts.push_back( nm->mkNode(ITE, - matchesEmpty, - res1, - nm->mkNode(AND, {splitX, firstMatch, k2Match, res2})), - k.eqNode(x))); + idx.eqNode(negOne), + k.eqNode(x), + nm->mkNode(AND, {split, k1Len, shortestMatch, match, res}))); retNode = k; } else if (t.getKind() == STRING_REPLACE_RE_ALL) @@ -679,13 +757,10 @@ Node StringsPreprocess::reduce(Node t, Node emp = Word::mkEmptyWord(t.getType()); - std::vector emptyVec; - Node sigmaStar = - nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec)); Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)); - Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, yp, sigmaStar); - // in_re(x, _* ++ y' ++ _*) - Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re); + Node idx = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero); + // indexof_re(x, y', 0) = -1 + Node noMatch = idx.eqNode(negOne); Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); Node usno = nm->mkNode(APPLY_UF, us, numOcc); @@ -700,8 +775,9 @@ Node StringsPreprocess::reduce(Node t, lemmas.push_back(usno.eqNode(rem)); // Uf(0) = 0 lemmas.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero)); - // not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*))) - lemmas.push_back(nm->mkNode(STRING_IN_REGEXP, rem, re).negate()); + // indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 + lemmas.push_back( + nm->mkNode(STRING_INDEXOF_RE, rem, yp, zero).eqNode(negOne)); Node i = SkolemCache::mkIndexVar(t); Node bvli = nm->mkNode(BOUND_VAR_LIST, i); @@ -709,50 +785,57 @@ Node StringsPreprocess::reduce(Node t, nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc)); Node ip1 = nm->mkNode(PLUS, i, one); Node ufi = nm->mkNode(APPLY_UF, uf, i); - Node uli = nm->mkNode(APPLY_UF, ul, i); Node ufip1 = nm->mkNode(APPLY_UF, uf, ip1); - Node ii = nm->mkNode(MINUS, ufip1, uli); - Node match = nm->mkNode(STRING_SUBSTR, x, ii, uli); - Node pfxMatch = - nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)); - Node nonMatch = - nm->mkNode(STRING_SUBSTR, - x, - ufi, - nm->mkNode(MINUS, nm->mkNode(MINUS, ufip1, one), ufi)); + Node ulip1 = nm->mkNode(APPLY_UF, ul, ip1); + // ii = Uf(i + 1) - Ul(i + 1) + Node ii = nm->mkNode(MINUS, ufip1, ulip1); std::vector flem; // Ul(i) > 0 - flem.push_back(nm->mkNode(GT, uli, zero)); - // Uf(i + 1) >= Uf(i) + Ul(i) - flem.push_back(nm->mkNode(GEQ, ufip1, nm->mkNode(PLUS, ufi, uli))); - // in_re(substr(x, ii, Ul(i)), y') - flem.push_back(nm->mkNode(STRING_IN_REGEXP, match, yp)); - // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*)) - flem.push_back(nm->mkNode(STRING_IN_REGEXP, nonMatch, re).negate()); + flem.push_back(nm->mkNode(GT, ulip1, zero)); + // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) + flem.push_back(ufip1.eqNode( + nm->mkNode(PLUS, nm->mkNode(STRING_INDEXOF_RE, x, yp, ufi), ulip1))); + // in_re(substr(x, ii, Ul(i + 1)), y') + flem.push_back(nm->mkNode( + STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, ulip1), yp)); + Node l = SkolemCache::mkLengthVar(t); + Node bvll = nm->mkNode(BOUND_VAR_LIST, l); + Node lenBound = + nm->mkNode(AND, nm->mkNode(LT, zero, l), nm->mkNode(LT, l, ulip1)); + Node shortestMatchBody = nm->mkNode( + OR, + lenBound.negate(), + nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, l), y) + .negate()); + // forall l. 0 < l < Ul(i + 1) => + // ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y') + flem.push_back(mkForallInternal(bvll, shortestMatchBody)); + Node pfxMatch = + nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)); // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) flem.push_back( nm->mkNode(APPLY_UF, us, i) .eqNode(nm->mkNode( STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1)))); - Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem)); Node forall = mkForallInternal(bvli, body); lemmas.push_back(forall); - // IF in_re(x, re.++(_*, y', _*)) - // THEN: + // IF indexof(x, y', 0) = -1 + // THEN: k = x + // ELSE: // numOcc > 0 ^ // k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^ - // Uf(0) = 0 ^ not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*))) + // Uf(0) = 0 ^ indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 ^ // forall i. 0 <= i < nummOcc => // Ul(i) > 0 ^ - // Uf(i + 1) >= Uf(i) + Ul(i) ^ - // in_re(substr(x, ii, Ul(i)), y') ^ - // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*)) ^ + // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) ^ + // in_re(substr(x, ii, Ul(i + 1)), y') ^ + // forall l. 0 < l < Ul(i + 1) => + // ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y') // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) // where ii = Uf(i + 1) - Ul(i) - // ELSE: k = x // where y' = re.diff(y, "") // // Conceptually, y' is the regex y but excluding the empty string (because @@ -760,8 +843,17 @@ Node StringsPreprocess::reduce(Node t, // matches of y' in x, Uf(i) is the end position of the i-th match, Ul(i) // is the length of the i^th match, and Us(i) is the result of processing // the remainder after processing the i^th occurrence of y in x. + // + // Visualization of Uf(i) and Ul(i): + // + // x = |------------| match 1 |-----------| match 2 |---| + // | | | + // Uf(0) Uf(1) Uf(2) + // + // |---------| |-----------| + // Ul(1) Ul(2) asserts.push_back( - nm->mkNode(ITE, hasMatch, nm->mkNode(AND, lemmas), k.eqNode(x))); + nm->mkNode(ITE, noMatch, k.eqNode(x), nm->mkNode(AND, lemmas))); retNode = k; } else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER) diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index 6897d5272..c763557ca 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -383,8 +383,9 @@ TypeNode getOwnerStringType(Node n) { TypeNode tn; Kind k = n.getKind(); - if (k == STRING_STRIDOF || k == STRING_LENGTH || k == STRING_STRCTN - || k == SEQ_NTH || k == STRING_PREFIX || k == STRING_SUFFIX) + if (k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH + || k == STRING_STRCTN || k == SEQ_NTH || k == STRING_PREFIX + || k == STRING_SUFFIX) { // owning string type is the type of first argument tn = n[0].getType(); diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 35d2553de..422acd048 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1116,6 +1116,7 @@ set(regress_0_tests regress0/strings/idof-sem.smt2 regress0/strings/ilc-like.smt2 regress0/strings/indexof-sym-simp.smt2 + regress0/strings/indexof_re.smt2 regress0/strings/is_digit_simple.smt2 regress0/strings/issue1189.smt2 regress0/strings/issue2958.smt2 @@ -2047,6 +2048,7 @@ set(regress_1_tests regress1/strings/cmu-substr-rw.smt2 regress1/strings/code-sequence.smt2 regress1/strings/complement-test.smt2 + regress1/strings/indexof_re_red.smt2 regress1/strings/crash-1019.smt2 regress1/strings/csp-prefix-exp-bug.smt2 regress1/strings/double-replace.smt2 @@ -2089,6 +2091,8 @@ set(regress_1_tests regress1/strings/issue5692-infer-proxy.smt2 regress1/strings/issue5940-skc-len-conc.smt2 regress1/strings/issue5940-2-skc-len-conc.smt2 + regress1/strings/issue6057-replace-re.smt2 + regress1/strings/issue6057-replace-re-all-jiwonparc.smt2 regress1/strings/issue6072-inc-no-const-reg.smt2 regress1/strings/issue6075-repl-len-one-rr.smt2 regress1/strings/issue6132-non-unique-skolem.smt2 @@ -2096,12 +2100,15 @@ set(regress_1_tests regress1/strings/issue6191-replace-all.smt2 regress1/strings/issue6203-1-substr-ctn-strip.smt2 regress1/strings/issue6203-2-re-ccache.smt2 + regress1/strings/issue6203-6-replace-re.smt2 regress1/strings/issue6214-2-sym-re-inc.smt2 regress1/strings/issue6214-3-sym-re-inc.smt2 regress1/strings/issue6214-4-sym-re-inc.smt2 regress1/strings/issue6270.smt2 regress1/strings/issue6271-rnf.smt2 regress1/strings/issue6271-2-rnf.smt2 + regress1/strings/issue6337-replace-re-all.smt2 + regress1/strings/issue6337-replace-re.smt2 regress1/strings/issue6567-empty-re-range.smt2 regress1/strings/issue6604-2.smt2 regress1/strings/kaluza-fl.smt2 @@ -2441,6 +2448,8 @@ set(regress_2_tests regress2/strings/issue3203.smt2 regress2/strings/issue5381.smt2 regress2/strings/issue6483.smt2 + regress2/strings/issue6057-replace-re-all.smt2 + regress2/strings/issue6057-replace-re-all-simplified.smt2 regress2/strings/issue918.smt2 regress2/strings/non_termination_regular_expression6.smt2 regress2/strings/range-perf.smt2 @@ -2532,6 +2541,7 @@ set(regress_3_tests regress3/specsharp-WindowsCard.15.RTE.Terminate_System.Int32.smt2 regress3/strings-any-term.sy regress3/strings/extf_d_perf.smt2 + regress3/strings/indexof_re_red.smt2 regress3/strings/norn-dis-0707-3.smt2 regress3/strings/replace_re_all.smt2 regress3/unbdd_inv_gen_ex7.sy diff --git a/test/regress/regress0/strings/indexof_re.smt2 b/test/regress/regress0/strings/indexof_re.smt2 new file mode 100644 index 000000000..72c5d7ee7 --- /dev/null +++ b/test/regress/regress0/strings/indexof_re.smt2 @@ -0,0 +1,18 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-const x String) +(assert (or + (not (= (str.indexof_re "abc" re.allchar (- 1)) (- 1))) + (not (= (str.indexof_re "abc" re.allchar (- 2)) (- 1))) + (not (= (str.indexof_re "abc" re.allchar 5) (- 1))) + (not (= (str.indexof_re "abc" re.allchar 0) 0)) + (not (= (str.indexof_re "abc" re.allchar 1) 1)) + (not (= (str.indexof_re "abc" re.allchar 2) 2)) + (not (= (str.indexof_re "abc" re.allchar 3) (- 1))) + (not (= (str.indexof_re "abc" (re.++ re.allchar re.allchar) 2) (- 1))) + (not (= (str.indexof_re "abc" (re.union (str.to_re "") re.allchar) 3) 3)) + (not (= (str.indexof_re (str.++ "abc" x) (re.union (str.to_re "") re.allchar) 3) 3)) + (not (= (str.indexof_re "cbabc" (re.union (str.to_re "a") (str.to_re "bab")) 0) 1)) +)) +(set-info :status unsat) +(check-sat) diff --git a/test/regress/regress1/strings/indexof_re_red.smt2 b/test/regress/regress1/strings/indexof_re_red.smt2 new file mode 100644 index 000000000..f32a6723b --- /dev/null +++ b/test/regress/regress1/strings/indexof_re_red.smt2 @@ -0,0 +1,52 @@ +; COMMAND-LINE: --strings-exp --incremental +(set-logic QF_SLIA) +(declare-const x String) +(declare-const i Int) + +(push) +(assert (= i (str.indexof_re (str.++ x "abc") (re.++ re.allchar (str.to_re "b")) 0))) +(assert (= i (str.len x))) +(set-info :status sat) +(check-sat) +(pop) + +(push) +(assert (= i (str.indexof_re (str.++ x "abc") (re.++ re.allchar (str.to_re "b")) 0))) +(assert (= i (+ (str.len x) 1))) +(set-info :status unsat) +(check-sat) +(pop) + +(push) +(assert (= i (str.indexof_re x (re.++ re.all (str.to_re "a") re.all) 0))) +(assert (= i 0)) +(set-info :status sat) +(check-sat) +(pop) + +(push) +(assert (= i (str.indexof_re x (re.++ re.all (str.to_re "a") re.all) 0))) +(assert (= i (- 1))) +(set-info :status sat) +(check-sat) +(pop) + +(push) +(assert (= 2 (str.indexof_re x (str.to_re "a") 1))) +(set-info :status sat) +(check-sat) +(pop) + +(push) +(assert (= i (str.indexof_re x (str.to_re "") 0))) +(assert (= i (str.len x))) +(set-info :status sat) +(check-sat) +(pop) + +(push) +(assert (= i (str.indexof_re x (re.comp (str.to_re "")) 0))) +(assert (= i (str.len x))) +(set-info :status unsat) +(check-sat) +(pop) diff --git a/test/regress/regress1/strings/issue6057-replace-re-all-jiwonparc.smt2 b/test/regress/regress1/strings/issue6057-replace-re-all-jiwonparc.smt2 new file mode 100644 index 000000000..c9d93d024 --- /dev/null +++ b/test/regress/regress1/strings/issue6057-replace-re-all-jiwonparc.smt2 @@ -0,0 +1,9 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun a () String) +; A complicated way of saying a = "b" +(assert (str.in_re a (re.++ (re.* (re.opt (str.to_re a))) (str.to_re "b")))) +; Corresponds to replace_re_all("ab", a*b, "") contains "a" +(assert (str.contains (str.replace_re_all (str.++ "a" a) (re.++ (re.* (str.to_re "a")) (str.to_re "b")) "") "a")) +(set-info :status unsat) +(check-sat) diff --git a/test/regress/regress1/strings/issue6057-replace-re.smt2 b/test/regress/regress1/strings/issue6057-replace-re.smt2 new file mode 100644 index 000000000..192b244b4 --- /dev/null +++ b/test/regress/regress1/strings/issue6057-replace-re.smt2 @@ -0,0 +1,7 @@ +(set-logic QF_SLIA) +(declare-fun a () String) +(assert (str.suffixof "a" a)) +(assert (str.in_re a (re.* (re.union (str.to_re (str.replace_re a (re.++ (re.* (str.to_re "a")) (str.to_re "ba")) "")) (str.to_re "b"))))) +(assert (str.in_re a (re.++ (re.* (str.to_re (ite (str.in_re a (re.* (str.to_re "b"))) "" "u"))) (re.* (str.to_re a))))) +(set-info :status sat) +(check-sat) diff --git a/test/regress/regress1/strings/issue6203-6-replace-re.smt2 b/test/regress/regress1/strings/issue6203-6-replace-re.smt2 new file mode 100644 index 000000000..d5e6acd45 --- /dev/null +++ b/test/regress/regress1/strings/issue6203-6-replace-re.smt2 @@ -0,0 +1,9 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun a () String) +(assert (not (str.in_re (str.replace_re a (re.++ (re.* (re.union (str.to_re "a") (str.to_re ""))) (str.to_re "b")) "") (str.to_re "")))) +(assert (ite (str.in_re a (re.diff (re.* (re.union (str.to_re "a") (str.to_re "b"))) (re.range "a" "u"))) + (str.in_re a (re.diff (re.* (re.union (str.to_re "a") (str.to_re "b"))) (re.range "a" "u"))) + (str.<= a "b"))) +(set-info :status sat) +(check-sat) diff --git a/test/regress/regress1/strings/issue6337-replace-re-all.smt2 b/test/regress/regress1/strings/issue6337-replace-re-all.smt2 new file mode 100644 index 000000000..9ae168b63 --- /dev/null +++ b/test/regress/regress1/strings/issue6337-replace-re-all.smt2 @@ -0,0 +1,10 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun str3 () String) +(declare-fun str8 () String) +(declare-fun str12 () String) +(declare-fun str14 () String) +(declare-fun str15 () String) +(assert (distinct str15 (str.++ str14 str3 str8 str14) (str.replace_re_all str12 (re.comp (str.to_re str15)) (str.++ str14 str3 str8 str14)) str12)) +(set-info :status sat) +(check-sat) diff --git a/test/regress/regress1/strings/issue6337-replace-re.smt2 b/test/regress/regress1/strings/issue6337-replace-re.smt2 new file mode 100644 index 000000000..78895b24e --- /dev/null +++ b/test/regress/regress1/strings/issue6337-replace-re.smt2 @@ -0,0 +1,13 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun a () String) +(declare-fun b () String) +(assert + (str.in_re + (str.replace_re (str.++ a "zb") + (re.union (str.to_re "z") (str.to_re "a") + (re.++ (re.* (str.to_re a)) + (re.union (str.to_re "z") (str.to_re "a")))) b) + (re.opt (str.to_re "bb")))) +(set-info :status sat) +(check-sat) diff --git a/test/regress/regress2/strings/issue6057-replace-re-all-simplified.smt2 b/test/regress/regress2/strings/issue6057-replace-re-all-simplified.smt2 new file mode 100644 index 000000000..83860ef86 --- /dev/null +++ b/test/regress/regress2/strings/issue6057-replace-re-all-simplified.smt2 @@ -0,0 +1,11 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun literal_5 () String) +(assert (not (= + literal_5 + (str.replace_re_all + literal_5 + (re.++ (re.* re.allchar) (str.to_re "\u{5c}\u{3c}\u{53}\u{43}\u{52}\u{49}\u{50}\u{54}") (re.* re.allchar)) + literal_5)))) +(set-info :status sat) +(check-sat) diff --git a/test/regress/regress2/strings/issue6057-replace-re-all.smt2 b/test/regress/regress2/strings/issue6057-replace-re-all.smt2 new file mode 100644 index 000000000..9819b75dd --- /dev/null +++ b/test/regress/regress2/strings/issue6057-replace-re-all.smt2 @@ -0,0 +1,9 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun literal_0 () String) +(assert (and (str.<= (str.++ literal_0 "\u{2f}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2f}\u{74}\u{79}\u{70}\u{65}\u{2f}\u{6e}\u{75}\u{6d}\u{65}\u{72}\u{69}\u{63}\u{61}\u{6c}\u{2f}\u{65}\u{64}\u{69}\u{74}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2e}\u{68}\u{74}\u{6d}\u{6c}") +(str.replace_re_all (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0) (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0))) +(str.<= (str.++ literal_0 "\u{2f}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2f}\u{74}\u{79}\u{70}\u{65}\u{2f}\u{6e}\u{75}\u{6d}\u{65}\u{72}\u{69}\u{63}\u{61}\u{6c}\u{2f}\u{65}\u{64}\u{69}\u{74}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2e}\u{68}\u{74}\u{6d}\u{6c}") (str.replace_re_all (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0) (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0))))) +(assert (not (str.<= "\u{2f}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2f}\u{74}\u{79}\u{70}\u{65}\u{2f}\u{6e}\u{75}\u{6d}\u{65}\u{72}\u{69}\u{63}\u{61}\u{6c}\u{2f}\u{65}\u{64}\u{69}\u{74}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2e}\u{68}\u{74}\u{6d}\u{6c}" "\u{2f}\u{65}\u{76}\u{69}\u{6c}"))) +(set-info :status sat) +(check-sat) diff --git a/test/regress/regress2/strings/replace_re.smt2 b/test/regress/regress2/strings/replace_re.smt2 index 1f9b2a2ee..a58314150 100644 --- a/test/regress/regress2/strings/replace_re.smt2 +++ b/test/regress/regress2/strings/replace_re.smt2 @@ -40,3 +40,11 @@ (set-info :status unsat) (check-sat) (pop) + +(push) +(assert (= "FOO" (str.replace_re x (re.union (str.to_re "A") (str.to_re "ABC")) "FOO"))) +(assert (not (= x "FOO"))) +(assert (> (str.len x) 1)) +(set-info :status unsat) +(check-sat) +(pop) diff --git a/test/regress/regress3/strings/indexof_re_red.smt2 b/test/regress/regress3/strings/indexof_re_red.smt2 new file mode 100644 index 000000000..0e115cfd3 --- /dev/null +++ b/test/regress/regress3/strings/indexof_re_red.smt2 @@ -0,0 +1,9 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-const x String) +(declare-const i Int) + +(assert (= i (str.indexof_re x (re.++ re.all (str.to_re "a")) 0))) +(assert (not (or (= i 0) (= i (- 1))))) +(set-info :status unsat) +(check-sat) -- 2.30.2