if arrays `a` and `b` are equal on all indices within indices `i` and `j`.
* Support for an integer operator `(_ iand n)` that returns the bitwise `and`
of two integers, seen as integers modulo n.
+* Strings:
+ * Support for `str.indexof_re(s, r, n)`, which returns the index of the first
+ occurrence of a regular expression `r` in a string `s` after index `n` or
+ -1 if `r` does not match a substring after `n`.
Improvements:
* New API: Added functions to retrieve the heap/nil term when using separation
{STRING_CHARAT, cvc5::Kind::STRING_CHARAT},
{STRING_CONTAINS, cvc5::Kind::STRING_STRCTN},
{STRING_INDEXOF, cvc5::Kind::STRING_STRIDOF},
+ {STRING_INDEXOF_RE, cvc5::Kind::STRING_INDEXOF_RE},
{STRING_REPLACE, cvc5::Kind::STRING_STRREPL},
{STRING_REPLACE_ALL, cvc5::Kind::STRING_STRREPLALL},
{STRING_REPLACE_RE, cvc5::Kind::STRING_REPLACE_RE},
{cvc5::Kind::STRING_CHARAT, STRING_CHARAT},
{cvc5::Kind::STRING_STRCTN, STRING_CONTAINS},
{cvc5::Kind::STRING_STRIDOF, STRING_INDEXOF},
+ {cvc5::Kind::STRING_INDEXOF_RE, STRING_INDEXOF_RE},
{cvc5::Kind::STRING_STRREPL, STRING_REPLACE},
{cvc5::Kind::STRING_STRREPLALL, STRING_REPLACE_ALL},
{cvc5::Kind::STRING_REPLACE_RE, STRING_REPLACE_RE},
* - `Solver::mkTerm(Kind kind, const std::vector<Term>& children) const`
*/
STRING_INDEXOF,
+ /**
+ * String index-of regular expression match.
+ * Returns the first match of a regular expression r in a string s. If the
+ * index is negative or greater than the length of string s1, or r does not
+ * match a substring in s after index i, the result is -1.
+ *
+ * Parameters:
+ * - 1: Term of sort String (string s)
+ * - 2: Term of sort RegLan (regular expression r)
+ * - 3: Term of sort Integer (index i)
+ *
+ * Create with:
+ * - `Solver::mkTerm(Kind kind, const Term& child1, const Term& child2, const Term& child3) const`
+ * - `Solver::mkTerm(Kind kind, const std::vector<Term>& children) const`
+ */
+ STRING_INDEXOF_RE,
/**
* String replace.
* Replaces a string s2 in a string s1 with string s3. If s2 does not appear
addOperator(api::STRING_REPLACE_RE_ALL, "str.replace_re_all");
if (!strictModeEnabled())
{
+ addOperator(api::STRING_INDEXOF_RE, "str.indexof_re");
addOperator(api::STRING_UPDATE, "str.update");
addOperator(api::STRING_TOLOWER, "str.tolower");
addOperator(api::STRING_TOUPPER, "str.toupper");
case kind::STRING_CHARAT:
case kind::STRING_STRCTN:
case kind::STRING_STRIDOF:
+ case kind::STRING_INDEXOF_RE:
case kind::STRING_STRREPL:
case kind::STRING_STRREPLALL:
case kind::STRING_REPLACE_RE:
case kind::STRING_STRCTN: return "str.contains" ;
case kind::STRING_CHARAT: return "str.at" ;
case kind::STRING_STRIDOF: return "str.indexof" ;
+ case kind::STRING_INDEXOF_RE: return "str.indexof_re";
case kind::STRING_STRREPL: return "str.replace" ;
case kind::STRING_STRREPLALL: return "str.replace_all";
case kind::STRING_REPLACE_RE: return "str.replace_re";
d_extt.addFunctionKind(kind::STRING_SUBSTR);
d_extt.addFunctionKind(kind::STRING_UPDATE);
d_extt.addFunctionKind(kind::STRING_STRIDOF);
+ d_extt.addFunctionKind(kind::STRING_INDEXOF_RE);
d_extt.addFunctionKind(kind::STRING_ITOS);
d_extt.addFunctionKind(kind::STRING_STOI);
d_extt.addFunctionKind(kind::STRING_STRREPL);
{
NodeManager* nm = NodeManager::currentNM();
Assert(k == STRING_SUBSTR || k == STRING_UPDATE || k == STRING_STRCTN
- || k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
- || k == STRING_STRREPL || k == STRING_STRREPLALL || k == SEQ_NTH
- || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
- || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER
- || k == STRING_REV);
+ || k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_ITOS
+ || k == STRING_STOI || k == STRING_STRREPL || k == STRING_STRREPLALL
+ || k == SEQ_NTH || k == STRING_REPLACE_RE
+ || k == STRING_REPLACE_RE_ALL || k == STRING_LEQ
+ || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV);
std::vector<Node> new_nodes;
Node res = d_preproc.simplify(n, new_nodes);
Assert(res != n);
operator STRING_STRCTN 2 "string contains"
operator STRING_LT 2 "string less than"
operator STRING_LEQ 2 "string less than or equal"
-operator STRING_STRIDOF 3 "string indexof"
+operator STRING_STRIDOF 3 "string index of substring"
+operator STRING_INDEXOF_RE 3 "string index of regular expression match"
operator STRING_STRREPL 3 "string replace"
operator STRING_STRREPLALL 3 "string replace all"
operator STRING_REPLACE_RE 3 "string replace regular expression match"
operator STRING_REPLACE_RE_ALL 3 "string replace all regular expression matches"
-typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>"
-typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>"
operator STRING_PREFIX 2 "string prefixof"
operator STRING_SUFFIX 2 "string suffixof"
operator STRING_IS_DIGIT 1 "string isdigit, returns true if argument is a string of length one that represents a digit"
typerule STRING_CHARAT ::cvc5::theory::strings::StringAtTypeRule
typerule STRING_STRCTN ::cvc5::theory::strings::StringRelationTypeRule
typerule STRING_STRIDOF ::cvc5::theory::strings::StringIndexOfTypeRule
+typerule STRING_INDEXOF_RE "SimpleTypeRule<RInteger, AString, ARegExp, AInteger>"
typerule STRING_STRREPL ::cvc5::theory::strings::StringReplaceTypeRule
typerule STRING_STRREPLALL ::cvc5::theory::strings::StringReplaceTypeRule
+typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>"
+typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>"
typerule STRING_PREFIX ::cvc5::theory::strings::StringStrToBoolTypeRule
typerule STRING_SUFFIX ::cvc5::theory::strings::StringStrToBoolTypeRule
typerule STRING_REV ::cvc5::theory::strings::StringStrToStrTypeRule
case Rewrite::IDOF_PULL_ENDPT: return "IDOF_PULL_ENDPT";
case Rewrite::IDOF_STRIP_CNST_ENDPTS: return "IDOF_STRIP_CNST_ENDPTS";
case Rewrite::IDOF_STRIP_SYM_LEN: return "IDOF_STRIP_SYM_LEN";
+ case Rewrite::INDEXOF_RE_EMP_RE: return "INDEXOF_RE_EMP_RE";
+ case Rewrite::INDEXOF_RE_EVAL: return "INDEXOF_RE_EVAL";
+ case Rewrite::INDEXOF_RE_INVALID_INDEX: return "INDEXOF_RE_INVALID_INDEX";
+ case Rewrite::INDEXOF_RE_MAX_INDEX: return "INDEXOF_RE_MAX_INDEX";
case Rewrite::ITOS_EVAL: return "ITOS_EVAL";
case Rewrite::RE_AND_EMPTY: return "RE_AND_EMPTY";
case Rewrite::RE_ANDOR_FLATTEN: return "RE_ANDOR_FLATTEN";
IDOF_PULL_ENDPT,
IDOF_STRIP_CNST_ENDPTS,
IDOF_STRIP_SYM_LEN,
+ INDEXOF_RE_EMP_RE,
+ INDEXOF_RE_EVAL,
+ INDEXOF_RE_INVALID_INDEX,
+ INDEXOF_RE_MAX_INDEX,
ITOS_EVAL,
RE_AND_EMPTY,
RE_ANDOR_FLATTEN,
{
retNode = rewriteIndexof(node);
}
+ else if (nk == kind::STRING_INDEXOF_RE)
+ {
+ retNode = rewriteIndexofRe(node);
+ }
else if (nk == kind::STRING_STRREPL)
{
retNode = rewriteReplace(node);
return node;
}
+Node SequencesRewriter::rewriteIndexofRe(Node node)
+{
+ Assert(node.getKind() == STRING_INDEXOF_RE);
+ NodeManager* nm = NodeManager::currentNM();
+ Node s = node[0];
+ Node r = node[1];
+ Node n = node[2];
+ Node zero = nm->mkConst(Rational(0));
+ Node slen = nm->mkNode(STRING_LENGTH, s);
+
+ if (ArithEntail::check(zero, n, true) || ArithEntail::check(n, slen, true))
+ {
+ Node ret = nm->mkConst(Rational(-1));
+ return returnRewrite(node, ret, Rewrite::INDEXOF_RE_INVALID_INDEX);
+ }
+
+ if (RegExpEntail::isConstRegExp(r))
+ {
+ if (s.isConst() && n.isConst())
+ {
+ Rational nrat = n.getConst<Rational>();
+ cvc5::Rational rMaxInt(cvc5::String::maxSize());
+ if (nrat > rMaxInt)
+ {
+ // We know that, due to limitations on the size of string constants
+ // in our implementation, that accessing a position greater than
+ // rMaxInt is guaranteed to be out of bounds.
+ Node negone = nm->mkConst(Rational(-1));
+ return returnRewrite(node, negone, Rewrite::INDEXOF_RE_MAX_INDEX);
+ }
+
+ uint32_t start = nrat.getNumerator().toUnsignedInt();
+ Node rem = nm->mkConst(s.getConst<String>().substr(start));
+ std::pair<size_t, size_t> match = firstMatch(rem, r);
+ Node ret = nm->mkConst(
+ Rational(match.first == string::npos
+ ? -1
+ : static_cast<int64_t>(start + match.first)));
+ return returnRewrite(node, ret, Rewrite::INDEXOF_RE_EVAL);
+ }
+
+ if (ArithEntail::check(n, zero) && ArithEntail::check(slen, n))
+ {
+ String emptyStr("");
+ if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, r))
+ {
+ return returnRewrite(node, n, Rewrite::INDEXOF_RE_EMP_RE);
+ }
+ }
+ }
+ return node;
+}
+
Node SequencesRewriter::rewriteReplace(Node node)
{
Assert(node.getKind() == kind::STRING_STRREPL);
std::vector<Node> res;
String rem = x.getConst<String>();
std::pair<size_t, size_t> match(0, 0);
- while (rem.size() >= 0)
+ while (rem.size() != 0)
{
match = firstMatch(nm->mkConst(rem), yp);
if (match.first == string::npos)
* Returns the rewritten form of node.
*/
Node rewriteIndexof(Node node);
+ /** rewrite indexof regular expression match
+ * This is the entry point for post-rewriting terms n of the form
+ * str.indexof_re( s, r, n )
+ * Returns the rewritten form of node.
+ */
+ Node rewriteIndexofRe(Node node);
/** rewrite replace
* This is the entry point for post-rewriting terms n of the form
* str.replace( s, t, r )
};
typedef expr::Attribute<IndexVarAttributeId, Node> IndexVarAttribute;
+/**
+ * A bound variable corresponding to the universally quantified integer
+ * variable used to range over the valid lengths of a string, used for
+ * axiomatizing the behavior of some term.
+ */
+struct LengthVarAttributeId
+{
+};
+typedef expr::Attribute<LengthVarAttributeId, Node> LengthVarAttribute;
+
SkolemCache::SkolemCache(bool useOpts) : d_useOpts(useOpts)
{
NodeManager* nm = NodeManager::currentNM();
return bvm->mkBoundVar<IndexVarAttribute>(t, intType);
}
+Node SkolemCache::mkLengthVar(Node t)
+{
+ NodeManager* nm = NodeManager::currentNM();
+ TypeNode intType = nm->integerType();
+ BoundVarManager* bvm = nm->getBoundVarManager();
+ return bvm->mkBoundVar<LengthVarAttribute>(t, intType);
+}
+
} // namespace strings
} // namespace theory
} // namespace cvc5
// in_re(a, re.++(_*, b, _*)) =>
// exists k_pre, k_match, k_post.
// a = k_pre ++ k_match ++ k_post ^
- // ~in_re(k_pre ++ substr(k_match, 0, str.len(k_match) - 1),
- // re.++(_*, b, _*)) ^
- // in_re(k2, y)
+ // len(k_pre) = indexof_re(x, y, 0) ^
+ // (forall l. 0 < l < len(k_match) =>
+ // ~in_re(substr(k_match, 0, l), r)) ^
+ // in_re(k_match, b)
+ //
+ // k_pre is the prefix before the first, shortest match of b in a. k_match
+ // is the substring of a matched by b. It is either empty or there is no
+ // shorter string that matches b.
SK_FIRST_MATCH_PRE,
SK_FIRST_MATCH,
SK_FIRST_MATCH_POST,
*/
static Node mkIndexVar(Node t);
+ /** Make length variable
+ *
+ * This returns an integer variable of kind BOUND_VARIABLE that is used for
+ * axiomatizing the behavior of a term or predicate t. It refers to lengths
+ * of strings in the reduction of t. For example, the length variable for the
+ * term str.indexof(s, r, n) is used to quantify over the lengths of strings
+ * that could be matched by r.
+ */
+ static Node mkLengthVar(Node t);
+
private:
/**
* Simplifies the arguments for a string skolem used for indexing into the
LT, t, nm->mkConst(Rational(utils::getAlphabetCardinality()))));
lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
}
- else if (tk == STRING_STRIDOF)
+ else if (tk == STRING_STRIDOF || tk == STRING_INDEXOF_RE)
{
- // (and (>= (str.indexof x y n) (- 1)) (<= (str.indexof x y n) (str.len
- // x)))
+ // (and (>= (f x y n) (- 1)) (<= (f x y n) (str.len x)))
+ //
+ // where f in { str.indexof, str.indexof_re }
Node l = nm->mkNode(STRING_LENGTH, t[0]);
lemma = nm->mkNode(AND,
nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))),
Kind k = n.getKind();
if (!options::stringExp())
{
- if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
- || k == STRING_STRREPL || k == STRING_SUBSTR || k == STRING_STRREPLALL
- || k == SEQ_NTH || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
- || k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER
- || k == STRING_TOUPPER || k == STRING_REV || k == STRING_UPDATE)
+ if (k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_ITOS
+ || k == STRING_STOI || k == STRING_STRREPL || k == STRING_SUBSTR
+ || k == STRING_STRREPLALL || k == SEQ_NTH || k == STRING_REPLACE_RE
+ || k == STRING_REPLACE_RE_ALL || k == STRING_STRCTN || k == STRING_LEQ
+ || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV
+ || k == STRING_UPDATE)
{
std::stringstream ss;
ss << "Term of kind " << k
d_equalityEngine->addFunctionKind(kind::STRING_ITOS, eagerEval);
d_equalityEngine->addFunctionKind(kind::STRING_STOI, eagerEval);
d_equalityEngine->addFunctionKind(kind::STRING_STRIDOF, eagerEval);
+ d_equalityEngine->addFunctionKind(kind::STRING_INDEXOF_RE, eagerEval);
d_equalityEngine->addFunctionKind(kind::STRING_STRREPL, eagerEval);
d_equalityEngine->addFunctionKind(kind::STRING_STRREPLALL, eagerEval);
d_equalityEngine->addFunctionKind(kind::STRING_REPLACE_RE, eagerEval);
// Thus, indexof( x, y, n ) = skk.
retNode = skk;
}
+ else if (t.getKind() == kind::STRING_INDEXOF_RE)
+ {
+ // processing term: indexof_re(s, r, n)
+ Node s = t[0];
+ Node r = t[1];
+ Node n = t[2];
+ Node skk = sc->mkTypedSkolemCached(
+ nm->integerType(), t, SkolemCache::SK_PURIFY, "iork");
+ Node sLen = nm->mkNode(STRING_LENGTH, s);
+
+ // n > len(x)
+ Node ub = nm->mkNode(GT, n, sLen);
+ // 0 > n
+ Node lb = nm->mkNode(GT, zero, n);
+ // n > len(x) OR 0 > n
+ Node condNegOne = nm->mkNode(OR, ub, lb);
+ // skk = -1
+ Node retNegOne = skk.eqNode(negOne);
+
+ Node emp = Word::mkEmptyWord(s.getType());
+ // in_re("", r)
+ Node matchEmptyStr = nm->mkNode(STRING_IN_REGEXP, emp, r);
+ // skk = n
+ Node retN = skk.eqNode(n);
+
+ Node i = SkolemCache::mkIndexVar(t);
+ Node l = SkolemCache::mkLengthVar(t);
+ Node bvl = nm->mkNode(BOUND_VAR_LIST, i, l);
+ Node bound =
+ nm->mkNode(AND,
+ {
+ nm->mkNode(GEQ, i, n),
+ nm->mkNode(LT, i, nm->mkNode(ITE, retNegOne, sLen, skk)),
+ nm->mkNode(GT, l, zero),
+ nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, i)),
+ });
+ Node body = nm->mkNode(
+ OR,
+ bound.negate(),
+ nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, i, l), r)
+ .negate());
+ // forall il.
+ // n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i =>
+ // ~in_re(substr(s, i, l), r)
+ Node firstMatch = mkForallInternal(bvl, body);
+ Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
+ Node validLen =
+ nm->mkNode(AND,
+ nm->mkNode(GEQ, l, n),
+ nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, skk)));
+ Node matchBody = nm->mkNode(
+ AND,
+ validLen,
+ nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, skk, l), r));
+ // skk != -1 =>
+ // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r))
+ Node match = nm->mkNode(
+ OR, retNegOne, mkForallInternal(bvll, matchBody.negate()).negate());
+
+ // assert:
+ // IF: n > len(s) OR 0 > n
+ // THEN: skk = -1
+ // ELIF: in_re("", r)
+ // THEN: skk = n
+ // ELSE: (forall il.
+ // n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i =>
+ // ~in_re(substr(s, i, l), r)) ^
+ // (skk != -1 =>
+ // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r))
+ //
+ // Note that this reduction relies on eager reduction lemmas being sent to
+ // properly limit the range of skk.
+ Node rr = nm->mkNode(
+ ITE,
+ condNegOne,
+ retNegOne,
+ nm->mkNode(
+ ITE, matchEmptyStr, retN, nm->mkNode(AND, firstMatch, match)));
+ asserts.push_back(rr);
+
+ // Thus, indexof_re(s, r, n) = skk.
+ retNode = skk;
+ }
else if (t.getKind() == STRING_ITOS)
{
// processing term: int.to.str( n )
}
else if (t.getKind() == STRING_REPLACE_RE)
{
+ // processing term: replace_re( x, y, z )
Node x = t[0];
Node y = t[1];
Node z = t[2];
Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k");
- std::vector<Node> emptyVec;
- Node sigmaStar =
- nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
- Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, y, sigmaStar);
- // in_re(x, re.++(_*, y, _*))
- Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re);
+ // indexof_re(x, y, 0)
+ Node idx = nm->mkNode(STRING_INDEXOF_RE, x, y, zero);
// in_re("", y)
Node matchesEmpty =
sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match");
Node k3 =
sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post");
+ Node k2Len = nm->mkNode(STRING_LENGTH, k2);
// x = k1 ++ k2 ++ k3
- Node splitX = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3));
- // ~in_re(k1 ++ str.substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*))
- Node k2len = nm->mkNode(STRING_LENGTH, k2);
- Node firstMatch =
- nm->mkNode(
- STRING_IN_REGEXP,
- nm->mkNode(
- STRING_CONCAT,
- k1,
- nm->mkNode(
- STRING_SUBSTR, k2, zero, nm->mkNode(MINUS, k2len, one))),
- re)
- .negate();
+ Node split = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3));
+ // len(k1) = indexof_re(x, y, 0)
+ Node k1Len = nm->mkNode(STRING_LENGTH, k1).eqNode(idx);
+ Node l = SkolemCache::mkLengthVar(t);
+ Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
+ Node bound =
+ nm->mkNode(AND, nm->mkNode(LEQ, zero, l), nm->mkNode(LT, l, k2Len));
+ Node body = nm->mkNode(
+ OR,
+ bound.negate(),
+ nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, k2, zero, l), y)
+ .negate());
+ // forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)
+ Node shortestMatch = mkForallInternal(bvll, body);
// in_re(k2, y)
- Node k2Match = nm->mkNode(STRING_IN_REGEXP, k2, y);
+ Node match = nm->mkNode(STRING_IN_REGEXP, k2, y);
// k = k1 ++ z ++ k3
- Node res2 = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3));
+ Node res = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3));
- // IF in_re(x, re.++(_*, y, _*))
- // THEN:
- // IF in_re("", y)
- // THEN: k = z ++ x
- // ELSE:
- // x = k1 ++ k2 ++ k3 ^
- // ~in_re(k1 ++ substr(k2, 0, str.len(k2) - 1), re.++(_*, y, _*)) ^
- // in_re(k2, y) ^ k = k1 ++ z ++ k3
- // ELSE: k = x
- asserts.push_back(nm->mkNode(
- ITE,
- hasMatch,
+ // IF: indexof_re(x, y, 0) = -1
+ // THEN: k = x
+ // ELSE:
+ // x = k1 ++ k2 ++ k3 ^
+ // len(k1) = indexof_re(x, y, 0) ^
+ // (forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)) ^
+ // in_re(k2, y) ^
+ // k = k1 ++ z ++ k3
+ asserts.push_back(
nm->mkNode(ITE,
- matchesEmpty,
- res1,
- nm->mkNode(AND, {splitX, firstMatch, k2Match, res2})),
- k.eqNode(x)));
+ idx.eqNode(negOne),
+ k.eqNode(x),
+ nm->mkNode(AND, {split, k1Len, shortestMatch, match, res})));
retNode = k;
}
else if (t.getKind() == STRING_REPLACE_RE_ALL)
Node emp = Word::mkEmptyWord(t.getType());
- std::vector<Node> emptyVec;
- Node sigmaStar =
- nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp));
- Node re = nm->mkNode(REGEXP_CONCAT, sigmaStar, yp, sigmaStar);
- // in_re(x, _* ++ y' ++ _*)
- Node hasMatch = nm->mkNode(STRING_IN_REGEXP, x, re);
+ Node idx = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero);
+ // indexof_re(x, y', 0) = -1
+ Node noMatch = idx.eqNode(negOne);
Node ufno = nm->mkNode(APPLY_UF, uf, numOcc);
Node usno = nm->mkNode(APPLY_UF, us, numOcc);
lemmas.push_back(usno.eqNode(rem));
// Uf(0) = 0
lemmas.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero));
- // not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*)))
- lemmas.push_back(nm->mkNode(STRING_IN_REGEXP, rem, re).negate());
+ // indexof_re(substr(x, Uf(numOcc)), y', 0) = -1
+ lemmas.push_back(
+ nm->mkNode(STRING_INDEXOF_RE, rem, yp, zero).eqNode(negOne));
Node i = SkolemCache::mkIndexVar(t);
Node bvli = nm->mkNode(BOUND_VAR_LIST, i);
nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc));
Node ip1 = nm->mkNode(PLUS, i, one);
Node ufi = nm->mkNode(APPLY_UF, uf, i);
- Node uli = nm->mkNode(APPLY_UF, ul, i);
Node ufip1 = nm->mkNode(APPLY_UF, uf, ip1);
- Node ii = nm->mkNode(MINUS, ufip1, uli);
- Node match = nm->mkNode(STRING_SUBSTR, x, ii, uli);
- Node pfxMatch =
- nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi));
- Node nonMatch =
- nm->mkNode(STRING_SUBSTR,
- x,
- ufi,
- nm->mkNode(MINUS, nm->mkNode(MINUS, ufip1, one), ufi));
+ Node ulip1 = nm->mkNode(APPLY_UF, ul, ip1);
+ // ii = Uf(i + 1) - Ul(i + 1)
+ Node ii = nm->mkNode(MINUS, ufip1, ulip1);
std::vector<Node> flem;
// Ul(i) > 0
- flem.push_back(nm->mkNode(GT, uli, zero));
- // Uf(i + 1) >= Uf(i) + Ul(i)
- flem.push_back(nm->mkNode(GEQ, ufip1, nm->mkNode(PLUS, ufi, uli)));
- // in_re(substr(x, ii, Ul(i)), y')
- flem.push_back(nm->mkNode(STRING_IN_REGEXP, match, yp));
- // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*))
- flem.push_back(nm->mkNode(STRING_IN_REGEXP, nonMatch, re).negate());
+ flem.push_back(nm->mkNode(GT, ulip1, zero));
+ // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1)
+ flem.push_back(ufip1.eqNode(
+ nm->mkNode(PLUS, nm->mkNode(STRING_INDEXOF_RE, x, yp, ufi), ulip1)));
+ // in_re(substr(x, ii, Ul(i + 1)), y')
+ flem.push_back(nm->mkNode(
+ STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, ulip1), yp));
+ Node l = SkolemCache::mkLengthVar(t);
+ Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
+ Node lenBound =
+ nm->mkNode(AND, nm->mkNode(LT, zero, l), nm->mkNode(LT, l, ulip1));
+ Node shortestMatchBody = nm->mkNode(
+ OR,
+ lenBound.negate(),
+ nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, l), y)
+ .negate());
+ // forall l. 0 < l < Ul(i + 1) =>
+ // ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y')
+ flem.push_back(mkForallInternal(bvll, shortestMatchBody));
+ Node pfxMatch =
+ nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi));
// Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
flem.push_back(
nm->mkNode(APPLY_UF, us, i)
.eqNode(nm->mkNode(
STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1))));
-
Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem));
Node forall = mkForallInternal(bvli, body);
lemmas.push_back(forall);
- // IF in_re(x, re.++(_*, y', _*))
- // THEN:
+ // IF indexof(x, y', 0) = -1
+ // THEN: k = x
+ // ELSE:
// numOcc > 0 ^
// k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^
- // Uf(0) = 0 ^ not(in_re(substr(x, Uf(numOcc)), re.++(_*, y', _*)))
+ // Uf(0) = 0 ^ indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 ^
// forall i. 0 <= i < nummOcc =>
// Ul(i) > 0 ^
- // Uf(i + 1) >= Uf(i) + Ul(i) ^
- // in_re(substr(x, ii, Ul(i)), y') ^
- // ~in_re(substr(x, Uf(i), Uf(i + 1) - 1 - Uf(i)), re.++(_*, y', _*)) ^
+ // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) ^
+ // in_re(substr(x, ii, Ul(i + 1)), y') ^
+ // forall l. 0 < l < Ul(i + 1) =>
+ // ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y')
// Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
// where ii = Uf(i + 1) - Ul(i)
- // ELSE: k = x
// where y' = re.diff(y, "")
//
// Conceptually, y' is the regex y but excluding the empty string (because
// matches of y' in x, Uf(i) is the end position of the i-th match, Ul(i)
// is the length of the i^th match, and Us(i) is the result of processing
// the remainder after processing the i^th occurrence of y in x.
+ //
+ // Visualization of Uf(i) and Ul(i):
+ //
+ // x = |------------| match 1 |-----------| match 2 |---|
+ // | | |
+ // Uf(0) Uf(1) Uf(2)
+ //
+ // |---------| |-----------|
+ // Ul(1) Ul(2)
asserts.push_back(
- nm->mkNode(ITE, hasMatch, nm->mkNode(AND, lemmas), k.eqNode(x)));
+ nm->mkNode(ITE, noMatch, k.eqNode(x), nm->mkNode(AND, lemmas)));
retNode = k;
}
else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER)
{
TypeNode tn;
Kind k = n.getKind();
- if (k == STRING_STRIDOF || k == STRING_LENGTH || k == STRING_STRCTN
- || k == SEQ_NTH || k == STRING_PREFIX || k == STRING_SUFFIX)
+ if (k == STRING_STRIDOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH
+ || k == STRING_STRCTN || k == SEQ_NTH || k == STRING_PREFIX
+ || k == STRING_SUFFIX)
{
// owning string type is the type of first argument
tn = n[0].getType();
regress0/strings/idof-sem.smt2
regress0/strings/ilc-like.smt2
regress0/strings/indexof-sym-simp.smt2
+ regress0/strings/indexof_re.smt2
regress0/strings/is_digit_simple.smt2
regress0/strings/issue1189.smt2
regress0/strings/issue2958.smt2
regress1/strings/cmu-substr-rw.smt2
regress1/strings/code-sequence.smt2
regress1/strings/complement-test.smt2
+ regress1/strings/indexof_re_red.smt2
regress1/strings/crash-1019.smt2
regress1/strings/csp-prefix-exp-bug.smt2
regress1/strings/double-replace.smt2
regress1/strings/issue5692-infer-proxy.smt2
regress1/strings/issue5940-skc-len-conc.smt2
regress1/strings/issue5940-2-skc-len-conc.smt2
+ regress1/strings/issue6057-replace-re.smt2
+ regress1/strings/issue6057-replace-re-all-jiwonparc.smt2
regress1/strings/issue6072-inc-no-const-reg.smt2
regress1/strings/issue6075-repl-len-one-rr.smt2
regress1/strings/issue6132-non-unique-skolem.smt2
regress1/strings/issue6191-replace-all.smt2
regress1/strings/issue6203-1-substr-ctn-strip.smt2
regress1/strings/issue6203-2-re-ccache.smt2
+ regress1/strings/issue6203-6-replace-re.smt2
regress1/strings/issue6214-2-sym-re-inc.smt2
regress1/strings/issue6214-3-sym-re-inc.smt2
regress1/strings/issue6214-4-sym-re-inc.smt2
regress1/strings/issue6270.smt2
regress1/strings/issue6271-rnf.smt2
regress1/strings/issue6271-2-rnf.smt2
+ regress1/strings/issue6337-replace-re-all.smt2
+ regress1/strings/issue6337-replace-re.smt2
regress1/strings/issue6567-empty-re-range.smt2
regress1/strings/issue6604-2.smt2
regress1/strings/kaluza-fl.smt2
regress2/strings/issue3203.smt2
regress2/strings/issue5381.smt2
regress2/strings/issue6483.smt2
+ regress2/strings/issue6057-replace-re-all.smt2
+ regress2/strings/issue6057-replace-re-all-simplified.smt2
regress2/strings/issue918.smt2
regress2/strings/non_termination_regular_expression6.smt2
regress2/strings/range-perf.smt2
regress3/specsharp-WindowsCard.15.RTE.Terminate_System.Int32.smt2
regress3/strings-any-term.sy
regress3/strings/extf_d_perf.smt2
+ regress3/strings/indexof_re_red.smt2
regress3/strings/norn-dis-0707-3.smt2
regress3/strings/replace_re_all.smt2
regress3/unbdd_inv_gen_ex7.sy
--- /dev/null
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-const x String)
+(assert (or
+ (not (= (str.indexof_re "abc" re.allchar (- 1)) (- 1)))
+ (not (= (str.indexof_re "abc" re.allchar (- 2)) (- 1)))
+ (not (= (str.indexof_re "abc" re.allchar 5) (- 1)))
+ (not (= (str.indexof_re "abc" re.allchar 0) 0))
+ (not (= (str.indexof_re "abc" re.allchar 1) 1))
+ (not (= (str.indexof_re "abc" re.allchar 2) 2))
+ (not (= (str.indexof_re "abc" re.allchar 3) (- 1)))
+ (not (= (str.indexof_re "abc" (re.++ re.allchar re.allchar) 2) (- 1)))
+ (not (= (str.indexof_re "abc" (re.union (str.to_re "") re.allchar) 3) 3))
+ (not (= (str.indexof_re (str.++ "abc" x) (re.union (str.to_re "") re.allchar) 3) 3))
+ (not (= (str.indexof_re "cbabc" (re.union (str.to_re "a") (str.to_re "bab")) 0) 1))
+))
+(set-info :status unsat)
+(check-sat)
--- /dev/null
+; COMMAND-LINE: --strings-exp --incremental
+(set-logic QF_SLIA)
+(declare-const x String)
+(declare-const i Int)
+
+(push)
+(assert (= i (str.indexof_re (str.++ x "abc") (re.++ re.allchar (str.to_re "b")) 0)))
+(assert (= i (str.len x)))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= i (str.indexof_re (str.++ x "abc") (re.++ re.allchar (str.to_re "b")) 0)))
+(assert (= i (+ (str.len x) 1)))
+(set-info :status unsat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= i (str.indexof_re x (re.++ re.all (str.to_re "a") re.all) 0)))
+(assert (= i 0))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= i (str.indexof_re x (re.++ re.all (str.to_re "a") re.all) 0)))
+(assert (= i (- 1)))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= 2 (str.indexof_re x (str.to_re "a") 1)))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= i (str.indexof_re x (str.to_re "") 0)))
+(assert (= i (str.len x)))
+(set-info :status sat)
+(check-sat)
+(pop)
+
+(push)
+(assert (= i (str.indexof_re x (re.comp (str.to_re "")) 0)))
+(assert (= i (str.len x)))
+(set-info :status unsat)
+(check-sat)
+(pop)
--- /dev/null
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun a () String)
+; A complicated way of saying a = "b"
+(assert (str.in_re a (re.++ (re.* (re.opt (str.to_re a))) (str.to_re "b"))))
+; Corresponds to replace_re_all("ab", a*b, "") contains "a"
+(assert (str.contains (str.replace_re_all (str.++ "a" a) (re.++ (re.* (str.to_re "a")) (str.to_re "b")) "") "a"))
+(set-info :status unsat)
+(check-sat)
--- /dev/null
+(set-logic QF_SLIA)
+(declare-fun a () String)
+(assert (str.suffixof "a" a))
+(assert (str.in_re a (re.* (re.union (str.to_re (str.replace_re a (re.++ (re.* (str.to_re "a")) (str.to_re "ba")) "")) (str.to_re "b")))))
+(assert (str.in_re a (re.++ (re.* (str.to_re (ite (str.in_re a (re.* (str.to_re "b"))) "" "u"))) (re.* (str.to_re a)))))
+(set-info :status sat)
+(check-sat)
--- /dev/null
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun a () String)
+(assert (not (str.in_re (str.replace_re a (re.++ (re.* (re.union (str.to_re "a") (str.to_re ""))) (str.to_re "b")) "") (str.to_re ""))))
+(assert (ite (str.in_re a (re.diff (re.* (re.union (str.to_re "a") (str.to_re "b"))) (re.range "a" "u")))
+ (str.in_re a (re.diff (re.* (re.union (str.to_re "a") (str.to_re "b"))) (re.range "a" "u")))
+ (str.<= a "b")))
+(set-info :status sat)
+(check-sat)
--- /dev/null
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun str3 () String)
+(declare-fun str8 () String)
+(declare-fun str12 () String)
+(declare-fun str14 () String)
+(declare-fun str15 () String)
+(assert (distinct str15 (str.++ str14 str3 str8 str14) (str.replace_re_all str12 (re.comp (str.to_re str15)) (str.++ str14 str3 str8 str14)) str12))
+(set-info :status sat)
+(check-sat)
--- /dev/null
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun a () String)
+(declare-fun b () String)
+(assert
+ (str.in_re
+ (str.replace_re (str.++ a "zb")
+ (re.union (str.to_re "z") (str.to_re "a")
+ (re.++ (re.* (str.to_re a))
+ (re.union (str.to_re "z") (str.to_re "a")))) b)
+ (re.opt (str.to_re "bb"))))
+(set-info :status sat)
+(check-sat)
--- /dev/null
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun literal_5 () String)
+(assert (not (=
+ literal_5
+ (str.replace_re_all
+ literal_5
+ (re.++ (re.* re.allchar) (str.to_re "\u{5c}\u{3c}\u{53}\u{43}\u{52}\u{49}\u{50}\u{54}") (re.* re.allchar))
+ literal_5))))
+(set-info :status sat)
+(check-sat)
--- /dev/null
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-fun literal_0 () String)
+(assert (and (str.<= (str.++ literal_0 "\u{2f}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2f}\u{74}\u{79}\u{70}\u{65}\u{2f}\u{6e}\u{75}\u{6d}\u{65}\u{72}\u{69}\u{63}\u{61}\u{6c}\u{2f}\u{65}\u{64}\u{69}\u{74}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2e}\u{68}\u{74}\u{6d}\u{6c}")
+(str.replace_re_all (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0) (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0)))
+(str.<= (str.++ literal_0 "\u{2f}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2f}\u{74}\u{79}\u{70}\u{65}\u{2f}\u{6e}\u{75}\u{6d}\u{65}\u{72}\u{69}\u{63}\u{61}\u{6c}\u{2f}\u{65}\u{64}\u{69}\u{74}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2e}\u{68}\u{74}\u{6d}\u{6c}") (str.replace_re_all (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0) (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) (str.replace_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}" (re.++ (re.* re.allchar) (re.++ (str.to_re "\u{2f}\u{65}\u{76}\u{69}\u{6c}") (re.* re.allchar))) literal_0)))))
+(assert (not (str.<= "\u{2f}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2f}\u{74}\u{79}\u{70}\u{65}\u{2f}\u{6e}\u{75}\u{6d}\u{65}\u{72}\u{69}\u{63}\u{61}\u{6c}\u{2f}\u{65}\u{64}\u{69}\u{74}\u{71}\u{75}\u{65}\u{73}\u{74}\u{69}\u{6f}\u{6e}\u{2e}\u{68}\u{74}\u{6d}\u{6c}" "\u{2f}\u{65}\u{76}\u{69}\u{6c}")))
+(set-info :status sat)
+(check-sat)
(set-info :status unsat)
(check-sat)
(pop)
+
+(push)
+(assert (= "FOO" (str.replace_re x (re.union (str.to_re "A") (str.to_re "ABC")) "FOO")))
+(assert (not (= x "FOO")))
+(assert (> (str.len x) 1))
+(set-info :status unsat)
+(check-sat)
+(pop)
--- /dev/null
+; COMMAND-LINE: --strings-exp
+(set-logic QF_SLIA)
+(declare-const x String)
+(declare-const i Int)
+
+(assert (= i (str.indexof_re x (re.++ re.all (str.to_re "a")) 0)))
+(assert (not (or (= i 0) (= i (- 1)))))
+(set-info :status unsat)
+(check-sat)