From: Andres Noetzli Date: Fri, 4 Jun 2021 00:41:40 +0000 (-0700) Subject: Fix handling of start index in `str.indexof_re` (#6674) X-Git-Tag: cvc5-1.0.0~1643 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=adf497af7a3fc8b06b875eaf9feca2568d0ba9d8;p=cvc5.git Fix handling of start index in `str.indexof_re` (#6674) Fixes #6636, fixes #6637. When the start index was non-zero, the result of str.indexof_re was not properly restricted to be greater or equal to the start index. This commit fixes the issue by making the eager reduction lemma more precise. Additionally, the commit fixes an issue with the lower bound of the length of the match in str.indexof_re. --- diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp index c7b3b5300..aac9e9313 100644 --- a/src/theory/strings/term_registry.cpp +++ b/src/theory/strings/term_registry.cpp @@ -90,13 +90,17 @@ Node TermRegistry::eagerReduce(Node t, SkolemCache* sc) } else if (tk == STRING_STRIDOF || tk == STRING_INDEXOF_RE) { - // (and (>= (f x y n) (- 1)) (<= (f x y n) (str.len x))) + // (and + // (or (= (f x y n) (- 1)) (>= (f x y n) n)) + // (<= (f x y n) (str.len x))) // // where f in { str.indexof, str.indexof_re } Node l = nm->mkNode(STRING_LENGTH, t[0]); - lemma = nm->mkNode(AND, - nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))), - nm->mkNode(LEQ, t, l)); + lemma = nm->mkNode( + AND, + nm->mkNode( + OR, nm->mkConst(Rational(-1)).eqNode(t), nm->mkNode(GEQ, t, t[2])), + nm->mkNode(LEQ, t, l)); } else if (tk == STRING_STOI) { diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 83a4fa04a..152a3e180 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -301,14 +301,14 @@ Node StringsPreprocess::reduce(Node t, Node bvll = nm->mkNode(BOUND_VAR_LIST, l); Node validLen = nm->mkNode(AND, - nm->mkNode(GEQ, l, n), + nm->mkNode(GEQ, l, zero), nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, skk))); Node matchBody = nm->mkNode( AND, validLen, nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, skk, l), r)); // skk != -1 => - // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r)) + // exists l. (0 <= l < len(s) - skk) ^ in_re(substr(s, skk, l), r)) Node match = nm->mkNode( OR, retNegOne, mkForallInternal(bvll, matchBody.negate()).negate()); @@ -321,7 +321,7 @@ Node StringsPreprocess::reduce(Node t, // n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i => // ~in_re(substr(s, i, l), r)) ^ // (skk != -1 => - // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r)) + // exists l. 0 <= l < len(s) - skk ^ in_re(substr(s, skk, l), r)) // // Note that this reduction relies on eager reduction lemmas being sent to // properly limit the range of skk. @@ -758,9 +758,8 @@ Node StringsPreprocess::reduce(Node t, Node emp = Word::mkEmptyWord(t.getType()); Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)); - Node idx = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero); // indexof_re(x, y', 0) = -1 - Node noMatch = idx.eqNode(negOne); + Node noMatch = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero).eqNode(negOne); Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); Node usno = nm->mkNode(APPLY_UF, us, numOcc); @@ -791,7 +790,7 @@ Node StringsPreprocess::reduce(Node t, Node ii = nm->mkNode(MINUS, ufip1, ulip1); std::vector flem; - // Ul(i) > 0 + // Ul(i + 1) > 0 flem.push_back(nm->mkNode(GT, ulip1, zero)); // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) flem.push_back(ufip1.eqNode( @@ -829,13 +828,13 @@ Node StringsPreprocess::reduce(Node t, // k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^ // Uf(0) = 0 ^ indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 ^ // forall i. 0 <= i < nummOcc => - // Ul(i) > 0 ^ + // Ul(i + 1) > 0 ^ // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) ^ // in_re(substr(x, ii, Ul(i + 1)), y') ^ // forall l. 0 < l < Ul(i + 1) => - // ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y') + // ~in_re(substr(x, ii, l), y') // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) - // where ii = Uf(i + 1) - Ul(i) + // where ii = Uf(i + 1) - Ul(i + 1) // where y' = re.diff(y, "") // // Conceptually, y' is the regex y but excluding the empty string (because diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 860d1854a..6036b021b 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1120,6 +1120,7 @@ set(regress_0_tests regress0/strings/ilc-like.smt2 regress0/strings/indexof-sym-simp.smt2 regress0/strings/indexof_re.smt2 + regress0/strings/indexof_re-start-index.smt2 regress0/strings/is_digit_simple.smt2 regress0/strings/issue1189.smt2 regress0/strings/issue2958.smt2 @@ -2456,6 +2457,8 @@ set(regress_2_tests regress2/strings/issue6483.smt2 regress2/strings/issue6057-replace-re-all.smt2 regress2/strings/issue6057-replace-re-all-simplified.smt2 + regress2/strings/issue6636-replace-re-all.smt2 + regress2/strings/issue6637-replace-re-all.smt2 regress2/strings/issue918.smt2 regress2/strings/non_termination_regular_expression6.smt2 regress2/strings/range-perf.smt2 diff --git a/test/regress/regress0/strings/indexof_re-start-index.smt2 b/test/regress/regress0/strings/indexof_re-start-index.smt2 new file mode 100644 index 000000000..9f17c5bef --- /dev/null +++ b/test/regress/regress0/strings/indexof_re-start-index.smt2 @@ -0,0 +1,8 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun i () Int) +(declare-fun a () String) +(assert (= i (str.indexof_re a (str.to_re "abc") 3))) +(assert (and (>= i 0) (< i 3))) +(set-info :status unsat) +(check-sat) diff --git a/test/regress/regress2/strings/issue6636-replace-re-all.smt2 b/test/regress/regress2/strings/issue6636-replace-re-all.smt2 new file mode 100644 index 000000000..779ef3af6 --- /dev/null +++ b/test/regress/regress2/strings/issue6636-replace-re-all.smt2 @@ -0,0 +1,6 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun a () String) +(assert (not (= a (str.replace_re_all a (re.++ (re.* re.allchar) (str.to_re a) (re.* re.allchar)) a)))) +(set-info :status unsat) +(check-sat) diff --git a/test/regress/regress2/strings/issue6637-replace-re-all.smt2 b/test/regress/regress2/strings/issue6637-replace-re-all.smt2 new file mode 100644 index 000000000..df4e0bd0e --- /dev/null +++ b/test/regress/regress2/strings/issue6637-replace-re-all.smt2 @@ -0,0 +1,7 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun a () String) +(assert (= (str.len a) 2)) +(assert (= (str.len (str.replace_re_all a (str.to_re "A") "B")) 3)) +(set-info :status unsat) +(check-sat)