From adf497af7a3fc8b06b875eaf9feca2568d0ba9d8 Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Thu, 3 Jun 2021 17:41:40 -0700 Subject: [PATCH] Fix handling of start index in `str.indexof_re` (#6674) Fixes #6636, fixes #6637. When the start index was non-zero, the result of str.indexof_re was not properly restricted to be greater or equal to the start index. This commit fixes the issue by making the eager reduction lemma more precise. Additionally, the commit fixes an issue with the lower bound of the length of the match in str.indexof_re. --- src/theory/strings/term_registry.cpp | 12 ++++++++---- .../strings/theory_strings_preprocess.cpp | 17 ++++++++--------- test/regress/CMakeLists.txt | 3 +++ .../strings/indexof_re-start-index.smt2 | 8 ++++++++ .../strings/issue6636-replace-re-all.smt2 | 6 ++++++ .../strings/issue6637-replace-re-all.smt2 | 7 +++++++ 6 files changed, 40 insertions(+), 13 deletions(-) create mode 100644 test/regress/regress0/strings/indexof_re-start-index.smt2 create mode 100644 test/regress/regress2/strings/issue6636-replace-re-all.smt2 create mode 100644 test/regress/regress2/strings/issue6637-replace-re-all.smt2 diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp index c7b3b5300..aac9e9313 100644 --- a/src/theory/strings/term_registry.cpp +++ b/src/theory/strings/term_registry.cpp @@ -90,13 +90,17 @@ Node TermRegistry::eagerReduce(Node t, SkolemCache* sc) } else if (tk == STRING_STRIDOF || tk == STRING_INDEXOF_RE) { - // (and (>= (f x y n) (- 1)) (<= (f x y n) (str.len x))) + // (and + // (or (= (f x y n) (- 1)) (>= (f x y n) n)) + // (<= (f x y n) (str.len x))) // // where f in { str.indexof, str.indexof_re } Node l = nm->mkNode(STRING_LENGTH, t[0]); - lemma = nm->mkNode(AND, - nm->mkNode(GEQ, t, nm->mkConst(Rational(-1))), - nm->mkNode(LEQ, t, l)); + lemma = nm->mkNode( + AND, + nm->mkNode( + OR, nm->mkConst(Rational(-1)).eqNode(t), nm->mkNode(GEQ, t, t[2])), + nm->mkNode(LEQ, t, l)); } else if (tk == STRING_STOI) { diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 83a4fa04a..152a3e180 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -301,14 +301,14 @@ Node StringsPreprocess::reduce(Node t, Node bvll = nm->mkNode(BOUND_VAR_LIST, l); Node validLen = nm->mkNode(AND, - nm->mkNode(GEQ, l, n), + nm->mkNode(GEQ, l, zero), nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, skk))); Node matchBody = nm->mkNode( AND, validLen, nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, skk, l), r)); // skk != -1 => - // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r)) + // exists l. (0 <= l < len(s) - skk) ^ in_re(substr(s, skk, l), r)) Node match = nm->mkNode( OR, retNegOne, mkForallInternal(bvll, matchBody.negate()).negate()); @@ -321,7 +321,7 @@ Node StringsPreprocess::reduce(Node t, // n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i => // ~in_re(substr(s, i, l), r)) ^ // (skk != -1 => - // exists l. n <= l < len(s) - skk ^ in_re(substr(s, skk, l), r)) + // exists l. 0 <= l < len(s) - skk ^ in_re(substr(s, skk, l), r)) // // Note that this reduction relies on eager reduction lemmas being sent to // properly limit the range of skk. @@ -758,9 +758,8 @@ Node StringsPreprocess::reduce(Node t, Node emp = Word::mkEmptyWord(t.getType()); Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)); - Node idx = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero); // indexof_re(x, y', 0) = -1 - Node noMatch = idx.eqNode(negOne); + Node noMatch = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero).eqNode(negOne); Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); Node usno = nm->mkNode(APPLY_UF, us, numOcc); @@ -791,7 +790,7 @@ Node StringsPreprocess::reduce(Node t, Node ii = nm->mkNode(MINUS, ufip1, ulip1); std::vector flem; - // Ul(i) > 0 + // Ul(i + 1) > 0 flem.push_back(nm->mkNode(GT, ulip1, zero)); // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) flem.push_back(ufip1.eqNode( @@ -829,13 +828,13 @@ Node StringsPreprocess::reduce(Node t, // k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^ // Uf(0) = 0 ^ indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 ^ // forall i. 0 <= i < nummOcc => - // Ul(i) > 0 ^ + // Ul(i + 1) > 0 ^ // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) ^ // in_re(substr(x, ii, Ul(i + 1)), y') ^ // forall l. 0 < l < Ul(i + 1) => - // ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y') + // ~in_re(substr(x, ii, l), y') // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) - // where ii = Uf(i + 1) - Ul(i) + // where ii = Uf(i + 1) - Ul(i + 1) // where y' = re.diff(y, "") // // Conceptually, y' is the regex y but excluding the empty string (because diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 860d1854a..6036b021b 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1120,6 +1120,7 @@ set(regress_0_tests regress0/strings/ilc-like.smt2 regress0/strings/indexof-sym-simp.smt2 regress0/strings/indexof_re.smt2 + regress0/strings/indexof_re-start-index.smt2 regress0/strings/is_digit_simple.smt2 regress0/strings/issue1189.smt2 regress0/strings/issue2958.smt2 @@ -2456,6 +2457,8 @@ set(regress_2_tests regress2/strings/issue6483.smt2 regress2/strings/issue6057-replace-re-all.smt2 regress2/strings/issue6057-replace-re-all-simplified.smt2 + regress2/strings/issue6636-replace-re-all.smt2 + regress2/strings/issue6637-replace-re-all.smt2 regress2/strings/issue918.smt2 regress2/strings/non_termination_regular_expression6.smt2 regress2/strings/range-perf.smt2 diff --git a/test/regress/regress0/strings/indexof_re-start-index.smt2 b/test/regress/regress0/strings/indexof_re-start-index.smt2 new file mode 100644 index 000000000..9f17c5bef --- /dev/null +++ b/test/regress/regress0/strings/indexof_re-start-index.smt2 @@ -0,0 +1,8 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun i () Int) +(declare-fun a () String) +(assert (= i (str.indexof_re a (str.to_re "abc") 3))) +(assert (and (>= i 0) (< i 3))) +(set-info :status unsat) +(check-sat) diff --git a/test/regress/regress2/strings/issue6636-replace-re-all.smt2 b/test/regress/regress2/strings/issue6636-replace-re-all.smt2 new file mode 100644 index 000000000..779ef3af6 --- /dev/null +++ b/test/regress/regress2/strings/issue6636-replace-re-all.smt2 @@ -0,0 +1,6 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun a () String) +(assert (not (= a (str.replace_re_all a (re.++ (re.* re.allchar) (str.to_re a) (re.* re.allchar)) a)))) +(set-info :status unsat) +(check-sat) diff --git a/test/regress/regress2/strings/issue6637-replace-re-all.smt2 b/test/regress/regress2/strings/issue6637-replace-re-all.smt2 new file mode 100644 index 000000000..df4e0bd0e --- /dev/null +++ b/test/regress/regress2/strings/issue6637-replace-re-all.smt2 @@ -0,0 +1,7 @@ +; COMMAND-LINE: --strings-exp +(set-logic QF_SLIA) +(declare-fun a () String) +(assert (= (str.len a) 2)) +(assert (= (str.len (str.replace_re_all a (str.to_re "A") "B")) 3)) +(set-info :status unsat) +(check-sat) -- 2.30.2