From: Andres Noetzli Date: Fri, 18 Sep 2020 14:40:26 +0000 (-0700) Subject: [Strings] Fix extended equality rewriter (#5092) X-Git-Tag: cvc5-1.0.0~2839 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f12e2d5a3bd09a91f0d6cd093a62016e456dd4a7;p=cvc5.git [Strings] Fix extended equality rewriter (#5092) Fixes #5090. Our extended equality rewriter was performing the following unsound rewrite: (= (str.++ Str13 Str5 Str16 Str13) (str.++ Str5 Str16 Str13 "cjyfqapanogtdznjbtqlfrmmfauwjdpvnhfpfhzsxaarlfvlvs")) ---> (and (= (str.++ Str13 Str5) Str5) (= (str.++ Str16 Str13) (str.++ Str16 Str13 "cjyfqapanogtdznjbtqlfrmmfauwjdpvnhfpfhzsxaarlfvlvs"))) The rule being applied was SPLIT_EQ_STRIP_R. The rewrite was applied due to the following circumstances: The rewriter found that (str.++ Str13 Str5) is strictly shorter than (str.++ Str5 Str16 Str13 "cjyfqapanogtdznjbtqlfrmmfauwjdpvnhfpfhzsxaarlfvlvs") The rewriter stripped the symbolic length of the former from the latter stripSymbolicLength() was only able to strip the first component, so there was a remaining length of (str.len Str13) The rule SPLIT_EQ_STRIP_R, however, was implicitly assuming that the symbolic length can either be stripped completly or not at all and was not considering the case where only a part of the length can be stripped. The commit adds a flag to stripSymbolicLength() that makes the function only return true if the whole length can be stripped from the input. The commit also refactors the code in stripSymbolicLength() slightly. Note: It is not necessary to try to do something smart in the case where only a partial prefix can be stripped because the rewriter tries to apply the rule to all the different prefix combinations anyway. --- diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 29c0aa2d5..eb59813b0 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -463,7 +463,8 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) // (= (str.++ "A" x y) (str.++ x "AB" z)) ---> // (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z))) std::vector rpfxv1; - if (StringsEntail::stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0)) + if (StringsEntail::stripSymbolicLength( + pfxv1, rpfxv1, 1, lenPfx0, true)) { std::vector sfxv0(v0.begin() + i, v0.end()); pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end()); @@ -490,7 +491,8 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) // (= (str.++ x "AB" z) (str.++ "A" x y)) ---> // (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y)) std::vector rpfxv0; - if (StringsEntail::stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1)) + if (StringsEntail::stripSymbolicLength( + pfxv0, rpfxv0, 1, lenPfx1, true)) { pfxv0.insert(pfxv0.end(), v0.begin() + i, v0.end()); std::vector sfxv1(v1.begin() + j, v1.end()); diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp index b2c2c3cd3..0d7866ca2 100644 --- a/src/theory/strings/strings_entail.cpp +++ b/src/theory/strings/strings_entail.cpp @@ -111,95 +111,92 @@ bool StringsEntail::canConstantContainList(Node c, bool StringsEntail::stripSymbolicLength(std::vector& n1, std::vector& nr, int dir, - Node& curr) + Node& curr, + bool strict) { Assert(dir == 1 || dir == -1); Assert(nr.empty()); NodeManager* nm = NodeManager::currentNM(); Node zero = nm->mkConst(CVC4::Rational(0)); bool ret = false; - bool success; + bool success = true; unsigned sindex = 0; - do + while (success && curr != zero && sindex < n1.size()) { Assert(!curr.isNull()); success = false; - if (curr != zero && sindex < n1.size()) + unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); + if (n1[sindex_use].isConst()) { - unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); - if (n1[sindex_use].isConst()) + // could strip part of a constant + Node lowerBound = ArithEntail::getConstantBound(Rewriter::rewrite(curr)); + if (!lowerBound.isNull()) { - // could strip part of a constant - Node lowerBound = - ArithEntail::getConstantBound(Rewriter::rewrite(curr)); - if (!lowerBound.isNull()) + Assert(lowerBound.isConst()); + Rational lbr = lowerBound.getConst(); + if (lbr.sgn() > 0) { - Assert(lowerBound.isConst()); - Rational lbr = lowerBound.getConst(); - if (lbr.sgn() > 0) + Assert(ArithEntail::check(curr, true)); + Node s = n1[sindex_use]; + size_t slen = Word::getLength(s); + Node ncl = nm->mkConst(CVC4::Rational(slen)); + Node next_s = nm->mkNode(MINUS, lowerBound, ncl); + next_s = Rewriter::rewrite(next_s); + Assert(next_s.isConst()); + // we can remove the entire constant + if (next_s.getConst().sgn() >= 0) { - Assert(ArithEntail::check(curr, true)); - Node s = n1[sindex_use]; - size_t slen = Word::getLength(s); - Node ncl = nm->mkConst(CVC4::Rational(slen)); - Node next_s = nm->mkNode(MINUS, lowerBound, ncl); - next_s = Rewriter::rewrite(next_s); - Assert(next_s.isConst()); - // we can remove the entire constant - if (next_s.getConst().sgn() >= 0) + curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl)); + success = true; + sindex++; + } + else + { + // we can remove part of the constant + // lower bound minus the length of a concrete string is negative, + // hence lowerBound cannot be larger than long max + Assert(lbr < Rational(String::maxSize())); + curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound)); + uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); + Assert(lbsize < slen); + if (dir == 1) { - curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl)); - success = true; - sindex++; + // strip partially from the front + nr.push_back(Word::prefix(s, lbsize)); + n1[sindex_use] = Word::suffix(s, slen - lbsize); } else { - // we can remove part of the constant - // lower bound minus the length of a concrete string is negative, - // hence lowerBound cannot be larger than long max - Assert(lbr < Rational(String::maxSize())); - curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound)); - uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); - Assert(lbsize < slen); - if (dir == 1) - { - // strip partially from the front - nr.push_back(Word::prefix(s, lbsize)); - n1[sindex_use] = Word::suffix(s, slen - lbsize); - } - else - { - // strip partially from the back - nr.push_back(Word::suffix(s, lbsize)); - n1[sindex_use] = Word::prefix(s, slen - lbsize); - } - ret = true; + // strip partially from the back + nr.push_back(Word::suffix(s, lbsize)); + n1[sindex_use] = Word::prefix(s, slen - lbsize); } - Assert(ArithEntail::check(curr)); - } - else - { - // we cannot remove the constant + ret = true; } + Assert(ArithEntail::check(curr)); } - } - else - { - Node next_s = NodeManager::currentNM()->mkNode( - MINUS, - curr, - NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use])); - next_s = Rewriter::rewrite(next_s); - if (ArithEntail::check(next_s)) + else { - success = true; - curr = next_s; - sindex++; + // we cannot remove the constant } } } - } while (success); - if (sindex > 0) + else + { + Node next_s = NodeManager::currentNM()->mkNode( + MINUS, + curr, + NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use])); + next_s = Rewriter::rewrite(next_s); + if (ArithEntail::check(next_s)) + { + success = true; + curr = next_s; + sindex++; + } + } + } + if (sindex > 0 && (!strict || curr == zero)) { if (dir == 1) { diff --git a/src/theory/strings/strings_entail.h b/src/theory/strings/strings_entail.h index 3eb77c5f5..6d16842cd 100644 --- a/src/theory/strings/strings_entail.h +++ b/src/theory/strings/strings_entail.h @@ -67,10 +67,11 @@ class StringsEntail /** strip symbolic length * - * This function strips off components of n1 whose length is less than - * or equal to argument curr, and stores them in nr. The direction - * dir determines whether the components are removed from the start - * or end of n1. + * This function strips off components of n1 whose length is less than or + * equal to argument curr, and stores them in nr. The direction dir + * determines whether the components are removed from the start or end of n1. + * If `strict` is set to true, then the function only returns true if full + * length `curr` can be stripped. * * In detail, this function updates n1 to n1' such that: * If dir=1, @@ -107,7 +108,8 @@ class StringsEntail static bool stripSymbolicLength(std::vector& n1, std::vector& nr, int dir, - Node& curr); + Node& curr, + bool strict = false); /** component contains * This function is used when rewriting str.contains( t1, t2 ), where * n1 is the vector form of t1 diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index c6f3b85f5..5f9465562 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -988,6 +988,7 @@ set(regress_0_tests regress0/strings/issue4674-recomp-nf.smt2 regress0/strings/issue4820.smt2 regress0/strings/issue4915.smt2 + regress0/strings/issue5090.smt2 regress0/strings/itos-entail.smt2 regress0/strings/large-model.smt2 regress0/strings/leadingzero001.smt2 diff --git a/test/regress/regress0/strings/issue5090.smt2 b/test/regress/regress0/strings/issue5090.smt2 new file mode 100644 index 000000000..44a57d4d2 --- /dev/null +++ b/test/regress/regress0/strings/issue5090.smt2 @@ -0,0 +1,32 @@ +; COMMAND-LINE: --strings-exp --incremental +(set-logic QF_S) +(declare-const Str0 String) +(declare-const Str1 String) +(declare-const Str2 String) +(declare-const Str3 String) +(declare-const Str4 String) +(declare-const Str5 String) +(declare-const Str6 String) +(declare-const Str7 String) +(declare-const Str8 String) +(declare-const Str9 String) +(declare-const Str10 String) +(declare-const Str11 String) +(declare-const Str12 String) +(declare-const Str13 String) +(declare-const Str14 String) +(declare-const Str15 String) +(declare-const Str16 String) +(declare-const Str17 String) +(declare-const Str18 String) +(declare-const Str19 String) +(assert (str.in_re Str19(re.opt (str.to_re Str10)))) +(assert (str.in_re Str9(re.opt (str.to_re Str18)))) +(assert (str.in_re (str.replace Str12 "jkngjj" Str14)(re.opt (str.to_re (str.++ Str13 "spifluyxzmbznnejkmfajdisgnyfeogvtwxuclzmrlmjmmwhly" Str5 Str19 "rsjusudbyjoyfpwbpasemhhxoayzouhoaekszsvhbsmnysbcih"))))) +(assert (str.in_re Str13(re.opt (str.to_re Str3)))) +(push 1) +(assert (str.in_re (str.++ Str12 (str.++ Str5 Str16 Str13) (str.++ Str5 "tqckdn" "hvhftx" (str.replace Str12 "jkngjj" Str14)) "trcuij" "ovnscketrkugxyqewkvuvondgahkfzwczexnyiziwhyqlomqie")(re.opt (str.to_re Str8)))) +(push 1) +(assert (str.in_re (str.++ Str13 (str.++ Str5 Str16 Str13))(re.++ (str.to_re (str.++ Str5 Str16 Str13)) (str.to_re "cjyfqapanogtdznjbtqlfrmmfauwjdpvnhfpfhzsxaarlfvlvs" )))) +(set-info :status sat) +(check-sat)