From 4b7de240edeee362a0b9ca440c22a8b0744cf34b Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 26 Feb 2020 18:04:16 -0600 Subject: [PATCH] Some initial work on using words (#3819) Towards support for sequences (CVC4/cvc4-projects#23.) --- .../strings/theory_strings_rewriter.cpp | 91 ++++++++++--------- src/theory/strings/word.cpp | 10 ++ src/theory/strings/word.h | 78 ++++++++-------- 3 files changed, 101 insertions(+), 78 deletions(-) diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 9cd4c1ecc..e58a51e63 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -25,6 +25,7 @@ #include "theory/arith/arith_msum.h" #include "theory/strings/regexp_operation.h" #include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" #include "theory/theory.h" #include "util/integer.h" #include "util/rational.h" @@ -357,20 +358,23 @@ Node TheoryStringsRewriter::rewriteStrEqualityExt(Node node) if (!c[0].empty() && !c[1].empty() && c[0].back().isConst() && c[1].back().isConst()) { - String cs[2]; + Node cs[2]; + size_t csl[2]; for (unsigned j = 0; j < 2; j++) { - cs[j] = c[j].back().getConst(); + cs[j] = c[j].back(); + csl[j] = Word::getLength(cs[j]); } - unsigned larger = cs[0].size() > cs[1].size() ? 0 : 1; - unsigned smallerSize = cs[1 - larger].size(); + size_t larger = csl[0] > csl[1] ? 0 : 1; + size_t smallerSize = csl[1 - larger]; if (cs[1 - larger] - == (i == 0 ? cs[larger].suffix(smallerSize) - : cs[larger].prefix(smallerSize))) + == (i == 0 ? Word::suffix(cs[larger], smallerSize) + : Word::prefix(cs[larger], smallerSize))) { - unsigned sizeDiff = cs[larger].size() - smallerSize; - c[larger][c[larger].size() - 1] = nm->mkConst( - i == 0 ? cs[larger].prefix(sizeDiff) : cs[larger].suffix(sizeDiff)); + size_t sizeDiff = csl[larger] - smallerSize; + c[larger][c[larger].size() - 1] = + i == 0 ? Word::prefix(cs[larger], sizeDiff) + : Word::suffix(cs[larger], sizeDiff); c[1 - larger].pop_back(); changed = true; } @@ -393,10 +397,10 @@ Node TheoryStringsRewriter::rewriteStrEqualityExt(Node node) for (unsigned i = 0; i < 2; i++) { Node cn = checkEntailHomogeneousString(node[i]); - if (!cn.isNull() && cn.getConst().size() > 0) + if (!cn.isNull() && !Word::isEmpty(cn)) { Assert(cn.isConst()); - Assert(cn.getConst().size() == 1); + Assert(Word::getLength(cn) == 1); unsigned hchar = cn.getConst().front(); // The operands of the concat on each side of the equality without @@ -758,7 +762,8 @@ Node TheoryStringsRewriter::rewriteConcat(Node node) } if(!tmpNode.isConst()) { if(!preNode.isNull()) { - if(preNode.getKind() == kind::CONST_STRING && !preNode.getConst().isEmptyString() ) { + if (preNode.isConst() && !Word::isEmpty(preNode)) + { node_vec.push_back( preNode ); } preNode = Node::null(); @@ -768,11 +773,15 @@ Node TheoryStringsRewriter::rewriteConcat(Node node) if( preNode.isNull() ){ preNode = tmpNode; }else{ - preNode = NodeManager::currentNM()->mkConst( preNode.getConst().concat( tmpNode.getConst() ) ); + std::vector vec; + vec.push_back(preNode); + vec.push_back(tmpNode); + preNode = Word::mkWord(vec); } } } - if( !preNode.isNull() && ( preNode.getKind()!=kind::CONST_STRING || !preNode.getConst().isEmptyString() ) ){ + if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode))) + { node_vec.push_back( preNode ); } @@ -828,7 +837,7 @@ Node TheoryStringsRewriter::rewriteConcatRegExp(TNode node) } } else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst() - && c[0].getConst().isEmptyString()) + && Word::isEmpty(c[0])) { changed = true; emptyRe = c; @@ -969,9 +978,8 @@ Node TheoryStringsRewriter::rewriteStarRegExp(TNode node) // ((R)*)* ---> R* return returnRewrite(node, node[0], "re-star-nested-star"); } - else if (node[0].getKind() == STRING_TO_REGEXP - && node[0][0].getKind() == CONST_STRING - && node[0][0].getConst().isEmptyString()) + else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst() + && Word::isEmpty(node[0][0])) { // ("")* ---> "" return returnRewrite(node, node[0], "re-star-empty-string"); @@ -991,8 +999,8 @@ Node TheoryStringsRewriter::rewriteStarRegExp(TNode node) std::vector node_vec; for (const Node& nc : node[0]) { - if (nc.getKind() == STRING_TO_REGEXP && nc[0].getKind() == CONST_STRING - && nc[0].getConst().isEmptyString()) + if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst() + && Word::isEmpty(nc[0])) { // can be removed changed = true; @@ -1571,18 +1579,19 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { { Kind nk0 = node[0].getKind(); if( node[0].isConst() ){ - retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational( node[0].getConst().size() ) ); + retNode = nm->mkConst(Rational(Word::getLength(node[0]))); } else if (nk0 == kind::STRING_CONCAT) { Node tmpNode = node[0]; if(tmpNode.isConst()) { - retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational( tmpNode.getConst().size() ) ); + retNode = nm->mkConst(Rational(Word::getLength(tmpNode))); }else if( tmpNode.getKind()==kind::STRING_CONCAT ){ std::vector node_vec; for(unsigned int i=0; imkConst( ::CVC4::Rational( tmpNode[i].getConst().size() ) ) ); + node_vec.push_back( + nm->mkConst(Rational(Word::getLength(tmpNode[i])))); } else { node_vec.push_back( NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, tmpNode[i]) ); } @@ -1758,7 +1767,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) NodeManager* nm = NodeManager::currentNM(); if (node[0].isConst()) { - if (node[0].getConst().size() == 0) + if (Word::isEmpty(node[0])) { Node ret = node[0]; return returnRewrite(node, ret, "ss-emptystr"); @@ -1773,13 +1782,13 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) { // start beyond the maximum size of strings // thus, it must be beyond the end point of this string - Node ret = nm->mkConst(::CVC4::String("")); + Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, "ss-const-start-max-oob"); } else if (node[1].getConst().sgn() < 0) { // start before the beginning of the string - Node ret = nm->mkConst(::CVC4::String("")); + Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, "ss-const-start-neg"); } else @@ -1788,7 +1797,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) if (start >= s.size()) { // start beyond the end of the string - Node ret = nm->mkConst(::CVC4::String("")); + Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, "ss-const-start-oob"); } } @@ -1800,7 +1809,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) } else if (node[2].getConst().sgn() <= 0) { - Node ret = nm->mkConst(::CVC4::String("")); + Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, "ss-const-len-non-pos"); } else @@ -1827,12 +1836,12 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) // if entailed non-positive length or negative start point if (checkEntailArith(zero, node[1], true)) { - Node ret = nm->mkConst(::CVC4::String("")); + Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, "ss-start-neg"); } else if (checkEntailArith(zero, node[2])) { - Node ret = nm->mkConst(::CVC4::String("")); + Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, "ss-len-non-pos"); } @@ -1858,7 +1867,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) // be empty. if (checkEntailArith(node[1], node[0][2])) { - Node ret = nm->mkConst(::CVC4::String("")); + Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, "ss-start-geq-len"); } } @@ -1942,7 +1951,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len)); if (checkEntailArithWithAssumption(n1_lt_tot_len, zero, node[2], false)) { - Node ret = nm->mkConst(::CVC4::String("")); + Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, "ss-start-entails-zero-len"); } @@ -1951,7 +1960,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2])); if (checkEntailArithWithAssumption(non_zero_len, node[1], tot_len, false)) { - Node ret = nm->mkConst(::CVC4::String("")); + Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, "ss-non-zero-len-entails-oob"); } @@ -1960,14 +1969,14 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero)); if (checkEntailArithWithAssumption(geq_zero_start, zero, tot_len, false)) { - Node ret = nm->mkConst(String("")); + Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, "ss-geq-zero-start-entails-emp-s"); } // (str.substr s x x) ---> "" if (str.len s) <= 1 if (node[1] == node[2] && checkEntailLengthOne(node[0])) { - Node ret = nm->mkConst(::CVC4::String("")); + Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, "ss-len-one-z-z"); } } @@ -2053,13 +2062,11 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { CVC4::String s = node[0].getConst(); if (node[1].isConst()) { - CVC4::String t = node[1].getConst(); - Node ret = - NodeManager::currentNM()->mkConst(s.find(t) != std::string::npos); + Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos); return returnRewrite(node, ret, "ctn-const"); }else{ Node t = node[1]; - if (s.size() == 0) + if (Word::isEmpty(node[0])) { Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); @@ -2103,14 +2110,14 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { } if (node[1].isConst()) { - CVC4::String t = node[1].getConst(); - if (t.size() == 0) + size_t len = Word::getLength(node[1]); + if (len == 0) { // contains( x, "" ) ---> true Node ret = NodeManager::currentNM()->mkConst(true); return returnRewrite(node, ret, "ctn-rhs-emptystr"); } - else if (t.size() == 1) + else if (len == 1) { // The following rewrites are specific to a single character second // argument of contains, where we can reason that this character is diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp index 655df7da3..a0ee0d224 100644 --- a/src/theory/strings/word.cpp +++ b/src/theory/strings/word.cpp @@ -22,6 +22,16 @@ namespace CVC4 { namespace theory { namespace strings { +Node Word::mkEmptyWord(TypeNode tn) +{ + if (tn.isString()) + { + return mkEmptyWord(CONST_STRING); + } + Unimplemented(); + return Node::null(); +} + Node Word::mkEmptyWord(Kind k) { NodeManager* nm = NodeManager::currentNM(); diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h index e4d10247a..74f50ee9a 100644 --- a/src/theory/strings/word.h +++ b/src/theory/strings/word.h @@ -20,6 +20,7 @@ #include #include "expr/node.h" +#include "expr/type_node.h" namespace CVC4 { namespace theory { @@ -28,7 +29,10 @@ namespace strings { // ------------------------------ for words (string or sequence constants) class Word { -public: + public: + /** make empty constant of type tn */ + static Node mkEmptyWord(TypeNode tn); + /** make empty constant of kind k */ static Node mkEmptyWord(Kind k); @@ -45,9 +49,9 @@ public: static std::size_t find(TNode x, TNode y, std::size_t start = 0); /** - * Return the first position y occurs in x searching from the end of x, or - * std::string::npos otherwise - */ + * Return the first position y occurs in x searching from the end of x, or + * std::string::npos otherwise + */ static std::size_t rfind(TNode x, TNode y, std::size_t start = 0); /** Returns true if y is a prefix of x */ @@ -62,8 +66,9 @@ public: /** Return the substring/subsequence of x starting at index i */ static Node substr(TNode x, std::size_t i); - /** Return the substring/subsequence of x starting at index i with size at most - * j */ + /** Return the substring/subsequence of x starting at index i with size at + * most + * j */ static Node substr(TNode x, std::size_t i, std::size_t j); /** Return the prefix of x of size at most i */ @@ -73,42 +78,43 @@ public: static Node suffix(TNode x, std::size_t i); /** - * Checks if there is any overlap between word x and another word y. This - * corresponds to checking whether one string contains the other and whether a - * substring/subsequence of one is a prefix of the other and vice-versa. - * - * @param x The first string - * @param y The second string - * @return True if there is an overlap, false otherwise - */ + * Checks if there is any overlap between word x and another word y. This + * corresponds to checking whether one string contains the other and whether a + * substring/subsequence of one is a prefix of the other and vice-versa. + * + * @param x The first string + * @param y The second string + * @return True if there is an overlap, false otherwise + */ static bool noOverlapWith(TNode x, TNode y); /** overlap - * - * if overlap returns m>0, - * then the maximal suffix of this string that is a prefix of y is of length m. - * - * For example, if x is "abcdef", then: - * x.overlap("defg") = 3 - * x.overlap("ab") = 0 - * x.overlap("d") = 0 - * x.overlap("bcdefdef") = 5 - */ + * + * if overlap returns m>0, + * then the maximal suffix of this string that is a prefix of y is of length + * m. + * + * For example, if x is "abcdef", then: + * x.overlap("defg") = 3 + * x.overlap("ab") = 0 + * x.overlap("d") = 0 + * x.overlap("bcdefdef") = 5 + */ static std::size_t overlap(TNode x, TNode y); /** reverse overlap - * - * if roverlap returns m>0, - * then the maximal prefix of this word that is a suffix of y is of length m. - * - * For example, if x is "abcdef", then: - * x.roverlap("aaabc") = 3 - * x.roverlap("def") = 0 - * x.roverlap("d") = 0 - * x.roverlap("defabcde") = 5 - * - * Notice that x.overlap(y) = y.roverlap(x) - */ + * + * if roverlap returns m>0, + * then the maximal prefix of this word that is a suffix of y is of length m. + * + * For example, if x is "abcdef", then: + * x.roverlap("aaabc") = 3 + * x.roverlap("def") = 0 + * x.roverlap("d") = 0 + * x.roverlap("defabcde") = 5 + * + * Notice that x.overlap(y) = y.roverlap(x) + */ static std::size_t roverlap(TNode x, TNode y); }; -- 2.30.2