From: Andrew Reynolds Date: Mon, 9 Mar 2020 18:35:59 +0000 (-0500) Subject: Convert more uses of strings to words (#3921) X-Git-Tag: cvc5-1.0.0~3547 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9f77ec44decf18fe23c738988281373795dcca0d;p=cvc5.git Convert more uses of strings to words (#3921) Towards theory of sequences. Also adds documentation to strncmp/rstrncmp and adds them to the Word interface. --- diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index 5414c9b98..e2cafa4d3 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -16,10 +16,10 @@ #include "theory/strings/core_solver.h" -#include "theory/strings/theory_strings_utils.h" #include "options/strings_options.h" #include "theory/strings/theory_strings_rewriter.h" - +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" using namespace std; using namespace CVC4::context; @@ -37,7 +37,7 @@ d_nf_pairs(c) d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) ); d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1)); - d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); + d_emptyString = Word::mkEmptyWord(CONST_STRING); d_true = NodeManager::currentNM()->mkConst( true ); d_false = NodeManager::currentNM()->mkConst( false ); } @@ -1071,10 +1071,10 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Assert(!d_state.areEqual(x, y)); std::vector lenExp; - Node xLen = d_state.getLength(x, lenExp); - Node yLen = d_state.getLength(y, lenExp); + Node xLenTerm = d_state.getLength(x, lenExp); + Node yLenTerm = d_state.getLength(y, lenExp); - if (d_state.areEqual(xLen, yLen)) + if (d_state.areEqual(xLenTerm, yLenTerm)) { // `x` and `y` have the same length. We infer that the two components // have to be the same. @@ -1083,7 +1083,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Trace("strings-solve-debug") << "Simple Case 2 : string lengths are equal" << std::endl; Node eq = x.eqNode(y); - Node leneq = xLen.eqNode(yLen); + Node leneq = xLenTerm.eqNode(yLenTerm); NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, lenExp); lenExp.push_back(leneq); d_im.sendInference(lenExp, eq, "N_Unify"); @@ -1137,13 +1137,14 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // Constants in both normal forms. // // E.g. "abc" ++ ... = "ab" ++ ... - String c1 = x.getConst(); - String c2 = y.getConst(); + size_t lenX = Word::getLength(x); + size_t lenY = Word::getLength(y); Trace("strings-solve-debug") << "Simple Case 4 : Const Split : " << x << " vs " << y << " at index " << index << ", isRev = " << isRev << std::endl; - size_t minLen = std::min(c1.size(), c2.size()); - bool isSameFix = isRev ? c1.rstrncmp(c2, minLen) : c1.strncmp(c2, minLen); + size_t minLen = std::min(lenX, lenY); + bool isSameFix = + isRev ? Word::rstrncmp(x, y, minLen) : Word::strncmp(x, y, minLen); if (isSameFix) { // The shorter constant is a prefix/suffix of the longer constant. We @@ -1152,20 +1153,20 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // // E.g. "abc" ++ x' ++ ... = "ab" ++ y' ++ ... ---> // "c" ++ x' ++ ... = y' ++ ... - bool c1Shorter = c1.size() < c2.size(); - NormalForm& nfl = c1Shorter ? nfj : nfi; - const String& cl = c1Shorter ? c2 : c1; - Node ns = c1Shorter ? x : y; + bool xShorter = lenX < lenY; + NormalForm& nfl = xShorter ? nfj : nfi; + Node cl = xShorter ? y : x; + Node ns = xShorter ? x : y; Node remainderStr; if (isRev) { - int newlen = cl.size() - minLen; - remainderStr = nm->mkConst(cl.substr(0, newlen)); + size_t newlen = std::max(lenX, lenY) - minLen; + remainderStr = Word::substr(cl, 0, newlen); } else { - remainderStr = nm->mkConst(cl.substr(minLen)); + remainderStr = Word::substr(cl, minLen); } Trace("strings-solve-debug-test") << "Break normal form of " << cl << " into " << ns << ", " @@ -1195,7 +1196,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, info.d_j = nfj.d_base; info.d_rev = isRev; Assert(index < nfiv.size() - rproc && index < nfjv.size() - rproc); - if (!d_state.areDisequal(xLen, yLen) && !d_state.areEqual(xLen, yLen) + if (!d_state.areDisequal(xLenTerm, yLenTerm) && !d_state.areEqual(xLenTerm, yLenTerm) && !x.isConst() && !y.isConst()) // AJR: remove the latter 2 conditions? { @@ -1207,7 +1208,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, Trace("strings-solve-debug") << "Non-simple Case 1 : string lengths neither equal nor disequal" << std::endl; - Node lenEq = nm->mkNode(EQUAL, xLen, yLen); + Node lenEq = nm->mkNode(EQUAL, xLenTerm, yLenTerm); lenEq = Rewriter::rewrite(lenEq); info.d_conc = nm->mkNode(OR, lenEq, lenEq.negate()); info.d_pending_phase[lenEq] = true; @@ -1395,7 +1396,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // is a prefix/suffix of the other. // // E.g. x ++ ... = y ++ ... ---> (x = y ++ k) v (y = x ++ k) - Assert(d_state.areDisequal(xLen, yLen)); + Assert(d_state.areDisequal(xLenTerm, yLenTerm)); Assert(x.getKind() != CONST_STRING); Assert(y.getKind() != CONST_STRING); @@ -1411,8 +1412,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, // do not infer constants are larger than variables if (t.getKind() != CONST_STRING) { - Node lt1 = e == 0 ? xLen : yLen; - Node lt2 = e == 0 ? yLen : xLen; + Node lt1 = e == 0 ? xLenTerm : yLenTerm; + Node lt2 = e == 0 ? yLenTerm : xLenTerm; Node entLit = Rewriter::rewrite(nm->mkNode(GT, lt1, lt2)); std::pair et = d_state.entailmentCheck( options::TheoryOfMode::THEORY_OF_TYPE_BASED, entLit); @@ -1465,7 +1466,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, } else { - Node ldeq = nm->mkNode(EQUAL, xLen, yLen).negate(); + Node ldeq = nm->mkNode(EQUAL, xLenTerm, yLenTerm).negate(); info.d_ant.push_back(ldeq); info.d_conc = nm->mkNode(OR, eq1, eq2); info.d_id = INFER_SSPLIT_VAR; @@ -1625,13 +1626,12 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi, { Trace("strings-loop") << "Strings::Loop: Const Normal Breaking." << std::endl; - CVC4::String s = t_yz.getConst(); - unsigned size = s.size(); + unsigned size = Word::getLength(t_yz); std::vector vconc; for (unsigned len = 1; len <= size; len++) { - Node y = nm->mkConst(s.substr(0, len)); - Node z = nm->mkConst(s.substr(len, size - len)); + Node y = Word::substr(t_yz, 0, len); + Node z = Word::substr(t_yz, len, size - len); Node restr = s_zy; Node cc; if (r != d_emptyString) @@ -1770,8 +1770,9 @@ void CoreSolver::processDeq( Node ni, Node nj ) { return; }else{ //split on first character - CVC4::String str = const_k.getConst(); - Node firstChar = str.size() == 1 ? const_k : NodeManager::currentNM()->mkConst( str.prefix( 1 ) ); + Node firstChar = Word::getLength(const_k) == 1 + ? const_k + : Word::prefix(const_k, 1); if (d_state.areEqual(lnck, d_one)) { if (d_state.areDisequal(firstChar, nconst_k)) @@ -1947,26 +1948,31 @@ int CoreSolver::processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& if (!d_state.areEqual(i, j)) { if( i.getKind()==kind::CONST_STRING && j.getKind()==kind::CONST_STRING ) { - unsigned int len_short = i.getConst().size() < j.getConst().size() ? i.getConst().size() : j.getConst().size(); + size_t lenI = Word::getLength(i); + size_t lenJ = Word::getLength(j); + unsigned int len_short = lenI < lenJ ? lenI : lenJ; bool isSameFix = isRev ? i.getConst().rstrncmp(j.getConst(), len_short): i.getConst().strncmp(j.getConst(), len_short); if( isSameFix ) { //same prefix/suffix //k is the index of the string that is shorter - Node nk = i.getConst().size() < j.getConst().size() ? i : j; - Node nl = i.getConst().size() < j.getConst().size() ? j : i; + Node nk = lenI < lenJ ? i : j; + Node nl = lenI < lenJ ? j : i; Node remainderStr; if( isRev ){ - int new_len = nl.getConst().size() - len_short; - remainderStr = NodeManager::currentNM()->mkConst( nl.getConst().substr(0, new_len) ); + int new_len = Word::getLength(nl) - len_short; + remainderStr = Word::substr(nl, 0, new_len); Trace("strings-solve-debug-test") << "Rev. Break normal form of " << nl << " into " << nk << ", " << remainderStr << std::endl; } else { - remainderStr = NodeManager::currentNM()->mkConst( nl.getConst().substr( len_short ) ); + remainderStr = Word::substr(nl, len_short); Trace("strings-solve-debug-test") << "Break normal form of " << nl << " into " << nk << ", " << remainderStr << std::endl; } - if( i.getConst().size() < j.getConst().size() ) { + if (lenI < lenJ) + { nfj.insert( nfj.begin() + index + 1, remainderStr ); nfj[index] = nfi[index]; - } else { + } + else + { nfi.insert( nfi.begin() + index + 1, remainderStr ); nfi[index] = nfj[index]; } diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index 530564a35..f91b59834 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -20,6 +20,7 @@ #include "options/strings_options.h" #include "theory/strings/theory_strings_rewriter.h" #include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" using namespace CVC4; using namespace CVC4::kind; @@ -29,11 +30,8 @@ namespace theory { namespace strings { RegExpOpr::RegExpOpr() - : d_emptyString(NodeManager::currentNM()->mkConst(::CVC4::String(""))), - d_true(NodeManager::currentNM()->mkConst(true)), + : d_true(NodeManager::currentNM()->mkConst(true)), d_false(NodeManager::currentNM()->mkConst(false)), - d_emptySingleton(NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, - d_emptyString)), d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY, std::vector{})), d_zero(NodeManager::currentNM()->mkConst(::CVC4::Rational(0))), @@ -42,6 +40,10 @@ RegExpOpr::RegExpOpr() std::vector{})), d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)) { + d_emptyString = Word::mkEmptyWord(CONST_STRING); + + d_emptySingleton = + NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString); d_lastchar = utils::getAlphabetCardinality() - 1; } @@ -295,6 +297,7 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { int ret = 1; retNode = d_emptyRegexp; + NodeManager* nm = NodeManager::currentNM(); PairNodeStr dv = std::make_pair( r, c ); if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) { @@ -332,10 +335,12 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { if(tmp == d_emptyString) { ret = 2; } else { - if (tmp.getConst().front() == c.front()) + if (tmp.getConst().front() == c.front()) { - retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, - tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) ); + retNode = + nm->mkNode(STRING_TO_REGEXP, + Word::getLength(tmp) == 1 ? d_emptyString + : Word::substr(tmp, 1)); } else { ret = 2; } @@ -346,10 +351,12 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { if(tmp.getKind() == kind::STRING_CONCAT) { Node t2 = tmp[0]; if(t2.isConst()) { - if (t2.getConst().front() == c.front()) + if (t2.getConst().front() == c.front()) { - Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, - tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) ); + Node n = nm->mkNode(STRING_TO_REGEXP, + Word::getLength(tmp) == 1 + ? d_emptyString + : Word::substr(tmp, 1)); std::vector< Node > vec_nodes; vec_nodes.push_back(n); for(unsigned i=1; imkConst( Rational( 0 ) ); d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1)); - d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); + d_emptyString = Word::mkEmptyWord(CONST_STRING); d_true = NodeManager::currentNM()->mkConst( true ); d_false = NodeManager::currentNM()->mkConst( false ); diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp index a0ee0d224..dd573b68c 100644 --- a/src/theory/strings/word.cpp +++ b/src/theory/strings/word.cpp @@ -78,6 +78,34 @@ size_t Word::getLength(TNode x) bool Word::isEmpty(TNode x) { return getLength(x) == 0; } +bool Word::strncmp(TNode x, TNode y, std::size_t n) +{ + Kind k = x.getKind(); + if (k == CONST_STRING) + { + Assert(y.getKind() == CONST_STRING); + String sx = x.getConst(); + String sy = y.getConst(); + return sx.strncmp(sy, n); + } + Unimplemented(); + return false; +} + +bool Word::rstrncmp(TNode x, TNode y, std::size_t n) +{ + Kind k = x.getKind(); + if (k == CONST_STRING) + { + Assert(y.getKind() == CONST_STRING); + String sx = x.getConst(); + String sy = y.getConst(); + return sx.rstrncmp(sy, n); + } + Unimplemented(); + return false; +} + std::size_t Word::find(TNode x, TNode y, std::size_t start) { Kind k = x.getKind(); diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h index 74f50ee9a..7b813a0b2 100644 --- a/src/theory/strings/word.h +++ b/src/theory/strings/word.h @@ -45,6 +45,22 @@ class Word /** Return true if x is empty */ static bool isEmpty(TNode x); + /** string compare + * + * Returns true if x is equal to y for their first n characters. + * If n is larger than the length of x or y, this method returns + * true if and only if x is equal to y. + */ + static bool strncmp(TNode x, TNode y, std::size_t n); + + /** reverse string compare + * + * Returns true if x is equal to y for their last n characters. + * If n is larger than the length of tx or y, this method returns + * true if and only if x is equal to y. + */ + static bool rstrncmp(TNode x, TNode y, std::size_t n); + /** Return the first position y occurs in x, or std::string::npos otherwise */ static std::size_t find(TNode x, TNode y, std::size_t start = 0); diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp index 4dfe68b51..00066edb6 100644 --- a/src/util/regexp.cpp +++ b/src/util/regexp.cpp @@ -88,8 +88,8 @@ String String::concat(const String &other) const { return String(ret_vec); } -bool String::strncmp(const String &y, const std::size_t np) const { - std::size_t n = np; +bool String::strncmp(const String& y, std::size_t n) const +{ std::size_t b = (size() >= y.size()) ? size() : y.size(); std::size_t s = (size() <= y.size()) ? size() : y.size(); if (n > s) { @@ -105,8 +105,8 @@ bool String::strncmp(const String &y, const std::size_t np) const { return true; } -bool String::rstrncmp(const String &y, const std::size_t np) const { - std::size_t n = np; +bool String::rstrncmp(const String& y, std::size_t n) const +{ std::size_t b = (size() >= y.size()) ? size() : y.size(); std::size_t s = (size() <= y.size()) ? size() : y.size(); if (n > s) { diff --git a/src/util/regexp.h b/src/util/regexp.h index 1cde53687..731736f72 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -109,8 +109,18 @@ class CVC4_PUBLIC String { bool operator<=(const String& y) const { return cmp(y) <= 0; } bool operator>=(const String& y) const { return cmp(y) >= 0; } - bool strncmp(const String& y, const std::size_t np) const; - bool rstrncmp(const String& y, const std::size_t np) const; + /** + * Returns true if this string is equal to y for their first n characters. + * If n is larger than the length of this string or y, this method returns + * true if and only if this string is equal to y. + */ + bool strncmp(const String& y, std::size_t n) const; + /** + * Returns true if this string is equal to y for their last n characters. + * Similar to strncmp, if n is larger than the length of this string or y, + * this method returns true if and only if this string is equal to y. + */ + bool rstrncmp(const String& y, std::size_t n) const; /* toString * Converts this string to a std::string.