From: Andrew Reynolds Date: Fri, 19 Jun 2020 21:48:10 +0000 (-0500) Subject: Convert more uses of strings to words (#4584) X-Git-Tag: cvc5-1.0.0~3193 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=22780596b561dff9b0eb5b0620252280a678944e;p=cvc5.git Convert more uses of strings to words (#4584) Towards theory of sequences. This PR also adds support for sequences in default sygus grammars. Also removes an interface for mkEmptyWord which doesn't have an equivalent for sequences. --- diff --git a/src/expr/type_node.cpp b/src/expr/type_node.cpp index edf88846f..ff4de9dc8 100644 --- a/src/expr/type_node.cpp +++ b/src/expr/type_node.cpp @@ -304,11 +304,7 @@ Node TypeNode::mkGroundValue() const return *te; } -bool TypeNode::isStringLike() const -{ - // TODO (cvc4-projects #23): sequence here - return isString(); -} +bool TypeNode::isStringLike() const { return isString() || isSequence(); } bool TypeNode::isSubtypeOf(TypeNode t) const { if(*this == t) { diff --git a/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp b/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp index 8df43087f..074b023a2 100644 --- a/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp +++ b/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp @@ -459,6 +459,10 @@ void CegGrammarConstructor::collectSygusGrammarTypesFor( // theory of strings shares the integer type TypeNode intType = NodeManager::currentNM()->integerType(); collectSygusGrammarTypesFor(intType,types); + if (range.isSequence()) + { + collectSygusGrammarTypesFor(range.getSequenceElementType(), types); + } } else if (range.isFunction()) { @@ -805,7 +809,7 @@ void CegGrammarConstructor::mkSygusDefaultGrammar( sdts[i].addConstructor(kind, cargsBinary); } } - else if (types[i].isString()) + else if (types[i].isStringLike()) { // concatenation std::vector cargsBinary; @@ -823,6 +827,18 @@ void CegGrammarConstructor::mkSygusDefaultGrammar( std::vector cargsLen; cargsLen.push_back(unres_t); sdts[i_intType].addConstructor(STRING_LENGTH, cargsLen); + if (types[i].isSequence()) + { + TypeNode etype = types[i].getSequenceElementType(); + // retrieve element unresolved type + Assert(type_to_unres.find(etype) != type_to_unres.end()); + TypeNode unresElemType = type_to_unres[etype]; + + Trace("sygus-grammar-def") << "...add for seq.unit" << std::endl; + std::vector cargsSeqUnit; + cargsSeqUnit.push_back(unresElemType); + sdts[i].addConstructor(SEQ_UNIT, cargsSeqUnit); + } } else if (types[i].isArray()) { diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index f43babad2..a91210a7b 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -40,7 +40,7 @@ RegExpOpr::RegExpOpr() std::vector{})), d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)) { - d_emptyString = Word::mkEmptyWord(CONST_STRING); + d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType()); d_emptySingleton = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString); diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp index e166845c7..9f502e1f6 100644 --- a/src/theory/strings/strings_entail.cpp +++ b/src/theory/strings/strings_entail.cpp @@ -37,8 +37,6 @@ bool StringsEntail::canConstantContainConcat(Node c, int& lastc) { Assert(c.isConst()); - CVC4::String t = c.getConst(); - const std::vector& tvec = t.getVec(); Assert(n.getKind() == STRING_CONCAT); // must find constant components in order size_t pos = 0; @@ -50,19 +48,20 @@ bool StringsEntail::canConstantContainConcat(Node c, { firstc = firstc == -1 ? i : firstc; lastc = i; - CVC4::String s = n[i].getConst(); - size_t new_pos = t.find(s, pos); + size_t new_pos = Word::find(c, n[i], pos); if (new_pos == std::string::npos) { return false; } else { - pos = new_pos + s.size(); + pos = new_pos + Word::getLength(n[i]); } } else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0])) { + Assert(c.getType().isString()); + const std::vector& tvec = c.getConst().getVec(); // find the first occurrence of a digit starting at pos while (pos < tvec.size() && !String::isDigit(tvec[pos])) { @@ -116,7 +115,8 @@ bool StringsEntail::stripSymbolicLength(std::vector& n1, { Assert(dir == 1 || dir == -1); Assert(nr.empty()); - Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0)); + NodeManager* nm = NodeManager::currentNM(); + Node zero = nm->mkConst(CVC4::Rational(0)); bool ret = false; bool success; unsigned sindex = 0; @@ -139,18 +139,16 @@ bool StringsEntail::stripSymbolicLength(std::vector& n1, if (lbr.sgn() > 0) { Assert(ArithEntail::check(curr, true)); - CVC4::String s = n1[sindex_use].getConst(); - Node ncl = - NodeManager::currentNM()->mkConst(CVC4::Rational(s.size())); - Node next_s = - NodeManager::currentNM()->mkNode(MINUS, lowerBound, ncl); + Node s = n1[sindex_use]; + size_t slen = Word::getLength(s); + Node ncl = nm->mkConst(CVC4::Rational(slen)); + Node next_s = nm->mkNode(MINUS, lowerBound, ncl); next_s = Rewriter::rewrite(next_s); Assert(next_s.isConst()); // we can remove the entire constant if (next_s.getConst().sgn() >= 0) { - curr = Rewriter::rewrite( - NodeManager::currentNM()->mkNode(MINUS, curr, ncl)); + curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl)); success = true; sindex++; } @@ -160,25 +158,20 @@ bool StringsEntail::stripSymbolicLength(std::vector& n1, // lower bound minus the length of a concrete string is negative, // hence lowerBound cannot be larger than long max Assert(lbr < Rational(String::maxSize())); - curr = Rewriter::rewrite( - NodeManager::currentNM()->mkNode(MINUS, curr, lowerBound)); + curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound)); uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); - Assert(lbsize < s.size()); + Assert(lbsize < slen); if (dir == 1) { // strip partially from the front - nr.push_back( - NodeManager::currentNM()->mkConst(s.prefix(lbsize))); - n1[sindex_use] = NodeManager::currentNM()->mkConst( - s.suffix(s.size() - lbsize)); + nr.push_back(Word::prefix(s, lbsize)); + n1[sindex_use] = Word::suffix(s, slen - lbsize); } else { // strip partially from the back - nr.push_back( - NodeManager::currentNM()->mkConst(s.suffix(lbsize))); - n1[sindex_use] = NodeManager::currentNM()->mkConst( - s.prefix(s.size() - lbsize)); + nr.push_back(Word::suffix(s, lbsize)); + n1[sindex_use] = Word::prefix(s, slen - lbsize); } ret = true; } @@ -496,8 +489,6 @@ bool StringsEntail::stripConstantEndpoints(std::vector& n1, { Assert(nb.empty()); Assert(ne.empty()); - - NodeManager* nm = NodeManager::currentNM(); bool changed = false; // for ( forwards, backwards ) direction for (unsigned r = 0; r < 2; r++) @@ -509,7 +500,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector& n1, bool removeComponent = false; Node n1cmp = n1[index0]; - if (n1cmp.isConst() && n1cmp.getConst().size() == 0) + if (n1cmp.isConst() && Word::isEmpty(n1cmp)) { return false; } @@ -522,14 +513,15 @@ bool StringsEntail::stripConstantEndpoints(std::vector& n1, << ", dir = " << dir << std::endl; if (n1cmp.isConst()) { - CVC4::String s = n1cmp.getConst(); + Node s = n1cmp; + size_t slen = Word::getLength(s); // overlap is an overapproximation of the number of characters // n2[index1] can match in s - unsigned overlap = s.size(); + unsigned overlap = Word::getLength(s); if (n2[index1].isConst()) { - CVC4::String t = n2[index1].getConst(); - std::size_t ret = r == 0 ? s.find(t) : s.rfind(t); + Node t = n2[index1]; + std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t); if (ret == std::string::npos) { if (n1.size() == 1) @@ -545,7 +537,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector& n1, // This is used to partially strip off the endpoint // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) --> // str.contains( str.++( "c", x ), str.++( "cd", y ) ) - overlap = r == 0 ? s.overlap(t) : t.overlap(s); + overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s); } else { @@ -553,19 +545,20 @@ bool StringsEntail::stripConstantEndpoints(std::vector& n1, // if there is no overlap // e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" ) // --> str.contains( x, "a" ) - removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0); + removeComponent = + ((r == 0 ? Word::overlap(s, t) : Word::overlap(t, s)) == 0); } } else if (sss.empty()) // only if not substr { - Assert(ret < s.size()); + Assert(ret < slen); // can strip off up to the find position, e.g. // str.contains( str.++( "abc", x ), str.++( "b", y ) ) --> // str.contains( str.++( "bc", x ), str.++( "b", y ) ), // and // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) --> // str.contains( str.++( x, "abb" ), str.++( y, "b" ) ) - overlap = s.size() - ret; + overlap = slen - ret; } } else @@ -573,7 +566,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector& n1, // inconclusive } // process the overlap - if (overlap < s.size()) + if (overlap < slen) { changed = true; if (overlap == 0) @@ -586,13 +579,13 @@ bool StringsEntail::stripConstantEndpoints(std::vector& n1, // component if (r == 0) { - nb.push_back(nm->mkConst(s.prefix(s.size() - overlap))); - n1[index0] = nm->mkConst(s.suffix(overlap)); + nb.push_back(Word::prefix(s, slen - overlap)); + n1[index0] = Word::suffix(s, overlap); } else { - ne.push_back(nm->mkConst(s.suffix(s.size() - overlap))); - n1[index0] = nm->mkConst(s.prefix(overlap)); + ne.push_back(Word::suffix(s, slen - overlap)); + n1[index0] = Word::prefix(s, overlap); } } } @@ -601,8 +594,8 @@ bool StringsEntail::stripConstantEndpoints(std::vector& n1, { if (n2[index1].isConst()) { + Assert(n2[index1].getType().isString()); CVC4::String t = n2[index1].getConst(); - if (n1.size() == 1) { // if n1.size()==1, then if n2[index1] is not a number, we can drop diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp index 484b0df2d..35b315e35 100644 --- a/src/theory/strings/word.cpp +++ b/src/theory/strings/word.cpp @@ -27,7 +27,8 @@ Node Word::mkEmptyWord(TypeNode tn) { if (tn.isString()) { - return mkEmptyWord(CONST_STRING); + std::vector vec; + return NodeManager::currentNM()->mkConst(String(vec)); } else if (tn.isSequence()) { @@ -39,17 +40,6 @@ Node Word::mkEmptyWord(TypeNode tn) return Node::null(); } -Node Word::mkEmptyWord(Kind k) -{ - if (k == CONST_STRING) - { - std::vector vec; - return NodeManager::currentNM()->mkConst(String(vec)); - } - Unimplemented(); - return Node::null(); -} - Node Word::mkWordFlatten(const std::vector& xs) { Assert(!xs.empty()); @@ -81,7 +71,8 @@ Node Word::mkWordFlatten(const std::vector& xs) seq.push_back(c.toExpr()); } } - return NodeManager::currentNM()->mkConst(ExprSequence(tn.toType(), seq)); + return NodeManager::currentNM()->mkConst( + ExprSequence(tn.getSequenceElementType().toType(), seq)); } Unimplemented(); return Node::null(); @@ -98,17 +89,17 @@ size_t Word::getLength(TNode x) { return x.getConst().getSequence().size(); } - Unimplemented(); + Unimplemented() << "Word::getLength on " << x; return 0; } std::vector Word::getChars(TNode x) { Kind k = x.getKind(); + std::vector ret; + NodeManager* nm = NodeManager::currentNM(); if (k == CONST_STRING) { - std::vector ret; - NodeManager* nm = NodeManager::currentNM(); std::vector ccVec; const std::vector& cvec = x.getConst().getVec(); for (unsigned chVal : cvec) @@ -120,8 +111,18 @@ std::vector Word::getChars(TNode x) } return ret; } + else if (k == CONST_SEQUENCE) + { + Type t = x.getConst().getType(); + const Sequence& sx = x.getConst().getSequence(); + const std::vector& vec = sx.getVec(); + for (const Node& v : vec) + { + ret.push_back(nm->mkConst(ExprSequence(t, {v.toExpr()}))); + } + return ret; + } Unimplemented(); - std::vector ret; return ret; } diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h index 4ebe43e2f..3b15b763a 100644 --- a/src/theory/strings/word.h +++ b/src/theory/strings/word.h @@ -33,9 +33,6 @@ class Word /** make empty constant of type tn */ static Node mkEmptyWord(TypeNode tn); - /** make empty constant of kind k */ - static Node mkEmptyWord(Kind k); - /** make word from constants in (non-empty) vector vec */ static Node mkWordFlatten(const std::vector& xs); diff --git a/test/unit/theory/theory_strings_word_white.h b/test/unit/theory/theory_strings_word_white.h index bc6279cb8..2e29d50b8 100644 --- a/test/unit/theory/theory_strings_word_white.h +++ b/test/unit/theory/theory_strings_word_white.h @@ -63,7 +63,8 @@ class TheoryStringsWordWhite : public CxxTest::TestSuite Node cac = d_nm->mkConst(String("cac")); Node abca = d_nm->mkConst(String("abca")); - TS_ASSERT(Word::mkEmptyWord(kind::CONST_STRING) == empty); + TypeNode stringType = d_nm->stringType(); + TS_ASSERT(Word::mkEmptyWord(stringType) == empty); std::vector vec; vec.push_back(abc);