From 5726447e3864c7d2289b458b2d2c5f31b5933a81 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Tue, 31 Mar 2020 10:07:29 -0500 Subject: [PATCH] Convert more uses of string-specific functions (#4158) Towards theory of sequences. --- src/theory/strings/sequences_rewriter.cpp | 87 +++++++++---------- src/theory/strings/sequences_rewriter.h | 9 +- .../strings/theory_strings_preprocess.cpp | 25 +++--- .../strings/theory_strings_preprocess.h | 1 - src/theory/strings/word.cpp | 23 +++++ src/theory/strings/word.h | 7 ++ 6 files changed, 91 insertions(+), 61 deletions(-) diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index d7ee459c7..be1e13459 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -479,13 +479,12 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) { Assert(cn.isConst()); Assert(Word::getLength(cn) == 1); - unsigned hchar = cn.getConst().front(); // The operands of the concat on each side of the equality without // constant strings std::vector trimmed[2]; - // Counts the number of `hchar`s on each side - size_t numHChars[2] = {0, 0}; + // Counts the number of `cn`s on each side + size_t numCns[2] = {0, 0}; for (size_t j = 0; j < 2; j++) { // Sort the operands of the concats on both sides of the equality @@ -496,12 +495,12 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) { if (cc.isConst()) { - // Count the number of `hchar`s in the string constant and make - // sure that all chars are `hchar`s - std::vector veccc = cc.getConst().getVec(); - for (size_t k = 0, size = veccc.size(); k < size; k++) + // Count the number of `cn`s in the string constant and make + // sure that all chars are `cn`s + std::vector veccc = Word::getChars(cc); + for (const Node& cv : veccc) { - if (veccc[k] != hchar) + if (cv != cn) { // This conflict case should mostly should be taken care of by // multiset reasoning in the strings rewriter, but we recognize @@ -510,7 +509,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) return returnRewrite( node, new_ret, Rewrite::STR_EQ_CONST_NHOMOG); } - numHChars[j]++; + numCns[j]++; } } else @@ -520,18 +519,18 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) } } - // We have to remove the same number of `hchar`s from both sides, so the - // side with less `hchar`s determines how many we can remove - size_t trimmedConst = std::min(numHChars[0], numHChars[1]); + // We have to remove the same number of `cn`s from both sides, so the + // side with less `cn`s determines how many we can remove + size_t trimmedConst = std::min(numCns[0], numCns[1]); for (size_t j = 0; j < 2; j++) { - size_t diff = numHChars[j] - trimmedConst; + size_t diff = numCns[j] - trimmedConst; if (diff != 0) { - // Add a constant string to the side with more `hchar`s to restore - // the difference in number of `hchar`s - std::vector vec(diff, hchar); - trimmed[j].push_back(nm->mkConst(String(vec))); + // Add a constant string to the side with more `cn`s to restore + // the difference in number of `cn`s + std::vector vec(diff, cn); + trimmed[j].push_back(Word::mkWord(vec)); } } @@ -2332,7 +2331,6 @@ Node SequencesRewriter::rewriteContains(Node node) } if (node[0].isConst()) { - CVC4::String s = node[0].getConst(); if (node[1].isConst()) { Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos); @@ -2356,14 +2354,13 @@ Node SequencesRewriter::rewriteContains(Node node) } else if (checkEntailLengthOne(t)) { - const std::vector& vec = s.getVec(); - + std::vector vec = Word::getChars(node[0]); + Node emp = Word::mkEmptyWord(t.getType()); NodeBuilder<> nb(OR); - nb << nm->mkConst(String("")).eqNode(t); - for (unsigned c : vec) + nb << emp.eqNode(t); + for (const Node& c : vec) { - std::vector sv = {c}; - nb << nm->mkConst(String(sv)).eqNode(t); + nb << c.eqNode(t); } // str.contains("ABCabc", t) ---> @@ -4232,7 +4229,7 @@ bool SequencesRewriter::stripConstantEndpoints(std::vector& n1, return changed; } -Node SequencesRewriter::canonicalStrForSymbolicLength(Node len) +Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype) { NodeManager* nm = NodeManager::currentNM(); @@ -4243,7 +4240,15 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len) Rational ratLen = len.getConst(); Assert(ratLen.getDenominator() == 1); Integer intLen = ratLen.getNumerator(); - res = nm->mkConst(String(std::string(intLen.getUnsignedInt(), 'A'))); + uint32_t u = intLen.getUnsignedInt(); + if (stype.isString()) + { + res = nm->mkConst(String(std::string(u, 'A'))); + } + else + { + Unimplemented() << "canonicalStrForSymbolicLength for non-string"; + } } else if (len.getKind() == kind::PLUS) { @@ -4251,7 +4256,7 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len) NodeBuilder<> concatBuilder(kind::STRING_CONCAT); for (const auto& n : len) { - Node sn = canonicalStrForSymbolicLength(n); + Node sn = canonicalStrForSymbolicLength(n, stype); if (sn.isNull()) { return Node::null(); @@ -4270,7 +4275,7 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len) Assert(ratReps.getDenominator() == 1); Integer intReps = ratReps.getNumerator(); - Node nRep = canonicalStrForSymbolicLength(len[1]); + Node nRep = canonicalStrForSymbolicLength(len[1], stype); std::vector nRepChildren; utils::getConcat(nRep, nRepChildren); NodeBuilder<> concatBuilder(kind::STRING_CONCAT); @@ -4292,7 +4297,7 @@ Node SequencesRewriter::lengthPreserveRewrite(Node n) { NodeManager* nm = NodeManager::currentNM(); Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n)); - Node res = canonicalStrForSymbolicLength(len); + Node res = canonicalStrForSymbolicLength(len, n.getType()); return res.isNull() ? n : res; } @@ -4869,8 +4874,6 @@ void SequencesRewriter::getArithApproximations(Node a, bool SequencesRewriter::checkEntailMultisetSubset(Node a, Node b) { - NodeManager* nm = NodeManager::currentNM(); - std::vector avec; utils::getConcat(getMultisetApproximation(a), avec); std::vector bvec; @@ -4913,14 +4916,9 @@ bool SequencesRewriter::checkEntailMultisetSubset(Node a, Node b) { Node cn = ncp.first; Assert(cn.isConst()); - std::vector cc_vec; - const std::vector& cvec = cn.getConst().getVec(); - for (unsigned i = 0, size = cvec.size(); i < size; i++) + std::vector cnChars = Word::getChars(cn); + for (const Node& ch : cnChars) { - // make the character - cc_vec.clear(); - cc_vec.insert(cc_vec.end(), cvec.begin() + i, cvec.begin() + i + 1); - Node ch = nm->mkConst(String(cc_vec)); count_const[j][ch] += ncp.second; if (std::find(chars.begin(), chars.end(), ch) == chars.end()) { @@ -4955,19 +4953,17 @@ bool SequencesRewriter::checkEntailMultisetSubset(Node a, Node b) Node SequencesRewriter::checkEntailHomogeneousString(Node a) { - NodeManager* nm = NodeManager::currentNM(); - std::vector avec; utils::getConcat(getMultisetApproximation(a), avec); bool cValid = false; - unsigned c = 0; + Node c; for (const Node& ac : avec) { if (ac.isConst()) { - std::vector acv = ac.getConst().getVec(); - for (unsigned cc : acv) + std::vector acv = Word::getChars(ac); + for (const Node& cc : acv) { if (!cValid) { @@ -4990,11 +4986,10 @@ Node SequencesRewriter::checkEntailHomogeneousString(Node a) if (!cValid) { - return nm->mkConst(String("")); + return Word::mkEmptyWord(a.getType()); } - std::vector cv = {c}; - return nm->mkConst(String(cv)); + return c; } Node SequencesRewriter::getMultisetApproximation(Node a) diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h index afdd2c0e1..0e5cd5705 100644 --- a/src/theory/strings/sequences_rewriter.h +++ b/src/theory/strings/sequences_rewriter.h @@ -512,11 +512,12 @@ class SequencesRewriter : public TheoryRewriter int dir = 0); /** - * Given a symbolic length n, returns the canonical string for that length. - * For example if n is constant, this function returns a string consisting of - * "A" repeated n times. Returns the null node if no such string exists. + * Given a symbolic length n, returns the canonical string (of type stype) + * for that length. For example if n is constant, this function returns a + * string consisting of "A" repeated n times. Returns the null node if no such + * string exists. */ - static Node canonicalStrForSymbolicLength(Node n); + static Node canonicalStrForSymbolicLength(Node n, TypeNode stype); /** length preserving rewrite * diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index b35c4a921..7777b9bd7 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -23,6 +23,7 @@ #include "proof/proof_manager.h" #include "smt/logic_exception.h" #include "theory/strings/sequences_rewriter.h" +#include "theory/strings/word.h" using namespace CVC4; using namespace CVC4::kind; @@ -40,7 +41,6 @@ StringsPreprocess::StringsPreprocess(SkolemCache* sc, d_zero = NodeManager::currentNM()->mkConst(Rational(0)); d_one = NodeManager::currentNM()->mkConst(Rational(1)); d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1)); - d_empty_str = NodeManager::currentNM()->mkConst(String("")); } StringsPreprocess::~StringsPreprocess(){ @@ -70,11 +70,13 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node c3 = nm->mkNode(GT, m, d_zero); Node cond = nm->mkNode(AND, c1, c2, c3); - Node sk1 = n == d_zero ? d_empty_str + Node emp = Word::mkEmptyWord(t.getType()); + + Node sk1 = n == d_zero ? emp : d_sc->mkSkolemCached( s, n, SkolemCache::SK_PREFIX, "sspre"); Node sk2 = SequencesRewriter::checkEntailArith(t12, lt0) - ? d_empty_str + ? emp : d_sc->mkSkolemCached( s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, skt, sk2)); @@ -91,7 +93,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node b14 = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, skt), m); Node b1 = nm->mkNode(AND, b11, b12, b13, b14); - Node b2 = skt.eqNode(d_empty_str); + Node b2 = skt.eqNode(emp); Node lemma = nm->mkNode(ITE, cond, b1, b2); // assert: @@ -151,7 +153,8 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node cc1 = skk.eqNode(negone); // y = "" - Node cond2 = y.eqNode(d_empty_str); + Node emp = Word::mkEmptyWord(x.getType()); + Node cond2 = y.eqNode(emp); // skk = n Node cc2 = skk.eqNode(t[2]); @@ -239,8 +242,8 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node nonneg = nm->mkNode(GEQ, n, d_zero); - lem = nm->mkNode( - ITE, nonneg, nm->mkNode(AND, conc), itost.eqNode(d_empty_str)); + Node emp = Word::mkEmptyWord(t.getType()); + lem = nm->mkNode(ITE, nonneg, nm->mkNode(AND, conc), itost.eqNode(emp)); new_nodes.push_back(lem); // assert: // IF n>=0 @@ -277,7 +280,8 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node lem = stoit.eqNode(d_neg_one); conc1.push_back(lem); - Node sEmpty = s.eqNode(d_empty_str); + Node emp = Word::mkEmptyWord(s.getType()); + Node sEmpty = s.eqNode(emp); Node k = nm->mkSkolem("k", nm->integerType()); Node kc1 = nm->mkNode(GEQ, k, d_zero); Node kc2 = nm->mkNode(LT, k, lens); @@ -478,8 +482,9 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { // the index to begin searching in x for y after the i^th occurrence of y in // x, and Us( i ) is the result of processing the remainder after processing // the i^th occurrence of y in x. - Node assert = nm->mkNode( - ITE, y.eqNode(d_empty_str), rpaw.eqNode(x), nm->mkNode(AND, lem)); + Node emp = Word::mkEmptyWord(t.getType()); + Node assert = + nm->mkNode(ITE, y.eqNode(emp), rpaw.eqNode(x), nm->mkNode(AND, lem)); new_nodes.push_back(assert); // Thus, replaceall( x, y, z ) = rpaw diff --git a/src/theory/strings/theory_strings_preprocess.h b/src/theory/strings/theory_strings_preprocess.h index 155b9014c..fb6404aa6 100644 --- a/src/theory/strings/theory_strings_preprocess.h +++ b/src/theory/strings/theory_strings_preprocess.h @@ -72,7 +72,6 @@ class StringsPreprocess { Node d_zero; Node d_one; Node d_neg_one; - Node d_empty_str; /** pointer to the skolem cache used by this class */ SkolemCache* d_sc; /** Reference to the statistics for the theory of strings/sequences. */ diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp index 0faeffd99..b42cf3160 100644 --- a/src/theory/strings/word.cpp +++ b/src/theory/strings/word.cpp @@ -76,6 +76,29 @@ size_t Word::getLength(TNode x) return 0; } +std::vector Word::getChars(TNode x) +{ + Kind k = x.getKind(); + if (k == CONST_STRING) + { + std::vector ret; + NodeManager* nm = NodeManager::currentNM(); + std::vector ccVec; + const std::vector& cvec = x.getConst().getVec(); + for (unsigned chVal : cvec) + { + ccVec.clear(); + ccVec.push_back(chVal); + Node ch = nm->mkConst(String(ccVec)); + ret.push_back(ch); + } + return ret; + } + Unimplemented(); + std::vector ret; + return ret; +} + bool Word::isEmpty(TNode x) { return getLength(x) == 0; } bool Word::strncmp(TNode x, TNode y, std::size_t n) diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h index 7b813a0b2..8e6e7876e 100644 --- a/src/theory/strings/word.h +++ b/src/theory/strings/word.h @@ -42,6 +42,13 @@ class Word /** Return the length of word x */ static size_t getLength(TNode x); + /** Get characters + * + * Given word x, this returns the vector of words of length one whose + * concatenation is equivalent to x. + */ + static std::vector getChars(TNode x); + /** Return true if x is empty */ static bool isEmpty(TNode x); -- 2.30.2