From d91b52085d7e3bbda65117c0cd88433aed383aff Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Fri, 3 Apr 2020 09:43:12 -0500 Subject: [PATCH] Split sequences rewriter (#4194) This is in preparation for making the strings rewriter configurable for stats. This moves all utility functions from SequencesRewriter to a proper place. This includes three new groupings of utility functions: those involving arithmetic entailments, those involving string entailments, those involving regular expression entailments. Here, "entailments" loosely means any question regarding a (set of) terms or formulas. No major code changes. Added some missing documentation and lightly cleaned a few blocks of code in cpp. --- src/CMakeLists.txt | 6 + src/theory/strings/arith_entail.cpp | 865 +++++ src/theory/strings/arith_entail.h | 180 ++ src/theory/strings/core_solver.cpp | 13 +- src/theory/strings/regexp_elim.cpp | 4 +- src/theory/strings/regexp_entail.cpp | 623 ++++ src/theory/strings/regexp_entail.h | 117 + src/theory/strings/regexp_operation.cpp | 29 +- src/theory/strings/regexp_operation.h | 2 - src/theory/strings/regexp_solver.cpp | 2 +- src/theory/strings/sequences_rewriter.cpp | 2806 +---------------- src/theory/strings/sequences_rewriter.h | 588 +--- src/theory/strings/skolem_cache.cpp | 6 +- src/theory/strings/strings_entail.cpp | 994 ++++++ src/theory/strings/strings_entail.h | 371 +++ .../strings/theory_strings_preprocess.cpp | 3 +- src/theory/strings/theory_strings_utils.cpp | 89 + src/theory/strings/theory_strings_utils.h | 37 + src/theory/strings/word.cpp | 27 +- src/theory/strings/word.h | 13 + test/unit/theory/sequences_rewriter_white.h | 79 +- 21 files changed, 3540 insertions(+), 3314 deletions(-) create mode 100644 src/theory/strings/arith_entail.cpp create mode 100644 src/theory/strings/arith_entail.h create mode 100644 src/theory/strings/regexp_entail.cpp create mode 100644 src/theory/strings/regexp_entail.h create mode 100644 src/theory/strings/strings_entail.cpp create mode 100644 src/theory/strings/strings_entail.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c1c8c15e0..447ca90f8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -656,6 +656,8 @@ libcvc4_add_sources( theory/smt_engine_subsolver.h theory/sort_inference.cpp theory/sort_inference.h + theory/strings/arith_entail.cpp + theory/strings/arith_entail.h theory/strings/base_solver.cpp theory/strings/base_solver.h theory/strings/core_solver.cpp @@ -672,6 +674,8 @@ libcvc4_add_sources( theory/strings/normal_form.h theory/strings/regexp_elim.cpp theory/strings/regexp_elim.h + theory/strings/regexp_entail.cpp + theory/strings/regexp_entail.h theory/strings/regexp_operation.cpp theory/strings/regexp_operation.h theory/strings/regexp_solver.cpp @@ -686,6 +690,8 @@ libcvc4_add_sources( theory/strings/skolem_cache.h theory/strings/solver_state.cpp theory/strings/solver_state.h + theory/strings/strings_entail.cpp + theory/strings/strings_entail.h theory/strings/strings_fmf.cpp theory/strings/strings_fmf.h theory/strings/strings_rewriter.cpp diff --git a/src/theory/strings/arith_entail.cpp b/src/theory/strings/arith_entail.cpp new file mode 100644 index 000000000..5933f2586 --- /dev/null +++ b/src/theory/strings/arith_entail.cpp @@ -0,0 +1,865 @@ +/********************* */ +/*! \file arith_entail.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of arithmetic entailment computation for string terms. + **/ + +#include "theory/strings/arith_entail.h" + +#include "expr/attribute.h" +#include "theory/arith/arith_msum.h" +#include "theory/rewriter.h" +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" +#include "theory/theory.h" + +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +bool ArithEntail::checkEq(Node a, Node b) +{ + if (a == b) + { + return true; + } + Node ar = Rewriter::rewrite(a); + Node br = Rewriter::rewrite(b); + return ar == br; +} + +bool ArithEntail::check(Node a, Node b, bool strict) +{ + if (a == b) + { + return !strict; + } + Node diff = NodeManager::currentNM()->mkNode(kind::MINUS, a, b); + return check(diff, strict); +} + +struct StrCheckEntailArithTag +{ +}; +struct StrCheckEntailArithComputedTag +{ +}; +/** Attribute true for expressions for which check returned true */ +typedef expr::Attribute StrCheckEntailArithAttr; +typedef expr::Attribute + StrCheckEntailArithComputedAttr; + +bool ArithEntail::check(Node a, bool strict) +{ + if (a.isConst()) + { + return a.getConst().sgn() >= (strict ? 1 : 0); + } + + Node ar = + strict + ? NodeManager::currentNM()->mkNode( + kind::MINUS, a, NodeManager::currentNM()->mkConst(Rational(1))) + : a; + ar = Rewriter::rewrite(ar); + + if (ar.getAttribute(StrCheckEntailArithComputedAttr())) + { + return ar.getAttribute(StrCheckEntailArithAttr()); + } + + bool ret = checkInternal(ar); + if (!ret) + { + // try with approximations + ret = checkApprox(ar); + } + // cache the result + ar.setAttribute(StrCheckEntailArithAttr(), ret); + ar.setAttribute(StrCheckEntailArithComputedAttr(), true); + return ret; +} + +bool ArithEntail::checkApprox(Node ar) +{ + Assert(Rewriter::rewrite(ar) == ar); + NodeManager* nm = NodeManager::currentNM(); + std::map msum; + Trace("strings-ent-approx-debug") + << "Setup arithmetic approximations for " << ar << std::endl; + if (!ArithMSum::getMonomialSum(ar, msum)) + { + Trace("strings-ent-approx-debug") + << "...failed to get monomial sum!" << std::endl; + return false; + } + // for each monomial v*c, mApprox[v] a list of + // possibilities for how the term can be soundly approximated, that is, + // if mApprox[v] contains av, then v*c > av*c. Notice that if c + // is positive, then v > av, otherwise if c is negative, then v < av. + // In other words, av is an under-approximation if c is positive, and an + // over-approximation if c is negative. + bool changed = false; + std::map > mApprox; + // map from approximations to their monomial sums + std::map > approxMsums; + // aarSum stores each monomial that does not have multiple approximations + std::vector aarSum; + for (std::pair& m : msum) + { + Node v = m.first; + Node c = m.second; + Trace("strings-ent-approx-debug") + << "Get approximations " << v << "..." << std::endl; + if (v.isNull()) + { + Node mn = c.isNull() ? nm->mkConst(Rational(1)) : c; + aarSum.push_back(mn); + } + else + { + // c.isNull() means c = 1 + bool isOverApprox = !c.isNull() && c.getConst().sgn() == -1; + std::vector& approx = mApprox[v]; + std::unordered_set visited; + std::vector toProcess; + toProcess.push_back(v); + do + { + Node curr = toProcess.back(); + Trace("strings-ent-approx-debug") << " process " << curr << std::endl; + curr = Rewriter::rewrite(curr); + toProcess.pop_back(); + if (visited.find(curr) == visited.end()) + { + visited.insert(curr); + std::vector currApprox; + getArithApproximations(curr, currApprox, isOverApprox); + if (currApprox.empty()) + { + Trace("strings-ent-approx-debug") + << "...approximation: " << curr << std::endl; + // no approximations, thus curr is a possibility + approx.push_back(curr); + } + else + { + toProcess.insert( + toProcess.end(), currApprox.begin(), currApprox.end()); + } + } + } while (!toProcess.empty()); + Assert(!approx.empty()); + // if we have only one approximation, move it to final + if (approx.size() == 1) + { + changed = v != approx[0]; + Node mn = ArithMSum::mkCoeffTerm(c, approx[0]); + aarSum.push_back(mn); + mApprox.erase(v); + } + else + { + // compute monomial sum form for each approximation, used below + for (const Node& aa : approx) + { + if (approxMsums.find(aa) == approxMsums.end()) + { + CVC4_UNUSED bool ret = + ArithMSum::getMonomialSum(aa, approxMsums[aa]); + Assert(ret); + } + } + changed = true; + } + } + } + if (!changed) + { + // approximations had no effect, return + Trace("strings-ent-approx-debug") << "...no approximations" << std::endl; + return false; + } + // get the current "fixed" sum for the abstraction of ar + Node aar = aarSum.empty() + ? nm->mkConst(Rational(0)) + : (aarSum.size() == 1 ? aarSum[0] : nm->mkNode(PLUS, aarSum)); + aar = Rewriter::rewrite(aar); + Trace("strings-ent-approx-debug") + << "...processed fixed sum " << aar << " with " << mApprox.size() + << " approximated monomials." << std::endl; + // if we have a choice of how to approximate + if (!mApprox.empty()) + { + // convert aar back to monomial sum + std::map msumAar; + if (!ArithMSum::getMonomialSum(aar, msumAar)) + { + return false; + } + if (Trace.isOn("strings-ent-approx")) + { + Trace("strings-ent-approx") + << "---- Check arithmetic entailment by under-approximation " << ar + << " >= 0" << std::endl; + Trace("strings-ent-approx") << "FIXED:" << std::endl; + ArithMSum::debugPrintMonomialSum(msumAar, "strings-ent-approx"); + Trace("strings-ent-approx") << "APPROX:" << std::endl; + for (std::pair >& a : mApprox) + { + Node c = msum[a.first]; + Trace("strings-ent-approx") << " "; + if (!c.isNull()) + { + Trace("strings-ent-approx") << c << " * "; + } + Trace("strings-ent-approx") + << a.second << " ...from " << a.first << std::endl; + } + Trace("strings-ent-approx") << std::endl; + } + Rational one(1); + // incorporate monomials one at a time that have a choice of approximations + while (!mApprox.empty()) + { + Node v; + Node vapprox; + int maxScore = -1; + // Look at each approximation, take the one with the best score. + // Notice that we are in the process of trying to prove + // ( c1*t1 + .. + cn*tn ) + ( approx_1 | ... | approx_m ) >= 0, + // where c1*t1 + .. + cn*tn is the "fixed" component of our sum (aar) + // and approx_1 ... approx_m are possible approximations. The + // intution here is that we want coefficients c1...cn to be positive. + // This is because arithmetic string terms t1...tn (which may be + // applications of len, indexof, str.to.int) are never entailed to be + // negative. Hence, we add the approx_i that contributes the "most" + // towards making all constants c1...cn positive and cancelling negative + // monomials in approx_i itself. + for (std::pair >& nam : mApprox) + { + Node cr = msum[nam.first]; + for (const Node& aa : nam.second) + { + unsigned helpsCancelCount = 0; + unsigned addsObligationCount = 0; + std::map::iterator it; + // we are processing an approximation cr*( c1*t1 + ... + cn*tn ) + for (std::pair& aam : approxMsums[aa]) + { + // Say aar is of the form t + c*ti, and aam is the monomial ci*ti + // where ci != 0. We say aam: + // (1) helps cancel if c != 0 and c>0 != ci>0 + // (2) adds obligation if c>=0 and c+ci<0 + Node ti = aam.first; + Node ci = aam.second; + if (!cr.isNull()) + { + ci = ci.isNull() ? cr + : Rewriter::rewrite(nm->mkNode(MULT, ci, cr)); + } + Trace("strings-ent-approx-debug") << ci << "*" << ti << " "; + int ciSgn = ci.isNull() ? 1 : ci.getConst().sgn(); + it = msumAar.find(ti); + if (it != msumAar.end()) + { + Node c = it->second; + int cSgn = c.isNull() ? 1 : c.getConst().sgn(); + if (cSgn == 0) + { + addsObligationCount += (ciSgn == -1 ? 1 : 0); + } + else if (cSgn != ciSgn) + { + helpsCancelCount++; + Rational r1 = c.isNull() ? one : c.getConst(); + Rational r2 = ci.isNull() ? one : ci.getConst(); + Rational r12 = r1 + r2; + if (r12.sgn() == -1) + { + addsObligationCount++; + } + } + } + else + { + addsObligationCount += (ciSgn == -1 ? 1 : 0); + } + } + Trace("strings-ent-approx-debug") + << "counts=" << helpsCancelCount << "," << addsObligationCount + << " for " << aa << " into " << aar << std::endl; + int score = (addsObligationCount > 0 ? 0 : 2) + + (helpsCancelCount > 0 ? 1 : 0); + // if its the best, update v and vapprox + if (v.isNull() || score > maxScore) + { + v = nam.first; + vapprox = aa; + maxScore = score; + } + } + if (!v.isNull()) + { + break; + } + } + Trace("strings-ent-approx") + << "- Decide " << v << " = " << vapprox << std::endl; + // we incorporate v approximated by vapprox into the overall approximation + // for ar + Assert(!v.isNull() && !vapprox.isNull()); + Assert(msum.find(v) != msum.end()); + Node mn = ArithMSum::mkCoeffTerm(msum[v], vapprox); + aar = nm->mkNode(PLUS, aar, mn); + // update the msumAar map + aar = Rewriter::rewrite(aar); + msumAar.clear(); + if (!ArithMSum::getMonomialSum(aar, msumAar)) + { + Assert(false); + Trace("strings-ent-approx") + << "...failed to get monomial sum!" << std::endl; + return false; + } + // we have processed the approximation for v + mApprox.erase(v); + } + Trace("strings-ent-approx") << "-----------------" << std::endl; + } + if (aar == ar) + { + Trace("strings-ent-approx-debug") + << "...approximation had no effect" << std::endl; + // this should never happen, but we avoid the infinite loop for sanity here + Assert(false); + return false; + } + // Check entailment on the approximation of ar. + // Notice that this may trigger further reasoning by approximation. For + // example, len( replace( x ++ y, substr( x, 0, n ), z ) ) may be + // under-approximated as len( x ) + len( y ) - len( substr( x, 0, n ) ) on + // this call, where in the recursive call we may over-approximate + // len( substr( x, 0, n ) ) as len( x ). In this example, we can infer + // that len( replace( x ++ y, substr( x, 0, n ), z ) ) >= len( y ) in two + // steps. + if (check(aar)) + { + Trace("strings-ent-approx") + << "*** StrArithApprox: showed " << ar + << " >= 0 using under-approximation!" << std::endl; + Trace("strings-ent-approx") + << "*** StrArithApprox: under-approximation was " << aar << std::endl; + return true; + } + return false; +} + +void ArithEntail::getArithApproximations(Node a, + std::vector& approx, + bool isOverApprox) +{ + NodeManager* nm = NodeManager::currentNM(); + // We do not handle PLUS here since this leads to exponential behavior. + // Instead, this is managed, e.g. during checkApprox, where + // PLUS terms are expanded "on-demand" during the reasoning. + Trace("strings-ent-approx-debug") + << "Get arith approximations " << a << std::endl; + Kind ak = a.getKind(); + if (ak == MULT) + { + Node c; + Node v; + if (ArithMSum::getMonomial(a, c, v)) + { + bool isNeg = c.getConst().sgn() == -1; + getArithApproximations(v, approx, isNeg ? !isOverApprox : isOverApprox); + for (unsigned i = 0, size = approx.size(); i < size; i++) + { + approx[i] = nm->mkNode(MULT, c, approx[i]); + } + } + } + else if (ak == STRING_LENGTH) + { + Kind aak = a[0].getKind(); + if (aak == STRING_SUBSTR) + { + // over,under-approximations for len( substr( x, n, m ) ) + Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]); + if (isOverApprox) + { + // m >= 0 implies + // m >= len( substr( x, n, m ) ) + if (check(a[0][2])) + { + approx.push_back(a[0][2]); + } + if (check(lenx, a[0][1])) + { + // n <= len( x ) implies + // len( x ) - n >= len( substr( x, n, m ) ) + approx.push_back(nm->mkNode(MINUS, lenx, a[0][1])); + } + else + { + // len( x ) >= len( substr( x, n, m ) ) + approx.push_back(lenx); + } + } + else + { + // 0 <= n and n+m <= len( x ) implies + // m <= len( substr( x, n, m ) ) + Node npm = nm->mkNode(PLUS, a[0][1], a[0][2]); + if (check(a[0][1]) && check(lenx, npm)) + { + approx.push_back(a[0][2]); + } + // 0 <= n and n+m >= len( x ) implies + // len(x)-n <= len( substr( x, n, m ) ) + if (check(a[0][1]) && check(npm, lenx)) + { + approx.push_back(nm->mkNode(MINUS, lenx, a[0][1])); + } + } + } + else if (aak == STRING_STRREPL) + { + // over,under-approximations for len( replace( x, y, z ) ) + // notice this is either len( x ) or ( len( x ) + len( z ) - len( y ) ) + Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]); + Node leny = nm->mkNode(STRING_LENGTH, a[0][1]); + Node lenz = nm->mkNode(STRING_LENGTH, a[0][2]); + if (isOverApprox) + { + if (check(leny, lenz)) + { + // len( y ) >= len( z ) implies + // len( x ) >= len( replace( x, y, z ) ) + approx.push_back(lenx); + } + else + { + // len( x ) + len( z ) >= len( replace( x, y, z ) ) + approx.push_back(nm->mkNode(PLUS, lenx, lenz)); + } + } + else + { + if (check(lenz, leny) || check(lenz, lenx)) + { + // len( y ) <= len( z ) or len( x ) <= len( z ) implies + // len( x ) <= len( replace( x, y, z ) ) + approx.push_back(lenx); + } + else + { + // len( x ) - len( y ) <= len( replace( x, y, z ) ) + approx.push_back(nm->mkNode(MINUS, lenx, leny)); + } + } + } + else if (aak == STRING_ITOS) + { + // over,under-approximations for len( int.to.str( x ) ) + if (isOverApprox) + { + if (check(a[0][0], false)) + { + if (check(a[0][0], true)) + { + // x > 0 implies + // x >= len( int.to.str( x ) ) + approx.push_back(a[0][0]); + } + else + { + // x >= 0 implies + // x+1 >= len( int.to.str( x ) ) + approx.push_back( + nm->mkNode(PLUS, nm->mkConst(Rational(1)), a[0][0])); + } + } + } + else + { + if (check(a[0][0])) + { + // x >= 0 implies + // len( int.to.str( x ) ) >= 1 + approx.push_back(nm->mkConst(Rational(1))); + } + // other crazy things are possible here, e.g. + // len( int.to.str( len( y ) + 10 ) ) >= 2 + } + } + } + else if (ak == STRING_STRIDOF) + { + // over,under-approximations for indexof( x, y, n ) + if (isOverApprox) + { + Node lenx = nm->mkNode(STRING_LENGTH, a[0]); + Node leny = nm->mkNode(STRING_LENGTH, a[1]); + if (check(lenx, leny)) + { + // len( x ) >= len( y ) implies + // len( x ) - len( y ) >= indexof( x, y, n ) + approx.push_back(nm->mkNode(MINUS, lenx, leny)); + } + else + { + // len( x ) >= indexof( x, y, n ) + approx.push_back(lenx); + } + } + else + { + // TODO?: + // contains( substr( x, n, len( x ) ), y ) implies + // n <= indexof( x, y, n ) + // ...hard to test, runs risk of non-termination + + // -1 <= indexof( x, y, n ) + approx.push_back(nm->mkConst(Rational(-1))); + } + } + else if (ak == STRING_STOI) + { + // over,under-approximations for str.to.int( x ) + if (isOverApprox) + { + // TODO?: + // y >= 0 implies + // y >= str.to.int( int.to.str( y ) ) + } + else + { + // -1 <= str.to.int( x ) + approx.push_back(nm->mkConst(Rational(-1))); + } + } + Trace("strings-ent-approx-debug") << "Return " << approx.size() << std::endl; +} + +bool ArithEntail::checkWithEqAssumption(Node assumption, Node a, bool strict) +{ + Assert(assumption.getKind() == kind::EQUAL); + Assert(Rewriter::rewrite(assumption) == assumption); + + // Find candidates variables to compute substitutions for + std::unordered_set candVars; + std::vector toVisit = {assumption}; + while (!toVisit.empty()) + { + Node curr = toVisit.back(); + toVisit.pop_back(); + + if (curr.getKind() == kind::PLUS || curr.getKind() == kind::MULT + || curr.getKind() == kind::MINUS || curr.getKind() == kind::EQUAL) + { + for (const auto& currChild : curr) + { + toVisit.push_back(currChild); + } + } + else if (curr.isVar() && Theory::theoryOf(curr) == THEORY_ARITH) + { + candVars.insert(curr); + } + else if (curr.getKind() == kind::STRING_LENGTH) + { + candVars.insert(curr); + } + } + + // Check if any of the candidate variables are in n + Node v; + Assert(toVisit.empty()); + toVisit.push_back(a); + while (!toVisit.empty()) + { + Node curr = toVisit.back(); + toVisit.pop_back(); + + for (const auto& currChild : curr) + { + toVisit.push_back(currChild); + } + + if (candVars.find(curr) != candVars.end()) + { + v = curr; + break; + } + } + + if (v.isNull()) + { + // No suitable candidate found + return false; + } + + Node solution = ArithMSum::solveEqualityFor(assumption, v); + if (solution.isNull()) + { + // Could not solve for v + return false; + } + + a = a.substitute(TNode(v), TNode(solution)); + return check(a, strict); +} + +bool ArithEntail::checkWithAssumption(Node assumption, + Node a, + Node b, + bool strict) +{ + Assert(Rewriter::rewrite(assumption) == assumption); + + NodeManager* nm = NodeManager::currentNM(); + + if (!assumption.isConst() && assumption.getKind() != kind::EQUAL) + { + // We rewrite inequality assumptions from x <= y to x + (str.len s) = y + // where s is some fresh string variable. We use (str.len s) because + // (str.len s) must be non-negative for the equation to hold. + Node x, y; + if (assumption.getKind() == kind::GEQ) + { + x = assumption[0]; + y = assumption[1]; + } + else + { + // (not (>= s t)) --> (>= (t - 1) s) + Assert(assumption.getKind() == kind::NOT + && assumption[0].getKind() == kind::GEQ); + x = nm->mkNode(kind::MINUS, assumption[0][1], nm->mkConst(Rational(1))); + y = assumption[0][0]; + } + + Node s = nm->mkBoundVar("slackVal", nm->stringType()); + Node slen = nm->mkNode(kind::STRING_LENGTH, s); + assumption = Rewriter::rewrite( + nm->mkNode(kind::EQUAL, x, nm->mkNode(kind::PLUS, y, slen))); + } + + Node diff = nm->mkNode(kind::MINUS, a, b); + bool res = false; + if (assumption.isConst()) + { + bool assumptionBool = assumption.getConst(); + if (assumptionBool) + { + res = check(diff, strict); + } + else + { + res = true; + } + } + else + { + res = checkWithEqAssumption(assumption, diff, strict); + } + return res; +} + +bool ArithEntail::checkWithAssumptions(std::vector assumptions, + Node a, + Node b, + bool strict) +{ + // TODO: We currently try to show the entailment with each assumption + // independently. In the future, we should make better use of multiple + // assumptions. + bool res = false; + for (const auto& assumption : assumptions) + { + Assert(Rewriter::rewrite(assumption) == assumption); + + if (checkWithAssumption(assumption, a, b, strict)) + { + res = true; + break; + } + } + return res; +} + +Node ArithEntail::getConstantBound(Node a, bool isLower) +{ + Assert(Rewriter::rewrite(a) == a); + Node ret; + if (a.isConst()) + { + ret = a; + } + else if (a.getKind() == kind::STRING_LENGTH) + { + if (isLower) + { + ret = NodeManager::currentNM()->mkConst(Rational(0)); + } + } + else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT) + { + std::vector children; + bool success = true; + for (unsigned i = 0; i < a.getNumChildren(); i++) + { + Node ac = getConstantBound(a[i], isLower); + if (ac.isNull()) + { + ret = ac; + success = false; + break; + } + else + { + if (ac.getConst().sgn() == 0) + { + if (a.getKind() == kind::MULT) + { + ret = ac; + success = false; + break; + } + } + else + { + if (a.getKind() == kind::MULT) + { + if ((ac.getConst().sgn() > 0) != isLower) + { + ret = Node::null(); + success = false; + break; + } + } + children.push_back(ac); + } + } + } + if (success) + { + if (children.empty()) + { + ret = NodeManager::currentNM()->mkConst(Rational(0)); + } + else if (children.size() == 1) + { + ret = children[0]; + } + else + { + ret = NodeManager::currentNM()->mkNode(a.getKind(), children); + ret = Rewriter::rewrite(ret); + } + } + } + Trace("strings-rewrite-cbound") + << "Constant " << (isLower ? "lower" : "upper") << " bound for " << a + << " is " << ret << std::endl; + Assert(ret.isNull() || ret.isConst()); + // entailment check should be at least as powerful as computing a lower bound + Assert(!isLower || ret.isNull() || ret.getConst().sgn() < 0 + || check(a, false)); + Assert(!isLower || ret.isNull() || ret.getConst().sgn() <= 0 + || check(a, true)); + return ret; +} + +bool ArithEntail::checkInternal(Node a) +{ + Assert(Rewriter::rewrite(a) == a); + // check whether a >= 0 + if (a.isConst()) + { + return a.getConst().sgn() >= 0; + } + else if (a.getKind() == kind::STRING_LENGTH) + { + // str.len( t ) >= 0 + return true; + } + else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT) + { + for (unsigned i = 0; i < a.getNumChildren(); i++) + { + if (!checkInternal(a[i])) + { + return false; + } + } + // t1 >= 0 ^ ... ^ tn >= 0 => t1 op ... op tn >= 0 + return true; + } + + return false; +} + +bool ArithEntail::inferZerosInSumGeq(Node x, + std::vector& ys, + std::vector& zeroYs) +{ + Assert(zeroYs.empty()); + + NodeManager* nm = NodeManager::currentNM(); + + // Check if we can show that y1 + ... + yn >= x + Node sum = (ys.size() > 1) ? nm->mkNode(PLUS, ys) : ys[0]; + if (!check(sum, x)) + { + return false; + } + + // Try to remove yi one-by-one and check if we can still show: + // + // y1 + ... + yi-1 + yi+1 + ... + yn >= x + // + // If that's the case, we know that yi can be zero and the inequality still + // holds. + size_t i = 0; + while (i < ys.size()) + { + Node yi = ys[i]; + std::vector::iterator pos = ys.erase(ys.begin() + i); + if (ys.size() > 1) + { + sum = nm->mkNode(PLUS, ys); + } + else + { + sum = ys.size() == 1 ? ys[0] : nm->mkConst(Rational(0)); + } + + if (check(sum, x)) + { + zeroYs.push_back(yi); + } + else + { + ys.insert(pos, yi); + i++; + } + } + return true; +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/arith_entail.h b/src/theory/strings/arith_entail.h new file mode 100644 index 000000000..266f726c7 --- /dev/null +++ b/src/theory/strings/arith_entail.h @@ -0,0 +1,180 @@ +/********************* */ +/*! \file arith_entail.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Arithmetic entailment computation for string terms. + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__ARITH_ENTAIL_H +#define CVC4__THEORY__STRINGS__ARITH_ENTAIL_H + +#include + +#include "expr/node.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** + * Techniques for computing arithmetic entailment for string terms. This + * is an implementation of the techniques from Reynolds et al, "High Level + * Abstractions for Simplifying Extended String Constraints in SMT", CAV 2019. + */ +class ArithEntail +{ + public: + /** check arithmetic entailment equal + * Returns true if it is always the case that a = b. + */ + static bool checkEq(Node a, Node b); + /** check arithmetic entailment + * Returns true if it is always the case that a >= b, + * and a>b if strict is true. + */ + static bool check(Node a, Node b, bool strict = false); + /** check arithmetic entailment + * Returns true if it is always the case that a >= 0. + */ + static bool check(Node a, bool strict = false); + /** check arithmetic entailment with approximations + * + * Returns true if it is always the case that a >= 0. We expect that a is in + * rewritten form. + * + * This function uses "approximation" techniques that under-approximate + * the value of a for the purposes of showing the entailment holds. For + * example, given: + * len( x ) - len( substr( y, 0, len( x ) ) ) + * Since we know that len( substr( y, 0, len( x ) ) ) <= len( x ), the above + * term can be under-approximated as len( x ) - len( x ) = 0, which is >= 0, + * and thus the entailment len( x ) - len( substr( y, 0, len( x ) ) ) >= 0 + * holds. + */ + static bool checkApprox(Node a); + + /** + * Checks whether assumption |= a >= 0 (if strict is false) or + * assumption |= a > 0 (if strict is true), where assumption is an equality + * assumption. The assumption must be in rewritten form. + * + * Example: + * + * checkWithEqAssumption(x + (str.len y) = 0, -x, false) = true + * + * Because: x = -(str.len y), so -x >= 0 --> (str.len y) >= 0 --> true + */ + static bool checkWithEqAssumption(Node assumption, + Node a, + bool strict = false); + + /** + * Checks whether assumption |= a >= b (if strict is false) or + * assumption |= a > b (if strict is true). The function returns true if it + * can be shown that the entailment holds and false otherwise. Assumption + * must be in rewritten form. Assumption may be an equality or an inequality. + * + * Example: + * + * checkWithAssumption(x + (str.len y) = 0, 0, x, false) = true + * + * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true + */ + static bool checkWithAssumption(Node assumption, + Node a, + Node b, + bool strict = false); + + /** + * Checks whether assumptions |= a >= b (if strict is false) or + * assumptions |= a > b (if strict is true). The function returns true if it + * can be shown that the entailment holds and false otherwise. Assumptions + * must be in rewritten form. Assumptions may be an equalities or an + * inequalities. + * + * Example: + * + * checkWithAssumptions([x + (str.len y) = 0], 0, x, false) = true + * + * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true + */ + static bool checkWithAssumptions(std::vector assumptions, + Node a, + Node b, + bool strict = false); + + /** get arithmetic lower bound + * If this function returns a non-null Node ret, + * then ret is a rational constant and + * we know that n >= ret always if isLower is true, + * or n <= ret if isLower is false. + * + * Notice the following invariant. + * If getConstantArithBound(a, true) = ret where ret is non-null, then for + * strict = { true, false } : + * ret >= strict ? 1 : 0 + * if and only if + * check( a, strict ) = true. + */ + static Node getConstantBound(Node a, bool isLower = true); + + /** + * Given an inequality y1 + ... + yn >= x, removes operands yi s.t. the + * original inequality still holds. Returns true if the original inequality + * holds and false otherwise. The list of ys is modified to contain a subset + * of the original ys. + * + * Example: + * + * inferZerosInSumGeq( (str.len x), [ (str.len x), (str.len y), 1 ], [] ) + * --> returns true with ys = [ (str.len x) ] and zeroYs = [ (str.len y), 1 ] + * (can be used to rewrite the inequality to false) + * + * inferZerosInSumGeq( (str.len x), [ (str.len y) ], [] ) + * --> returns false because it is not possible to show + * str.len(y) >= str.len(x) + */ + static bool inferZerosInSumGeq(Node x, + std::vector& ys, + std::vector& zeroYs); + + private: + /** check entail arithmetic internal + * Returns true if we can show a >= 0 always. + * a is in rewritten form. + */ + static bool checkInternal(Node a); + /** Get arithmetic approximations + * + * This gets the (set of) arithmetic approximations for term a and stores + * them in approx. If isOverApprox is true, these are over-approximations + * for the value of a, otherwise, they are underapproximations. For example, + * an over-approximation for len( substr( y, n, m ) ) is m; an + * under-approximation for indexof( x, y, n ) is -1. + * + * Notice that this function is not generally recursive (although it may make + * a small bounded of recursive calls). Instead, it returns the shape + * of the approximations for a. For example, an under-approximation + * for the term len( replace( substr( x, 0, n ), y, z ) ) returned by this + * function might be len( substr( x, 0, n ) ) - len( y ), where we don't + * consider (recursively) the approximations for len( substr( x, 0, n ) ). + */ + static void getArithApproximations(Node a, + std::vector& approx, + bool isOverApprox = false); +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index 3384499a2..dc076e734 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -18,6 +18,7 @@ #include "options/strings_options.h" #include "theory/strings/sequences_rewriter.h" +#include "theory/strings/strings_entail.h" #include "theory/strings/theory_strings_utils.h" #include "theory/strings/word.h" @@ -154,7 +155,7 @@ void CoreSolver::checkFlatForms() for (const Node& n : it->second) { int firstc, lastc; - if (!SequencesRewriter::canConstantContainList( + if (!StringsEntail::canConstantContainList( c, d_flat_form[n], firstc, lastc)) { Trace("strings-ff-debug") << "Flat form for " << n @@ -348,9 +349,8 @@ void CoreSolver::checkFlatForm(std::vector& eqc, if (!curr_c.isNull() && !cc_c.isNull()) { // check for constant conflict - int index; - Node s = - SequencesRewriter::splitConstant(cc_c, curr_c, index, isRev); + size_t index; + Node s = Word::splitConstant(cc_c, curr_c, index, isRev); if (s.isNull()) { d_bsolver.explainConstantEqc(ac,curr,exp); @@ -915,8 +915,7 @@ void CoreSolver::getNormalForms(Node eqc, { NormalForm& nf = normal_forms[i]; int firstc, lastc; - if (!SequencesRewriter::canConstantContainList( - c, nf.d_nf, firstc, lastc)) + if (!StringsEntail::canConstantContainList(c, nf.d_nf, firstc, lastc)) { Node n = nf.d_base; //conflict @@ -1918,7 +1917,7 @@ int CoreSolver::processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& if (!c.isNull()) { int findex, lindex; - if (!SequencesRewriter::canConstantContainList( + if (!StringsEntail::canConstantContainList( c, i == 0 ? nfj : nfi, findex, lindex)) { Trace("strings-solve-debug") diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp index 259588789..d4f301e23 100644 --- a/src/theory/strings/regexp_elim.cpp +++ b/src/theory/strings/regexp_elim.cpp @@ -17,7 +17,7 @@ #include "options/strings_options.h" #include "theory/rewriter.h" -#include "theory/strings/sequences_rewriter.h" +#include "theory/strings/regexp_entail.h" #include "theory/strings/theory_strings_utils.h" using namespace CVC4; @@ -71,7 +71,7 @@ Node RegExpElimination::eliminateConcat(Node atom) for (unsigned i = 0, size = children.size(); i < size; i++) { Node c = children[i]; - Node fl = SequencesRewriter::getFixedLengthForRegexp(c); + Node fl = RegExpEntail::getFixedLengthForRegexp(c); if (fl.isNull()) { if (!hasPivotIndex && c.getKind() == REGEXP_STAR diff --git a/src/theory/strings/regexp_entail.cpp b/src/theory/strings/regexp_entail.cpp new file mode 100644 index 000000000..d03893483 --- /dev/null +++ b/src/theory/strings/regexp_entail.cpp @@ -0,0 +1,623 @@ +/********************* */ +/*! \file regexp_entail.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of entailment tests involving regular expressions + **/ + +#include "theory/strings/regexp_entail.h" + +#include "theory/rewriter.h" +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" + +using namespace std; +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, + std::vector& children, + int dir) +{ + Trace("regexp-ext-rewrite-debug") + << "Simple reg exp consume, dir=" << dir << ":" << std::endl; + Trace("regexp-ext-rewrite-debug") + << " mchildren : " << mchildren << std::endl; + Trace("regexp-ext-rewrite-debug") << " children : " << children << std::endl; + NodeManager* nm = NodeManager::currentNM(); + unsigned tmin = dir < 0 ? 0 : dir; + unsigned tmax = dir < 0 ? 1 : dir; + // try to remove off front and back + for (unsigned t = 0; t < 2; t++) + { + if (tmin <= t && t <= tmax) + { + bool do_next = true; + while (!children.empty() && !mchildren.empty() && do_next) + { + do_next = false; + Node xc = mchildren[mchildren.size() - 1]; + Node rc = children[children.size() - 1]; + Assert(rc.getKind() != REGEXP_CONCAT); + Assert(xc.getKind() != STRING_CONCAT); + if (rc.getKind() == STRING_TO_REGEXP) + { + if (xc == rc[0]) + { + children.pop_back(); + mchildren.pop_back(); + do_next = true; + Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl; + } + else if (xc.isConst() && rc[0].isConst()) + { + // split the constant + size_t index; + Node s = Word::splitConstant(xc, rc[0], index, t == 0); + Trace("regexp-ext-rewrite-debug") + << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> " + << s << " " << index << " " << t << std::endl; + if (s.isNull()) + { + Trace("regexp-ext-rewrite-debug") + << "...return false" << std::endl; + return nm->mkConst(false); + } + else + { + Trace("regexp-ext-rewrite-debug") + << "...strip equal const" << std::endl; + children.pop_back(); + mchildren.pop_back(); + if (index == 0) + { + mchildren.push_back(s); + } + else + { + children.push_back(nm->mkNode(STRING_TO_REGEXP, s)); + } + } + do_next = true; + } + } + else if (xc.isConst()) + { + // check for constants + CVC4::String s = xc.getConst(); + if (Word::isEmpty(xc)) + { + Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl; + // ignore and continue + mchildren.pop_back(); + do_next = true; + } + else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA) + { + std::vector ssVec; + ssVec.push_back(t == 0 ? s.back() : s.front()); + CVC4::String ss(ssVec); + if (testConstStringInRegExp(ss, 0, rc)) + { + // strip off one character + mchildren.pop_back(); + if (s.size() > 1) + { + if (t == 0) + { + mchildren.push_back(nm->mkConst(s.substr(0, s.size() - 1))); + } + else + { + mchildren.push_back(nm->mkConst(s.substr(1))); + } + } + children.pop_back(); + do_next = true; + } + else + { + return nm->mkConst(false); + } + } + else if (rc.getKind() == REGEXP_INTER || rc.getKind() == REGEXP_UNION) + { + // see if any/each child does not work + bool result_valid = true; + Node result; + Node emp_s = nm->mkConst(::CVC4::String("")); + for (unsigned i = 0; i < rc.getNumChildren(); i++) + { + std::vector mchildren_s; + std::vector children_s; + mchildren_s.push_back(xc); + utils::getConcat(rc[i], children_s); + Node ret = simpleRegexpConsume(mchildren_s, children_s, t); + if (!ret.isNull()) + { + // one conjunct cannot be satisfied, return false + if (rc.getKind() == REGEXP_INTER) + { + return ret; + } + } + else + { + if (children_s.empty()) + { + // if we were able to fully consume, store the result + Assert(mchildren_s.size() <= 1); + if (mchildren_s.empty()) + { + mchildren_s.push_back(emp_s); + } + if (result.isNull()) + { + result = mchildren_s[0]; + } + else if (result != mchildren_s[0]) + { + result_valid = false; + } + } + else + { + result_valid = false; + } + } + } + if (result_valid) + { + if (result.isNull()) + { + // all disjuncts cannot be satisfied, return false + Assert(rc.getKind() == REGEXP_UNION); + return nm->mkConst(false); + } + else + { + // all branches led to the same result + children.pop_back(); + mchildren.pop_back(); + if (result != emp_s) + { + mchildren.push_back(result); + } + do_next = true; + } + } + } + else if (rc.getKind() == REGEXP_STAR) + { + // check if there is no way that this star can be unrolled even once + std::vector mchildren_s; + mchildren_s.insert( + mchildren_s.end(), mchildren.begin(), mchildren.end()); + if (t == 1) + { + std::reverse(mchildren_s.begin(), mchildren_s.end()); + } + std::vector children_s; + utils::getConcat(rc[0], children_s); + Trace("regexp-ext-rewrite-debug") + << "...recursive call on body of star" << std::endl; + Node ret = simpleRegexpConsume(mchildren_s, children_s, t); + if (!ret.isNull()) + { + Trace("regexp-ext-rewrite-debug") + << "CRE : regexp star infeasable " << xc << " " << rc + << std::endl; + children.pop_back(); + if (!children.empty()) + { + Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl; + do_next = true; + } + } + else + { + if (children_s.empty()) + { + // check if beyond this, we can't do it or there is nothing + // left, if so, repeat + bool can_skip = false; + if (children.size() > 1) + { + std::vector mchildren_ss; + mchildren_ss.insert( + mchildren_ss.end(), mchildren.begin(), mchildren.end()); + std::vector children_ss; + children_ss.insert( + children_ss.end(), children.begin(), children.end() - 1); + if (t == 1) + { + std::reverse(mchildren_ss.begin(), mchildren_ss.end()); + std::reverse(children_ss.begin(), children_ss.end()); + } + if (simpleRegexpConsume(mchildren_ss, children_ss, t) + .isNull()) + { + can_skip = true; + } + } + if (!can_skip) + { + Trace("regexp-ext-rewrite-debug") + << "...can't skip" << std::endl; + // take the result of fully consuming once + if (t == 1) + { + std::reverse(mchildren_s.begin(), mchildren_s.end()); + } + mchildren.clear(); + mchildren.insert( + mchildren.end(), mchildren_s.begin(), mchildren_s.end()); + do_next = true; + } + else + { + Trace("regexp-ext-rewrite-debug") + << "...can skip " << rc << " from " << xc << std::endl; + } + } + } + } + } + if (!do_next) + { + Trace("regexp-ext-rewrite") + << "Cannot consume : " << xc << " " << rc << std::endl; + } + } + } + if (dir != 0) + { + std::reverse(children.begin(), children.end()); + std::reverse(mchildren.begin(), mchildren.end()); + } + } + return Node::null(); +} + +bool RegExpEntail::isConstRegExp(TNode t) +{ + if (t.getKind() == STRING_TO_REGEXP) + { + return t[0].isConst(); + } + else if (t.isVar()) + { + return false; + } + for (unsigned i = 0; i < t.getNumChildren(); ++i) + { + if (!isConstRegExp(t[i])) + { + return false; + } + } + return true; +} + +bool RegExpEntail::testConstStringInRegExp(CVC4::String& s, + unsigned index_start, + TNode r) +{ + Assert(index_start <= s.size()); + Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at " + << index_start << std::endl; + Assert(!r.isVar()); + Kind k = r.getKind(); + switch (k) + { + case STRING_TO_REGEXP: + { + CVC4::String s2 = s.substr(index_start, s.size() - index_start); + if (r[0].isConst()) + { + return (s2 == r[0].getConst()); + } + Assert(false) << "RegExp contains variables"; + return false; + } + case REGEXP_CONCAT: + { + if (s.size() != index_start) + { + std::vector vec_k(r.getNumChildren(), -1); + int start = 0; + int left = (int)s.size() - index_start; + int i = 0; + while (i < (int)r.getNumChildren()) + { + bool flag = true; + if (i == (int)r.getNumChildren() - 1) + { + if (testConstStringInRegExp(s, index_start + start, r[i])) + { + return true; + } + } + else if (i == -1) + { + return false; + } + else + { + for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i]) + { + CVC4::String t = s.substr(index_start + start, vec_k[i]); + if (testConstStringInRegExp(t, 0, r[i])) + { + start += vec_k[i]; + left -= vec_k[i]; + flag = false; + ++i; + vec_k[i] = -1; + break; + } + } + } + + if (flag) + { + --i; + if (i >= 0) + { + start -= vec_k[i]; + left += vec_k[i]; + } + } + } + return false; + } + else + { + for (unsigned i = 0; i < r.getNumChildren(); ++i) + { + if (!testConstStringInRegExp(s, index_start, r[i])) + { + return false; + } + } + return true; + } + } + case REGEXP_UNION: + { + for (unsigned i = 0; i < r.getNumChildren(); ++i) + { + if (testConstStringInRegExp(s, index_start, r[i])) + { + return true; + } + } + return false; + } + case REGEXP_INTER: + { + for (unsigned i = 0; i < r.getNumChildren(); ++i) + { + if (!testConstStringInRegExp(s, index_start, r[i])) + { + return false; + } + } + return true; + } + case REGEXP_STAR: + { + if (s.size() != index_start) + { + for (unsigned i = s.size() - index_start; i > 0; --i) + { + CVC4::String t = s.substr(index_start, i); + if (testConstStringInRegExp(t, 0, r[0])) + { + if (index_start + i == s.size() + || testConstStringInRegExp(s, index_start + i, r)) + { + return true; + } + } + } + return false; + } + else + { + return true; + } + } + case REGEXP_EMPTY: { return false; + } + case REGEXP_SIGMA: + { + if (s.size() == index_start + 1) + { + return true; + } + else + { + return false; + } + } + case REGEXP_RANGE: + { + if (s.size() == index_start + 1) + { + unsigned a = r[0].getConst().front(); + unsigned b = r[1].getConst().front(); + unsigned c = s.back(); + return (a <= c && c <= b); + } + else + { + return false; + } + } + case REGEXP_LOOP: + { + NodeManager* nm = NodeManager::currentNM(); + uint32_t l = r[1].getConst().getNumerator().toUnsignedInt(); + if (s.size() == index_start) + { + return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]); + } + else if (l == 0 && r[1] == r[2]) + { + return false; + } + else + { + Assert(r.getNumChildren() == 3) + << "String rewriter error: LOOP has 2 children"; + if (l == 0) + { + // R{0,u} + uint32_t u = r[2].getConst().getNumerator().toUnsignedInt(); + for (unsigned len = s.size() - index_start; len >= 1; len--) + { + CVC4::String t = s.substr(index_start, len); + if (testConstStringInRegExp(t, 0, r[0])) + { + if (len + index_start == s.size()) + { + return true; + } + else + { + Node num2 = nm->mkConst(CVC4::Rational(u - 1)); + Node r2 = nm->mkNode(REGEXP_LOOP, r[0], r[1], num2); + if (testConstStringInRegExp(s, index_start + len, r2)) + { + return true; + } + } + } + } + return false; + } + else + { + // R{l,l} + Assert(r[1] == r[2]) + << "String rewriter error: LOOP nums are not equal"; + if (l > s.size() - index_start) + { + if (testConstStringInRegExp(s, s.size(), r[0])) + { + l = s.size() - index_start; + } + else + { + return false; + } + } + for (unsigned len = 1; len <= s.size() - index_start; len++) + { + CVC4::String t = s.substr(index_start, len); + if (testConstStringInRegExp(t, 0, r[0])) + { + Node num2 = nm->mkConst(CVC4::Rational(l - 1)); + Node r2 = nm->mkNode(REGEXP_LOOP, r[0], num2, num2); + if (testConstStringInRegExp(s, index_start + len, r2)) + { + return true; + } + } + } + return false; + } + } + } + case REGEXP_COMPLEMENT: + { + return !testConstStringInRegExp(s, index_start, r[0]); + break; + } + default: + { + Assert(!utils::isRegExpKind(k)); + return false; + } + } +} + +bool RegExpEntail::hasEpsilonNode(TNode node) +{ + for (const Node& nc : node) + { + if (nc.getKind() == STRING_TO_REGEXP && Word::isEmpty(nc[0])) + { + return true; + } + } + return false; +} + +Node RegExpEntail::getFixedLengthForRegexp(Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + if (n.getKind() == STRING_TO_REGEXP) + { + Node ret = nm->mkNode(STRING_LENGTH, n[0]); + ret = Rewriter::rewrite(ret); + if (ret.isConst()) + { + return ret; + } + } + else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE) + { + return nm->mkConst(Rational(1)); + } + else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER) + { + Node ret; + for (const Node& nc : n) + { + Node flc = getFixedLengthForRegexp(nc); + if (flc.isNull() || (!ret.isNull() && ret != flc)) + { + return Node::null(); + } + else if (ret.isNull()) + { + // first time + ret = flc; + } + } + return ret; + } + else if (n.getKind() == REGEXP_CONCAT) + { + NodeBuilder<> nb(PLUS); + for (const Node& nc : n) + { + Node flc = getFixedLengthForRegexp(nc); + if (flc.isNull()) + { + return flc; + } + nb << flc; + } + Node ret = nb.constructNode(); + ret = Rewriter::rewrite(ret); + return ret; + } + return Node::null(); +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/regexp_entail.h b/src/theory/strings/regexp_entail.h new file mode 100644 index 000000000..0732a6770 --- /dev/null +++ b/src/theory/strings/regexp_entail.h @@ -0,0 +1,117 @@ +/********************* */ +/*! \file regexp_entail.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Entailment tests involving regular expressions + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__REGEXP_ENTAIL_H +#define CVC4__THEORY__STRINGS__REGEXP_ENTAIL_H + +#include +#include +#include + +#include "expr/attribute.h" +#include "theory/strings/rewrites.h" +#include "theory/theory_rewriter.h" +#include "theory/type_enumerator.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +class RegExpEntail +{ + public: + /** simple regular expression consume + * + * This method is called when we are rewriting a membership of the form + * s1 ++ ... ++ sn in r1 ++ ... ++ rm + * We have that mchildren consists of the strings s1...sn, and children + * consists of the regular expressions r1...rm. + * + * This method tries to strip off parts of the concatenation terms. It updates + * the vectors such that the resulting vectors are such that the membership + * mchildren[n'...n''] in children[m'...m''] is equivalent to the input + * membership. The argument dir indicates the direction to consider, where + * 0 means strip off the front, 1 off the back, and < 0 off of both. + * + * If this method returns the false node, then we have inferred that no + * string in the language of r1 ++ ... ++ rm is a prefix (when dir!=1) or + * suffix (when dir!=0) of s1 ++ ... ++ sn. Otherwise, it returns the null + * node. + * + * For example, given input + * mchildren = { "ab", x }, children = { [["a"]], ([["cd"]])* } and dir = 0, + * this method updates: + * mchildren = { "b", x }, children = { ("cd")* } + * and returns null. + * + * For example, given input + * { x, "abb", x }, { [[x]], ["a"..."b"], allchar, [[y]], [[x]]} and dir=-1, + * this method updates: + * { "b" }, { [[y]] } + * where [[.]] denotes str.to.re, and returns null. + * + * Notice that the above requirement for returning false is stronger than + * determining that s1 ++ ... ++ sn in r1 ++ ... ++ rm is equivalent to false. + * For example, for input "bb" in "b" ++ ( "a" )*, we do not return false + * since "b" is in the language of "b" ++ ( "a" )* and is a prefix of "bb". + * We do not return false even though the above membership is equivalent + * to false. We do this because the function is used e.g. to test whether a + * possible unrolling leads to a conflict. This is demonstrated by the + * following examples: + * + * For example, given input + * { "bb", x }, { "b", ("a")* } and dir=-1, + * this method updates: + * { "b" }, { ("a")* } + * and returns null. + * + * For example, given input + * { "cb", x }, { "b", ("a")* } and dir=-1, + * this method leaves children and mchildren unchanged and returns false. + * + * Notice that based on this, we can determine that: + * "cb" ++ x in ( "b" ++ ("a")* )* + * is equivalent to false, whereas we cannot determine that: + * "bb" ++ x in ( "b" ++ ("a")* )* + * is equivalent to false. + */ + static Node simpleRegexpConsume(std::vector& mchildren, + std::vector& children, + int dir = -1); + /** Is t a constant regular expression? */ + static bool isConstRegExp(TNode t); + /** + * Does the substring of s starting at index_start occur in constant regular + * expression r? + */ + static bool testConstStringInRegExp(CVC4::String& s, + unsigned index_start, + TNode r); + /** Does regular expression node have (str.to.re "") as a child? */ + static bool hasEpsilonNode(TNode node); + /** get length for regular expression + * + * Given regular expression n, if this method returns a non-null value c, then + * x in n entails len( x ) = c. + */ + static Node getFixedLengthForRegexp(Node n); +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__REGEXP_ENTAIL_H */ diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index d104f3ade..0520ec88a 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -18,7 +18,7 @@ #include "expr/kind.h" #include "options/strings_options.h" -#include "theory/strings/sequences_rewriter.h" +#include "theory/strings/regexp_entail.h" #include "theory/strings/theory_strings_utils.h" #include "theory/strings/word.h" @@ -83,7 +83,7 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r) { d_constCache[cur] = RE_C_CONSTANT; } - else if (!isRegExpKind(ck)) + else if (!utils::isRegExpKind(ck)) { // non-regular expression applications, e.g. function applications // with regular expression return type are treated as variables. @@ -115,15 +115,6 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r) return d_constCache[r]; } -bool RegExpOpr::isRegExpKind(Kind k) -{ - return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP - || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER - || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT - || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV - || k == REGEXP_COMPLEMENT; -} - // 0-unknown, 1-yes, 2-no int RegExpOpr::delta( Node r, Node &exp ) { Trace("regexp-delta") << "RegExp-Delta starts with /" << mkString( r ) << "/" << std::endl; @@ -270,7 +261,7 @@ int RegExpOpr::delta( Node r, Node &exp ) { break; } default: { - Assert(!isRegExpKind(k)); + Assert(!utils::isRegExpKind(k)); break; } } @@ -521,7 +512,7 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { break; } default: { - Assert(!isRegExpKind(r.getKind())); + Assert(!utils::isRegExpKind(r.getKind())); return 0; break; } @@ -808,7 +799,7 @@ void RegExpOpr::firstChars(Node r, std::set &pcset, SetNodes &pvset) // aren't a standard regular expression kind. However, if we do, then // the following code is conservative and says that the current // regular expression can begin with any character. - Assert(isRegExpKind(k)); + Assert(utils::isRegExpKind(k)); // can start with any character Assert(d_lastchar < std::numeric_limits::max()); for (unsigned i = 0; i <= d_lastchar; i++) @@ -908,12 +899,12 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes // all strings in the language of R1 have the same length, say n, // then the conclusion of the reduction is quantifier-free: // ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2) - Node reLength = SequencesRewriter::getFixedLengthForRegexp(r[0]); + Node reLength = RegExpEntail::getFixedLengthForRegexp(r[0]); if (reLength.isNull()) { // try from the opposite end unsigned indexE = r.getNumChildren() - 1; - reLength = SequencesRewriter::getFixedLengthForRegexp(r[indexE]); + reLength = RegExpEntail::getFixedLengthForRegexp(r[indexE]); if (!reLength.isNull()) { indexRm = indexE; @@ -1068,7 +1059,7 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes break; } default: { - Assert(!isRegExpKind(k)); + Assert(!utils::isRegExpKind(k)); break; } } @@ -1267,7 +1258,7 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes break; } default: { - Assert(!isRegExpKind(k)); + Assert(!utils::isRegExpKind(k)); break; } } @@ -1744,7 +1735,7 @@ std::string RegExpOpr::mkString( Node r ) { std::stringstream ss; ss << r; retStr = ss.str(); - Assert(!isRegExpKind(r.getKind())); + Assert(!utils::isRegExpKind(r.getKind())); break; } } diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index 7845b2e00..53d60cd40 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -121,8 +121,6 @@ class RegExpOpr { bool checkConstRegExp( Node r ); /** get the constant type for regular expression r */ RegExpConstType getRegExpConstType(Node r); - /** is k a native operator whose return type is a regular expression? */ - static bool isRegExpKind(Kind k); void simplify(Node t, std::vector< Node > &new_nodes, bool polarity); /** * This method returns 1 if the empty string is in r, 2 if the empty string diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp index f6ef92b4d..35c52646d 100644 --- a/src/theory/strings/regexp_solver.cpp +++ b/src/theory/strings/regexp_solver.cpp @@ -686,7 +686,7 @@ Node RegExpSolver::getNormalSymRegExp(Node r, std::vector& nf_exp) { Trace("strings-error") << "Unsupported term: " << r << " in normalization SymRegExp." << std::endl; - Assert(!RegExpOpr::isRegExpKind(r.getKind())); + Assert(!utils::isRegExpKind(r.getKind())); } } return ret; diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index be1e13459..9ccda55c2 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -16,294 +16,22 @@ #include "theory/strings/sequences_rewriter.h" -#include -#include - +#include "expr/attribute.h" #include "expr/node_builder.h" -#include "options/strings_options.h" -#include "smt/logic_exception.h" -#include "theory/arith/arith_msum.h" -#include "theory/strings/regexp_operation.h" +#include "theory/rewriter.h" +#include "theory/strings/arith_entail.h" +#include "theory/strings/regexp_entail.h" +#include "theory/strings/strings_entail.h" #include "theory/strings/strings_rewriter.h" #include "theory/strings/theory_strings_utils.h" #include "theory/strings/word.h" -#include "theory/theory.h" -#include "util/integer.h" -#include "util/rational.h" using namespace std; -using namespace CVC4; using namespace CVC4::kind; -using namespace CVC4::theory; -using namespace CVC4::theory::strings; -Node SequencesRewriter::simpleRegexpConsume(std::vector& mchildren, - std::vector& children, - int dir) -{ - Trace("regexp-ext-rewrite-debug") - << "Simple reg exp consume, dir=" << dir << ":" << std::endl; - Trace("regexp-ext-rewrite-debug") - << " mchildren : " << mchildren << std::endl; - Trace("regexp-ext-rewrite-debug") << " children : " << children << std::endl; - NodeManager* nm = NodeManager::currentNM(); - unsigned tmin = dir < 0 ? 0 : dir; - unsigned tmax = dir < 0 ? 1 : dir; - // try to remove off front and back - for (unsigned t = 0; t < 2; t++) - { - if (tmin <= t && t <= tmax) - { - bool do_next = true; - while (!children.empty() && !mchildren.empty() && do_next) - { - do_next = false; - Node xc = mchildren[mchildren.size() - 1]; - Node rc = children[children.size() - 1]; - Assert(rc.getKind() != kind::REGEXP_CONCAT); - Assert(xc.getKind() != kind::STRING_CONCAT); - if (rc.getKind() == kind::STRING_TO_REGEXP) - { - if (xc == rc[0]) - { - children.pop_back(); - mchildren.pop_back(); - do_next = true; - Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl; - } - else if (xc.isConst() && rc[0].isConst()) - { - // split the constant - int index; - Node s = splitConstant(xc, rc[0], index, t == 0); - Trace("regexp-ext-rewrite-debug") - << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> " - << s << " " << index << " " << t << std::endl; - if (s.isNull()) - { - Trace("regexp-ext-rewrite-debug") - << "...return false" << std::endl; - return NodeManager::currentNM()->mkConst(false); - } - else - { - Trace("regexp-ext-rewrite-debug") - << "...strip equal const" << std::endl; - children.pop_back(); - mchildren.pop_back(); - if (index == 0) - { - mchildren.push_back(s); - } - else - { - children.push_back(nm->mkNode(STRING_TO_REGEXP, s)); - } - } - do_next = true; - } - } - else if (xc.isConst()) - { - // check for constants - CVC4::String s = xc.getConst(); - if (Word::isEmpty(xc)) - { - Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl; - // ignore and continue - mchildren.pop_back(); - do_next = true; - } - else if (rc.getKind() == kind::REGEXP_RANGE - || rc.getKind() == kind::REGEXP_SIGMA) - { - std::vector ssVec; - ssVec.push_back(t == 0 ? s.back() : s.front()); - CVC4::String ss(ssVec); - if (testConstStringInRegExp(ss, 0, rc)) - { - // strip off one character - mchildren.pop_back(); - if (s.size() > 1) - { - if (t == 0) - { - mchildren.push_back(NodeManager::currentNM()->mkConst( - s.substr(0, s.size() - 1))); - } - else - { - mchildren.push_back( - NodeManager::currentNM()->mkConst(s.substr(1))); - } - } - children.pop_back(); - do_next = true; - } - else - { - return NodeManager::currentNM()->mkConst(false); - } - } - else if (rc.getKind() == kind::REGEXP_INTER - || rc.getKind() == kind::REGEXP_UNION) - { - // see if any/each child does not work - bool result_valid = true; - Node result; - Node emp_s = NodeManager::currentNM()->mkConst(::CVC4::String("")); - for (unsigned i = 0; i < rc.getNumChildren(); i++) - { - std::vector mchildren_s; - std::vector children_s; - mchildren_s.push_back(xc); - utils::getConcat(rc[i], children_s); - Node ret = simpleRegexpConsume(mchildren_s, children_s, t); - if (!ret.isNull()) - { - // one conjunct cannot be satisfied, return false - if (rc.getKind() == kind::REGEXP_INTER) - { - return ret; - } - } - else - { - if (children_s.empty()) - { - // if we were able to fully consume, store the result - Assert(mchildren_s.size() <= 1); - if (mchildren_s.empty()) - { - mchildren_s.push_back(emp_s); - } - if (result.isNull()) - { - result = mchildren_s[0]; - } - else if (result != mchildren_s[0]) - { - result_valid = false; - } - } - else - { - result_valid = false; - } - } - } - if (result_valid) - { - if (result.isNull()) - { - // all disjuncts cannot be satisfied, return false - Assert(rc.getKind() == kind::REGEXP_UNION); - return NodeManager::currentNM()->mkConst(false); - } - else - { - // all branches led to the same result - children.pop_back(); - mchildren.pop_back(); - if (result != emp_s) - { - mchildren.push_back(result); - } - do_next = true; - } - } - } - else if (rc.getKind() == kind::REGEXP_STAR) - { - // check if there is no way that this star can be unrolled even once - std::vector mchildren_s; - mchildren_s.insert( - mchildren_s.end(), mchildren.begin(), mchildren.end()); - if (t == 1) - { - std::reverse(mchildren_s.begin(), mchildren_s.end()); - } - std::vector children_s; - utils::getConcat(rc[0], children_s); - Trace("regexp-ext-rewrite-debug") - << "...recursive call on body of star" << std::endl; - Node ret = simpleRegexpConsume(mchildren_s, children_s, t); - if (!ret.isNull()) - { - Trace("regexp-ext-rewrite-debug") - << "CRE : regexp star infeasable " << xc << " " << rc - << std::endl; - children.pop_back(); - if (!children.empty()) - { - Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl; - do_next = true; - } - } - else - { - if (children_s.empty()) - { - // check if beyond this, we can't do it or there is nothing - // left, if so, repeat - bool can_skip = false; - if (children.size() > 1) - { - std::vector mchildren_ss; - mchildren_ss.insert( - mchildren_ss.end(), mchildren.begin(), mchildren.end()); - std::vector children_ss; - children_ss.insert( - children_ss.end(), children.begin(), children.end() - 1); - if (t == 1) - { - std::reverse(mchildren_ss.begin(), mchildren_ss.end()); - std::reverse(children_ss.begin(), children_ss.end()); - } - if (simpleRegexpConsume(mchildren_ss, children_ss, t) - .isNull()) - { - can_skip = true; - } - } - if (!can_skip) - { - Trace("regexp-ext-rewrite-debug") - << "...can't skip" << std::endl; - // take the result of fully consuming once - if (t == 1) - { - std::reverse(mchildren_s.begin(), mchildren_s.end()); - } - mchildren.clear(); - mchildren.insert( - mchildren.end(), mchildren_s.begin(), mchildren_s.end()); - do_next = true; - } - else - { - Trace("regexp-ext-rewrite-debug") - << "...can skip " << rc << " from " << xc << std::endl; - } - } - } - } - } - if (!do_next) - { - Trace("regexp-ext-rewrite") - << "Cannot consume : " << xc << " " << rc << std::endl; - } - } - } - if (dir != 0) - { - std::reverse(children.begin(), children.end()); - std::reverse(mchildren.begin(), mchildren.end()); - } - } - return Node::null(); -} +namespace CVC4 { +namespace theory { +namespace strings { Node SequencesRewriter::rewriteEquality(Node node) { @@ -325,7 +53,7 @@ Node SequencesRewriter::rewriteEquality(Node node) // must call rewrite contains directly to avoid infinite loop // we do a fix point since we may rewrite contains terms to simpler // contains terms. - Node ctn = checkEntailContains(node[r], node[1 - r], false); + Node ctn = StringsEntail::checkContains(node[r], node[1 - r], false); if (!ctn.isNull()) { if (!ctn.getConst()) @@ -474,7 +202,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) // ------- homogeneous constants for (unsigned i = 0; i < 2; i++) { - Node cn = checkEntailHomogeneousString(node[i]); + Node cn = StringsEntail::checkHomogeneousString(node[i]); if (!cn.isNull() && !Word::isEmpty(cn)) { Assert(cn.isConst()); @@ -564,7 +292,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) } // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y ""))) - if (checkEntailNonEmpty(ne[2])) + if (StringsEntail::checkNonEmpty(ne[2])) { Node ret = nm->mkNode(AND, @@ -574,7 +302,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) } // (= "" (str.replace x "A" "")) ---> (str.prefix x "A") - if (checkEntailLengthOne(ne[1]) && ne[2] == empty) + if (StringsEntail::checkLengthOne(ne[1]) && ne[2] == empty) { Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]); return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP); @@ -584,7 +312,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) { Node zero = nm->mkConst(Rational(0)); - if (checkEntailArith(ne[1], false) && checkEntailArith(ne[2], true)) + if (ArithEntail::check(ne[1], false) && ArithEntail::check(ne[2], true)) { // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0 if (ne[1] == zero) @@ -600,7 +328,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) } // (= "" (str.substr "A" 0 z)) ---> (<= z 0) - if (checkEntailNonEmpty(ne[0]) && ne[1] == zero) + if (StringsEntail::checkNonEmpty(ne[0]) && ne[1] == zero) { Node ret = nm->mkNode(LEQ, ne[2], zero); return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_Z); @@ -618,7 +346,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) Node x = node[1 - i]; // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x)) - if (checkEntailNonEmpty(x) && repl[0] == empty) + if (StringsEntail::checkNonEmpty(x) && repl[0] == empty) { Node ret = nm->mkNode( EQUAL, empty, nm->mkNode(STRING_STRREPL, x, repl[2], repl[1])); @@ -649,7 +377,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) { Node lenY = nm->mkNode(STRING_LENGTH, repl[1]); Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]); - if (checkEntailArithEq(lenY, lenZ)) + if (ArithEntail::checkEq(lenY, lenZ)) { Node ret = nm->mkNode(OR, nm->mkNode(EQUAL, repl[0], repl[1]), @@ -672,7 +400,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) { if (node[1 - i].getKind() == STRING_CONCAT) { - new_ret = inferEqsFromContains(node[i], node[1 - i]); + new_ret = StringsEntail::inferEqsFromContains(node[i], node[1 - i]); if (!new_ret.isNull()) { return returnRewrite(node, new_ret, Rewrite::STR_EQ_CONJ_LEN_ENTAIL); @@ -709,7 +437,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0); Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1); - if (checkEntailArithEq(lenPfx0, lenPfx1)) + if (ArithEntail::checkEq(lenPfx0, lenPfx1)) { std::vector sfxv0(v0.begin() + i, v0.end()); std::vector sfxv1(v1.begin() + j, v1.end()); @@ -719,7 +447,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) .eqNode(utils::mkConcat(sfxv1, stype))); return returnRewrite(node, ret, Rewrite::SPLIT_EQ); } - else if (checkEntailArith(lenPfx1, lenPfx0, true)) + else if (ArithEntail::check(lenPfx1, lenPfx0, true)) { // The prefix on the right-hand side is strictly longer than the // prefix on the left-hand side, so we try to strip the right-hand @@ -729,7 +457,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) // (= (str.++ "A" x y) (str.++ x "AB" z)) ---> // (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z))) std::vector rpfxv1; - if (stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0)) + if (StringsEntail::stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0)) { std::vector sfxv0(v0.begin() + i, v0.end()); pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end()); @@ -746,7 +474,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) // in the inner loop) break; } - else if (checkEntailArith(lenPfx0, lenPfx1, true)) + else if (ArithEntail::check(lenPfx0, lenPfx1, true)) { // The prefix on the left-hand side is strictly longer than the // prefix on the right-hand side, so we try to strip the left-hand @@ -756,7 +484,7 @@ Node SequencesRewriter::rewriteStrEqualityExt(Node node) // (= (str.++ x "AB" z) (str.++ "A" x y)) ---> // (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y)) std::vector rpfxv0; - if (stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1)) + if (StringsEntail::stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1)) { pfxv0.insert(pfxv0.end(), v0.begin() + i, v0.end()); std::vector sfxv1(v1.begin() + j, v1.end()); @@ -927,7 +655,7 @@ Node SequencesRewriter::rewriteConcat(Node node) Node lastX; for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++) { - Node s = getStringOrEmpty(node_vec[i]); + Node s = StringsEntail::getStringOrEmpty(node_vec[i]); bool nextX = false; if (s != lastX) { @@ -1058,7 +786,8 @@ Node SequencesRewriter::rewriteConcatRegExp(TNode node) { // if empty, drop it // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)* - if (isConstRegExp(curr) && testConstStringInRegExp(emptyStr, 0, curr)) + if (RegExpEntail::isConstRegExp(curr) + && RegExpEntail::testConstStringInRegExp(emptyStr, 0, curr)) { curr = Node::null(); } @@ -1079,8 +808,9 @@ Node SequencesRewriter::rewriteConcatRegExp(TNode node) lastAllStar = true; // go back and remove empty ones from back of cvec // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)* - while (!cvec.empty() && isConstRegExp(cvec.back()) - && testConstStringInRegExp(emptyStr, 0, cvec.back())) + while (!cvec.empty() && RegExpEntail::isConstRegExp(cvec.back()) + && RegExpEntail::testConstStringInRegExp( + emptyStr, 0, cvec.back())) { cvec.pop_back(); } @@ -1145,7 +875,7 @@ Node SequencesRewriter::rewriteStarRegExp(TNode node) else if (node[0].getKind() == REGEXP_UNION) { // simplification of unions under star - if (hasEpsilonNode(node[0])) + if (RegExpEntail::hasEpsilonNode(node[0])) { bool changed = false; std::vector node_vec; @@ -1348,270 +1078,6 @@ Node SequencesRewriter::rewriteRangeRegExp(TNode node) return node; } -bool SequencesRewriter::isConstRegExp(TNode t) -{ - if (t.getKind() == kind::STRING_TO_REGEXP) - { - return t[0].isConst(); - } - else if (t.isVar()) - { - return false; - } - else - { - for (unsigned i = 0; i < t.getNumChildren(); ++i) - { - if (!isConstRegExp(t[i])) - { - return false; - } - } - return true; - } -} - -bool SequencesRewriter::testConstStringInRegExp(CVC4::String& s, - unsigned int index_start, - TNode r) -{ - Assert(index_start <= s.size()); - Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at " - << index_start << std::endl; - Assert(!r.isVar()); - Kind k = r.getKind(); - switch (k) - { - case kind::STRING_TO_REGEXP: - { - CVC4::String s2 = s.substr(index_start, s.size() - index_start); - if (r[0].isConst()) - { - return (s2 == r[0].getConst()); - } - else - { - Assert(false) << "RegExp contains variables"; - return false; - } - } - case kind::REGEXP_CONCAT: - { - if (s.size() != index_start) - { - std::vector vec_k(r.getNumChildren(), -1); - int start = 0; - int left = (int)s.size() - index_start; - int i = 0; - while (i < (int)r.getNumChildren()) - { - bool flag = true; - if (i == (int)r.getNumChildren() - 1) - { - if (testConstStringInRegExp(s, index_start + start, r[i])) - { - return true; - } - } - else if (i == -1) - { - return false; - } - else - { - for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i]) - { - CVC4::String t = s.substr(index_start + start, vec_k[i]); - if (testConstStringInRegExp(t, 0, r[i])) - { - start += vec_k[i]; - left -= vec_k[i]; - flag = false; - ++i; - vec_k[i] = -1; - break; - } - } - } - - if (flag) - { - --i; - if (i >= 0) - { - start -= vec_k[i]; - left += vec_k[i]; - } - } - } - return false; - } - else - { - for (unsigned i = 0; i < r.getNumChildren(); ++i) - { - if (!testConstStringInRegExp(s, index_start, r[i])) return false; - } - return true; - } - } - case kind::REGEXP_UNION: - { - for (unsigned i = 0; i < r.getNumChildren(); ++i) - { - if (testConstStringInRegExp(s, index_start, r[i])) return true; - } - return false; - } - case kind::REGEXP_INTER: - { - for (unsigned i = 0; i < r.getNumChildren(); ++i) - { - if (!testConstStringInRegExp(s, index_start, r[i])) return false; - } - return true; - } - case kind::REGEXP_STAR: - { - if (s.size() != index_start) - { - for (unsigned i = s.size() - index_start; i > 0; --i) - { - CVC4::String t = s.substr(index_start, i); - if (testConstStringInRegExp(t, 0, r[0])) - { - if (index_start + i == s.size() - || testConstStringInRegExp(s, index_start + i, r)) - { - return true; - } - } - } - return false; - } - else - { - return true; - } - } - case kind::REGEXP_EMPTY: { return false; - } - case kind::REGEXP_SIGMA: - { - if (s.size() == index_start + 1) - { - return true; - } - else - { - return false; - } - } - case kind::REGEXP_RANGE: - { - if (s.size() == index_start + 1) - { - unsigned a = r[0].getConst().front(); - unsigned b = r[1].getConst().front(); - unsigned c = s.back(); - return (a <= c && c <= b); - } - else - { - return false; - } - } - case kind::REGEXP_LOOP: - { - uint32_t l = r[1].getConst().getNumerator().toUnsignedInt(); - if (s.size() == index_start) - { - return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]); - } - else if (l == 0 && r[1] == r[2]) - { - return false; - } - else - { - Assert(r.getNumChildren() == 3) - << "String rewriter error: LOOP has 2 children"; - if (l == 0) - { - // R{0,u} - uint32_t u = r[2].getConst().getNumerator().toUnsignedInt(); - for (unsigned len = s.size() - index_start; len >= 1; len--) - { - CVC4::String t = s.substr(index_start, len); - if (testConstStringInRegExp(t, 0, r[0])) - { - if (len + index_start == s.size()) - { - return true; - } - else - { - Node num2 = - NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1)); - Node r2 = NodeManager::currentNM()->mkNode( - kind::REGEXP_LOOP, r[0], r[1], num2); - if (testConstStringInRegExp(s, index_start + len, r2)) - { - return true; - } - } - } - } - return false; - } - else - { - // R{l,l} - Assert(r[1] == r[2]) - << "String rewriter error: LOOP nums are not equal"; - if (l > s.size() - index_start) - { - if (testConstStringInRegExp(s, s.size(), r[0])) - { - l = s.size() - index_start; - } - else - { - return false; - } - } - for (unsigned len = 1; len <= s.size() - index_start; len++) - { - CVC4::String t = s.substr(index_start, len); - if (testConstStringInRegExp(t, 0, r[0])) - { - Node num2 = - NodeManager::currentNM()->mkConst(CVC4::Rational(l - 1)); - Node r2 = NodeManager::currentNM()->mkNode( - kind::REGEXP_LOOP, r[0], num2, num2); - if (testConstStringInRegExp(s, index_start + len, r2)) - { - return true; - } - } - } - return false; - } - } - } - case REGEXP_COMPLEMENT: - { - return !testConstStringInRegExp(s, index_start, r[0]); - break; - } - default: - { - Assert(!RegExpOpr::isRegExpKind(k)); - return false; - } - } -} - Node SequencesRewriter::rewriteMembership(TNode node) { NodeManager* nm = NodeManager::currentNM(); @@ -1626,12 +1092,12 @@ Node SequencesRewriter::rewriteMembership(TNode node) Node retNode = NodeManager::currentNM()->mkConst(false); return returnRewrite(node, retNode, Rewrite::RE_IN_EMPTY); } - else if (x.isConst() && isConstRegExp(r)) + else if (x.isConst() && RegExpEntail::isConstRegExp(r)) { // test whether x in node[1] CVC4::String s = x.getConst(); - Node retNode = - NodeManager::currentNM()->mkConst(testConstStringInRegExp(s, 0, r)); + bool test = RegExpEntail::testConstStringInRegExp(s, 0, r); + Node retNode = NodeManager::currentNM()->mkConst(test); return returnRewrite(node, retNode, Rewrite::RE_IN_EVAL); } else if (r.getKind() == kind::REGEXP_SIGMA) @@ -1666,7 +1132,7 @@ Node SequencesRewriter::rewriteMembership(TNode node) // (str.in.re (str.++ x1 ... xn) (re.* R)) --> // (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R)) // if the length of all strings in R is one. - Node flr = getFixedLengthForRegexp(r[0]); + Node flr = RegExpEntail::getFixedLengthForRegexp(r[0]); if (!flr.isNull()) { Node one = nm->mkConst(Rational(1)); @@ -1794,7 +1260,7 @@ Node SequencesRewriter::rewriteMembership(TNode node) success = false; std::vector children; utils::getConcat(r[0], children); - Node scn = simpleRegexpConsume(mchildren, children, dir); + Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children, dir); if (!scn.isNull()) { Trace("regexp-ext-rewrite") @@ -1834,7 +1300,7 @@ Node SequencesRewriter::rewriteMembership(TNode node) std::vector mchildren; utils::getConcat(x, mchildren); unsigned prevSize = children.size() + mchildren.size(); - Node scn = simpleRegexpConsume(mchildren, children); + Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children); if (!scn.isNull()) { Trace("regexp-ext-rewrite") @@ -2003,19 +1469,6 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) return RewriteResponse(REWRITE_DONE, retNode); } -bool SequencesRewriter::hasEpsilonNode(TNode node) -{ - for (unsigned int i = 0; i < node.getNumChildren(); i++) - { - if (node[i].getKind() == kind::STRING_TO_REGEXP && node[i][0].isConst() - && Word::isEmpty(node[i][0])) - { - return true; - } - } - return false; -} - RewriteResponse SequencesRewriter::preRewrite(TNode node) { return RewriteResponse(REWRITE_DONE, node); @@ -2106,12 +1559,12 @@ Node SequencesRewriter::rewriteSubstr(Node node) Node zero = nm->mkConst(CVC4::Rational(0)); // if entailed non-positive length or negative start point - if (checkEntailArith(zero, node[1], true)) + if (ArithEntail::check(zero, node[1], true)) { Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, Rewrite::SS_START_NEG); } - else if (checkEntailArith(zero, node[2])) + else if (ArithEntail::check(zero, node[2])) { Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, Rewrite::SS_LEN_NON_POS); @@ -2137,7 +1590,7 @@ Node SequencesRewriter::rewriteSubstr(Node node) // over-approximation of the length of (str.substr x a a), which // then allows us to reason that the result of the whole term must // be empty. - if (checkEntailArith(node[1], node[0][2])) + if (ArithEntail::check(node[1], node[0][2])) { Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, Rewrite::SS_START_GEQ_LEN); @@ -2150,8 +1603,8 @@ Node SequencesRewriter::rewriteSubstr(Node node) // if (str.len y) = 1 and (str.len z) = 1 if (node[1] == zero) { - if (checkEntailLengthOne(node[0][1], true) - && checkEntailLengthOne(node[0][2], true)) + if (StringsEntail::checkLengthOne(node[0][1], true) + && StringsEntail::checkLengthOne(node[0][2], true)) { Node ret = nm->mkNode( kind::STRING_STRREPL, @@ -2172,7 +1625,7 @@ Node SequencesRewriter::rewriteSubstr(Node node) { Node curr = node[2]; std::vector childrenr; - if (stripSymbolicLength(n1, childrenr, 1, curr)) + if (StringsEntail::stripSymbolicLength(n1, childrenr, 1, curr)) { if (curr != zero && !n1.empty()) { @@ -2204,7 +1657,7 @@ Node SequencesRewriter::rewriteSubstr(Node node) Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2])); if (node[2] != tot_len) { - if (checkEntailArith(node[2], tot_len)) + if (ArithEntail::check(node[2], tot_len)) { // end point beyond end point of string, map to tot_len Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len); @@ -2220,7 +1673,7 @@ Node SequencesRewriter::rewriteSubstr(Node node) // (str.substr s x y) --> "" if x < len(s) |= 0 >= y Node n1_lt_tot_len = Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len)); - if (checkEntailArithWithAssumption(n1_lt_tot_len, zero, node[2], false)) + if (ArithEntail::checkWithAssumption(n1_lt_tot_len, zero, node[2], false)) { Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, Rewrite::SS_START_ENTAILS_ZERO_LEN); @@ -2229,7 +1682,8 @@ Node SequencesRewriter::rewriteSubstr(Node node) // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s) Node non_zero_len = Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2])); - if (checkEntailArithWithAssumption(non_zero_len, node[1], tot_len, false)) + if (ArithEntail::checkWithAssumption( + non_zero_len, node[1], tot_len, false)) { Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, Rewrite::SS_NON_ZERO_LEN_ENTAILS_OOB); @@ -2238,7 +1692,8 @@ Node SequencesRewriter::rewriteSubstr(Node node) // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s) Node geq_zero_start = Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero)); - if (checkEntailArithWithAssumption(geq_zero_start, zero, tot_len, false)) + if (ArithEntail::checkWithAssumption( + geq_zero_start, zero, tot_len, false)) { Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite( @@ -2246,7 +1701,7 @@ Node SequencesRewriter::rewriteSubstr(Node node) } // (str.substr s x x) ---> "" if (str.len s) <= 1 - if (node[1] == node[2] && checkEntailLengthOne(node[0])) + if (node[1] == node[2] && StringsEntail::checkLengthOne(node[0])) { Node ret = Word::mkEmptyWord(node.getType()); return returnRewrite(node, ret, Rewrite::SS_LEN_ONE_Z_Z); @@ -2257,7 +1712,7 @@ Node SequencesRewriter::rewriteSubstr(Node node) // strip off components while quantity is entailed positive int dir = r == 0 ? 1 : -1; std::vector childrenr; - if (stripSymbolicLength(n1, childrenr, dir, curr)) + if (StringsEntail::stripSymbolicLength(n1, childrenr, dir, curr)) { if (r == 0) { @@ -2281,7 +1736,7 @@ Node SequencesRewriter::rewriteSubstr(Node node) { Node start_inner = node[0][1]; Node start_outer = node[1]; - if (checkEntailArith(start_outer) && checkEntailArith(start_inner)) + if (ArithEntail::check(start_outer) && ArithEntail::check(start_inner)) { // both are positive // thus, start point is definitely start_inner+start_outer. @@ -2298,11 +1753,11 @@ Node SequencesRewriter::rewriteSubstr(Node node) { new_len = len_from_inner; } - else if (checkEntailArith(len_from_inner, len_from_outer)) + else if (ArithEntail::check(len_from_inner, len_from_outer)) { new_len = len_from_outer; } - else if (checkEntailArith(len_from_outer, len_from_inner)) + else if (ArithEntail::check(len_from_outer, len_from_inner)) { new_len = len_from_inner; } @@ -2343,7 +1798,7 @@ Node SequencesRewriter::rewriteContains(Node node) { Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); - if (checkEntailArith(len1, true)) + if (ArithEntail::check(len1, true)) { // we handle the false case here since the rewrite for equality // uses this function, hence we want to conclude false if possible. @@ -2352,7 +1807,7 @@ Node SequencesRewriter::rewriteContains(Node node) return returnRewrite(node, ret, Rewrite::CTN_LHS_EMPTYSTR); } } - else if (checkEntailLengthOne(t)) + else if (StringsEntail::checkLengthOne(t)) { std::vector vec = Word::getChars(node[0]); Node emp = Word::mkEmptyWord(t.getType()); @@ -2372,7 +1827,8 @@ Node SequencesRewriter::rewriteContains(Node node) else if (node[1].getKind() == kind::STRING_CONCAT) { int firstc, lastc; - if (!canConstantContainConcat(node[0], node[1], firstc, lastc)) + if (!StringsEntail::canConstantContainConcat( + node[0], node[1], firstc, lastc)) { Node ret = NodeManager::currentNM()->mkConst(false); return returnRewrite(node, ret, Rewrite::CTN_NCONST_CTN_CONCAT); @@ -2410,7 +1866,7 @@ Node SequencesRewriter::rewriteContains(Node node) } else if (node[0].getKind() == STRING_STRREPL) { - Node rplDomain = checkEntailContains(node[0][1], node[1]); + Node rplDomain = StringsEntail::checkContains(node[0][1], node[1]); if (!rplDomain.isNull() && !rplDomain.getConst()) { Node d1 = nm->mkNode(STRING_STRCTN, node[0][0], node[1]); @@ -2436,7 +1892,7 @@ Node SequencesRewriter::rewriteContains(Node node) // component-wise containment std::vector nc1rb; std::vector nc1re; - if (componentContains(nc1, nc2, nc1rb, nc1re) != -1) + if (StringsEntail::componentContains(nc1, nc2, nc1rb, nc1re) != -1) { Node ret = NodeManager::currentNM()->mkConst(true); return returnRewrite(node, ret, Rewrite::CTN_COMPONENT); @@ -2446,7 +1902,7 @@ Node SequencesRewriter::rewriteContains(Node node) // strip endpoints std::vector nb; std::vector ne; - if (stripConstantEndpoints(nc1, nc2, nb, ne)) + if (StringsEntail::stripConstantEndpoints(nc1, nc2, nb, ne)) { Node ret = NodeManager::currentNM()->mkNode( kind::STRING_STRCTN, utils::mkConcat(nc1, stype), node[1]); @@ -2464,10 +1920,10 @@ Node SequencesRewriter::rewriteContains(Node node) // replacement does not change y. (str.contains x w) checks that if the // replacement changes anything in y, the w makes it impossible for it to // occur in x. - Node ctnConst = checkEntailContains(node[0], n[0]); + Node ctnConst = StringsEntail::checkContains(node[0], n[0]); if (!ctnConst.isNull() && !ctnConst.getConst()) { - Node ctnConst2 = checkEntailContains(node[0], n[2]); + Node ctnConst2 = StringsEntail::checkContains(node[0], n[2]); if (!ctnConst2.isNull() && !ctnConst2.getConst()) { Node res = nm->mkConst(false); @@ -2511,7 +1967,7 @@ Node SequencesRewriter::rewriteContains(Node node) // length entailment Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); - if (checkEntailArith(len_n2, len_n1, true)) + if (ArithEntail::check(len_n2, len_n1, true)) { // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false Node ret = NodeManager::currentNM()->mkConst(false); @@ -2522,13 +1978,13 @@ Node SequencesRewriter::rewriteContains(Node node) // For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false // since the number of a's in the second argument is greater than the number // of a's in the first argument - if (checkEntailMultisetSubset(node[0], node[1])) + if (StringsEntail::checkMultisetSubset(node[0], node[1])) { Node ret = nm->mkConst(false); return returnRewrite(node, ret, Rewrite::CTN_MSET_NSS); } - if (checkEntailArith(len_n2, len_n1, false)) + if (ArithEntail::check(len_n2, len_n1, false)) { // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2 Node ret = node[0].eqNode(node[1]); @@ -2613,7 +2069,7 @@ Node SequencesRewriter::rewriteContains(Node node) // (str.contains (str.replace x y x) z) ---> (str.contains x z) // if (str.len z) <= 1 - if (checkEntailLengthOne(node[1])) + if (StringsEntail::checkLengthOne(node[1])) { Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]); return returnRewrite(node, ret, Rewrite::CTN_REPL_LEN_ONE_TO_CTN); @@ -2633,9 +2089,9 @@ Node SequencesRewriter::rewriteContains(Node node) // (str.contains (str.replace x y z) w) ---> // (str.contains (str.replace x y "") w) // if (str.contains z w) ---> false and (str.len w) = 1 - if (checkEntailLengthOne(node[1])) + if (StringsEntail::checkLengthOne(node[1])) { - Node ctn = checkEntailContains(node[1], node[0][2]); + Node ctn = StringsEntail::checkContains(node[1], node[0][2]); if (!ctn.isNull() && !ctn.getConst()) { Node empty = nm->mkConst(String("")); @@ -2734,7 +2190,7 @@ Node SequencesRewriter::rewriteIndexof(Node node) return returnRewrite(node, zero, Rewrite::IDOF_EQ_CST_START); } } - if (checkEntailArith(node[2], true)) + if (ArithEntail::check(node[2], true)) { // y>0 implies indexof( x, x, y ) --> -1 Node negone = nm->mkConst(Rational(-1)); @@ -2757,7 +2213,7 @@ Node SequencesRewriter::rewriteIndexof(Node node) { if (Word::isEmpty(node[1])) { - if (checkEntailArith(len0, node[2]) && checkEntailArith(node[2])) + if (ArithEntail::check(len0, node[2]) && ArithEntail::check(node[2])) { // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z return returnRewrite(node, node[2], Rewrite::IDOF_EMP_IDOF); @@ -2765,7 +2221,7 @@ Node SequencesRewriter::rewriteIndexof(Node node) } } - if (checkEntailArith(len1, len0m2, true)) + if (ArithEntail::check(len1, len0m2, true)) { // len(x)-z < len(y) implies indexof( x, y, z ) ----> -1 Node negone = nm->mkConst(Rational(-1)); @@ -2779,7 +2235,7 @@ Node SequencesRewriter::rewriteIndexof(Node node) fstr = Rewriter::rewrite(fstr); } - Node cmp_conr = checkEntailContains(fstr, node[1]); + Node cmp_conr = StringsEntail::checkContains(fstr, node[1]); Trace("strings-rewrite-debug") << "For " << node << ", check contains(" << fstr << ", " << node[1] << ")" << std::endl; Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl; @@ -2794,7 +2250,8 @@ Node SequencesRewriter::rewriteIndexof(Node node) // past the first position in node[0] that contains node[1], we can drop std::vector nb; std::vector ne; - int cc = componentContains(children0, children1, nb, ne, true, 1); + int cc = StringsEntail::componentContains( + children0, children1, nb, ne, true, 1); if (cc != -1 && !ne.empty()) { // For example: @@ -2805,7 +2262,8 @@ Node SequencesRewriter::rewriteIndexof(Node node) } // Strip components from the beginning that are guaranteed not to match - if (stripConstantEndpoints(children0, children1, nb, ne, 1)) + if (StringsEntail::stripConstantEndpoints( + children0, children1, nb, ne, 1)) { // str.indexof(str.++("AB", x, "C"), "C", 0) ---> // 2 + str.indexof(str.++(x, "C"), "C", 0) @@ -2823,7 +2281,7 @@ Node SequencesRewriter::rewriteIndexof(Node node) // strip symbolic length Node new_len = node[2]; std::vector nr; - if (stripSymbolicLength(children0, nr, 1, new_len)) + if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len)) { // For example: // z>str.len( x1 ) and str.contains( x2, y )-->true @@ -2849,7 +2307,7 @@ Node SequencesRewriter::rewriteIndexof(Node node) { Node new_len = node[2]; std::vector nr; - if (stripSymbolicLength(children0, nr, 1, new_len)) + if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len)) { // Normalize the string before the start index. // @@ -2875,7 +2333,7 @@ Node SequencesRewriter::rewriteIndexof(Node node) { std::vector cb; std::vector ce; - if (stripConstantEndpoints(children0, children1, cb, ce, -1)) + if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce, -1)) { Node ret = utils::mkConcat(children0, stype); ret = nm->mkNode(STRING_STRIDOF, ret, node[1], node[2]); @@ -2950,14 +2408,14 @@ Node SequencesRewriter::rewriteReplace(Node node) // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); - if (checkEntailArith(l1, l0)) + if (ArithEntail::check(l1, l0)) { return returnRewrite(node, node[0], Rewrite::RPL_RPL_LEN_ID); } // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x) // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "") - if (checkEntailLengthOne(node[0])) + if (StringsEntail::checkLengthOne(node[0])) { Node empty = nm->mkConst(String("")); Node rn1 = Rewriter::rewrite( @@ -2966,7 +2424,7 @@ Node SequencesRewriter::rewriteReplace(Node node) { std::vector emptyNodes; bool allEmptyEqs; - std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(rn1); + std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(rn1); if (allEmptyEqs) { @@ -2987,14 +2445,15 @@ Node SequencesRewriter::rewriteReplace(Node node) // check if contains definitely does (or does not) hold Node cmp_con = nm->mkNode(kind::STRING_STRCTN, node[0], node[1]); Node cmp_conr = Rewriter::rewrite(cmp_con); - if (!checkEntailContains(node[0], node[1]).isNull()) + if (!StringsEntail::checkContains(node[0], node[1]).isNull()) { if (cmp_conr.getConst()) { // component-wise containment std::vector cb; std::vector ce; - int cc = componentContains(children0, children1, cb, ce, true, 1); + int cc = StringsEntail::componentContains( + children0, children1, cb, ce, true, 1); if (cc != -1) { if (cc == 0 && children0[0] == children1[0]) @@ -3052,7 +2511,7 @@ Node SequencesRewriter::rewriteReplace(Node node) std::vector emptyNodes; bool allEmptyEqs; - std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(cmp_conr); + std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(cmp_conr); if (emptyNodes.size() > 0) { @@ -3089,7 +2548,7 @@ Node SequencesRewriter::rewriteReplace(Node node) if (cmp_conr != cmp_con) { - if (checkEntailNonEmpty(node[1])) + if (StringsEntail::checkNonEmpty(node[1])) { // pull endpoints that can be stripped // for example, @@ -3097,7 +2556,7 @@ Node SequencesRewriter::rewriteReplace(Node node) // str.++( "b", str.replace( x, "a", y ), "b" ) std::vector cb; std::vector ce; - if (stripConstantEndpoints(children0, children1, cb, ce)) + if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce)) { std::vector cc; cc.insert(cc.end(), cb.begin(), cb.end()); @@ -3138,7 +2597,7 @@ Node SequencesRewriter::rewriteReplace(Node node) Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]); Node len0_1 = nm->mkNode(kind::PLUS, len0, one); // Check len(t) + j > len(x) + 1 - if (checkEntailArith(maxLen1, len0_1, true)) + if (ArithEntail::check(maxLen1, len0_1, true)) { children1.push_back(nm->mkNode( kind::STRING_SUBSTR, @@ -3188,7 +2647,7 @@ Node SequencesRewriter::rewriteReplace(Node node) // (str.len w) >= (str.len z) Node wlen = nm->mkNode(kind::STRING_LENGTH, w); Node zlen = nm->mkNode(kind::STRING_LENGTH, z); - if (checkEntailArith(wlen, zlen)) + if (ArithEntail::check(wlen, zlen)) { // w != z Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z)); @@ -3214,7 +2673,7 @@ Node SequencesRewriter::rewriteReplace(Node node) return returnRewrite(node, node[0], Rewrite::REPL_REPL2_INV_ID); } bool dualReplIteSuccess = false; - Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]); + Node cmp_con2 = StringsEntail::checkContains(node[1][0], node[1][2]); if (!cmp_con2.isNull() && !cmp_con2.getConst()) { // str.contains( x, z ) ---> false @@ -3229,10 +2688,10 @@ Node SequencesRewriter::rewriteReplace(Node node) // implies // str.replace( x, str.replace( x, y, z ), w ) ---> // ite( str.contains( x, y ), x, w ) - cmp_con2 = checkEntailContains(node[1][1], node[1][2]); + cmp_con2 = StringsEntail::checkContains(node[1][1], node[1][2]); if (!cmp_con2.isNull() && !cmp_con2.getConst()) { - cmp_con2 = checkEntailContains(node[1][2], node[1][1]); + cmp_con2 = StringsEntail::checkContains(node[1][2], node[1][1]); if (!cmp_con2.isNull() && !cmp_con2.getConst()) { dualReplIteSuccess = true; @@ -3262,7 +2721,7 @@ Node SequencesRewriter::rewriteReplace(Node node) // str.contains(y, z) ----> false and ( y == w or x == w ) implies // implies // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w) - Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]); + Node cmp_con2 = StringsEntail::checkContains(node[1][0], node[1][2]); invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst(); } } @@ -3271,10 +2730,10 @@ Node SequencesRewriter::rewriteReplace(Node node) // str.contains(x, z) ----> false and str.contains(x, w) ----> false // implies // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u) - Node cmp_con2 = checkEntailContains(node[0], node[1][1]); + Node cmp_con2 = StringsEntail::checkContains(node[0], node[1][1]); if (!cmp_con2.isNull() && !cmp_con2.getConst()) { - cmp_con2 = checkEntailContains(node[0], node[1][2]); + cmp_con2 = StringsEntail::checkContains(node[0], node[1][2]); invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst(); } } @@ -3290,7 +2749,7 @@ Node SequencesRewriter::rewriteReplace(Node node) { // str.contains( z, w ) ----> false implies // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z ) - Node cmp_con2 = checkEntailContains(node[1], node[2][0]); + Node cmp_con2 = StringsEntail::checkContains(node[1], node[2][0]); if (!cmp_con2.isNull() && !cmp_con2.getConst()) { Node res = @@ -3310,7 +2769,7 @@ Node SequencesRewriter::rewriteReplace(Node node) { // str.contains( x, z ) ----> false implies // str.replace( x, y, str.replace( y, z, w ) ) ---> x - cmp_con = checkEntailContains(node[0], node[2][1]); + cmp_con = StringsEntail::checkContains(node[0], node[2][1]); success = !cmp_con.isNull() && !cmp_con.getConst(); } if (success) @@ -3324,7 +2783,7 @@ Node SequencesRewriter::rewriteReplace(Node node) // str.replace( x ++ y ++ x ++ y, "A", z ) --> // str.replace( x ++ y, "A", z ) ++ x ++ y // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y. - if (checkEntailLengthOne(node[1])) + if (StringsEntail::checkLengthOne(node[1])) { Node lastLhs; unsigned lastCheckIndex = 0; @@ -3336,7 +2795,7 @@ Node SequencesRewriter::rewriteReplace(Node node) checkLhs.end(), children0.begin(), children0.begin() + checkIndex); Node lhs = utils::mkConcat(checkLhs, stype); Node rhs = children0[checkIndex]; - Node ctn = checkEntailContains(lhs, rhs); + Node ctn = StringsEntail::checkContains(lhs, rhs); if (!ctn.isNull() && ctn.getConst()) { lastLhs = lhs; @@ -3442,7 +2901,7 @@ Node SequencesRewriter::rewriteReplaceInternal(Node node) if (node[0] == node[1]) { // only holds for replaceall if non-empty - if (nk == STRING_STRREPL || checkEntailNonEmpty(node[1])) + if (nk == STRING_STRREPL || StringsEntail::checkNonEmpty(node[1])) { return returnRewrite(node, node[2], Rewrite::RPL_REPLACE); } @@ -3550,7 +3009,7 @@ Node SequencesRewriter::rewritePrefixSuffix(Node n) // Check if we can turn the prefix/suffix into equalities by showing that the // prefix/suffix is at least as long as the string - Node eqs = inferEqsFromContains(n[1], n[0]); + Node eqs = StringsEntail::inferEqsFromContains(n[1], n[0]); if (!eqs.isNull()) { return returnRewrite(n, eqs, Rewrite::SUF_PREFIX_TO_EQS); @@ -3563,2041 +3022,78 @@ Node SequencesRewriter::rewritePrefixSuffix(Node n) return returnRewrite(n, retNode, Rewrite::SUF_PREFIX_ELIM); } -Node SequencesRewriter::splitConstant(Node a, Node b, int& index, bool isRev) +Node SequencesRewriter::lengthPreserveRewrite(Node n) { - Assert(a.isConst() && b.isConst()); - size_t lenA = Word::getLength(a); - size_t lenB = Word::getLength(b); - index = lenA <= lenB ? 1 : 0; - size_t len_short = index == 1 ? lenA : lenB; - bool cmp = - isRev ? a.getConst().rstrncmp(b.getConst(), len_short) - : a.getConst().strncmp(b.getConst(), len_short); - if (cmp) - { - Node l = index == 0 ? a : b; - if (isRev) - { - int new_len = l.getConst().size() - len_short; - return Word::substr(l, 0, new_len); - } - else - { - return Word::substr(l, len_short); - } - } - // not the same prefix/suffix - return Node::null(); + NodeManager* nm = NodeManager::currentNM(); + Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n)); + Node res = canonicalStrForSymbolicLength(len, n.getType()); + return res.isNull() ? n : res; } -bool SequencesRewriter::canConstantContainConcat(Node c, - Node n, - int& firstc, - int& lastc) +Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype) { - Assert(c.isConst()); - CVC4::String t = c.getConst(); - const std::vector& tvec = t.getVec(); - Assert(n.getKind() == kind::STRING_CONCAT); - // must find constant components in order - size_t pos = 0; - firstc = -1; - lastc = -1; - for (unsigned i = 0; i < n.getNumChildren(); i++) - { - if (n[i].isConst()) + NodeManager* nm = NodeManager::currentNM(); + + Node res; + if (len.getKind() == CONST_RATIONAL) + { + // c -> "A" repeated c times + Rational ratLen = len.getConst(); + Assert(ratLen.getDenominator() == 1); + Integer intLen = ratLen.getNumerator(); + uint32_t u = intLen.getUnsignedInt(); + if (stype.isString()) { - firstc = firstc == -1 ? i : firstc; - lastc = i; - CVC4::String s = n[i].getConst(); - size_t new_pos = t.find(s, pos); - if (new_pos == std::string::npos) - { - return false; - } - else - { - pos = new_pos + s.size(); - } + res = nm->mkConst(String(std::string(u, 'A'))); } - else if (n[i].getKind() == kind::STRING_ITOS && checkEntailArith(n[i][0])) + else { - // find the first occurrence of a digit starting at pos - while (pos < tvec.size() && !String::isDigit(tvec[pos])) - { - pos++; - } - if (pos == tvec.size()) - { - return false; - } - // must consume at least one digit here - pos++; + Unimplemented() << "canonicalStrForSymbolicLength for non-string"; } } - return true; -} - -bool SequencesRewriter::canConstantContainList(Node c, - std::vector& l, - int& firstc, - int& lastc) -{ - Assert(c.isConst()); - // must find constant components in order - size_t pos = 0; - firstc = -1; - lastc = -1; - for (unsigned i = 0; i < l.size(); i++) - { - if (l[i].isConst()) + else if (len.getKind() == PLUS) + { + // x + y -> norm(x) + norm(y) + NodeBuilder<> concatBuilder(STRING_CONCAT); + for (const auto& n : len) { - firstc = firstc == -1 ? i : firstc; - lastc = i; - size_t new_pos = Word::find(c, l[i], pos); - if (new_pos == std::string::npos) - { - return false; - } - else + Node sn = canonicalStrForSymbolicLength(n, stype); + if (sn.isNull()) { - pos = new_pos + Word::getLength(l[i]); - } - } - } - return true; -} - -bool SequencesRewriter::stripSymbolicLength(std::vector& n1, - std::vector& nr, - int dir, - Node& curr) -{ - Assert(dir == 1 || dir == -1); - Assert(nr.empty()); - Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0)); - bool ret = false; - bool success; - unsigned sindex = 0; - do - { - Assert(!curr.isNull()); - success = false; - if (curr != zero && sindex < n1.size()) - { - unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); - if (n1[sindex_use].isConst()) - { - // could strip part of a constant - Node lowerBound = getConstantArithBound(Rewriter::rewrite(curr)); - if (!lowerBound.isNull()) - { - Assert(lowerBound.isConst()); - Rational lbr = lowerBound.getConst(); - if (lbr.sgn() > 0) - { - Assert(checkEntailArith(curr, true)); - CVC4::String s = n1[sindex_use].getConst(); - Node ncl = - NodeManager::currentNM()->mkConst(CVC4::Rational(s.size())); - Node next_s = - NodeManager::currentNM()->mkNode(kind::MINUS, lowerBound, ncl); - next_s = Rewriter::rewrite(next_s); - Assert(next_s.isConst()); - // we can remove the entire constant - if (next_s.getConst().sgn() >= 0) - { - curr = Rewriter::rewrite( - NodeManager::currentNM()->mkNode(kind::MINUS, curr, ncl)); - success = true; - sindex++; - } - else - { - // we can remove part of the constant - // lower bound minus the length of a concrete string is negative, - // hence lowerBound cannot be larger than long max - Assert(lbr < Rational(String::maxSize())); - curr = Rewriter::rewrite(NodeManager::currentNM()->mkNode( - kind::MINUS, curr, lowerBound)); - uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); - Assert(lbsize < s.size()); - if (dir == 1) - { - // strip partially from the front - nr.push_back( - NodeManager::currentNM()->mkConst(s.prefix(lbsize))); - n1[sindex_use] = NodeManager::currentNM()->mkConst( - s.suffix(s.size() - lbsize)); - } - else - { - // strip partially from the back - nr.push_back( - NodeManager::currentNM()->mkConst(s.suffix(lbsize))); - n1[sindex_use] = NodeManager::currentNM()->mkConst( - s.prefix(s.size() - lbsize)); - } - ret = true; - } - Assert(checkEntailArith(curr)); - } - else - { - // we cannot remove the constant - } - } - } - else - { - Node next_s = NodeManager::currentNM()->mkNode( - kind::MINUS, - curr, - NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, - n1[sindex_use])); - next_s = Rewriter::rewrite(next_s); - if (checkEntailArith(next_s)) - { - success = true; - curr = next_s; - sindex++; - } - } - } - } while (success); - if (sindex > 0) - { - if (dir == 1) - { - nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex); - n1.erase(n1.begin(), n1.begin() + sindex); - } - else - { - nr.insert(nr.end(), n1.end() - sindex, n1.end()); - n1.erase(n1.end() - sindex, n1.end()); - } - ret = true; - } - return ret; -} - -int SequencesRewriter::componentContains(std::vector& n1, - std::vector& n2, - std::vector& nb, - std::vector& ne, - bool computeRemainder, - int remainderDir) -{ - Assert(nb.empty()); - Assert(ne.empty()); - // if n2 is a singleton, we can do optimized version here - if (n2.size() == 1) - { - for (unsigned i = 0; i < n1.size(); i++) - { - Node n1rb; - Node n1re; - if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder)) - { - if (computeRemainder) - { - n1[i] = n2[0]; - if (remainderDir != -1) - { - if (!n1re.isNull()) - { - ne.push_back(n1re); - } - ne.insert(ne.end(), n1.begin() + i + 1, n1.end()); - n1.erase(n1.begin() + i + 1, n1.end()); - } - else if (!n1re.isNull()) - { - n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode( - kind::STRING_CONCAT, n1[i], n1re)); - } - if (remainderDir != 1) - { - nb.insert(nb.end(), n1.begin(), n1.begin() + i); - n1.erase(n1.begin(), n1.begin() + i); - if (!n1rb.isNull()) - { - nb.push_back(n1rb); - } - } - else if (!n1rb.isNull()) - { - n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode( - kind::STRING_CONCAT, n1rb, n1[i])); - } - } - return i; - } - } - } - else if (n1.size() >= n2.size()) - { - unsigned diff = n1.size() - n2.size(); - for (unsigned i = 0; i <= diff; i++) - { - Node n1rb_first; - Node n1re_first; - // first component of n2 must be a suffix - if (componentContainsBase(n1[i], - n2[0], - n1rb_first, - n1re_first, - 1, - computeRemainder && remainderDir != 1)) - { - Assert(n1re_first.isNull()); - for (unsigned j = 1; j < n2.size(); j++) - { - // are we in the last component? - if (j + 1 == n2.size()) - { - Node n1rb_last; - Node n1re_last; - // last component of n2 must be a prefix - if (componentContainsBase(n1[i + j], - n2[j], - n1rb_last, - n1re_last, - -1, - computeRemainder && remainderDir != -1)) - { - Assert(n1rb_last.isNull()); - if (computeRemainder) - { - if (remainderDir != -1) - { - if (!n1re_last.isNull()) - { - ne.push_back(n1re_last); - } - ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end()); - n1.erase(n1.begin() + i + j + 1, n1.end()); - n1[i + j] = n2[j]; - } - if (remainderDir != 1) - { - n1[i] = n2[0]; - nb.insert(nb.end(), n1.begin(), n1.begin() + i); - n1.erase(n1.begin(), n1.begin() + i); - if (!n1rb_first.isNull()) - { - nb.push_back(n1rb_first); - } - } - } - return i; - } - else - { - break; - } - } - else if (n1[i + j] != n2[j]) - { - break; - } - } - } - } - } - return -1; -} - -bool SequencesRewriter::componentContainsBase( - Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder) -{ - Assert(n1rb.isNull()); - Assert(n1re.isNull()); - - NodeManager* nm = NodeManager::currentNM(); - - if (n1 == n2) - { - return true; - } - else - { - if (n1.isConst() && n2.isConst()) - { - size_t len1 = Word::getLength(n1); - size_t len2 = Word::getLength(n2); - if (len2 < len1) - { - if (dir == 1) - { - if (Word::suffix(n1, len2) == n2) - { - if (computeRemainder) - { - n1rb = Word::prefix(n1, len1 - len2); - } - return true; - } - } - else if (dir == -1) - { - if (Word::prefix(n1, len2) == n2) - { - if (computeRemainder) - { - n1re = Word::suffix(n1, len1 - len2); - } - return true; - } - } - else - { - size_t f = Word::find(n1, n2); - if (f != std::string::npos) - { - if (computeRemainder) - { - if (f > 0) - { - n1rb = Word::prefix(n1, f); - } - if (len1 > f + len2) - { - n1re = Word::suffix(n1, len1 - (f + len2)); - } - } - return true; - } - } - } - } - else - { - // cases for: - // n1 = x containing n2 = substr( x, n2[1], n2[2] ) - if (n2.getKind() == kind::STRING_SUBSTR) - { - if (n2[0] == n1) - { - bool success = true; - Node start_pos = n2[1]; - Node end_pos = nm->mkNode(kind::PLUS, n2[1], n2[2]); - Node len_n2s = nm->mkNode(kind::STRING_LENGTH, n2[0]); - if (dir == 1) - { - // To be a suffix, start + length must be greater than - // or equal to the length of the string. - success = checkEntailArith(end_pos, len_n2s); - } - else if (dir == -1) - { - // To be a prefix, must literally start at 0, since - // if we knew it started at <0, it should be rewritten to "", - // if we knew it started at 0, then n2[1] should be rewritten to - // 0. - success = start_pos.isConst() - && start_pos.getConst().sgn() == 0; - } - if (success) - { - if (computeRemainder) - { - // we can only compute the remainder if start_pos and end_pos - // are known to be non-negative. - if (!checkEntailArith(start_pos) || !checkEntailArith(end_pos)) - { - return false; - } - if (dir != 1) - { - n1rb = nm->mkNode(kind::STRING_SUBSTR, - n2[0], - nm->mkConst(Rational(0)), - start_pos); - } - if (dir != -1) - { - n1re = nm->mkNode(kind::STRING_SUBSTR, n2[0], end_pos, len_n2s); - } - } - return true; - } - } - } - - if (!computeRemainder && dir == 0) - { - if (n1.getKind() == STRING_STRREPL) - { - // (str.contains (str.replace x y z) w) ---> true - // if (str.contains x w) --> true and (str.contains z w) ---> true - Node xCtnW = checkEntailContains(n1[0], n2); - if (!xCtnW.isNull() && xCtnW.getConst()) - { - Node zCtnW = checkEntailContains(n1[2], n2); - if (!zCtnW.isNull() && zCtnW.getConst()) - { - return true; - } - } - } - } - } - } - return false; -} - -bool SequencesRewriter::stripConstantEndpoints(std::vector& n1, - std::vector& n2, - std::vector& nb, - std::vector& ne, - int dir) -{ - Assert(nb.empty()); - Assert(ne.empty()); - - NodeManager* nm = NodeManager::currentNM(); - bool changed = false; - // for ( forwards, backwards ) direction - for (unsigned r = 0; r < 2; r++) - { - if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1)) - { - unsigned index0 = r == 0 ? 0 : n1.size() - 1; - unsigned index1 = r == 0 ? 0 : n2.size() - 1; - bool removeComponent = false; - Node n1cmp = n1[index0]; - - if (n1cmp.isConst() && n1cmp.getConst().size() == 0) - { - return false; - } - - std::vector sss; - std::vector sls; - n1cmp = decomposeSubstrChain(n1cmp, sss, sls); - Trace("strings-rewrite-debug2") - << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1] - << ", dir = " << dir << std::endl; - if (n1cmp.isConst()) - { - CVC4::String s = n1cmp.getConst(); - // overlap is an overapproximation of the number of characters - // n2[index1] can match in s - unsigned overlap = s.size(); - if (n2[index1].isConst()) - { - CVC4::String t = n2[index1].getConst(); - std::size_t ret = r == 0 ? s.find(t) : s.rfind(t); - if (ret == std::string::npos) - { - if (n1.size() == 1) - { - // can remove everything - // e.g. str.contains( "abc", str.++( "ba", x ) ) --> - // str.contains( "", str.++( "ba", x ) ) - removeComponent = true; - } - else if (sss.empty()) // only if not substr - { - // check how much overlap there is - // This is used to partially strip off the endpoint - // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) --> - // str.contains( str.++( "c", x ), str.++( "cd", y ) ) - overlap = r == 0 ? s.overlap(t) : t.overlap(s); - } - else - { - // if we are looking at a substring, we can remove the component - // if there is no overlap - // e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" ) - // --> str.contains( x, "a" ) - removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0); - } - } - else if (sss.empty()) // only if not substr - { - Assert(ret < s.size()); - // can strip off up to the find position, e.g. - // str.contains( str.++( "abc", x ), str.++( "b", y ) ) --> - // str.contains( str.++( "bc", x ), str.++( "b", y ) ), - // and - // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) --> - // str.contains( str.++( x, "abb" ), str.++( y, "b" ) ) - overlap = s.size() - ret; - } - } - else - { - // inconclusive - } - // process the overlap - if (overlap < s.size()) - { - changed = true; - if (overlap == 0) - { - removeComponent = true; - } - else - { - // can drop the prefix (resp. suffix) from the first (resp. last) - // component - if (r == 0) - { - nb.push_back(nm->mkConst(s.prefix(s.size() - overlap))); - n1[index0] = nm->mkConst(s.suffix(overlap)); - } - else - { - ne.push_back(nm->mkConst(s.suffix(s.size() - overlap))); - n1[index0] = nm->mkConst(s.prefix(overlap)); - } - } - } - } - else if (n1cmp.getKind() == kind::STRING_ITOS) - { - if (n2[index1].isConst()) - { - CVC4::String t = n2[index1].getConst(); - - if (n1.size() == 1) - { - // if n1.size()==1, then if n2[index1] is not a number, we can drop - // the entire component - // e.g. str.contains( int.to.str(x), "123a45") --> false - if (!t.isNumber()) - { - removeComponent = true; - } - } - else - { - const std::vector& tvec = t.getVec(); - Assert(tvec.size() > 0); - - // if n1.size()>1, then if the first (resp. last) character of - // n2[index1] - // is not a digit, we can drop the entire component, e.g.: - // str.contains( str.++( int.to.str(x), y ), "a12") --> - // str.contains( y, "a12" ) - // str.contains( str.++( y, int.to.str(x) ), "a0b") --> - // str.contains( y, "a0b" ) - unsigned i = r == 0 ? 0 : (tvec.size() - 1); - if (!String::isDigit(tvec[i])) - { - removeComponent = true; - } - } - } - } - if (removeComponent) - { - // can drop entire first (resp. last) component - if (r == 0) - { - nb.push_back(n1[index0]); - n1.erase(n1.begin(), n1.begin() + 1); - } - else - { - ne.push_back(n1[index0]); - n1.pop_back(); - } - if (n1.empty()) - { - // if we've removed everything, just return (we will rewrite to false) - return true; - } - else - { - changed = true; - } - } - } - } - // TODO (#1180) : computing the maximal overlap in this function may be - // important. - // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) ---> - // false - // ...since str.to.int(x) can contain at most 1 character from "2aaaa", - // leaving 4 characters - // which is larger that the upper bound for length of str.substr(y,0,3), - // which is 3. - return changed; -} - -Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype) -{ - NodeManager* nm = NodeManager::currentNM(); - - Node res; - if (len.getKind() == kind::CONST_RATIONAL) - { - // c -> "A" repeated c times - Rational ratLen = len.getConst(); - Assert(ratLen.getDenominator() == 1); - Integer intLen = ratLen.getNumerator(); - uint32_t u = intLen.getUnsignedInt(); - if (stype.isString()) - { - res = nm->mkConst(String(std::string(u, 'A'))); - } - else - { - Unimplemented() << "canonicalStrForSymbolicLength for non-string"; - } - } - else if (len.getKind() == kind::PLUS) - { - // x + y -> norm(x) + norm(y) - NodeBuilder<> concatBuilder(kind::STRING_CONCAT); - for (const auto& n : len) - { - Node sn = canonicalStrForSymbolicLength(n, stype); - if (sn.isNull()) - { - return Node::null(); - } - std::vector snChildren; - utils::getConcat(sn, snChildren); - concatBuilder.append(snChildren); - } - res = concatBuilder.constructNode(); - } - else if (len.getKind() == kind::MULT && len.getNumChildren() == 2 - && len[0].isConst()) - { - // c * x -> norm(x) repeated c times - Rational ratReps = len[0].getConst(); - Assert(ratReps.getDenominator() == 1); - Integer intReps = ratReps.getNumerator(); - - Node nRep = canonicalStrForSymbolicLength(len[1], stype); - std::vector nRepChildren; - utils::getConcat(nRep, nRepChildren); - NodeBuilder<> concatBuilder(kind::STRING_CONCAT); - for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++) - { - concatBuilder.append(nRepChildren); - } - res = concatBuilder.constructNode(); - } - else if (len.getKind() == kind::STRING_LENGTH) - { - // len(x) -> x - res = len[0]; - } - return res; -} - -Node SequencesRewriter::lengthPreserveRewrite(Node n) -{ - NodeManager* nm = NodeManager::currentNM(); - Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n)); - Node res = canonicalStrForSymbolicLength(len, n.getType()); - return res.isNull() ? n : res; -} - -Node SequencesRewriter::checkEntailContains(Node a, Node b, bool fullRewriter) -{ - NodeManager* nm = NodeManager::currentNM(); - Node ctn = nm->mkNode(kind::STRING_STRCTN, a, b); - - if (fullRewriter) - { - ctn = Rewriter::rewrite(ctn); - } - else - { - Node prev; - do - { - prev = ctn; - ctn = rewriteContains(ctn); - } while (prev != ctn && ctn.getKind() == kind::STRING_STRCTN); - } - - Assert(ctn.getType().isBoolean()); - return ctn.isConst() ? ctn : Node::null(); -} - -bool SequencesRewriter::checkEntailNonEmpty(Node a) -{ - Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a); - len = Rewriter::rewrite(len); - return checkEntailArith(len, true); -} - -bool SequencesRewriter::checkEntailLengthOne(Node s, bool strict) -{ - NodeManager* nm = NodeManager::currentNM(); - Node one = nm->mkConst(Rational(1)); - Node len = nm->mkNode(STRING_LENGTH, s); - len = Rewriter::rewrite(len); - return checkEntailArith(one, len) && (!strict || checkEntailArith(len, true)); -} - -bool SequencesRewriter::checkEntailArithEq(Node a, Node b) -{ - if (a == b) - { - return true; - } - else - { - Node ar = Rewriter::rewrite(a); - Node br = Rewriter::rewrite(b); - return ar == br; - } -} - -bool SequencesRewriter::checkEntailArith(Node a, Node b, bool strict) -{ - if (a == b) - { - return !strict; - } - else - { - Node diff = NodeManager::currentNM()->mkNode(kind::MINUS, a, b); - return checkEntailArith(diff, strict); - } -} - -struct StrCheckEntailArithTag -{ -}; -struct StrCheckEntailArithComputedTag -{ -}; -/** Attribute true for expressions for which checkEntailArith returned true */ -typedef expr::Attribute StrCheckEntailArithAttr; -typedef expr::Attribute - StrCheckEntailArithComputedAttr; - -bool SequencesRewriter::checkEntailArith(Node a, bool strict) -{ - if (a.isConst()) - { - return a.getConst().sgn() >= (strict ? 1 : 0); - } - - Node ar = - strict - ? NodeManager::currentNM()->mkNode( - kind::MINUS, a, NodeManager::currentNM()->mkConst(Rational(1))) - : a; - ar = Rewriter::rewrite(ar); - - if (ar.getAttribute(StrCheckEntailArithComputedAttr())) - { - return ar.getAttribute(StrCheckEntailArithAttr()); - } - - bool ret = checkEntailArithInternal(ar); - if (!ret) - { - // try with approximations - ret = checkEntailArithApprox(ar); - } - // cache the result - ar.setAttribute(StrCheckEntailArithAttr(), ret); - ar.setAttribute(StrCheckEntailArithComputedAttr(), true); - return ret; -} - -bool SequencesRewriter::checkEntailArithApprox(Node ar) -{ - Assert(Rewriter::rewrite(ar) == ar); - NodeManager* nm = NodeManager::currentNM(); - std::map msum; - Trace("strings-ent-approx-debug") - << "Setup arithmetic approximations for " << ar << std::endl; - if (!ArithMSum::getMonomialSum(ar, msum)) - { - Trace("strings-ent-approx-debug") - << "...failed to get monomial sum!" << std::endl; - return false; - } - // for each monomial v*c, mApprox[v] a list of - // possibilities for how the term can be soundly approximated, that is, - // if mApprox[v] contains av, then v*c > av*c. Notice that if c - // is positive, then v > av, otherwise if c is negative, then v < av. - // In other words, av is an under-approximation if c is positive, and an - // over-approximation if c is negative. - bool changed = false; - std::map > mApprox; - // map from approximations to their monomial sums - std::map > approxMsums; - // aarSum stores each monomial that does not have multiple approximations - std::vector aarSum; - for (std::pair& m : msum) - { - Node v = m.first; - Node c = m.second; - Trace("strings-ent-approx-debug") - << "Get approximations " << v << "..." << std::endl; - if (v.isNull()) - { - Node mn = c.isNull() ? nm->mkConst(Rational(1)) : c; - aarSum.push_back(mn); - } - else - { - // c.isNull() means c = 1 - bool isOverApprox = !c.isNull() && c.getConst().sgn() == -1; - std::vector& approx = mApprox[v]; - std::unordered_set visited; - std::vector toProcess; - toProcess.push_back(v); - do - { - Node curr = toProcess.back(); - Trace("strings-ent-approx-debug") << " process " << curr << std::endl; - curr = Rewriter::rewrite(curr); - toProcess.pop_back(); - if (visited.find(curr) == visited.end()) - { - visited.insert(curr); - std::vector currApprox; - getArithApproximations(curr, currApprox, isOverApprox); - if (currApprox.empty()) - { - Trace("strings-ent-approx-debug") - << "...approximation: " << curr << std::endl; - // no approximations, thus curr is a possibility - approx.push_back(curr); - } - else - { - toProcess.insert( - toProcess.end(), currApprox.begin(), currApprox.end()); - } - } - } while (!toProcess.empty()); - Assert(!approx.empty()); - // if we have only one approximation, move it to final - if (approx.size() == 1) - { - changed = v != approx[0]; - Node mn = ArithMSum::mkCoeffTerm(c, approx[0]); - aarSum.push_back(mn); - mApprox.erase(v); - } - else - { - // compute monomial sum form for each approximation, used below - for (const Node& aa : approx) - { - if (approxMsums.find(aa) == approxMsums.end()) - { - CVC4_UNUSED bool ret = - ArithMSum::getMonomialSum(aa, approxMsums[aa]); - Assert(ret); - } - } - changed = true; - } - } - } - if (!changed) - { - // approximations had no effect, return - Trace("strings-ent-approx-debug") << "...no approximations" << std::endl; - return false; - } - // get the current "fixed" sum for the abstraction of ar - Node aar = aarSum.empty() - ? nm->mkConst(Rational(0)) - : (aarSum.size() == 1 ? aarSum[0] : nm->mkNode(PLUS, aarSum)); - aar = Rewriter::rewrite(aar); - Trace("strings-ent-approx-debug") - << "...processed fixed sum " << aar << " with " << mApprox.size() - << " approximated monomials." << std::endl; - // if we have a choice of how to approximate - if (!mApprox.empty()) - { - // convert aar back to monomial sum - std::map msumAar; - if (!ArithMSum::getMonomialSum(aar, msumAar)) - { - return false; - } - if (Trace.isOn("strings-ent-approx")) - { - Trace("strings-ent-approx") - << "---- Check arithmetic entailment by under-approximation " << ar - << " >= 0" << std::endl; - Trace("strings-ent-approx") << "FIXED:" << std::endl; - ArithMSum::debugPrintMonomialSum(msumAar, "strings-ent-approx"); - Trace("strings-ent-approx") << "APPROX:" << std::endl; - for (std::pair >& a : mApprox) - { - Node c = msum[a.first]; - Trace("strings-ent-approx") << " "; - if (!c.isNull()) - { - Trace("strings-ent-approx") << c << " * "; - } - Trace("strings-ent-approx") - << a.second << " ...from " << a.first << std::endl; - } - Trace("strings-ent-approx") << std::endl; - } - Rational one(1); - // incorporate monomials one at a time that have a choice of approximations - while (!mApprox.empty()) - { - Node v; - Node vapprox; - int maxScore = -1; - // Look at each approximation, take the one with the best score. - // Notice that we are in the process of trying to prove - // ( c1*t1 + .. + cn*tn ) + ( approx_1 | ... | approx_m ) >= 0, - // where c1*t1 + .. + cn*tn is the "fixed" component of our sum (aar) - // and approx_1 ... approx_m are possible approximations. The - // intution here is that we want coefficients c1...cn to be positive. - // This is because arithmetic string terms t1...tn (which may be - // applications of len, indexof, str.to.int) are never entailed to be - // negative. Hence, we add the approx_i that contributes the "most" - // towards making all constants c1...cn positive and cancelling negative - // monomials in approx_i itself. - for (std::pair >& nam : mApprox) - { - Node cr = msum[nam.first]; - for (const Node& aa : nam.second) - { - unsigned helpsCancelCount = 0; - unsigned addsObligationCount = 0; - std::map::iterator it; - // we are processing an approximation cr*( c1*t1 + ... + cn*tn ) - for (std::pair& aam : approxMsums[aa]) - { - // Say aar is of the form t + c*ti, and aam is the monomial ci*ti - // where ci != 0. We say aam: - // (1) helps cancel if c != 0 and c>0 != ci>0 - // (2) adds obligation if c>=0 and c+ci<0 - Node ti = aam.first; - Node ci = aam.second; - if (!cr.isNull()) - { - ci = ci.isNull() ? cr - : Rewriter::rewrite(nm->mkNode(MULT, ci, cr)); - } - Trace("strings-ent-approx-debug") << ci << "*" << ti << " "; - int ciSgn = ci.isNull() ? 1 : ci.getConst().sgn(); - it = msumAar.find(ti); - if (it != msumAar.end()) - { - Node c = it->second; - int cSgn = c.isNull() ? 1 : c.getConst().sgn(); - if (cSgn == 0) - { - addsObligationCount += (ciSgn == -1 ? 1 : 0); - } - else if (cSgn != ciSgn) - { - helpsCancelCount++; - Rational r1 = c.isNull() ? one : c.getConst(); - Rational r2 = ci.isNull() ? one : ci.getConst(); - Rational r12 = r1 + r2; - if (r12.sgn() == -1) - { - addsObligationCount++; - } - } - } - else - { - addsObligationCount += (ciSgn == -1 ? 1 : 0); - } - } - Trace("strings-ent-approx-debug") - << "counts=" << helpsCancelCount << "," << addsObligationCount - << " for " << aa << " into " << aar << std::endl; - int score = (addsObligationCount > 0 ? 0 : 2) - + (helpsCancelCount > 0 ? 1 : 0); - // if its the best, update v and vapprox - if (v.isNull() || score > maxScore) - { - v = nam.first; - vapprox = aa; - maxScore = score; - } - } - if (!v.isNull()) - { - break; - } - } - Trace("strings-ent-approx") - << "- Decide " << v << " = " << vapprox << std::endl; - // we incorporate v approximated by vapprox into the overall approximation - // for ar - Assert(!v.isNull() && !vapprox.isNull()); - Assert(msum.find(v) != msum.end()); - Node mn = ArithMSum::mkCoeffTerm(msum[v], vapprox); - aar = nm->mkNode(PLUS, aar, mn); - // update the msumAar map - aar = Rewriter::rewrite(aar); - msumAar.clear(); - if (!ArithMSum::getMonomialSum(aar, msumAar)) - { - Assert(false); - Trace("strings-ent-approx") - << "...failed to get monomial sum!" << std::endl; - return false; - } - // we have processed the approximation for v - mApprox.erase(v); - } - Trace("strings-ent-approx") << "-----------------" << std::endl; - } - if (aar == ar) - { - Trace("strings-ent-approx-debug") - << "...approximation had no effect" << std::endl; - // this should never happen, but we avoid the infinite loop for sanity here - Assert(false); - return false; - } - // Check entailment on the approximation of ar. - // Notice that this may trigger further reasoning by approximation. For - // example, len( replace( x ++ y, substr( x, 0, n ), z ) ) may be - // under-approximated as len( x ) + len( y ) - len( substr( x, 0, n ) ) on - // this call, where in the recursive call we may over-approximate - // len( substr( x, 0, n ) ) as len( x ). In this example, we can infer - // that len( replace( x ++ y, substr( x, 0, n ), z ) ) >= len( y ) in two - // steps. - if (checkEntailArith(aar)) - { - Trace("strings-ent-approx") - << "*** StrArithApprox: showed " << ar - << " >= 0 using under-approximation!" << std::endl; - Trace("strings-ent-approx") - << "*** StrArithApprox: under-approximation was " << aar << std::endl; - return true; - } - return false; -} - -void SequencesRewriter::getArithApproximations(Node a, - std::vector& approx, - bool isOverApprox) -{ - NodeManager* nm = NodeManager::currentNM(); - // We do not handle PLUS here since this leads to exponential behavior. - // Instead, this is managed, e.g. during checkEntailArithApprox, where - // PLUS terms are expanded "on-demand" during the reasoning. - Trace("strings-ent-approx-debug") - << "Get arith approximations " << a << std::endl; - Kind ak = a.getKind(); - if (ak == MULT) - { - Node c; - Node v; - if (ArithMSum::getMonomial(a, c, v)) - { - bool isNeg = c.getConst().sgn() == -1; - getArithApproximations(v, approx, isNeg ? !isOverApprox : isOverApprox); - for (unsigned i = 0, size = approx.size(); i < size; i++) - { - approx[i] = nm->mkNode(MULT, c, approx[i]); - } - } - } - else if (ak == STRING_LENGTH) - { - Kind aak = a[0].getKind(); - if (aak == STRING_SUBSTR) - { - // over,under-approximations for len( substr( x, n, m ) ) - Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]); - if (isOverApprox) - { - // m >= 0 implies - // m >= len( substr( x, n, m ) ) - if (checkEntailArith(a[0][2])) - { - approx.push_back(a[0][2]); - } - if (checkEntailArith(lenx, a[0][1])) - { - // n <= len( x ) implies - // len( x ) - n >= len( substr( x, n, m ) ) - approx.push_back(nm->mkNode(MINUS, lenx, a[0][1])); - } - else - { - // len( x ) >= len( substr( x, n, m ) ) - approx.push_back(lenx); - } - } - else - { - // 0 <= n and n+m <= len( x ) implies - // m <= len( substr( x, n, m ) ) - Node npm = nm->mkNode(PLUS, a[0][1], a[0][2]); - if (checkEntailArith(a[0][1]) && checkEntailArith(lenx, npm)) - { - approx.push_back(a[0][2]); - } - // 0 <= n and n+m >= len( x ) implies - // len(x)-n <= len( substr( x, n, m ) ) - if (checkEntailArith(a[0][1]) && checkEntailArith(npm, lenx)) - { - approx.push_back(nm->mkNode(MINUS, lenx, a[0][1])); - } - } - } - else if (aak == STRING_STRREPL) - { - // over,under-approximations for len( replace( x, y, z ) ) - // notice this is either len( x ) or ( len( x ) + len( z ) - len( y ) ) - Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]); - Node leny = nm->mkNode(STRING_LENGTH, a[0][1]); - Node lenz = nm->mkNode(STRING_LENGTH, a[0][2]); - if (isOverApprox) - { - if (checkEntailArith(leny, lenz)) - { - // len( y ) >= len( z ) implies - // len( x ) >= len( replace( x, y, z ) ) - approx.push_back(lenx); - } - else - { - // len( x ) + len( z ) >= len( replace( x, y, z ) ) - approx.push_back(nm->mkNode(PLUS, lenx, lenz)); - } - } - else - { - if (checkEntailArith(lenz, leny) || checkEntailArith(lenz, lenx)) - { - // len( y ) <= len( z ) or len( x ) <= len( z ) implies - // len( x ) <= len( replace( x, y, z ) ) - approx.push_back(lenx); - } - else - { - // len( x ) - len( y ) <= len( replace( x, y, z ) ) - approx.push_back(nm->mkNode(MINUS, lenx, leny)); - } - } - } - else if (aak == STRING_ITOS) - { - // over,under-approximations for len( int.to.str( x ) ) - if (isOverApprox) - { - if (checkEntailArith(a[0][0], false)) - { - if (checkEntailArith(a[0][0], true)) - { - // x > 0 implies - // x >= len( int.to.str( x ) ) - approx.push_back(a[0][0]); - } - else - { - // x >= 0 implies - // x+1 >= len( int.to.str( x ) ) - approx.push_back( - nm->mkNode(PLUS, nm->mkConst(Rational(1)), a[0][0])); - } - } - } - else - { - if (checkEntailArith(a[0][0])) - { - // x >= 0 implies - // len( int.to.str( x ) ) >= 1 - approx.push_back(nm->mkConst(Rational(1))); - } - // other crazy things are possible here, e.g. - // len( int.to.str( len( y ) + 10 ) ) >= 2 - } - } - } - else if (ak == STRING_STRIDOF) - { - // over,under-approximations for indexof( x, y, n ) - if (isOverApprox) - { - Node lenx = nm->mkNode(STRING_LENGTH, a[0]); - Node leny = nm->mkNode(STRING_LENGTH, a[1]); - if (checkEntailArith(lenx, leny)) - { - // len( x ) >= len( y ) implies - // len( x ) - len( y ) >= indexof( x, y, n ) - approx.push_back(nm->mkNode(MINUS, lenx, leny)); - } - else - { - // len( x ) >= indexof( x, y, n ) - approx.push_back(lenx); - } - } - else - { - // TODO?: - // contains( substr( x, n, len( x ) ), y ) implies - // n <= indexof( x, y, n ) - // ...hard to test, runs risk of non-termination - - // -1 <= indexof( x, y, n ) - approx.push_back(nm->mkConst(Rational(-1))); - } - } - else if (ak == STRING_STOI) - { - // over,under-approximations for str.to.int( x ) - if (isOverApprox) - { - // TODO?: - // y >= 0 implies - // y >= str.to.int( int.to.str( y ) ) - } - else - { - // -1 <= str.to.int( x ) - approx.push_back(nm->mkConst(Rational(-1))); - } - } - Trace("strings-ent-approx-debug") << "Return " << approx.size() << std::endl; -} - -bool SequencesRewriter::checkEntailMultisetSubset(Node a, Node b) -{ - std::vector avec; - utils::getConcat(getMultisetApproximation(a), avec); - std::vector bvec; - utils::getConcat(b, bvec); - - std::map num_nconst[2]; - std::map num_const[2]; - for (unsigned j = 0; j < 2; j++) - { - std::vector& jvec = j == 0 ? avec : bvec; - for (const Node& cc : jvec) - { - if (cc.isConst()) - { - num_const[j][cc]++; - } - else - { - num_nconst[j][cc]++; - } - } - } - bool ms_success = true; - for (std::pair& nncp : num_nconst[0]) - { - if (nncp.second > num_nconst[1][nncp.first]) - { - ms_success = false; - break; - } - } - if (ms_success) - { - // count the number of constant characters in the first argument - std::map count_const[2]; - std::vector chars; - for (unsigned j = 0; j < 2; j++) - { - for (std::pair& ncp : num_const[j]) - { - Node cn = ncp.first; - Assert(cn.isConst()); - std::vector cnChars = Word::getChars(cn); - for (const Node& ch : cnChars) - { - count_const[j][ch] += ncp.second; - if (std::find(chars.begin(), chars.end(), ch) == chars.end()) - { - chars.push_back(ch); - } - } - } - } - Trace("strings-entail-ms-ss") - << "For " << a << " and " << b << " : " << std::endl; - for (const Node& ch : chars) - { - Trace("strings-entail-ms-ss") << " # occurrences of substring "; - Trace("strings-entail-ms-ss") << ch << " in arguments is "; - Trace("strings-entail-ms-ss") - << count_const[0][ch] << " / " << count_const[1][ch] << std::endl; - if (count_const[0][ch] < count_const[1][ch]) - { - return true; - } - } - - // TODO (#1180): count the number of 2,3,4,.. character substrings - // for example: - // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false - // since the second argument contains more occurrences of "bb". - // note this is orthogonal reasoning to inductive reasoning - // via regular membership reduction in Liang et al CAV 2015. - } - return false; -} - -Node SequencesRewriter::checkEntailHomogeneousString(Node a) -{ - std::vector avec; - utils::getConcat(getMultisetApproximation(a), avec); - - bool cValid = false; - Node c; - for (const Node& ac : avec) - { - if (ac.isConst()) - { - std::vector acv = Word::getChars(ac); - for (const Node& cc : acv) - { - if (!cValid) - { - cValid = true; - c = cc; - } - else if (c != cc) - { - // Found a different character - return Node::null(); - } - } - } - else - { - // Could produce a different character - return Node::null(); - } - } - - if (!cValid) - { - return Word::mkEmptyWord(a.getType()); - } - - return c; -} - -Node SequencesRewriter::getMultisetApproximation(Node a) -{ - NodeManager* nm = NodeManager::currentNM(); - if (a.getKind() == STRING_SUBSTR) - { - return a[0]; - } - else if (a.getKind() == STRING_STRREPL) - { - return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2])); - } - else if (a.getKind() == STRING_CONCAT) - { - NodeBuilder<> nb(STRING_CONCAT); - for (const Node& ac : a) - { - nb << getMultisetApproximation(ac); - } - return nb.constructNode(); - } - else - { - return a; - } -} - -bool SequencesRewriter::checkEntailArithWithEqAssumption(Node assumption, - Node a, - bool strict) -{ - Assert(assumption.getKind() == kind::EQUAL); - Assert(Rewriter::rewrite(assumption) == assumption); - - // Find candidates variables to compute substitutions for - std::unordered_set candVars; - std::vector toVisit = {assumption}; - while (!toVisit.empty()) - { - Node curr = toVisit.back(); - toVisit.pop_back(); - - if (curr.getKind() == kind::PLUS || curr.getKind() == kind::MULT - || curr.getKind() == kind::MINUS || curr.getKind() == kind::EQUAL) - { - for (const auto& currChild : curr) - { - toVisit.push_back(currChild); - } - } - else if (curr.isVar() && Theory::theoryOf(curr) == THEORY_ARITH) - { - candVars.insert(curr); - } - else if (curr.getKind() == kind::STRING_LENGTH) - { - candVars.insert(curr); - } - } - - // Check if any of the candidate variables are in n - Node v; - Assert(toVisit.empty()); - toVisit.push_back(a); - while (!toVisit.empty()) - { - Node curr = toVisit.back(); - toVisit.pop_back(); - - for (const auto& currChild : curr) - { - toVisit.push_back(currChild); - } - - if (candVars.find(curr) != candVars.end()) - { - v = curr; - break; - } - } - - if (v.isNull()) - { - // No suitable candidate found - return false; - } - - Node solution = ArithMSum::solveEqualityFor(assumption, v); - if (solution.isNull()) - { - // Could not solve for v - return false; - } - - a = a.substitute(TNode(v), TNode(solution)); - return checkEntailArith(a, strict); -} - -bool SequencesRewriter::checkEntailArithWithAssumption(Node assumption, - Node a, - Node b, - bool strict) -{ - Assert(Rewriter::rewrite(assumption) == assumption); - - NodeManager* nm = NodeManager::currentNM(); - - if (!assumption.isConst() && assumption.getKind() != kind::EQUAL) - { - // We rewrite inequality assumptions from x <= y to x + (str.len s) = y - // where s is some fresh string variable. We use (str.len s) because - // (str.len s) must be non-negative for the equation to hold. - Node x, y; - if (assumption.getKind() == kind::GEQ) - { - x = assumption[0]; - y = assumption[1]; - } - else - { - // (not (>= s t)) --> (>= (t - 1) s) - Assert(assumption.getKind() == kind::NOT - && assumption[0].getKind() == kind::GEQ); - x = nm->mkNode(kind::MINUS, assumption[0][1], nm->mkConst(Rational(1))); - y = assumption[0][0]; - } - - Node s = nm->mkBoundVar("slackVal", nm->stringType()); - Node slen = nm->mkNode(kind::STRING_LENGTH, s); - assumption = Rewriter::rewrite( - nm->mkNode(kind::EQUAL, x, nm->mkNode(kind::PLUS, y, slen))); - } - - Node diff = nm->mkNode(kind::MINUS, a, b); - bool res = false; - if (assumption.isConst()) - { - bool assumptionBool = assumption.getConst(); - if (assumptionBool) - { - res = checkEntailArith(diff, strict); - } - else - { - res = true; - } - } - else - { - res = checkEntailArithWithEqAssumption(assumption, diff, strict); - } - return res; -} - -bool SequencesRewriter::checkEntailArithWithAssumptions( - std::vector assumptions, Node a, Node b, bool strict) -{ - // TODO: We currently try to show the entailment with each assumption - // independently. In the future, we should make better use of multiple - // assumptions. - bool res = false; - for (const auto& assumption : assumptions) - { - Assert(Rewriter::rewrite(assumption) == assumption); - - if (checkEntailArithWithAssumption(assumption, a, b, strict)) - { - res = true; - break; - } - } - return res; -} - -Node SequencesRewriter::getConstantArithBound(Node a, bool isLower) -{ - Assert(Rewriter::rewrite(a) == a); - Node ret; - if (a.isConst()) - { - ret = a; - } - else if (a.getKind() == kind::STRING_LENGTH) - { - if (isLower) - { - ret = NodeManager::currentNM()->mkConst(Rational(0)); - } - } - else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT) - { - std::vector children; - bool success = true; - for (unsigned i = 0; i < a.getNumChildren(); i++) - { - Node ac = getConstantArithBound(a[i], isLower); - if (ac.isNull()) - { - ret = ac; - success = false; - break; - } - else - { - if (ac.getConst().sgn() == 0) - { - if (a.getKind() == kind::MULT) - { - ret = ac; - success = false; - break; - } - } - else - { - if (a.getKind() == kind::MULT) - { - if ((ac.getConst().sgn() > 0) != isLower) - { - ret = Node::null(); - success = false; - break; - } - } - children.push_back(ac); - } - } - } - if (success) - { - if (children.empty()) - { - ret = NodeManager::currentNM()->mkConst(Rational(0)); - } - else if (children.size() == 1) - { - ret = children[0]; - } - else - { - ret = NodeManager::currentNM()->mkNode(a.getKind(), children); - ret = Rewriter::rewrite(ret); - } - } - } - Trace("strings-rewrite-cbound") - << "Constant " << (isLower ? "lower" : "upper") << " bound for " << a - << " is " << ret << std::endl; - Assert(ret.isNull() || ret.isConst()); - // entailment check should be at least as powerful as computing a lower bound - Assert(!isLower || ret.isNull() || ret.getConst().sgn() < 0 - || checkEntailArith(a, false)); - Assert(!isLower || ret.isNull() || ret.getConst().sgn() <= 0 - || checkEntailArith(a, true)); - return ret; -} - -Node SequencesRewriter::getFixedLengthForRegexp(Node n) -{ - NodeManager* nm = NodeManager::currentNM(); - if (n.getKind() == STRING_TO_REGEXP) - { - Node ret = nm->mkNode(STRING_LENGTH, n[0]); - ret = Rewriter::rewrite(ret); - if (ret.isConst()) - { - return ret; - } - } - else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE) - { - return nm->mkConst(Rational(1)); - } - else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER) - { - Node ret; - for (const Node& nc : n) - { - Node flc = getFixedLengthForRegexp(nc); - if (flc.isNull() || (!ret.isNull() && ret != flc)) - { - return Node::null(); - } - else if (ret.isNull()) - { - // first time - ret = flc; + return Node::null(); } + std::vector snChildren; + utils::getConcat(sn, snChildren); + concatBuilder.append(snChildren); } - return ret; + res = concatBuilder.constructNode(); } - else if (n.getKind() == REGEXP_CONCAT) + else if (len.getKind() == MULT && len.getNumChildren() == 2 + && len[0].isConst()) { - NodeBuilder<> nb(PLUS); - for (const Node& nc : n) - { - Node flc = getFixedLengthForRegexp(nc); - if (flc.isNull()) - { - return flc; - } - nb << flc; - } - Node ret = nb.constructNode(); - ret = Rewriter::rewrite(ret); - return ret; - } - return Node::null(); -} + // c * x -> norm(x) repeated c times + Rational ratReps = len[0].getConst(); + Assert(ratReps.getDenominator() == 1); + Integer intReps = ratReps.getNumerator(); -bool SequencesRewriter::checkEntailArithInternal(Node a) -{ - Assert(Rewriter::rewrite(a) == a); - // check whether a >= 0 - if (a.isConst()) - { - return a.getConst().sgn() >= 0; - } - else if (a.getKind() == kind::STRING_LENGTH) - { - // str.len( t ) >= 0 - return true; - } - else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT) - { - for (unsigned i = 0; i < a.getNumChildren(); i++) + Node nRep = canonicalStrForSymbolicLength(len[1], stype); + std::vector nRepChildren; + utils::getConcat(nRep, nRepChildren); + NodeBuilder<> concatBuilder(STRING_CONCAT); + for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++) { - if (!checkEntailArithInternal(a[i])) - { - return false; - } + concatBuilder.append(nRepChildren); } - // t1 >= 0 ^ ... ^ tn >= 0 => t1 op ... op tn >= 0 - return true; - } - - return false; -} - -Node SequencesRewriter::decomposeSubstrChain(Node s, - std::vector& ss, - std::vector& ls) -{ - Assert(ss.empty()); - Assert(ls.empty()); - while (s.getKind() == STRING_SUBSTR) - { - ss.push_back(s[1]); - ls.push_back(s[2]); - s = s[0]; - } - std::reverse(ss.begin(), ss.end()); - std::reverse(ls.begin(), ls.end()); - return s; -} - -Node SequencesRewriter::mkSubstrChain(Node base, - const std::vector& ss, - const std::vector& ls) -{ - NodeManager* nm = NodeManager::currentNM(); - for (unsigned i = 0, size = ss.size(); i < size; i++) - { - base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]); + res = concatBuilder.constructNode(); } - return base; -} - -Node SequencesRewriter::getStringOrEmpty(Node n) -{ - NodeManager* nm = NodeManager::currentNM(); - Node res; - while (res.isNull()) + else if (len.getKind() == STRING_LENGTH) { - switch (n.getKind()) - { - case kind::STRING_STRREPL: - { - Node empty = nm->mkConst(::CVC4::String("")); - if (n[0] == empty) - { - // (str.replace "" x y) --> y - n = n[2]; - break; - } - - if (checkEntailLengthOne(n[0]) && n[2] == empty) - { - // (str.replace "A" x "") --> "A" - res = n[0]; - break; - } - - res = n; - break; - } - case kind::STRING_SUBSTR: - { - if (checkEntailLengthOne(n[0])) - { - // (str.substr "A" x y) --> "A" - res = n[0]; - break; - } - res = n; - break; - } - default: - { - res = n; - break; - } - } + // len(x) -> x + res = len[0]; } return res; } -bool SequencesRewriter::inferZerosInSumGeq(Node x, - std::vector& ys, - std::vector& zeroYs) -{ - Assert(zeroYs.empty()); - - NodeManager* nm = NodeManager::currentNM(); - - // Check if we can show that y1 + ... + yn >= x - Node sum = (ys.size() > 1) ? nm->mkNode(PLUS, ys) : ys[0]; - if (!checkEntailArith(sum, x)) - { - return false; - } - - // Try to remove yi one-by-one and check if we can still show: - // - // y1 + ... + yi-1 + yi+1 + ... + yn >= x - // - // If that's the case, we know that yi can be zero and the inequality still - // holds. - size_t i = 0; - while (i < ys.size()) - { - Node yi = ys[i]; - std::vector::iterator pos = ys.erase(ys.begin() + i); - if (ys.size() > 1) - { - sum = nm->mkNode(PLUS, ys); - } - else - { - sum = ys.size() == 1 ? ys[0] : nm->mkConst(Rational(0)); - } - - if (checkEntailArith(sum, x)) - { - zeroYs.push_back(yi); - } - else - { - ys.insert(pos, yi); - i++; - } - } - return true; -} - -Node SequencesRewriter::inferEqsFromContains(Node x, Node y) -{ - NodeManager* nm = NodeManager::currentNM(); - Node emp = nm->mkConst(String("")); - Assert(x.getType() == y.getType()); - TypeNode stype = x.getType(); - - Node xLen = nm->mkNode(STRING_LENGTH, x); - std::vector yLens; - if (y.getKind() != STRING_CONCAT) - { - yLens.push_back(nm->mkNode(STRING_LENGTH, y)); - } - else - { - for (const Node& yi : y) - { - yLens.push_back(nm->mkNode(STRING_LENGTH, yi)); - } - } - - std::vector zeroLens; - if (x == emp) - { - // If x is the empty string, then all ys must be empty, too, and we can - // skip the expensive checks. Note that this is just a performance - // optimization. - zeroLens.swap(yLens); - } - else - { - // Check if we can infer that str.len(x) <= str.len(y). If that is the - // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... + - // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality - // true. The terms that can have length zero without making the inequality - // false must be all be empty if (str.contains x y) is true. - if (!inferZerosInSumGeq(xLen, yLens, zeroLens)) - { - // We could not prove that the inequality holds - return Node::null(); - } - else if (yLens.size() == y.getNumChildren()) - { - // We could only prove that the inequality holds but not that any of the - // ys must be empty - return nm->mkNode(EQUAL, x, y); - } - } - - if (y.getKind() != STRING_CONCAT) - { - if (zeroLens.size() == 1) - { - // y is not a concatenation and we found that it must be empty, so just - // return (= y "") - Assert(zeroLens[0][0] == y); - return nm->mkNode(EQUAL, y, emp); - } - else - { - Assert(yLens.size() == 1 && yLens[0][0] == y); - return nm->mkNode(EQUAL, x, y); - } - } - - std::vector cs; - for (const Node& yiLen : yLens) - { - Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end()); - cs.push_back(yiLen[0]); - } - - NodeBuilder<> nb(AND); - // (= x (str.++ y1' ... ym')) - if (!cs.empty()) - { - nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype)); - } - // (= y1'' "") ... (= yk'' "") - for (const Node& zeroLen : zeroLens) - { - Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end()); - nb << nm->mkNode(EQUAL, zeroLen[0], emp); - } - - // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) - return nb.constructNode(); -} - -std::pair > SequencesRewriter::collectEmptyEqs(Node x) -{ - NodeManager* nm = NodeManager::currentNM(); - Node empty = nm->mkConst(::CVC4::String("")); - - // Collect the equalities of the form (= x "") (sorted) - std::set emptyNodes; - bool allEmptyEqs = true; - if (x.getKind() == kind::EQUAL) - { - if (x[0] == empty) - { - emptyNodes.insert(x[1]); - } - else if (x[1] == empty) - { - emptyNodes.insert(x[0]); - } - else - { - allEmptyEqs = false; - } - } - else if (x.getKind() == kind::AND) - { - for (const Node& c : x) - { - if (c.getKind() == kind::EQUAL) - { - if (c[0] == empty) - { - emptyNodes.insert(c[1]); - } - else if (c[1] == empty) - { - emptyNodes.insert(c[0]); - } - } - else - { - allEmptyEqs = false; - } - } - } - - if (emptyNodes.size() == 0) - { - allEmptyEqs = false; - } - - return std::make_pair( - allEmptyEqs, std::vector(emptyNodes.begin(), emptyNodes.end())); -} Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r) { @@ -5646,3 +3142,7 @@ Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r) } return ret; } + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h index 0e5cd5705..7391a7bd0 100644 --- a/src/theory/strings/sequences_rewriter.h +++ b/src/theory/strings/sequences_rewriter.h @@ -18,14 +18,11 @@ #ifndef CVC4__THEORY__STRINGS__SEQUENCES_REWRITER_H #define CVC4__THEORY__STRINGS__SEQUENCES_REWRITER_H -#include -#include #include -#include "expr/attribute.h" +#include "expr/node.h" #include "theory/strings/rewrites.h" #include "theory/theory_rewriter.h" -#include "theory/type_enumerator.h" namespace CVC4 { namespace theory { @@ -34,69 +31,6 @@ namespace strings { class SequencesRewriter : public TheoryRewriter { protected: - /** simple regular expression consume - * - * This method is called when we are rewriting a membership of the form - * s1 ++ ... ++ sn in r1 ++ ... ++ rm - * We have that mchildren consists of the strings s1...sn, and children - * consists of the regular expressions r1...rm. - * - * This method tries to strip off parts of the concatenation terms. It updates - * the vectors such that the resulting vectors are such that the membership - * mchildren[n'...n''] in children[m'...m''] is equivalent to the input - * membership. The argument dir indicates the direction to consider, where - * 0 means strip off the front, 1 off the back, and < 0 off of both. - * - * If this method returns the false node, then we have inferred that no - * string in the language of r1 ++ ... ++ rm is a prefix (when dir!=1) or - * suffix (when dir!=0) of s1 ++ ... ++ sn. Otherwise, it returns the null - * node. - * - * For example, given input - * mchildren = { "ab", x }, children = { [["a"]], ([["cd"]])* } and dir = 0, - * this method updates: - * mchildren = { "b", x }, children = { ("cd")* } - * and returns null. - * - * For example, given input - * { x, "abb", x }, { [[x]], ["a"..."b"], allchar, [[y]], [[x]]} and dir=-1, - * this method updates: - * { "b" }, { [[y]] } - * where [[.]] denotes str.to.re, and returns null. - * - * Notice that the above requirement for returning false is stronger than - * determining that s1 ++ ... ++ sn in r1 ++ ... ++ rm is equivalent to false. - * For example, for input "bb" in "b" ++ ( "a" )*, we do not return false - * since "b" is in the language of "b" ++ ( "a" )* and is a prefix of "bb". - * We do not return false even though the above membership is equivalent - * to false. We do this because the function is used e.g. to test whether a - * possible unrolling leads to a conflict. This is demonstrated by the - * following examples: - * - * For example, given input - * { "bb", x }, { "b", ("a")* } and dir=-1, - * this method updates: - * { "b" }, { ("a")* } - * and returns null. - * - * For example, given input - * { "cb", x }, { "b", ("a")* } and dir=-1, - * this method leaves children and mchildren unchanged and returns false. - * - * Notice that based on this, we can determine that: - * "cb" ++ x in ( "b" ++ ("a")* )* - * is equivalent to false, whereas we cannot determine that: - * "bb" ++ x in ( "b" ++ ("a")* )* - * is equivalent to false. - */ - static Node simpleRegexpConsume(std::vector& mchildren, - std::vector& children, - int dir = -1); - static bool isConstRegExp(TNode t); - static bool testConstStringInRegExp(CVC4::String& s, - unsigned int index_start, - TNode r); - /** rewrite regular expression concatenation * * This is the entry point for post-rewriting applications of re.++. @@ -159,12 +93,6 @@ class SequencesRewriter : public TheoryRewriter */ static Node rewriteMembership(TNode node); - static bool hasEpsilonNode(TNode node); - /** check entail arithmetic internal - * Returns true if we can show a >= 0 always. - * a is in rewritten form. - */ - static bool checkEntailArithInternal(Node a); /** rewrite string equality extended * * This method returns a formula that is equivalent to the equality between @@ -292,233 +220,6 @@ class SequencesRewriter : public TheoryRewriter */ static Node rewriteStringToCode(Node node); - static Node splitConstant(Node a, Node b, int& index, bool isRev); - /** can constant contain list - * return true if constant c can contain the list l in order - * firstc/lastc store which indices in l were used to determine the return - * value. - * (This is typically used when this function returns false, for minimizing - * explanations) - * - * For example: - * canConstantContainList( "abc", { x, "c", y } ) returns true - * firstc/lastc are updated to 1/1 - * canConstantContainList( "abc", { x, "d", y } ) returns false - * firstc/lastc are updated to 1/1 - * canConstantContainList( "abcdef", { x, "b", y, "a", z, "c", w } - * returns false - * firstc/lastc are updated to 1/3 - * canConstantContainList( "abcdef", { x, "b", y, "e", z, "c", w } - * returns false - * firstc/lastc are updated to 1/5 - */ - static bool canConstantContainList(Node c, - std::vector& l, - int& firstc, - int& lastc); - /** can constant contain concat - * same as above but with n = str.++( l ) instead of l - */ - static bool canConstantContainConcat(Node c, Node n, int& firstc, int& lastc); - - /** strip symbolic length - * - * This function strips off components of n1 whose length is less than - * or equal to argument curr, and stores them in nr. The direction - * dir determines whether the components are removed from the start - * or end of n1. - * - * In detail, this function updates n1 to n1' such that: - * If dir=1, - * n1 = str.++( nr, n1' ) - * If dir=-1 - * n1 = str.++( n1', nr ) - * It updates curr to curr' such that: - * curr' = curr - str.len( str.++( nr ) ), and - * curr' >= 0 - * where the latter fact is determined by checkArithEntail. - * - * This function returns true if n1 is modified. - * - * For example: - * - * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)+1 ) - * returns true - * n1 is updated to { "bc", y } - * nr is updated to { x, "a" } - * curr is updated to 0 * - * - * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)-1 ) - * returns false - * - * stripSymbolicLength( { y, "abc", x }, {}, 1, str.len(x)+1 ) - * returns false - * - * stripSymbolicLength( { x, "abc", y }, {}, -1, 2*str.len(y)+4 ) - * returns true - * n1 is updated to { x } - * nr is updated to { "abc", y } - * curr is updated to str.len(y)+1 - */ - static bool stripSymbolicLength(std::vector& n1, - std::vector& nr, - int dir, - Node& curr); - /** component contains - * This function is used when rewriting str.contains( t1, t2 ), where - * n1 is the vector form of t1 - * n2 is the vector form of t2 - * - * If this function returns n>=0 for some n, then - * n1 = { x1...x{n-1} xn...x{n+s} x{n+s+1}...xm }, - * n2 = { y1...ys }, - * y1 is a suffix of xn, - * y2...y{s-1} = x{n+1}...x{n+s-1}, and - * ys is a prefix of x{n+s} - * Otherwise it returns -1. - * - * This function may update n1 if computeRemainder = true. - * We maintain the invariant that the resulting value n1' - * of n1 after this function is such that: - * n1 = str.++( nb, n1', ne ) - * The vectors nb and ne have the following properties. - * If computeRemainder = true, then - * If remainderDir != -1, then - * ne is { x{n+s}' x{n+s+1}...xm } - * where x{n+s} = str.++( ys, x{n+s}' ). - * If remainderDir != 1, then - * nb is { x1, ..., x{n-1}, xn' } - * where xn = str.++( xn', y1 ). - * - * For example: - * - * componentContains({ x, "abc", x }, { "b" }, {}, true, 0) - * returns 1, - * n1 is updated to { "b" }, - * nb is updated to { x, "a" }, - * ne is updated to { "c", x } - * - * componentContains({ x, "abc", x }, { "b" }, {}, true, 1) - * returns 1, - * n1 is updated to { x, "ab" }, - * ne is updated to { "c", x } - * - * componentContains({ y, z, "abc", x, "def" }, { "c", x, "de" }, {}, true, 1) - * returns 2, - * n1 is updated to { y, z, "abc", x, "de" }, - * ne is updated to { "f" } - * - * componentContains({ y, "abc", x, "def" }, { "c", x, "de" }, {}, true, -1) - * returns 1, - * n1 is updated to { "c", x, "def" }, - * nb is updated to { y, "ab" } - */ - static int componentContains(std::vector& n1, - std::vector& n2, - std::vector& nb, - std::vector& ne, - bool computeRemainder = false, - int remainderDir = 0); - /** component contains base - * - * This function is a helper for the above function. - * - * It returns true if n2 is contained in n1 with the following - * restrictions: - * If dir=1, then n2 must be a suffix of n1. - * If dir=-1, then n2 must be a prefix of n1. - * - * If computeRemainder is true, then n1rb and n1re are - * updated such that : - * n1 = str.++( n1rb, n2, n1re ) - * where a null value of n1rb and n1re indicates the - * empty string. - * - * For example: - * - * componentContainsBase("cabe", "ab", n1rb, n1re, 1, false) - * returns false. - * - * componentContainsBase("cabe", "ab", n1rb, n1re, 0, true) - * returns true, - * n1rb is set to "c", - * n1re is set to "e". - * - * componentContainsBase(y, str.substr(y,0,5), n1rb, n1re, -1, true) - * returns true, - * n1re is set to str.substr(y,5,str.len(y)). - * - * - * Notice that this function may return false when it cannot compute a - * remainder when it otherwise would have returned true. For example: - * - * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, false) - * returns true. - * - * Hence, we know that str.substr(y,x,z) is contained in y. However: - * - * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, true) - * returns false. - * - * The reason is since computeRemainder=true, it must be that - * y = str.++( n1rb, str.substr(y,x,z), n1re ) - * for some n1rb, n1re. However, to construct such n1rb, n1re would require - * e.g. the terms: - * y = str.++( ite( x+z < 0 OR x < 0, "", str.substr(y,0,x) ), - * str.substr(y,x,z), - * ite( x+z < 0 OR x < 0, y, str.substr(y,x+z,len(y)) ) ) - * - * Since we do not wish to introduce ITE terms in the rewriter, we instead - * return false, indicating that we cannot compute the remainder. - */ - static bool componentContainsBase( - Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder); - /** strip constant endpoints - * This function is used when rewriting str.contains( t1, t2 ), where - * n1 is the vector form of t1 - * n2 is the vector form of t2 - * - * It modifies n1 to a new vector n1' such that: - * (1) str.contains( str.++( n1 ), str.++( n2 ) ) is equivalent to - * str.contains( str.++( n1' ), str.++( n2 ) ) - * (2) str.++( n1 ) = str.++( nb, n1', ne ) - * - * "dir" is the direction in which we can modify n1: - * if dir = 1, then we allow dropping components from the front of n1, - * if dir = -1, then we allow dropping components from the back of n1, - * if dir = 0, then we allow dropping components from either. - * - * It returns true if n1 is modified. - * - * For example: - * stripConstantEndpoints({ "ab", x, "de" }, { "c" }, {}, {}, 1) - * returns true, - * n1 is updated to { x, "de" } - * nb is updated to { "ab" } - * stripConstantEndpoints({ "ab", x, "de" }, { "bd" }, {}, {}, 0) - * returns true, - * n1 is updated to { "b", x, "d" } - * nb is updated to { "a" } - * ne is updated to { "e" } - * stripConstantEndpoints({ "ad", substr("ccc",x,y) }, { "d" }, {}, {}, -1) - * returns true, - * n1 is updated to {"ad"} - * ne is updated to { substr("ccc",x,y) } - */ - static bool stripConstantEndpoints(std::vector& n1, - std::vector& n2, - std::vector& nb, - std::vector& ne, - int dir = 0); - - /** - * Given a symbolic length n, returns the canonical string (of type stype) - * for that length. For example if n is constant, this function returns a - * string consisting of "A" repeated n times. Returns the null node if no such - * string exists. - */ - static Node canonicalStrForSymbolicLength(Node n, TypeNode stype); - /** length preserving rewrite * * Given input n, this returns a string n' whose length is equivalent to n. @@ -528,289 +229,12 @@ class SequencesRewriter : public TheoryRewriter static Node lengthPreserveRewrite(Node n); /** - * Checks whether a string term `a` is entailed to contain or not contain a - * string term `b`. - * - * @param a The string that is checked whether it contains `b` - * @param b The string that is checked whether it is contained in `a` - * @param fullRewriter Determines whether the function can use the full - * rewriter or only `rewriteContains()` (useful for avoiding loops) - * @return true node if it can be shown that `a` contains `b`, false node if - * it can be shown that `a` does not contain `b`, null node otherwise - */ - static Node checkEntailContains(Node a, Node b, bool fullRewriter = true); - - /** entail non-empty - * - * Checks whether string a is entailed to be non-empty. Is equivalent to - * the call checkArithEntail( len( a ), true ). - */ - static bool checkEntailNonEmpty(Node a); - - /** - * Checks whether string has at most/exactly length one. Length one strings - * can be used for more aggressive rewriting because there is guaranteed that - * it cannot be overlap multiple components in a string concatenation. - * - * @param s The string to check - * @param strict If true, the string must have exactly length one, otherwise - * at most length one - * @return True if the string has at most/exactly length one, false otherwise - */ - static bool checkEntailLengthOne(Node s, bool strict = false); - - /** check arithmetic entailment equal - * Returns true if it is always the case that a = b. - */ - static bool checkEntailArithEq(Node a, Node b); - /** check arithmetic entailment - * Returns true if it is always the case that a >= b, - * and a>b if strict is true. - */ - static bool checkEntailArith(Node a, Node b, bool strict = false); - /** check arithmetic entailment - * Returns true if it is always the case that a >= 0. - */ - static bool checkEntailArith(Node a, bool strict = false); - /** check arithmetic entailment with approximations - * - * Returns true if it is always the case that a >= 0. We expect that a is in - * rewritten form. - * - * This function uses "approximation" techniques that under-approximate - * the value of a for the purposes of showing the entailment holds. For - * example, given: - * len( x ) - len( substr( y, 0, len( x ) ) ) - * Since we know that len( substr( y, 0, len( x ) ) ) <= len( x ), the above - * term can be under-approximated as len( x ) - len( x ) = 0, which is >= 0, - * and thus the entailment len( x ) - len( substr( y, 0, len( x ) ) ) >= 0 - * holds. - */ - static bool checkEntailArithApprox(Node a); - /** Get arithmetic approximations - * - * This gets the (set of) arithmetic approximations for term a and stores - * them in approx. If isOverApprox is true, these are over-approximations - * for the value of a, otherwise, they are underapproximations. For example, - * an over-approximation for len( substr( y, n, m ) ) is m; an - * under-approximation for indexof( x, y, n ) is -1. - * - * Notice that this function is not generally recursive (although it may make - * a small bounded of recursive calls). Instead, it returns the shape - * of the approximations for a. For example, an under-approximation - * for the term len( replace( substr( x, 0, n ), y, z ) ) returned by this - * function might be len( substr( x, 0, n ) ) - len( y ), where we don't - * consider (recursively) the approximations for len( substr( x, 0, n ) ). - */ - static void getArithApproximations(Node a, - std::vector& approx, - bool isOverApprox = false); - - /** - * Checks whether it is always true that `a` is a strict subset of `b` in the - * multiset domain. - * - * Examples: - * - * a = (str.++ "A" x), b = (str.++ "A" x "B") ---> true - * a = (str.++ "A" x), b = (str.++ "B" x "AA") ---> true - * a = (str.++ "A" x), b = (str.++ "B" y "AA") ---> false - * - * @param a The term for which it should be checked if it is a strict subset - * of `b` in the multiset domain - * @param b The term for which it should be checked if it is a strict - * superset of `a` in the multiset domain - * @return True if it is always the case that `a` is a strict subset of `b`, - * false otherwise. - */ - static bool checkEntailMultisetSubset(Node a, Node b); - - /** - * Returns a character `c` if it is always the case that str.in.re(a, c*), - * i.e. if all possible values of `a` only consist of `c` characters, and the - * null node otherwise. If `a` is the empty string, the function returns an - * empty string. - * - * @param a The node to check for homogeneity - * @return If `a` is homogeneous, the only character that it may contain, the - * empty string if `a` is empty, and the null node otherwise - */ - static Node checkEntailHomogeneousString(Node a); - - /** - * Simplifies a given node `a` s.t. the result is a concatenation of string - * terms that can be interpreted as a multiset and which contains all - * multisets that `a` could form. - * - * Examples: - * - * (str.substr "AA" 0 n) ---> "AA" - * (str.replace "AAA" x "BB") ---> (str.++ "AAA" "BB") - * - * @param a The node to simplify - * @return A concatenation that can be interpreted as a multiset - */ - static Node getMultisetApproximation(Node a); - - /** - * Checks whether assumption |= a >= 0 (if strict is false) or - * assumption |= a > 0 (if strict is true), where assumption is an equality - * assumption. The assumption must be in rewritten form. - * - * Example: - * - * checkEntailArithWithEqAssumption(x + (str.len y) = 0, -x, false) = true - * - * Because: x = -(str.len y), so -x >= 0 --> (str.len y) >= 0 --> true - */ - static bool checkEntailArithWithEqAssumption(Node assumption, - Node a, - bool strict = false); - - /** - * Checks whether assumption |= a >= b (if strict is false) or - * assumption |= a > b (if strict is true). The function returns true if it - * can be shown that the entailment holds and false otherwise. Assumption - * must be in rewritten form. Assumption may be an equality or an inequality. - * - * Example: - * - * checkEntailArithWithAssumption(x + (str.len y) = 0, 0, x, false) = true - * - * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true - */ - static bool checkEntailArithWithAssumption(Node assumption, - Node a, - Node b, - bool strict = false); - - /** - * Checks whether assumptions |= a >= b (if strict is false) or - * assumptions |= a > b (if strict is true). The function returns true if it - * can be shown that the entailment holds and false otherwise. Assumptions - * must be in rewritten form. Assumptions may be an equalities or an - * inequalities. - * - * Example: - * - * checkEntailArithWithAssumptions([x + (str.len y) = 0], 0, x, false) = true - * - * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true - */ - static bool checkEntailArithWithAssumptions(std::vector assumptions, - Node a, - Node b, - bool strict = false); - - /** get arithmetic lower bound - * If this function returns a non-null Node ret, - * then ret is a rational constant and - * we know that n >= ret always if isLower is true, - * or n <= ret if isLower is false. - * - * Notice the following invariant. - * If getConstantArithBound(a, true) = ret where ret is non-null, then for - * strict = { true, false } : - * ret >= strict ? 1 : 0 - * if and only if - * checkEntailArith( a, strict ) = true. - */ - static Node getConstantArithBound(Node a, bool isLower = true); - /** get length for regular expression - * - * Given regular expression n, if this method returns a non-null value c, then - * x in n entails len( x ) = c. - */ - static Node getFixedLengthForRegexp(Node n); - /** decompose substr chain - * - * If s is substr( ... substr( base, x1, y1 ) ..., xn, yn ), then this - * function returns base, adds { x1 ... xn } to ss, and { y1 ... yn } to ls. - */ - static Node decomposeSubstrChain(Node s, - std::vector& ss, - std::vector& ls); - /** make substr chain - * - * If ss is { x1 ... xn } and ls is { y1 ... yn }, this returns the term - * substr( ... substr( base, x1, y1 ) ..., xn, yn ). - */ - static Node mkSubstrChain(Node base, - const std::vector& ss, - const std::vector& ls); - - /** - * Overapproximates the possible values of node n. This overapproximation - * assumes that n can return a value x or the empty string and tries to find - * the simplest x such that this holds. In the general case, x is the same as - * the input n. This overapproximation can be used to sort terms with the - * same possible values in string concatenation for example. - * - * Example: - * - * getStringOrEmpty( (str.replace "" x y) ) --> y because (str.replace "" x y) - * either returns y or "" - * - * getStringOrEmpty( (str.substr "ABC" x y) ) --> (str.substr "ABC" x y) - * because the function could not compute a simpler - */ - static Node getStringOrEmpty(Node n); - - /** - * Given an inequality y1 + ... + yn >= x, removes operands yi s.t. the - * original inequality still holds. Returns true if the original inequality - * holds and false otherwise. The list of ys is modified to contain a subset - * of the original ys. - * - * Example: - * - * inferZerosInSumGeq( (str.len x), [ (str.len x), (str.len y), 1 ], [] ) - * --> returns true with ys = [ (str.len x) ] and zeroYs = [ (str.len y), 1 ] - * (can be used to rewrite the inequality to false) - * - * inferZerosInSumGeq( (str.len x), [ (str.len y) ], [] ) - * --> returns false because it is not possible to show - * str.len(y) >= str.len(x) - */ - static bool inferZerosInSumGeq(Node x, - std::vector& ys, - std::vector& zeroYs); - - /** - * Infers a conjunction of equalities that correspond to (str.contains x y) - * if it can show that the length of y is greater or equal to the length of - * x. If y is a concatentation, we get x = y1 ++ ... ++ yn, the conjunction - * is of the form: - * - * (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) - * - * where each yi'' are yi that must be empty for (= x y) to hold and yi' are - * yi that the function could not infer anything about. Returns a null node - * if the function cannot infer that str.len(y) >= str.len(x). Returns (= x - * y) if the function can infer that str.len(y) >= str.len(x) but cannot - * infer that any of the yi must be empty. - */ - static Node inferEqsFromContains(Node x, Node y); - - /** - * Collects equal-to-empty nodes from a conjunction or a single - * node. Returns a list of nodes that are compared to empty nodes - * and a boolean that indicates whether all nodes in the - * conjunction were a comparison with the empty node. The nodes in - * the list are sorted and duplicates removed. - * - * Examples: - * - * collectEmptyEqs( (= "" x) ) = { true, [x] } - * collectEmptyEqs( (and (= "" x) (= "" y)) ) = { true, [x, y] } - * collectEmptyEqs( (and (= "A" x) (= "" y) (= "" y)) ) = { false, [y] } - * - * @param x The conjunction of equalities or a single equality - * @return A pair of a boolean that indicates whether the - * conjunction consists only of comparisons to the empty string - * and the list of nodes that are compared to the empty string + * Given a symbolic length n, returns the canonical string (of type stype) + * for that length. For example if n is constant, this function returns a + * string consisting of "A" repeated n times. Returns the null node if no such + * string exists. */ - static std::pair > collectEmptyEqs(Node x); + static Node canonicalStrForSymbolicLength(Node n, TypeNode stype); }; /* class SequencesRewriter */ } // namespace strings diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp index 7396a5013..bdce86d06 100644 --- a/src/theory/strings/skolem_cache.cpp +++ b/src/theory/strings/skolem_cache.cpp @@ -15,7 +15,7 @@ #include "theory/strings/skolem_cache.h" #include "theory/rewriter.h" -#include "theory/strings/sequences_rewriter.h" +#include "theory/strings/arith_entail.h" #include "util/rational.h" using namespace CVC4::kind; @@ -163,8 +163,8 @@ SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b) a = s; b = m; } - else if (SequencesRewriter::checkEntailArith(nm->mkNode(PLUS, n, m), - nm->mkNode(STRING_LENGTH, s))) + else if (ArithEntail::check(nm->mkNode(PLUS, n, m), + nm->mkNode(STRING_LENGTH, s))) { // SK_PURIFY((str.substr x n m)) ---> SK_SUFFIX_REM(x, n) // if n + m >= (str.len x) diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp new file mode 100644 index 000000000..a1abfabe5 --- /dev/null +++ b/src/theory/strings/strings_entail.cpp @@ -0,0 +1,994 @@ +/********************* */ +/*! \file strings_entail.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of entailment tests involving strings. + **/ + +#include "theory/strings/strings_entail.h" + +#include "expr/node_builder.h" +#include "theory/rewriter.h" +#include "theory/strings/arith_entail.h" +#include "theory/strings/sequences_rewriter.h" +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" + +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +bool StringsEntail::canConstantContainConcat(Node c, + Node n, + int& firstc, + int& lastc) +{ + Assert(c.isConst()); + CVC4::String t = c.getConst(); + const std::vector& tvec = t.getVec(); + Assert(n.getKind() == STRING_CONCAT); + // must find constant components in order + size_t pos = 0; + firstc = -1; + lastc = -1; + for (unsigned i = 0; i < n.getNumChildren(); i++) + { + if (n[i].isConst()) + { + firstc = firstc == -1 ? i : firstc; + lastc = i; + CVC4::String s = n[i].getConst(); + size_t new_pos = t.find(s, pos); + if (new_pos == std::string::npos) + { + return false; + } + else + { + pos = new_pos + s.size(); + } + } + else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0])) + { + // find the first occurrence of a digit starting at pos + while (pos < tvec.size() && !String::isDigit(tvec[pos])) + { + pos++; + } + if (pos == tvec.size()) + { + return false; + } + // must consume at least one digit here + pos++; + } + } + return true; +} + +bool StringsEntail::canConstantContainList(Node c, + std::vector& l, + int& firstc, + int& lastc) +{ + Assert(c.isConst()); + // must find constant components in order + size_t pos = 0; + firstc = -1; + lastc = -1; + for (unsigned i = 0; i < l.size(); i++) + { + if (l[i].isConst()) + { + firstc = firstc == -1 ? i : firstc; + lastc = i; + size_t new_pos = Word::find(c, l[i], pos); + if (new_pos == std::string::npos) + { + return false; + } + else + { + pos = new_pos + Word::getLength(l[i]); + } + } + } + return true; +} + +bool StringsEntail::stripSymbolicLength(std::vector& n1, + std::vector& nr, + int dir, + Node& curr) +{ + Assert(dir == 1 || dir == -1); + Assert(nr.empty()); + Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0)); + bool ret = false; + bool success; + unsigned sindex = 0; + do + { + Assert(!curr.isNull()); + success = false; + if (curr != zero && sindex < n1.size()) + { + unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); + if (n1[sindex_use].isConst()) + { + // could strip part of a constant + Node lowerBound = + ArithEntail::getConstantBound(Rewriter::rewrite(curr)); + if (!lowerBound.isNull()) + { + Assert(lowerBound.isConst()); + Rational lbr = lowerBound.getConst(); + if (lbr.sgn() > 0) + { + Assert(ArithEntail::check(curr, true)); + CVC4::String s = n1[sindex_use].getConst(); + Node ncl = + NodeManager::currentNM()->mkConst(CVC4::Rational(s.size())); + Node next_s = + NodeManager::currentNM()->mkNode(MINUS, lowerBound, ncl); + next_s = Rewriter::rewrite(next_s); + Assert(next_s.isConst()); + // we can remove the entire constant + if (next_s.getConst().sgn() >= 0) + { + curr = Rewriter::rewrite( + NodeManager::currentNM()->mkNode(MINUS, curr, ncl)); + success = true; + sindex++; + } + else + { + // we can remove part of the constant + // lower bound minus the length of a concrete string is negative, + // hence lowerBound cannot be larger than long max + Assert(lbr < Rational(String::maxSize())); + curr = Rewriter::rewrite( + NodeManager::currentNM()->mkNode(MINUS, curr, lowerBound)); + uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); + Assert(lbsize < s.size()); + if (dir == 1) + { + // strip partially from the front + nr.push_back( + NodeManager::currentNM()->mkConst(s.prefix(lbsize))); + n1[sindex_use] = NodeManager::currentNM()->mkConst( + s.suffix(s.size() - lbsize)); + } + else + { + // strip partially from the back + nr.push_back( + NodeManager::currentNM()->mkConst(s.suffix(lbsize))); + n1[sindex_use] = NodeManager::currentNM()->mkConst( + s.prefix(s.size() - lbsize)); + } + ret = true; + } + Assert(ArithEntail::check(curr)); + } + else + { + // we cannot remove the constant + } + } + } + else + { + Node next_s = NodeManager::currentNM()->mkNode( + MINUS, + curr, + NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use])); + next_s = Rewriter::rewrite(next_s); + if (ArithEntail::check(next_s)) + { + success = true; + curr = next_s; + sindex++; + } + } + } + } while (success); + if (sindex > 0) + { + if (dir == 1) + { + nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex); + n1.erase(n1.begin(), n1.begin() + sindex); + } + else + { + nr.insert(nr.end(), n1.end() - sindex, n1.end()); + n1.erase(n1.end() - sindex, n1.end()); + } + ret = true; + } + return ret; +} + +int StringsEntail::componentContains(std::vector& n1, + std::vector& n2, + std::vector& nb, + std::vector& ne, + bool computeRemainder, + int remainderDir) +{ + Assert(nb.empty()); + Assert(ne.empty()); + // if n2 is a singleton, we can do optimized version here + if (n2.size() == 1) + { + for (unsigned i = 0; i < n1.size(); i++) + { + Node n1rb; + Node n1re; + if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder)) + { + if (computeRemainder) + { + n1[i] = n2[0]; + if (remainderDir != -1) + { + if (!n1re.isNull()) + { + ne.push_back(n1re); + } + ne.insert(ne.end(), n1.begin() + i + 1, n1.end()); + n1.erase(n1.begin() + i + 1, n1.end()); + } + else if (!n1re.isNull()) + { + n1[i] = Rewriter::rewrite( + NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re)); + } + if (remainderDir != 1) + { + nb.insert(nb.end(), n1.begin(), n1.begin() + i); + n1.erase(n1.begin(), n1.begin() + i); + if (!n1rb.isNull()) + { + nb.push_back(n1rb); + } + } + else if (!n1rb.isNull()) + { + n1[i] = Rewriter::rewrite( + NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i])); + } + } + return i; + } + } + } + else if (n1.size() >= n2.size()) + { + unsigned diff = n1.size() - n2.size(); + for (unsigned i = 0; i <= diff; i++) + { + Node n1rb_first; + Node n1re_first; + // first component of n2 must be a suffix + if (componentContainsBase(n1[i], + n2[0], + n1rb_first, + n1re_first, + 1, + computeRemainder && remainderDir != 1)) + { + Assert(n1re_first.isNull()); + for (unsigned j = 1; j < n2.size(); j++) + { + // are we in the last component? + if (j + 1 == n2.size()) + { + Node n1rb_last; + Node n1re_last; + // last component of n2 must be a prefix + if (componentContainsBase(n1[i + j], + n2[j], + n1rb_last, + n1re_last, + -1, + computeRemainder && remainderDir != -1)) + { + Assert(n1rb_last.isNull()); + if (computeRemainder) + { + if (remainderDir != -1) + { + if (!n1re_last.isNull()) + { + ne.push_back(n1re_last); + } + ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end()); + n1.erase(n1.begin() + i + j + 1, n1.end()); + n1[i + j] = n2[j]; + } + if (remainderDir != 1) + { + n1[i] = n2[0]; + nb.insert(nb.end(), n1.begin(), n1.begin() + i); + n1.erase(n1.begin(), n1.begin() + i); + if (!n1rb_first.isNull()) + { + nb.push_back(n1rb_first); + } + } + } + return i; + } + else + { + break; + } + } + else if (n1[i + j] != n2[j]) + { + break; + } + } + } + } + } + return -1; +} + +bool StringsEntail::componentContainsBase( + Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder) +{ + Assert(n1rb.isNull()); + Assert(n1re.isNull()); + + NodeManager* nm = NodeManager::currentNM(); + + if (n1 == n2) + { + return true; + } + else + { + if (n1.isConst() && n2.isConst()) + { + size_t len1 = Word::getLength(n1); + size_t len2 = Word::getLength(n2); + if (len2 < len1) + { + if (dir == 1) + { + if (Word::suffix(n1, len2) == n2) + { + if (computeRemainder) + { + n1rb = Word::prefix(n1, len1 - len2); + } + return true; + } + } + else if (dir == -1) + { + if (Word::prefix(n1, len2) == n2) + { + if (computeRemainder) + { + n1re = Word::suffix(n1, len1 - len2); + } + return true; + } + } + else + { + size_t f = Word::find(n1, n2); + if (f != std::string::npos) + { + if (computeRemainder) + { + if (f > 0) + { + n1rb = Word::prefix(n1, f); + } + if (len1 > f + len2) + { + n1re = Word::suffix(n1, len1 - (f + len2)); + } + } + return true; + } + } + } + } + else + { + // cases for: + // n1 = x containing n2 = substr( x, n2[1], n2[2] ) + if (n2.getKind() == STRING_SUBSTR) + { + if (n2[0] == n1) + { + bool success = true; + Node start_pos = n2[1]; + Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]); + Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]); + if (dir == 1) + { + // To be a suffix, start + length must be greater than + // or equal to the length of the string. + success = ArithEntail::check(end_pos, len_n2s); + } + else if (dir == -1) + { + // To be a prefix, must literally start at 0, since + // if we knew it started at <0, it should be rewritten to "", + // if we knew it started at 0, then n2[1] should be rewritten to + // 0. + success = start_pos.isConst() + && start_pos.getConst().sgn() == 0; + } + if (success) + { + if (computeRemainder) + { + // we can only compute the remainder if start_pos and end_pos + // are known to be non-negative. + if (!ArithEntail::check(start_pos) + || !ArithEntail::check(end_pos)) + { + return false; + } + if (dir != 1) + { + n1rb = nm->mkNode( + STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos); + } + if (dir != -1) + { + n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s); + } + } + return true; + } + } + } + + if (!computeRemainder && dir == 0) + { + if (n1.getKind() == STRING_STRREPL) + { + // (str.contains (str.replace x y z) w) ---> true + // if (str.contains x w) --> true and (str.contains z w) ---> true + Node xCtnW = StringsEntail::checkContains(n1[0], n2); + if (!xCtnW.isNull() && xCtnW.getConst()) + { + Node zCtnW = StringsEntail::checkContains(n1[2], n2); + if (!zCtnW.isNull() && zCtnW.getConst()) + { + return true; + } + } + } + } + } + } + return false; +} + +bool StringsEntail::stripConstantEndpoints(std::vector& n1, + std::vector& n2, + std::vector& nb, + std::vector& ne, + int dir) +{ + Assert(nb.empty()); + Assert(ne.empty()); + + NodeManager* nm = NodeManager::currentNM(); + bool changed = false; + // for ( forwards, backwards ) direction + for (unsigned r = 0; r < 2; r++) + { + if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1)) + { + unsigned index0 = r == 0 ? 0 : n1.size() - 1; + unsigned index1 = r == 0 ? 0 : n2.size() - 1; + bool removeComponent = false; + Node n1cmp = n1[index0]; + + if (n1cmp.isConst() && n1cmp.getConst().size() == 0) + { + return false; + } + + std::vector sss; + std::vector sls; + n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls); + Trace("strings-rewrite-debug2") + << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1] + << ", dir = " << dir << std::endl; + if (n1cmp.isConst()) + { + CVC4::String s = n1cmp.getConst(); + // overlap is an overapproximation of the number of characters + // n2[index1] can match in s + unsigned overlap = s.size(); + if (n2[index1].isConst()) + { + CVC4::String t = n2[index1].getConst(); + std::size_t ret = r == 0 ? s.find(t) : s.rfind(t); + if (ret == std::string::npos) + { + if (n1.size() == 1) + { + // can remove everything + // e.g. str.contains( "abc", str.++( "ba", x ) ) --> + // str.contains( "", str.++( "ba", x ) ) + removeComponent = true; + } + else if (sss.empty()) // only if not substr + { + // check how much overlap there is + // This is used to partially strip off the endpoint + // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) --> + // str.contains( str.++( "c", x ), str.++( "cd", y ) ) + overlap = r == 0 ? s.overlap(t) : t.overlap(s); + } + else + { + // if we are looking at a substring, we can remove the component + // if there is no overlap + // e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" ) + // --> str.contains( x, "a" ) + removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0); + } + } + else if (sss.empty()) // only if not substr + { + Assert(ret < s.size()); + // can strip off up to the find position, e.g. + // str.contains( str.++( "abc", x ), str.++( "b", y ) ) --> + // str.contains( str.++( "bc", x ), str.++( "b", y ) ), + // and + // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) --> + // str.contains( str.++( x, "abb" ), str.++( y, "b" ) ) + overlap = s.size() - ret; + } + } + else + { + // inconclusive + } + // process the overlap + if (overlap < s.size()) + { + changed = true; + if (overlap == 0) + { + removeComponent = true; + } + else + { + // can drop the prefix (resp. suffix) from the first (resp. last) + // component + if (r == 0) + { + nb.push_back(nm->mkConst(s.prefix(s.size() - overlap))); + n1[index0] = nm->mkConst(s.suffix(overlap)); + } + else + { + ne.push_back(nm->mkConst(s.suffix(s.size() - overlap))); + n1[index0] = nm->mkConst(s.prefix(overlap)); + } + } + } + } + else if (n1cmp.getKind() == STRING_ITOS) + { + if (n2[index1].isConst()) + { + CVC4::String t = n2[index1].getConst(); + + if (n1.size() == 1) + { + // if n1.size()==1, then if n2[index1] is not a number, we can drop + // the entire component + // e.g. str.contains( int.to.str(x), "123a45") --> false + if (!t.isNumber()) + { + removeComponent = true; + } + } + else + { + const std::vector& tvec = t.getVec(); + Assert(tvec.size() > 0); + + // if n1.size()>1, then if the first (resp. last) character of + // n2[index1] + // is not a digit, we can drop the entire component, e.g.: + // str.contains( str.++( int.to.str(x), y ), "a12") --> + // str.contains( y, "a12" ) + // str.contains( str.++( y, int.to.str(x) ), "a0b") --> + // str.contains( y, "a0b" ) + unsigned i = r == 0 ? 0 : (tvec.size() - 1); + if (!String::isDigit(tvec[i])) + { + removeComponent = true; + } + } + } + } + if (removeComponent) + { + // can drop entire first (resp. last) component + if (r == 0) + { + nb.push_back(n1[index0]); + n1.erase(n1.begin(), n1.begin() + 1); + } + else + { + ne.push_back(n1[index0]); + n1.pop_back(); + } + if (n1.empty()) + { + // if we've removed everything, just return (we will rewrite to false) + return true; + } + else + { + changed = true; + } + } + } + } + // TODO (#1180) : computing the maximal overlap in this function may be + // important. + // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) ---> + // false + // ...since str.to.int(x) can contain at most 1 character from "2aaaa", + // leaving 4 characters + // which is larger that the upper bound for length of str.substr(y,0,3), + // which is 3. + return changed; +} + +Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter) +{ + NodeManager* nm = NodeManager::currentNM(); + Node ctn = nm->mkNode(STRING_STRCTN, a, b); + + if (fullRewriter) + { + ctn = Rewriter::rewrite(ctn); + } + else + { + Node prev; + do + { + prev = ctn; + ctn = SequencesRewriter::rewriteContains(ctn); + } while (prev != ctn && ctn.getKind() == STRING_STRCTN); + } + + Assert(ctn.getType().isBoolean()); + return ctn.isConst() ? ctn : Node::null(); +} + +bool StringsEntail::checkNonEmpty(Node a) +{ + Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a); + len = Rewriter::rewrite(len); + return ArithEntail::check(len, true); +} + +bool StringsEntail::checkLengthOne(Node s, bool strict) +{ + NodeManager* nm = NodeManager::currentNM(); + Node one = nm->mkConst(Rational(1)); + Node len = nm->mkNode(STRING_LENGTH, s); + len = Rewriter::rewrite(len); + return ArithEntail::check(one, len) + && (!strict || ArithEntail::check(len, true)); +} + +bool StringsEntail::checkMultisetSubset(Node a, Node b) +{ + std::vector avec; + utils::getConcat(getMultisetApproximation(a), avec); + std::vector bvec; + utils::getConcat(b, bvec); + + std::map num_nconst[2]; + std::map num_const[2]; + for (unsigned j = 0; j < 2; j++) + { + std::vector& jvec = j == 0 ? avec : bvec; + for (const Node& cc : jvec) + { + if (cc.isConst()) + { + num_const[j][cc]++; + } + else + { + num_nconst[j][cc]++; + } + } + } + bool ms_success = true; + for (std::pair& nncp : num_nconst[0]) + { + if (nncp.second > num_nconst[1][nncp.first]) + { + ms_success = false; + break; + } + } + if (ms_success) + { + // count the number of constant characters in the first argument + std::map count_const[2]; + std::vector chars; + for (unsigned j = 0; j < 2; j++) + { + for (std::pair& ncp : num_const[j]) + { + Node cn = ncp.first; + Assert(cn.isConst()); + std::vector cnChars = Word::getChars(cn); + for (const Node& ch : cnChars) + { + count_const[j][ch] += ncp.second; + if (std::find(chars.begin(), chars.end(), ch) == chars.end()) + { + chars.push_back(ch); + } + } + } + } + Trace("strings-entail-ms-ss") + << "For " << a << " and " << b << " : " << std::endl; + for (const Node& ch : chars) + { + Trace("strings-entail-ms-ss") << " # occurrences of substring "; + Trace("strings-entail-ms-ss") << ch << " in arguments is "; + Trace("strings-entail-ms-ss") + << count_const[0][ch] << " / " << count_const[1][ch] << std::endl; + if (count_const[0][ch] < count_const[1][ch]) + { + return true; + } + } + + // TODO (#1180): count the number of 2,3,4,.. character substrings + // for example: + // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false + // since the second argument contains more occurrences of "bb". + // note this is orthogonal reasoning to inductive reasoning + // via regular membership reduction in Liang et al CAV 2015. + } + return false; +} + +Node StringsEntail::checkHomogeneousString(Node a) +{ + std::vector avec; + utils::getConcat(getMultisetApproximation(a), avec); + + bool cValid = false; + Node c; + for (const Node& ac : avec) + { + if (ac.isConst()) + { + std::vector acv = Word::getChars(ac); + for (const Node& cc : acv) + { + if (!cValid) + { + cValid = true; + c = cc; + } + else if (c != cc) + { + // Found a different character + return Node::null(); + } + } + } + else + { + // Could produce a different character + return Node::null(); + } + } + + if (!cValid) + { + return Word::mkEmptyWord(a.getType()); + } + + return c; +} + +Node StringsEntail::getMultisetApproximation(Node a) +{ + NodeManager* nm = NodeManager::currentNM(); + if (a.getKind() == STRING_SUBSTR) + { + return a[0]; + } + else if (a.getKind() == STRING_STRREPL) + { + return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2])); + } + else if (a.getKind() == STRING_CONCAT) + { + NodeBuilder<> nb(STRING_CONCAT); + for (const Node& ac : a) + { + nb << getMultisetApproximation(ac); + } + return nb.constructNode(); + } + else + { + return a; + } +} + +Node StringsEntail::getStringOrEmpty(Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + Node res; + while (res.isNull()) + { + switch (n.getKind()) + { + case STRING_STRREPL: + { + Node empty = nm->mkConst(::CVC4::String("")); + if (n[0] == empty) + { + // (str.replace "" x y) --> y + n = n[2]; + break; + } + + if (checkLengthOne(n[0]) && n[2] == empty) + { + // (str.replace "A" x "") --> "A" + res = n[0]; + break; + } + + res = n; + break; + } + case STRING_SUBSTR: + { + if (checkLengthOne(n[0])) + { + // (str.substr "A" x y) --> "A" + res = n[0]; + break; + } + res = n; + break; + } + default: + { + res = n; + break; + } + } + } + return res; +} + +Node StringsEntail::inferEqsFromContains(Node x, Node y) +{ + NodeManager* nm = NodeManager::currentNM(); + Node emp = nm->mkConst(String("")); + Assert(x.getType() == y.getType()); + TypeNode stype = x.getType(); + + Node xLen = nm->mkNode(STRING_LENGTH, x); + std::vector yLens; + if (y.getKind() != STRING_CONCAT) + { + yLens.push_back(nm->mkNode(STRING_LENGTH, y)); + } + else + { + for (const Node& yi : y) + { + yLens.push_back(nm->mkNode(STRING_LENGTH, yi)); + } + } + + std::vector zeroLens; + if (x == emp) + { + // If x is the empty string, then all ys must be empty, too, and we can + // skip the expensive checks. Note that this is just a performance + // optimization. + zeroLens.swap(yLens); + } + else + { + // Check if we can infer that str.len(x) <= str.len(y). If that is the + // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... + + // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality + // true. The terms that can have length zero without making the inequality + // false must be all be empty if (str.contains x y) is true. + if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens)) + { + // We could not prove that the inequality holds + return Node::null(); + } + else if (yLens.size() == y.getNumChildren()) + { + // We could only prove that the inequality holds but not that any of the + // ys must be empty + return nm->mkNode(EQUAL, x, y); + } + } + + if (y.getKind() != STRING_CONCAT) + { + if (zeroLens.size() == 1) + { + // y is not a concatenation and we found that it must be empty, so just + // return (= y "") + Assert(zeroLens[0][0] == y); + return nm->mkNode(EQUAL, y, emp); + } + else + { + Assert(yLens.size() == 1 && yLens[0][0] == y); + return nm->mkNode(EQUAL, x, y); + } + } + + std::vector cs; + for (const Node& yiLen : yLens) + { + Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end()); + cs.push_back(yiLen[0]); + } + + NodeBuilder<> nb(AND); + // (= x (str.++ y1' ... ym')) + if (!cs.empty()) + { + nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype)); + } + // (= y1'' "") ... (= yk'' "") + for (const Node& zeroLen : zeroLens) + { + Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end()); + nb << nm->mkNode(EQUAL, zeroLen[0], emp); + } + + // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) + return nb.constructNode(); +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/strings_entail.h b/src/theory/strings/strings_entail.h new file mode 100644 index 000000000..d4993faf4 --- /dev/null +++ b/src/theory/strings/strings_entail.h @@ -0,0 +1,371 @@ +/********************* */ +/*! \file strings_entail.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Entailment tests involving strings + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__STRING_ENTAIL_H +#define CVC4__THEORY__STRINGS__STRING_ENTAIL_H + +#include + +#include "expr/node.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** + * Entailment tests involving strings. + * Some of these techniques are described in Reynolds et al, "High Level + * Abstractions for Simplifying Extended String Constraints in SMT", CAV 2019. + */ +class StringsEntail +{ + public: + /** can constant contain list + * return true if constant c can contain the list l in order + * firstc/lastc store which indices in l were used to determine the return + * value. + * (This is typically used when this function returns false, for minimizing + * explanations) + * + * For example: + * canConstantContainList( "abc", { x, "c", y } ) returns true + * firstc/lastc are updated to 1/1 + * canConstantContainList( "abc", { x, "d", y } ) returns false + * firstc/lastc are updated to 1/1 + * canConstantContainList( "abcdef", { x, "b", y, "a", z, "c", w } + * returns false + * firstc/lastc are updated to 1/3 + * canConstantContainList( "abcdef", { x, "b", y, "e", z, "c", w } + * returns false + * firstc/lastc are updated to 1/5 + */ + static bool canConstantContainList(Node c, + std::vector& l, + int& firstc, + int& lastc); + /** can constant contain concat + * same as above but with n = str.++( l ) instead of l + */ + static bool canConstantContainConcat(Node c, Node n, int& firstc, int& lastc); + + /** strip symbolic length + * + * This function strips off components of n1 whose length is less than + * or equal to argument curr, and stores them in nr. The direction + * dir determines whether the components are removed from the start + * or end of n1. + * + * In detail, this function updates n1 to n1' such that: + * If dir=1, + * n1 = str.++( nr, n1' ) + * If dir=-1 + * n1 = str.++( n1', nr ) + * It updates curr to curr' such that: + * curr' = curr - str.len( str.++( nr ) ), and + * curr' >= 0 + * where the latter fact is determined by checkArithEntail. + * + * This function returns true if n1 is modified. + * + * For example: + * + * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)+1 ) + * returns true + * n1 is updated to { "bc", y } + * nr is updated to { x, "a" } + * curr is updated to 0 * + * + * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)-1 ) + * returns false + * + * stripSymbolicLength( { y, "abc", x }, {}, 1, str.len(x)+1 ) + * returns false + * + * stripSymbolicLength( { x, "abc", y }, {}, -1, 2*str.len(y)+4 ) + * returns true + * n1 is updated to { x } + * nr is updated to { "abc", y } + * curr is updated to str.len(y)+1 + */ + static bool stripSymbolicLength(std::vector& n1, + std::vector& nr, + int dir, + Node& curr); + /** component contains + * This function is used when rewriting str.contains( t1, t2 ), where + * n1 is the vector form of t1 + * n2 is the vector form of t2 + * + * If this function returns n>=0 for some n, then + * n1 = { x1...x{n-1} xn...x{n+s} x{n+s+1}...xm }, + * n2 = { y1...ys }, + * y1 is a suffix of xn, + * y2...y{s-1} = x{n+1}...x{n+s-1}, and + * ys is a prefix of x{n+s} + * Otherwise it returns -1. + * + * This function may update n1 if computeRemainder = true. + * We maintain the invariant that the resulting value n1' + * of n1 after this function is such that: + * n1 = str.++( nb, n1', ne ) + * The vectors nb and ne have the following properties. + * If computeRemainder = true, then + * If remainderDir != -1, then + * ne is { x{n+s}' x{n+s+1}...xm } + * where x{n+s} = str.++( ys, x{n+s}' ). + * If remainderDir != 1, then + * nb is { x1, ..., x{n-1}, xn' } + * where xn = str.++( xn', y1 ). + * + * For example: + * + * componentContains({ x, "abc", x }, { "b" }, {}, true, 0) + * returns 1, + * n1 is updated to { "b" }, + * nb is updated to { x, "a" }, + * ne is updated to { "c", x } + * + * componentContains({ x, "abc", x }, { "b" }, {}, true, 1) + * returns 1, + * n1 is updated to { x, "ab" }, + * ne is updated to { "c", x } + * + * componentContains({ y, z, "abc", x, "def" }, { "c", x, "de" }, {}, true, 1) + * returns 2, + * n1 is updated to { y, z, "abc", x, "de" }, + * ne is updated to { "f" } + * + * componentContains({ y, "abc", x, "def" }, { "c", x, "de" }, {}, true, -1) + * returns 1, + * n1 is updated to { "c", x, "def" }, + * nb is updated to { y, "ab" } + */ + static int componentContains(std::vector& n1, + std::vector& n2, + std::vector& nb, + std::vector& ne, + bool computeRemainder = false, + int remainderDir = 0); + /** strip constant endpoints + * This function is used when rewriting str.contains( t1, t2 ), where + * n1 is the vector form of t1 + * n2 is the vector form of t2 + * + * It modifies n1 to a new vector n1' such that: + * (1) str.contains( str.++( n1 ), str.++( n2 ) ) is equivalent to + * str.contains( str.++( n1' ), str.++( n2 ) ) + * (2) str.++( n1 ) = str.++( nb, n1', ne ) + * + * "dir" is the direction in which we can modify n1: + * if dir = 1, then we allow dropping components from the front of n1, + * if dir = -1, then we allow dropping components from the back of n1, + * if dir = 0, then we allow dropping components from either. + * + * It returns true if n1 is modified. + * + * For example: + * stripConstantEndpoints({ "ab", x, "de" }, { "c" }, {}, {}, 1) + * returns true, + * n1 is updated to { x, "de" } + * nb is updated to { "ab" } + * stripConstantEndpoints({ "ab", x, "de" }, { "bd" }, {}, {}, 0) + * returns true, + * n1 is updated to { "b", x, "d" } + * nb is updated to { "a" } + * ne is updated to { "e" } + * stripConstantEndpoints({ "ad", substr("ccc",x,y) }, { "d" }, {}, {}, -1) + * returns true, + * n1 is updated to {"ad"} + * ne is updated to { substr("ccc",x,y) } + */ + static bool stripConstantEndpoints(std::vector& n1, + std::vector& n2, + std::vector& nb, + std::vector& ne, + int dir = 0); + + /** + * Checks whether a string term `a` is entailed to contain or not contain a + * string term `b`. + * + * @param a The string that is checked whether it contains `b` + * @param b The string that is checked whether it is contained in `a` + * @param fullRewriter Determines whether the function can use the full + * rewriter or only `rewriteContains()` (useful for avoiding loops) + * @return true node if it can be shown that `a` contains `b`, false node if + * it can be shown that `a` does not contain `b`, null node otherwise + */ + static Node checkContains(Node a, Node b, bool fullRewriter = true); + + /** entail non-empty + * + * Checks whether string a is entailed to be non-empty. Is equivalent to + * the call checkArithEntail( len( a ), true ). + */ + static bool checkNonEmpty(Node a); + + /** + * Checks whether string has at most/exactly length one. Length one strings + * can be used for more aggressive rewriting because there is guaranteed that + * it cannot be overlap multiple components in a string concatenation. + * + * @param s The string to check + * @param strict If true, the string must have exactly length one, otherwise + * at most length one + * @return True if the string has at most/exactly length one, false otherwise + */ + static bool checkLengthOne(Node s, bool strict = false); + + /** + * Checks whether it is always true that `a` is a strict subset of `b` in the + * multiset domain. + * + * Examples: + * + * a = (str.++ "A" x), b = (str.++ "A" x "B") ---> true + * a = (str.++ "A" x), b = (str.++ "B" x "AA") ---> true + * a = (str.++ "A" x), b = (str.++ "B" y "AA") ---> false + * + * @param a The term for which it should be checked if it is a strict subset + * of `b` in the multiset domain + * @param b The term for which it should be checked if it is a strict + * superset of `a` in the multiset domain + * @return True if it is always the case that `a` is a strict subset of `b`, + * false otherwise. + */ + static bool checkMultisetSubset(Node a, Node b); + + /** + * Returns a character `c` if it is always the case that str.in.re(a, c*), + * i.e. if all possible values of `a` only consist of `c` characters, and the + * null node otherwise. If `a` is the empty string, the function returns an + * empty string. + * + * @param a The node to check for homogeneity + * @return If `a` is homogeneous, the only character that it may contain, the + * empty string if `a` is empty, and the null node otherwise + */ + static Node checkHomogeneousString(Node a); + + /** + * Overapproximates the possible values of node n. This overapproximation + * assumes that n can return a value x or the empty string and tries to find + * the simplest x such that this holds. In the general case, x is the same as + * the input n. This overapproximation can be used to sort terms with the + * same possible values in string concatenation for example. + * + * Example: + * + * getStringOrEmpty( (str.replace "" x y) ) --> y because (str.replace "" x y) + * either returns y or "" + * + * getStringOrEmpty( (str.substr "ABC" x y) ) --> (str.substr "ABC" x y) + * because the function could not compute a simpler + */ + static Node getStringOrEmpty(Node n); + + /** + * Infers a conjunction of equalities that correspond to (str.contains x y) + * if it can show that the length of y is greater or equal to the length of + * x. If y is a concatentation, we get x = y1 ++ ... ++ yn, the conjunction + * is of the form: + * + * (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) + * + * where each yi'' are yi that must be empty for (= x y) to hold and yi' are + * yi that the function could not infer anything about. Returns a null node + * if the function cannot infer that str.len(y) >= str.len(x). Returns (= x + * y) if the function can infer that str.len(y) >= str.len(x) but cannot + * infer that any of the yi must be empty. + */ + static Node inferEqsFromContains(Node x, Node y); + + private: + /** component contains base + * + * This function is a helper for the above function. + * + * It returns true if n2 is contained in n1 with the following + * restrictions: + * If dir=1, then n2 must be a suffix of n1. + * If dir=-1, then n2 must be a prefix of n1. + * + * If computeRemainder is true, then n1rb and n1re are + * updated such that : + * n1 = str.++( n1rb, n2, n1re ) + * where a null value of n1rb and n1re indicates the + * empty string. + * + * For example: + * + * componentContainsBase("cabe", "ab", n1rb, n1re, 1, false) + * returns false. + * + * componentContainsBase("cabe", "ab", n1rb, n1re, 0, true) + * returns true, + * n1rb is set to "c", + * n1re is set to "e". + * + * componentContainsBase(y, str.substr(y,0,5), n1rb, n1re, -1, true) + * returns true, + * n1re is set to str.substr(y,5,str.len(y)). + * + * + * Notice that this function may return false when it cannot compute a + * remainder when it otherwise would have returned true. For example: + * + * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, false) + * returns true. + * + * Hence, we know that str.substr(y,x,z) is contained in y. However: + * + * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, true) + * returns false. + * + * The reason is since computeRemainder=true, it must be that + * y = str.++( n1rb, str.substr(y,x,z), n1re ) + * for some n1rb, n1re. However, to construct such n1rb, n1re would require + * e.g. the terms: + * y = str.++( ite( x+z < 0 OR x < 0, "", str.substr(y,0,x) ), + * str.substr(y,x,z), + * ite( x+z < 0 OR x < 0, y, str.substr(y,x+z,len(y)) ) ) + * + * Since we do not wish to introduce ITE terms in the rewriter, we instead + * return false, indicating that we cannot compute the remainder. + */ + static bool componentContainsBase( + Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder); + /** + * Simplifies a given node `a` s.t. the result is a concatenation of string + * terms that can be interpreted as a multiset and which contains all + * multisets that `a` could form. + * + * Examples: + * + * (str.substr "AA" 0 n) ---> "AA" + * (str.replace "AAA" x "BB") ---> (str.++ "AAA" "BB") + * + * @param a The node to simplify + * @return A concatenation that can be interpreted as a multiset + */ + static Node getMultisetApproximation(Node a); +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__STRING_ENTAIL_H */ diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 7777b9bd7..097cef235 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -22,6 +22,7 @@ #include "options/strings_options.h" #include "proof/proof_manager.h" #include "smt/logic_exception.h" +#include "theory/strings/arith_entail.h" #include "theory/strings/sequences_rewriter.h" #include "theory/strings/word.h" @@ -75,7 +76,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node sk1 = n == d_zero ? emp : d_sc->mkSkolemCached( s, n, SkolemCache::SK_PREFIX, "sspre"); - Node sk2 = SequencesRewriter::checkEntailArith(t12, lt0) + Node sk2 = ArithEntail::check(t12, lt0) ? emp : d_sc->mkSkolemCached( s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index 3b4c757f2..b2a62cc62 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -18,6 +18,8 @@ #include "options/strings_options.h" #include "theory/rewriter.h" +#include "theory/strings/arith_entail.h" +#include "theory/strings/strings_entail.h" #include "theory/strings/word.h" using namespace CVC4::kind; @@ -178,6 +180,84 @@ Node getConstantEndpoint(Node e, bool isSuf) return getConstantComponent(e); } +Node decomposeSubstrChain(Node s, std::vector& ss, std::vector& ls) +{ + Assert(ss.empty()); + Assert(ls.empty()); + while (s.getKind() == STRING_SUBSTR) + { + ss.push_back(s[1]); + ls.push_back(s[2]); + s = s[0]; + } + std::reverse(ss.begin(), ss.end()); + std::reverse(ls.begin(), ls.end()); + return s; +} + +Node mkSubstrChain(Node base, + const std::vector& ss, + const std::vector& ls) +{ + NodeManager* nm = NodeManager::currentNM(); + for (unsigned i = 0, size = ss.size(); i < size; i++) + { + base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]); + } + return base; +} + +std::pair > collectEmptyEqs(Node x) +{ + // Collect the equalities of the form (= x "") (sorted) + std::set emptyNodes; + bool allEmptyEqs = true; + if (x.getKind() == EQUAL) + { + if (Word::isEmpty(x[0])) + { + emptyNodes.insert(x[1]); + } + else if (Word::isEmpty(x[1])) + { + emptyNodes.insert(x[0]); + } + else + { + allEmptyEqs = false; + } + } + else if (x.getKind() == AND) + { + for (const Node& c : x) + { + if (c.getKind() == EQUAL) + { + if (Word::isEmpty(c[0])) + { + emptyNodes.insert(c[1]); + } + else if (Word::isEmpty(c[1])) + { + emptyNodes.insert(c[0]); + } + } + else + { + allEmptyEqs = false; + } + } + } + + if (emptyNodes.size() == 0) + { + allEmptyEqs = false; + } + + return std::make_pair( + allEmptyEqs, std::vector(emptyNodes.begin(), emptyNodes.end())); +} + bool isUnboundedWildcard(const std::vector& rs, size_t start) { size_t i = start; @@ -270,6 +350,15 @@ bool isStringKind(Kind k) || k == STRING_FROM_CODE || k == STRING_TO_CODE; } +bool isRegExpKind(Kind k) +{ + return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP + || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER + || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT + || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV + || k == REGEXP_COMPLEMENT; +} + TypeNode getOwnerStringType(Node n) { TypeNode tn; diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h index 846d3d563..9fc23499a 100644 --- a/src/theory/strings/theory_strings_utils.h +++ b/src/theory/strings/theory_strings_utils.h @@ -102,6 +102,41 @@ Node getConstantComponent(Node t); */ Node getConstantEndpoint(Node e, bool isSuf); +/** decompose substr chain + * + * If s is substr( ... substr( base, x1, y1 ) ..., xn, yn ), then this + * function returns base, adds { x1 ... xn } to ss, and { y1 ... yn } to ls. + */ +Node decomposeSubstrChain(Node s, std::vector& ss, std::vector& ls); +/** make substr chain + * + * If ss is { x1 ... xn } and ls is { y1 ... yn }, this returns the term + * substr( ... substr( base, x1, y1 ) ..., xn, yn ). + */ +Node mkSubstrChain(Node base, + const std::vector& ss, + const std::vector& ls); + +/** + * Collects equal-to-empty nodes from a conjunction or a single + * node. Returns a list of nodes that are compared to empty nodes + * and a boolean that indicates whether all nodes in the + * conjunction were a comparison with the empty node. The nodes in + * the list are sorted and duplicates removed. + * + * Examples: + * + * collectEmptyEqs( (= "" x) ) = { true, [x] } + * collectEmptyEqs( (and (= "" x) (= "" y)) ) = { true, [x, y] } + * collectEmptyEqs( (and (= "A" x) (= "" y) (= "" y)) ) = { false, [y] } + * + * @param x The conjunction of equalities or a single equality + * @return A pair of a boolean that indicates whether the + * conjunction consists only of comparisons to the empty string + * and the list of nodes that are compared to the empty string + */ +std::pair > collectEmptyEqs(Node x); + /** * Given a vector of regular expression nodes and a start index that points to * a wildcard, returns true if the wildcard is unbounded (i.e. it is followed @@ -142,6 +177,8 @@ void printConcatTrace(std::vector& n, const char* c); /** Is k a string-specific kind? */ bool isStringKind(Kind k); +/** is k a native operator whose return type is a regular expression? */ +bool isRegExpKind(Kind k); /** Get owner string type * diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp index b42cf3160..085078dea 100644 --- a/src/theory/strings/word.cpp +++ b/src/theory/strings/word.cpp @@ -99,7 +99,7 @@ std::vector Word::getChars(TNode x) return ret; } -bool Word::isEmpty(TNode x) { return getLength(x) == 0; } +bool Word::isEmpty(TNode x) { return x.isConst() && getLength(x) == 0; } bool Word::strncmp(TNode x, TNode y, std::size_t n) { @@ -283,6 +283,31 @@ std::size_t Word::roverlap(TNode x, TNode y) return 0; } +Node Word::splitConstant(Node x, Node y, size_t& index, bool isRev) +{ + Assert(x.isConst() && y.isConst()); + size_t lenA = getLength(x); + size_t lenB = getLength(y); + index = lenA <= lenB ? 1 : 0; + size_t lenShort = index == 1 ? lenA : lenB; + bool cmp = isRev ? rstrncmp(x, y, lenShort) : strncmp(x, y, lenShort); + if (cmp) + { + Node l = index == 0 ? x : y; + if (isRev) + { + size_t new_len = getLength(l) - lenShort; + return substr(l, 0, new_len); + } + else + { + return substr(l, lenShort); + } + } + // not the same prefix/suffix + return Node::null(); +} + } // namespace strings } // namespace theory } // namespace CVC4 diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h index 8e6e7876e..454710c98 100644 --- a/src/theory/strings/word.h +++ b/src/theory/strings/word.h @@ -139,6 +139,19 @@ class Word * Notice that x.overlap(y) = y.roverlap(x) */ static std::size_t roverlap(TNode x, TNode y); + /** Split constant + * + * This returns the suffix remainder (resp. prefix remainder when isRev is + * true) of words a and b, call it r, such that: + * (1) a = b ++ r , or + * (2) a ++ r = b + * when isRev = false. The argument index is set to 1 if we are in the second + * case, and 0 otherwise. + * + * If a and b do not share a common prefix (resp. suffix), then this method + * returns the null node. + */ + static Node splitConstant(Node a, Node b, size_t& index, bool isRev); }; // ------------------------------ end for words (string or sequence constants) diff --git a/test/unit/theory/sequences_rewriter_white.h b/test/unit/theory/sequences_rewriter_white.h index c823c0704..4cc679ca8 100644 --- a/test/unit/theory/sequences_rewriter_white.h +++ b/test/unit/theory/sequences_rewriter_white.h @@ -20,7 +20,9 @@ #include "smt/smt_engine_scope.h" #include "theory/quantifiers/extended_rewrite.h" #include "theory/rewriter.h" +#include "theory/strings/arith_entail.h" #include "theory/strings/sequences_rewriter.h" +#include "theory/strings/strings_entail.h" #include #include @@ -108,23 +110,23 @@ class SequencesRewriterWhite : public CxxTest::TestSuite Node three = d_nm->mkConst(Rational(3)); Node i = d_nm->mkVar("i", intType); - TS_ASSERT(SequencesRewriter::checkEntailLengthOne(a)); - TS_ASSERT(SequencesRewriter::checkEntailLengthOne(a, true)); + TS_ASSERT(StringsEntail::checkLengthOne(a)); + TS_ASSERT(StringsEntail::checkLengthOne(a, true)); Node substr = d_nm->mkNode(kind::STRING_SUBSTR, x, zero, one); - TS_ASSERT(SequencesRewriter::checkEntailLengthOne(substr)); - TS_ASSERT(!SequencesRewriter::checkEntailLengthOne(substr, true)); + TS_ASSERT(StringsEntail::checkLengthOne(substr)); + TS_ASSERT(!StringsEntail::checkLengthOne(substr, true)); substr = d_nm->mkNode(kind::STRING_SUBSTR, d_nm->mkNode(kind::STRING_CONCAT, a, x), zero, one); - TS_ASSERT(SequencesRewriter::checkEntailLengthOne(substr)); - TS_ASSERT(SequencesRewriter::checkEntailLengthOne(substr, true)); + TS_ASSERT(StringsEntail::checkLengthOne(substr)); + TS_ASSERT(StringsEntail::checkLengthOne(substr, true)); substr = d_nm->mkNode(kind::STRING_SUBSTR, x, zero, two); - TS_ASSERT(!SequencesRewriter::checkEntailLengthOne(substr)); - TS_ASSERT(!SequencesRewriter::checkEntailLengthOne(substr, true)); + TS_ASSERT(!StringsEntail::checkLengthOne(substr)); + TS_ASSERT(!StringsEntail::checkLengthOne(substr, true)); } void testCheckEntailArith() @@ -139,10 +141,10 @@ class SequencesRewriterWhite : public CxxTest::TestSuite // 1 >= (str.len (str.substr z n 1)) ---> true Node substr_z = d_nm->mkNode(kind::STRING_LENGTH, d_nm->mkNode(kind::STRING_SUBSTR, z, n, one)); - TS_ASSERT(SequencesRewriter::checkEntailArith(one, substr_z)); + TS_ASSERT(ArithEntail::check(one, substr_z)); // (str.len (str.substr z n 1)) >= 1 ---> false - TS_ASSERT(!SequencesRewriter::checkEntailArith(substr_z, one)); + TS_ASSERT(!ArithEntail::check(substr_z, one)); } void testCheckEntailArithWithAssumption() @@ -166,25 +168,25 @@ class SequencesRewriterWhite : public CxxTest::TestSuite Rewriter::rewrite(d_nm->mkNode(kind::EQUAL, x_plus_slen_y, zero)); // x + (str.len y) = 0 |= 0 >= x --> true - TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( + TS_ASSERT(ArithEntail::checkWithAssumption( x_plus_slen_y_eq_zero, zero, x, false)); // x + (str.len y) = 0 |= 0 > x --> false - TS_ASSERT(!SequencesRewriter::checkEntailArithWithAssumption( + TS_ASSERT(!ArithEntail::checkWithAssumption( x_plus_slen_y_eq_zero, zero, x, true)); Node x_plus_slen_y_plus_z_eq_zero = Rewriter::rewrite(d_nm->mkNode( kind::EQUAL, d_nm->mkNode(kind::PLUS, x_plus_slen_y, z), zero)); // x + (str.len y) + z = 0 |= 0 > x --> false - TS_ASSERT(!SequencesRewriter::checkEntailArithWithAssumption( + TS_ASSERT(!ArithEntail::checkWithAssumption( x_plus_slen_y_plus_z_eq_zero, zero, x, true)); Node x_plus_slen_y_plus_slen_y_eq_zero = Rewriter::rewrite(d_nm->mkNode( kind::EQUAL, d_nm->mkNode(kind::PLUS, x_plus_slen_y, slen_y), zero)); // x + (str.len y) + (str.len y) = 0 |= 0 >= x --> true - TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( + TS_ASSERT(ArithEntail::checkWithAssumption( x_plus_slen_y_plus_slen_y_eq_zero, zero, x, false)); Node five = d_nm->mkConst(Rational(5)); @@ -194,28 +196,28 @@ class SequencesRewriterWhite : public CxxTest::TestSuite Rewriter::rewrite(d_nm->mkNode(kind::LT, x_plus_five, six)); // x + 5 < 6 |= 0 >= x --> true - TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( - x_plus_five_lt_six, zero, x, false)); + TS_ASSERT( + ArithEntail::checkWithAssumption(x_plus_five_lt_six, zero, x, false)); // x + 5 < 6 |= 0 > x --> false - TS_ASSERT(!SequencesRewriter::checkEntailArithWithAssumption( - x_plus_five_lt_six, zero, x, true)); + TS_ASSERT( + !ArithEntail::checkWithAssumption(x_plus_five_lt_six, zero, x, true)); Node neg_x = d_nm->mkNode(kind::UMINUS, x); Node x_plus_five_lt_five = Rewriter::rewrite(d_nm->mkNode(kind::LT, x_plus_five, five)); // x + 5 < 5 |= -x >= 0 --> true - TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( + TS_ASSERT(ArithEntail::checkWithAssumption( x_plus_five_lt_five, neg_x, zero, false)); // x + 5 < 5 |= 0 > x --> true - TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( - x_plus_five_lt_five, zero, x, false)); + TS_ASSERT( + ArithEntail::checkWithAssumption(x_plus_five_lt_five, zero, x, false)); // 0 < x |= x >= (str.len (int.to.str x)) Node assm = Rewriter::rewrite(d_nm->mkNode(kind::LT, zero, x)); - TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( + TS_ASSERT(ArithEntail::checkWithAssumption( assm, x, d_nm->mkNode(kind::STRING_LENGTH, d_nm->mkNode(kind::STRING_ITOS, x)), @@ -1050,33 +1052,30 @@ class SequencesRewriterWhite : public CxxTest::TestSuite Node empty_x_y = d_nm->mkNode(kind::AND, d_nm->mkNode(kind::EQUAL, empty, x), d_nm->mkNode(kind::EQUAL, empty, y)); - sameNormalForm(SequencesRewriter::inferEqsFromContains(empty, xy), - empty_x_y); + sameNormalForm(StringsEntail::inferEqsFromContains(empty, xy), empty_x_y); // inferEqsFromContains(x, (str.++ x y)) returns false Node bxya = d_nm->mkNode(kind::STRING_CONCAT, b, y, x, a); - sameNormalForm(SequencesRewriter::inferEqsFromContains(x, bxya), f); + sameNormalForm(StringsEntail::inferEqsFromContains(x, bxya), f); // inferEqsFromContains(x, y) returns null - Node n = SequencesRewriter::inferEqsFromContains(x, y); + Node n = StringsEntail::inferEqsFromContains(x, y); TS_ASSERT(n.isNull()); // inferEqsFromContains(x, x) returns something equivalent to (= x x) Node eq_x_x = d_nm->mkNode(kind::EQUAL, x, x); - sameNormalForm(SequencesRewriter::inferEqsFromContains(x, x), eq_x_x); + sameNormalForm(StringsEntail::inferEqsFromContains(x, x), eq_x_x); // inferEqsFromContains((str.replace x "B" "A"), x) returns something // equivalent to (= (str.replace x "B" "A") x) Node repl = d_nm->mkNode(kind::STRING_STRREPL, x, b, a); Node eq_repl_x = d_nm->mkNode(kind::EQUAL, repl, x); - sameNormalForm(SequencesRewriter::inferEqsFromContains(repl, x), - eq_repl_x); + sameNormalForm(StringsEntail::inferEqsFromContains(repl, x), eq_repl_x); // inferEqsFromContains(x, (str.replace x "B" "A")) returns something // equivalent to (= (str.replace x "B" "A") x) Node eq_x_repl = d_nm->mkNode(kind::EQUAL, x, repl); - sameNormalForm(SequencesRewriter::inferEqsFromContains(x, repl), - eq_x_repl); + sameNormalForm(StringsEntail::inferEqsFromContains(x, repl), eq_x_repl); } void testRewritePrefixSuffix() @@ -1402,8 +1401,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite std::vector n2 = {a}; std::vector nb; std::vector ne; - bool res = - SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 0); + bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, 0); TS_ASSERT(!res); } @@ -1414,8 +1412,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite std::vector n2 = {a, d_nm->mkNode(kind::STRING_ITOS, n)}; std::vector nb; std::vector ne; - bool res = - SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 0); + bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, 0); TS_ASSERT(!res); } @@ -1430,8 +1427,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite std::vector ne; std::vector n1r = {cd}; std::vector nbr = {ab}; - bool res = - SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 1); + bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, 1); TS_ASSERT(res); TS_ASSERT_EQUALS(n1, n1r); TS_ASSERT_EQUALS(nb, nbr); @@ -1448,8 +1444,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite std::vector ne; std::vector n1r = {c, x}; std::vector nbr = {ab}; - bool res = - SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 1); + bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, 1); TS_ASSERT(res); TS_ASSERT_EQUALS(n1, n1r); TS_ASSERT_EQUALS(nb, nbr); @@ -1466,8 +1461,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite std::vector ne; std::vector n1r = {a}; std::vector ner = {bc}; - bool res = - SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, -1); + bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, -1); TS_ASSERT(res); TS_ASSERT_EQUALS(n1, n1r); TS_ASSERT_EQUALS(ne, ner); @@ -1484,8 +1478,7 @@ class SequencesRewriterWhite : public CxxTest::TestSuite std::vector ne; std::vector n1r = {x, a}; std::vector ner = {bc}; - bool res = - SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, -1); + bool res = StringsEntail::stripConstantEndpoints(n1, n2, nb, ne, -1); TS_ASSERT(res); TS_ASSERT_EQUALS(n1, n1r); TS_ASSERT_EQUALS(ne, ner); -- 2.30.2