From: Andrew Reynolds Date: Fri, 20 Mar 2020 19:07:37 +0000 (-0500) Subject: Split string-specific operators from TheoryStringsRewriter (#3920) X-Git-Tag: cvc5-1.0.0~3465 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=964760cf81eb7414a11bbd89ef3a16e8927d6947;p=cvc5.git Split string-specific operators from TheoryStringsRewriter (#3920) Organization towards theory of sequences. The motivation of this PR is to ensure that string-specific operators in the rewriter are in their own file; thus the use of mkConst / getConst is allowable in rewriter_str.cpp. --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7e31d1494..c35a14800 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -679,18 +679,20 @@ libcvc4_add_sources( theory/strings/regexp_operation.h theory/strings/regexp_solver.cpp theory/strings/regexp_solver.h + theory/strings/sequences_rewriter.cpp + theory/strings/sequences_rewriter.h theory/strings/skolem_cache.cpp theory/strings/skolem_cache.h theory/strings/solver_state.cpp theory/strings/solver_state.h theory/strings/strings_fmf.cpp theory/strings/strings_fmf.h + theory/strings/strings_rewriter.cpp + theory/strings/strings_rewriter.h theory/strings/theory_strings.cpp theory/strings/theory_strings.h theory/strings/theory_strings_preprocess.cpp theory/strings/theory_strings_preprocess.h - theory/strings/theory_strings_rewriter.cpp - theory/strings/theory_strings_rewriter.h theory/strings/theory_strings_type_rules.h theory/strings/theory_strings_utils.cpp theory/strings/theory_strings_utils.h diff --git a/src/theory/quantifiers/extended_rewrite.cpp b/src/theory/quantifiers/extended_rewrite.cpp index 44d3666e8..7920ecbeb 100644 --- a/src/theory/quantifiers/extended_rewrite.cpp +++ b/src/theory/quantifiers/extended_rewrite.cpp @@ -20,7 +20,7 @@ #include "theory/datatypes/datatypes_rewriter.h" #include "theory/quantifiers/term_util.h" #include "theory/rewriter.h" -#include "theory/strings/theory_strings_rewriter.h" +#include "theory/strings/sequences_rewriter.h" using namespace CVC4::kind; using namespace std; @@ -1692,7 +1692,7 @@ Node ExtendedRewriter::extendedRewriteStrings(Node ret) if (ret.getKind() == EQUAL) { - new_ret = strings::TheoryStringsRewriter::rewriteEqualityExt(ret); + new_ret = strings::SequencesRewriter::rewriteEqualityExt(ret); } return new_ret; diff --git a/src/theory/strings/base_solver.cpp b/src/theory/strings/base_solver.cpp index c23041914..6958d2528 100644 --- a/src/theory/strings/base_solver.cpp +++ b/src/theory/strings/base_solver.cpp @@ -16,7 +16,6 @@ #include "theory/strings/base_solver.h" #include "options/strings_options.h" -#include "theory/strings/theory_strings_rewriter.h" #include "theory/strings/theory_strings_utils.h" using namespace std; diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index 723a8c08e..2a95b41ba 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -17,7 +17,7 @@ #include "theory/strings/core_solver.h" #include "options/strings_options.h" -#include "theory/strings/theory_strings_rewriter.h" +#include "theory/strings/sequences_rewriter.h" #include "theory/strings/theory_strings_utils.h" #include "theory/strings/word.h" @@ -155,7 +155,7 @@ void CoreSolver::checkFlatForms() for (const Node& n : it->second) { int firstc, lastc; - if (!TheoryStringsRewriter::canConstantContainList( + if (!SequencesRewriter::canConstantContainList( c, d_flat_form[n], firstc, lastc)) { Trace("strings-ff-debug") << "Flat form for " << n @@ -348,8 +348,8 @@ void CoreSolver::checkFlatForm(std::vector& eqc, { // check for constant conflict int index; - Node s = TheoryStringsRewriter::splitConstant( - cc_c, curr_c, index, isRev); + Node s = + SequencesRewriter::splitConstant(cc_c, curr_c, index, isRev); if (s.isNull()) { d_bsolver.explainConstantEqc(ac,curr,exp); @@ -905,7 +905,7 @@ void CoreSolver::getNormalForms(Node eqc, { NormalForm& nf = normal_forms[i]; int firstc, lastc; - if (!TheoryStringsRewriter::canConstantContainList( + if (!SequencesRewriter::canConstantContainList( c, nf.d_nf, firstc, lastc)) { Node n = nf.d_base; @@ -1910,7 +1910,7 @@ int CoreSolver::processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& if (!c.isNull()) { int findex, lindex; - if (!TheoryStringsRewriter::canConstantContainList( + if (!SequencesRewriter::canConstantContainList( c, i == 0 ? nfj : nfi, findex, lindex)) { Trace("strings-solve-debug") diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index af114e361..c586df6dd 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -15,8 +15,8 @@ #include "theory/strings/extf_solver.h" #include "options/strings_options.h" +#include "theory/strings/sequences_rewriter.h" #include "theory/strings/theory_strings_preprocess.h" -#include "theory/strings/theory_strings_rewriter.h" #include "theory/strings/theory_strings_utils.h" using namespace std; @@ -617,7 +617,7 @@ void ExtfSolver::checkExtfInference(Node n, if (inferEqr.getKind() == EQUAL) { // try to use the extended rewriter for equalities - inferEqrr = TheoryStringsRewriter::rewriteEqualityExt(inferEqr); + inferEqrr = SequencesRewriter::rewriteEqualityExt(inferEqr); } if (inferEqrr != inferEqr) { diff --git a/src/theory/strings/inference_manager.cpp b/src/theory/strings/inference_manager.cpp index 67ba2d5a3..389c4e7bf 100644 --- a/src/theory/strings/inference_manager.cpp +++ b/src/theory/strings/inference_manager.cpp @@ -19,7 +19,6 @@ #include "theory/ext_theory.h" #include "theory/rewriter.h" #include "theory/strings/theory_strings.h" -#include "theory/strings/theory_strings_rewriter.h" #include "theory/strings/theory_strings_utils.h" using namespace std; diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 6c7846737..5b988061b 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -8,7 +8,7 @@ theory THEORY_STRINGS ::CVC4::theory::strings::TheoryStrings "theory/strings/the properties check parametric propagate presolve -rewriter ::CVC4::theory::strings::TheoryStringsRewriter "theory/strings/theory_strings_rewriter.h" +rewriter ::CVC4::theory::strings::SequencesRewriter "theory/strings/sequences_rewriter.h" typechecker "theory/strings/theory_strings_type_rules.h" diff --git a/src/theory/strings/regexp_elim.cpp b/src/theory/strings/regexp_elim.cpp index 86995736e..976efad3c 100644 --- a/src/theory/strings/regexp_elim.cpp +++ b/src/theory/strings/regexp_elim.cpp @@ -17,7 +17,7 @@ #include "options/strings_options.h" #include "theory/rewriter.h" -#include "theory/strings/theory_strings_rewriter.h" +#include "theory/strings/sequences_rewriter.h" #include "theory/strings/theory_strings_utils.h" using namespace CVC4; @@ -70,7 +70,7 @@ Node RegExpElimination::eliminateConcat(Node atom) for (unsigned i = 0, size = children.size(); i < size; i++) { Node c = children[i]; - Node fl = TheoryStringsRewriter::getFixedLengthForRegexp(c); + Node fl = SequencesRewriter::getFixedLengthForRegexp(c); if (fl.isNull()) { if (!hasPivotIndex && c.getKind() == REGEXP_STAR diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index f91b59834..d5105a489 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -18,7 +18,7 @@ #include "expr/kind.h" #include "options/strings_options.h" -#include "theory/strings/theory_strings_rewriter.h" +#include "theory/strings/sequences_rewriter.h" #include "theory/strings/theory_strings_utils.h" #include "theory/strings/word.h" @@ -920,12 +920,12 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes // all strings in the language of R1 have the same length, say n, // then the conclusion of the reduction is quantifier-free: // ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2) - Node reLength = TheoryStringsRewriter::getFixedLengthForRegexp(r[0]); + Node reLength = SequencesRewriter::getFixedLengthForRegexp(r[0]); if (reLength.isNull()) { // try from the opposite end unsigned indexE = r.getNumChildren() - 1; - reLength = TheoryStringsRewriter::getFixedLengthForRegexp(r[indexE]); + reLength = SequencesRewriter::getFixedLengthForRegexp(r[indexE]); if (!reLength.isNull()) { indexRm = indexE; diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp index 9d9c66ec2..cd66c0ebf 100644 --- a/src/theory/strings/regexp_solver.cpp +++ b/src/theory/strings/regexp_solver.cpp @@ -21,7 +21,6 @@ #include "options/strings_options.h" #include "theory/ext_theory.h" #include "theory/strings/theory_strings.h" -#include "theory/strings/theory_strings_rewriter.h" #include "theory/strings/theory_strings_utils.h" #include "theory/theory_model.h" diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp new file mode 100644 index 000000000..f4a1cd411 --- /dev/null +++ b/src/theory/strings/sequences_rewriter.cpp @@ -0,0 +1,5597 @@ +/********************* */ +/*! \file sequences_rewriter.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of the theory of strings. + ** + ** Implementation of the theory of strings. + **/ + +#include "theory/strings/sequences_rewriter.h" + +#include +#include + +#include "expr/node_builder.h" +#include "options/strings_options.h" +#include "smt/logic_exception.h" +#include "theory/arith/arith_msum.h" +#include "theory/strings/regexp_operation.h" +#include "theory/strings/strings_rewriter.h" +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" +#include "theory/theory.h" +#include "util/integer.h" +#include "util/rational.h" + +using namespace std; +using namespace CVC4; +using namespace CVC4::kind; +using namespace CVC4::theory; +using namespace CVC4::theory::strings; + +Node SequencesRewriter::simpleRegexpConsume(std::vector& mchildren, + std::vector& children, + int dir) +{ + Trace("regexp-ext-rewrite-debug") + << "Simple reg exp consume, dir=" << dir << ":" << std::endl; + Trace("regexp-ext-rewrite-debug") + << " mchildren : " << mchildren << std::endl; + Trace("regexp-ext-rewrite-debug") << " children : " << children << std::endl; + NodeManager* nm = NodeManager::currentNM(); + unsigned tmin = dir < 0 ? 0 : dir; + unsigned tmax = dir < 0 ? 1 : dir; + // try to remove off front and back + for (unsigned t = 0; t < 2; t++) + { + if (tmin <= t && t <= tmax) + { + bool do_next = true; + while (!children.empty() && !mchildren.empty() && do_next) + { + do_next = false; + Node xc = mchildren[mchildren.size() - 1]; + Node rc = children[children.size() - 1]; + Assert(rc.getKind() != kind::REGEXP_CONCAT); + Assert(xc.getKind() != kind::STRING_CONCAT); + if (rc.getKind() == kind::STRING_TO_REGEXP) + { + if (xc == rc[0]) + { + children.pop_back(); + mchildren.pop_back(); + do_next = true; + Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl; + } + else if (xc.isConst() && rc[0].isConst()) + { + // split the constant + int index; + Node s = splitConstant(xc, rc[0], index, t == 0); + Trace("regexp-ext-rewrite-debug") + << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> " + << s << " " << index << " " << t << std::endl; + if (s.isNull()) + { + Trace("regexp-ext-rewrite-debug") + << "...return false" << std::endl; + return NodeManager::currentNM()->mkConst(false); + } + else + { + Trace("regexp-ext-rewrite-debug") + << "...strip equal const" << std::endl; + children.pop_back(); + mchildren.pop_back(); + if (index == 0) + { + mchildren.push_back(s); + } + else + { + children.push_back(nm->mkNode(STRING_TO_REGEXP, s)); + } + } + do_next = true; + } + } + else if (xc.isConst()) + { + // check for constants + CVC4::String s = xc.getConst(); + if (Word::isEmpty(xc)) + { + Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl; + // ignore and continue + mchildren.pop_back(); + do_next = true; + } + else if (rc.getKind() == kind::REGEXP_RANGE + || rc.getKind() == kind::REGEXP_SIGMA) + { + std::vector ssVec; + ssVec.push_back(t == 0 ? s.back() : s.front()); + CVC4::String ss(ssVec); + if (testConstStringInRegExp(ss, 0, rc)) + { + // strip off one character + mchildren.pop_back(); + if (s.size() > 1) + { + if (t == 0) + { + mchildren.push_back(NodeManager::currentNM()->mkConst( + s.substr(0, s.size() - 1))); + } + else + { + mchildren.push_back( + NodeManager::currentNM()->mkConst(s.substr(1))); + } + } + children.pop_back(); + do_next = true; + } + else + { + return NodeManager::currentNM()->mkConst(false); + } + } + else if (rc.getKind() == kind::REGEXP_INTER + || rc.getKind() == kind::REGEXP_UNION) + { + // see if any/each child does not work + bool result_valid = true; + Node result; + Node emp_s = NodeManager::currentNM()->mkConst(::CVC4::String("")); + for (unsigned i = 0; i < rc.getNumChildren(); i++) + { + std::vector mchildren_s; + std::vector children_s; + mchildren_s.push_back(xc); + utils::getConcat(rc[i], children_s); + Node ret = simpleRegexpConsume(mchildren_s, children_s, t); + if (!ret.isNull()) + { + // one conjunct cannot be satisfied, return false + if (rc.getKind() == kind::REGEXP_INTER) + { + return ret; + } + } + else + { + if (children_s.empty()) + { + // if we were able to fully consume, store the result + Assert(mchildren_s.size() <= 1); + if (mchildren_s.empty()) + { + mchildren_s.push_back(emp_s); + } + if (result.isNull()) + { + result = mchildren_s[0]; + } + else if (result != mchildren_s[0]) + { + result_valid = false; + } + } + else + { + result_valid = false; + } + } + } + if (result_valid) + { + if (result.isNull()) + { + // all disjuncts cannot be satisfied, return false + Assert(rc.getKind() == kind::REGEXP_UNION); + return NodeManager::currentNM()->mkConst(false); + } + else + { + // all branches led to the same result + children.pop_back(); + mchildren.pop_back(); + if (result != emp_s) + { + mchildren.push_back(result); + } + do_next = true; + } + } + } + else if (rc.getKind() == kind::REGEXP_STAR) + { + // check if there is no way that this star can be unrolled even once + std::vector mchildren_s; + mchildren_s.insert( + mchildren_s.end(), mchildren.begin(), mchildren.end()); + if (t == 1) + { + std::reverse(mchildren_s.begin(), mchildren_s.end()); + } + std::vector children_s; + utils::getConcat(rc[0], children_s); + Trace("regexp-ext-rewrite-debug") + << "...recursive call on body of star" << std::endl; + Node ret = simpleRegexpConsume(mchildren_s, children_s, t); + if (!ret.isNull()) + { + Trace("regexp-ext-rewrite-debug") + << "CRE : regexp star infeasable " << xc << " " << rc + << std::endl; + children.pop_back(); + if (!children.empty()) + { + Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl; + do_next = true; + } + } + else + { + if (children_s.empty()) + { + // check if beyond this, we can't do it or there is nothing + // left, if so, repeat + bool can_skip = false; + if (children.size() > 1) + { + std::vector mchildren_ss; + mchildren_ss.insert( + mchildren_ss.end(), mchildren.begin(), mchildren.end()); + std::vector children_ss; + children_ss.insert( + children_ss.end(), children.begin(), children.end() - 1); + if (t == 1) + { + std::reverse(mchildren_ss.begin(), mchildren_ss.end()); + std::reverse(children_ss.begin(), children_ss.end()); + } + if (simpleRegexpConsume(mchildren_ss, children_ss, t) + .isNull()) + { + can_skip = true; + } + } + if (!can_skip) + { + Trace("regexp-ext-rewrite-debug") + << "...can't skip" << std::endl; + // take the result of fully consuming once + if (t == 1) + { + std::reverse(mchildren_s.begin(), mchildren_s.end()); + } + mchildren.clear(); + mchildren.insert( + mchildren.end(), mchildren_s.begin(), mchildren_s.end()); + do_next = true; + } + else + { + Trace("regexp-ext-rewrite-debug") + << "...can skip " << rc << " from " << xc << std::endl; + } + } + } + } + } + if (!do_next) + { + Trace("regexp-ext-rewrite") + << "Cannot consume : " << xc << " " << rc << std::endl; + } + } + } + if (dir != 0) + { + std::reverse(children.begin(), children.end()); + std::reverse(mchildren.begin(), mchildren.end()); + } + } + return Node::null(); +} + +Node SequencesRewriter::rewriteEquality(Node node) +{ + Assert(node.getKind() == kind::EQUAL); + if (node[0] == node[1]) + { + return NodeManager::currentNM()->mkConst(true); + } + else if (node[0].isConst() && node[1].isConst()) + { + return NodeManager::currentNM()->mkConst(false); + } + + // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false ) + for (unsigned r = 0; r < 2; r++) + { + // must call rewrite contains directly to avoid infinite loop + // we do a fix point since we may rewrite contains terms to simpler + // contains terms. + Node ctn = checkEntailContains(node[r], node[1 - r], false); + if (!ctn.isNull()) + { + if (!ctn.getConst()) + { + return returnRewrite(node, ctn, "eq-nctn"); + } + else + { + // definitely contains but not syntactically equal + // We may be able to simplify, e.g. + // str.++( x, "a" ) == "a" ----> x = "" + } + } + } + + // ( len( s ) != len( t ) ) => ( s == t ---> false ) + // This covers cases like str.++( x, x ) == "a" ---> false + Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); + Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + Node len_eq = len0.eqNode(len1); + len_eq = Rewriter::rewrite(len_eq); + if (len_eq.isConst() && !len_eq.getConst()) + { + return returnRewrite(node, len_eq, "eq-len-deq"); + } + + std::vector c[2]; + for (unsigned i = 0; i < 2; i++) + { + utils::getConcat(node[i], c[i]); + } + + // check if the prefix, suffix mismatches + // For example, str.++( x, "a", y ) == str.++( x, "bc", z ) ---> false + unsigned minsize = std::min(c[0].size(), c[1].size()); + for (unsigned r = 0; r < 2; r++) + { + for (unsigned i = 0; i < minsize; i++) + { + unsigned index1 = r == 0 ? i : (c[0].size() - 1) - i; + unsigned index2 = r == 0 ? i : (c[1].size() - 1) - i; + if (c[0][index1].isConst() && c[1][index2].isConst()) + { + CVC4::String s = c[0][index1].getConst(); + CVC4::String t = c[1][index2].getConst(); + unsigned len_short = s.size() <= t.size() ? s.size() : t.size(); + bool isSameFix = + r == 1 ? s.rstrncmp(t, len_short) : s.strncmp(t, len_short); + if (!isSameFix) + { + Node ret = NodeManager::currentNM()->mkConst(false); + return returnRewrite(node, ret, "eq-nfix"); + } + } + if (c[0][index1] != c[1][index2]) + { + break; + } + } + } + + // standard ordering + if (node[0] > node[1]) + { + return NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]); + } + return node; +} + +Node SequencesRewriter::rewriteEqualityExt(Node node) +{ + Assert(node.getKind() == EQUAL); + if (node[0].getType().isInteger()) + { + return rewriteArithEqualityExt(node); + } + if (node[0].getType().isString()) + { + return rewriteStrEqualityExt(node); + } + return node; +} + +Node SequencesRewriter::rewriteStrEqualityExt(Node node) +{ + Assert(node.getKind() == EQUAL && node[0].getType().isString()); + + NodeManager* nm = NodeManager::currentNM(); + std::vector c[2]; + Node new_ret; + for (unsigned i = 0; i < 2; i++) + { + utils::getConcat(node[i], c[i]); + } + // ------- equality unification + bool changed = false; + for (unsigned i = 0; i < 2; i++) + { + while (!c[0].empty() && !c[1].empty() && c[0].back() == c[1].back()) + { + c[0].pop_back(); + c[1].pop_back(); + changed = true; + } + // splice constants + if (!c[0].empty() && !c[1].empty() && c[0].back().isConst() + && c[1].back().isConst()) + { + Node cs[2]; + size_t csl[2]; + for (unsigned j = 0; j < 2; j++) + { + cs[j] = c[j].back(); + csl[j] = Word::getLength(cs[j]); + } + size_t larger = csl[0] > csl[1] ? 0 : 1; + size_t smallerSize = csl[1 - larger]; + if (cs[1 - larger] + == (i == 0 ? Word::suffix(cs[larger], smallerSize) + : Word::prefix(cs[larger], smallerSize))) + { + size_t sizeDiff = csl[larger] - smallerSize; + c[larger][c[larger].size() - 1] = + i == 0 ? Word::prefix(cs[larger], sizeDiff) + : Word::suffix(cs[larger], sizeDiff); + c[1 - larger].pop_back(); + changed = true; + } + } + for (unsigned j = 0; j < 2; j++) + { + std::reverse(c[j].begin(), c[j].end()); + } + } + if (changed) + { + // e.g. x++y = x++z ---> y = z, "AB" ++ x = "A" ++ y --> "B" ++ x = y + Node s1 = utils::mkConcat(STRING_CONCAT, c[0]); + Node s2 = utils::mkConcat(STRING_CONCAT, c[1]); + new_ret = s1.eqNode(s2); + node = returnRewrite(node, new_ret, "str-eq-unify"); + } + + // ------- homogeneous constants + for (unsigned i = 0; i < 2; i++) + { + Node cn = checkEntailHomogeneousString(node[i]); + if (!cn.isNull() && !Word::isEmpty(cn)) + { + Assert(cn.isConst()); + Assert(Word::getLength(cn) == 1); + unsigned hchar = cn.getConst().front(); + + // The operands of the concat on each side of the equality without + // constant strings + std::vector trimmed[2]; + // Counts the number of `hchar`s on each side + size_t numHChars[2] = {0, 0}; + for (size_t j = 0; j < 2; j++) + { + // Sort the operands of the concats on both sides of the equality + // (since both sides may only contain one char, the order does not + // matter) + std::sort(c[j].begin(), c[j].end()); + for (const Node& cc : c[j]) + { + if (cc.isConst()) + { + // Count the number of `hchar`s in the string constant and make + // sure that all chars are `hchar`s + std::vector veccc = cc.getConst().getVec(); + for (size_t k = 0, size = veccc.size(); k < size; k++) + { + if (veccc[k] != hchar) + { + // This conflict case should mostly should be taken care of by + // multiset reasoning in the strings rewriter, but we recognize + // this conflict just in case. + new_ret = nm->mkConst(false); + return returnRewrite( + node, new_ret, "string-eq-const-conflict-non-homog"); + } + numHChars[j]++; + } + } + else + { + trimmed[j].push_back(cc); + } + } + } + + // We have to remove the same number of `hchar`s from both sides, so the + // side with less `hchar`s determines how many we can remove + size_t trimmedConst = std::min(numHChars[0], numHChars[1]); + for (size_t j = 0; j < 2; j++) + { + size_t diff = numHChars[j] - trimmedConst; + if (diff != 0) + { + // Add a constant string to the side with more `hchar`s to restore + // the difference in number of `hchar`s + std::vector vec(diff, hchar); + trimmed[j].push_back(nm->mkConst(String(vec))); + } + } + + Node lhs = utils::mkConcat(STRING_CONCAT, trimmed[i]); + Node ss = utils::mkConcat(STRING_CONCAT, trimmed[1 - i]); + if (lhs != node[i] || ss != node[1 - i]) + { + // e.g. + // "AA" = y ++ x ---> "AA" = x ++ y if x < y + // "AAA" = y ++ "A" ++ z ---> "AA" = y ++ z + new_ret = lhs.eqNode(ss); + node = returnRewrite(node, new_ret, "str-eq-homog-const"); + } + } + } + + // ------- rewrites for (= "" _) + Node empty = nm->mkConst(::CVC4::String("")); + for (size_t i = 0; i < 2; i++) + { + if (node[i] == empty) + { + Node ne = node[1 - i]; + if (ne.getKind() == STRING_STRREPL) + { + // (= "" (str.replace x y x)) ---> (= x "") + if (ne[0] == ne[2]) + { + Node ret = nm->mkNode(EQUAL, ne[0], empty); + return returnRewrite(node, ret, "str-emp-repl-x-y-x"); + } + + // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y ""))) + if (checkEntailNonEmpty(ne[2])) + { + Node ret = + nm->mkNode(AND, + nm->mkNode(EQUAL, ne[0], empty), + nm->mkNode(NOT, nm->mkNode(EQUAL, ne[1], empty))); + return returnRewrite(node, ret, "str-emp-repl-emp"); + } + + // (= "" (str.replace x "A" "")) ---> (str.prefix x "A") + if (checkEntailLengthOne(ne[1]) && ne[2] == empty) + { + Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]); + return returnRewrite(node, ret, "str-emp-repl-emp"); + } + } + else if (ne.getKind() == STRING_SUBSTR) + { + Node zero = nm->mkConst(Rational(0)); + + if (checkEntailArith(ne[1], false) && checkEntailArith(ne[2], true)) + { + // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0 + if (ne[1] == zero) + { + Node ret = nm->mkNode(EQUAL, ne[0], empty); + return returnRewrite(node, ret, "str-emp-substr-leq-len"); + } + + // (= "" (str.substr x n m)) ---> (<= (str.len x) n) + // if n >= 0 and m > 0 + Node ret = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, ne[0]), ne[1]); + return returnRewrite(node, ret, "str-emp-substr-leq-len"); + } + + // (= "" (str.substr "A" 0 z)) ---> (<= z 0) + if (checkEntailNonEmpty(ne[0]) && ne[1] == zero) + { + Node ret = nm->mkNode(LEQ, ne[2], zero); + return returnRewrite(node, ret, "str-emp-substr-leq-z"); + } + } + } + } + + // ------- rewrites for (= (str.replace _ _ _) _) + for (size_t i = 0; i < 2; i++) + { + if (node[i].getKind() == STRING_STRREPL) + { + Node repl = node[i]; + Node x = node[1 - i]; + + // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x)) + if (checkEntailNonEmpty(x) && repl[0] == empty) + { + Node ret = nm->mkNode( + EQUAL, empty, nm->mkNode(STRING_STRREPL, x, repl[2], repl[1])); + return returnRewrite(node, ret, "str-eq-repl-emp"); + } + + // (= x (str.replace y x y)) ---> (= x y) + if (repl[0] == repl[2] && x == repl[1]) + { + Node ret = nm->mkNode(EQUAL, x, repl[0]); + return returnRewrite(node, ret, "str-eq-repl-to-eq"); + } + + // (= x (str.replace x "A" "B")) ---> (not (str.contains x "A")) + if (x == repl[0]) + { + Node eq = Rewriter::rewrite(nm->mkNode(EQUAL, repl[1], repl[2])); + if (eq.isConst() && !eq.getConst()) + { + Node ret = nm->mkNode(NOT, nm->mkNode(STRING_STRCTN, x, repl[1])); + return returnRewrite(node, ret, "str-eq-repl-not-ctn"); + } + } + + // (= (str.replace x y z) z) --> (or (= x y) (= x z)) + // if (str.len y) = (str.len z) + if (repl[2] == x) + { + Node lenY = nm->mkNode(STRING_LENGTH, repl[1]); + Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]); + if (checkEntailArithEq(lenY, lenZ)) + { + Node ret = nm->mkNode(OR, + nm->mkNode(EQUAL, repl[0], repl[1]), + nm->mkNode(EQUAL, repl[0], repl[2])); + return returnRewrite(node, ret, "str-eq-repl-to-dis"); + } + } + } + } + + // Try to rewrite (= x y) into a conjunction of equalities based on length + // entailment. + // + // (<= (str.len x) (str.++ y1 ... yn)) AND (= x (str.++ y1 ... yn)) ---> + // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) + // + // where yi' and yi'' correspond to some yj and + // (<= (str.len x) (str.++ y1' ... ym')) + for (unsigned i = 0; i < 2; i++) + { + if (node[1 - i].getKind() == STRING_CONCAT) + { + new_ret = inferEqsFromContains(node[i], node[1 - i]); + if (!new_ret.isNull()) + { + return returnRewrite(node, new_ret, "str-eq-conj-len-entail"); + } + } + } + + if (node[0].getKind() == STRING_CONCAT && node[1].getKind() == STRING_CONCAT) + { + // (= (str.++ x_1 ... x_i x_{i + 1} ... x_n) + // (str.++ y_1 ... y_j y_{j + 1} ... y_m)) ---> + // (and (= (str.++ x_1 ... x_i) (str.++ y_1 ... y_j)) + // (= (str.++ x_{i + 1} ... x_n) (str.++ y_{j + 1} ... y_m))) + // + // if (str.len (str.++ x_1 ... x_i)) = (str.len (str.++ y_1 ... y_j)) + // + // This rewrite performs length-based equality splitting: If we can show + // that two prefixes have the same length, we can split an equality into + // two equalities, one over the prefixes and another over the suffixes. + std::vector v0, v1; + utils::getConcat(node[0], v0); + utils::getConcat(node[1], v1); + size_t startRhs = 0; + for (size_t i = 0, size0 = v0.size(); i <= size0; i++) + { + std::vector pfxv0(v0.begin(), v0.begin() + i); + Node pfx0 = utils::mkConcat(STRING_CONCAT, pfxv0); + for (size_t j = startRhs, size1 = v1.size(); j <= size1; j++) + { + if (!(i == 0 && j == 0) && !(i == v0.size() && j == v1.size())) + { + std::vector pfxv1(v1.begin(), v1.begin() + j); + Node pfx1 = utils::mkConcat(STRING_CONCAT, pfxv1); + Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0); + Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1); + + if (checkEntailArithEq(lenPfx0, lenPfx1)) + { + std::vector sfxv0(v0.begin() + i, v0.end()); + std::vector sfxv1(v1.begin() + j, v1.end()); + Node ret = + nm->mkNode(kind::AND, + pfx0.eqNode(pfx1), + utils::mkConcat(STRING_CONCAT, sfxv0) + .eqNode(utils::mkConcat(STRING_CONCAT, sfxv1))); + return returnRewrite(node, ret, "split-eq"); + } + else if (checkEntailArith(lenPfx1, lenPfx0, true)) + { + // The prefix on the right-hand side is strictly longer than the + // prefix on the left-hand side, so we try to strip the right-hand + // prefix by the length of the left-hand prefix + // + // Example: + // (= (str.++ "A" x y) (str.++ x "AB" z)) ---> + // (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z))) + std::vector rpfxv1; + if (stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0)) + { + std::vector sfxv0(v0.begin() + i, v0.end()); + pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end()); + Node ret = nm->mkNode( + kind::AND, + pfx0.eqNode(utils::mkConcat(STRING_CONCAT, rpfxv1)), + utils::mkConcat(STRING_CONCAT, sfxv0) + .eqNode(utils::mkConcat(STRING_CONCAT, pfxv1))); + return returnRewrite(node, ret, "split-eq-strip-r"); + } + + // If the prefix of the right-hand side is (strictly) longer than + // the prefix of the left-hand side, we can advance the left-hand + // side (since the length of the right-hand side is only increasing + // in the inner loop) + break; + } + else if (checkEntailArith(lenPfx0, lenPfx1, true)) + { + // The prefix on the left-hand side is strictly longer than the + // prefix on the right-hand side, so we try to strip the left-hand + // prefix by the length of the right-hand prefix + // + // Example: + // (= (str.++ x "AB" z) (str.++ "A" x y)) ---> + // (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y)) + std::vector rpfxv0; + if (stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1)) + { + pfxv0.insert(pfxv0.end(), v0.begin() + i, v0.end()); + std::vector sfxv1(v1.begin() + j, v1.end()); + Node ret = nm->mkNode( + kind::AND, + utils::mkConcat(STRING_CONCAT, rpfxv0).eqNode(pfx1), + utils::mkConcat(STRING_CONCAT, pfxv0) + .eqNode(utils::mkConcat(STRING_CONCAT, sfxv1))); + return returnRewrite(node, ret, "split-eq-strip-l"); + } + + // If the prefix of the left-hand side is (strictly) longer than + // the prefix of the right-hand side, then we don't need to check + // that right-hand prefix for future left-hand prefixes anymore + // (since they are increasing in length) + startRhs = j + 1; + } + } + } + } + } + + return node; +} + +Node SequencesRewriter::rewriteArithEqualityExt(Node node) +{ + Assert(node.getKind() == EQUAL && node[0].getType().isInteger()); + + // cases where we can solve the equality + + // notice we cannot rewrite str.to.int(x)=n to x="n" due to leading zeroes. + + return node; +} + +// TODO (#1180) add rewrite +// str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) ---> +// str.substr( x, n1, n2+n3 ) +Node SequencesRewriter::rewriteConcat(Node node) +{ + Assert(node.getKind() == kind::STRING_CONCAT); + Trace("strings-rewrite-debug") + << "Strings::rewriteConcat start " << node << std::endl; + NodeManager* nm = NodeManager::currentNM(); + Node retNode = node; + std::vector node_vec; + Node preNode = Node::null(); + for (Node tmpNode : node) + { + if (tmpNode.getKind() == STRING_CONCAT) + { + unsigned j = 0; + // combine the first term with the previous constant if applicable + if (!preNode.isNull()) + { + if (tmpNode[0].isConst()) + { + preNode = nm->mkConst( + preNode.getConst().concat(tmpNode[0].getConst())); + node_vec.push_back(preNode); + } + else + { + node_vec.push_back(preNode); + node_vec.push_back(tmpNode[0]); + } + preNode = Node::null(); + ++j; + } + // insert the middle terms to node_vec + if (j <= tmpNode.getNumChildren() - 1) + { + node_vec.insert(node_vec.end(), tmpNode.begin() + j, tmpNode.end() - 1); + } + // take the last term as the current + tmpNode = tmpNode[tmpNode.getNumChildren() - 1]; + } + if (!tmpNode.isConst()) + { + if (!preNode.isNull()) + { + if (preNode.isConst() && !Word::isEmpty(preNode)) + { + node_vec.push_back(preNode); + } + preNode = Node::null(); + } + node_vec.push_back(tmpNode); + } + else + { + if (preNode.isNull()) + { + preNode = tmpNode; + } + else + { + std::vector vec; + vec.push_back(preNode); + vec.push_back(tmpNode); + preNode = Word::mkWord(vec); + } + } + } + if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode))) + { + node_vec.push_back(preNode); + } + + // Sort adjacent operands in str.++ that all result in the same string or the + // empty string. + // + // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) --> + // (str.++ ... [sort those 3 arguments] ... ) + size_t lastIdx = 0; + Node lastX; + for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++) + { + Node s = getStringOrEmpty(node_vec[i]); + bool nextX = false; + if (s != lastX) + { + nextX = true; + } + + if (nextX) + { + std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i); + lastX = s; + lastIdx = i; + } + } + std::sort(node_vec.begin() + lastIdx, node_vec.end()); + + retNode = utils::mkConcat(STRING_CONCAT, node_vec); + Trace("strings-rewrite-debug") + << "Strings::rewriteConcat end " << retNode << std::endl; + return retNode; +} + +Node SequencesRewriter::rewriteConcatRegExp(TNode node) +{ + Assert(node.getKind() == kind::REGEXP_CONCAT); + NodeManager* nm = NodeManager::currentNM(); + Trace("strings-rewrite-debug") + << "Strings::rewriteConcatRegExp flatten " << node << std::endl; + Node retNode = node; + std::vector vec; + bool changed = false; + Node emptyRe; + for (const Node& c : node) + { + if (c.getKind() == REGEXP_CONCAT) + { + changed = true; + for (const Node& cc : c) + { + vec.push_back(cc); + } + } + else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst() + && Word::isEmpty(c[0])) + { + changed = true; + emptyRe = c; + } + else if (c.getKind() == REGEXP_EMPTY) + { + // re.++( ..., empty, ... ) ---> empty + std::vector nvec; + return nm->mkNode(REGEXP_EMPTY, nvec); + } + else + { + vec.push_back(c); + } + } + if (changed) + { + // flatten + // this handles nested re.++ and elimination or str.to.re(""), e.g.: + // re.++( re.++( R1, R2 ), str.to.re(""), R3 ) ---> re.++( R1, R2, R3 ) + if (vec.empty()) + { + Assert(!emptyRe.isNull()); + retNode = emptyRe; + } + else + { + retNode = vec.size() == 1 ? vec[0] : nm->mkNode(REGEXP_CONCAT, vec); + } + return returnRewrite(node, retNode, "re.concat-flatten"); + } + Trace("strings-rewrite-debug") + << "Strings::rewriteConcatRegExp start " << node << std::endl; + std::vector cvec; + // the current accumulation of constant strings + std::vector preReStr; + // whether the last component was (_)* + bool lastAllStar = false; + String emptyStr = String(""); + // this loop checks to see if components can be combined or dropped + for (unsigned i = 0, size = vec.size(); i <= size; i++) + { + Node curr; + if (i < size) + { + curr = vec[i]; + Assert(curr.getKind() != REGEXP_CONCAT); + } + // update preReStr + if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP) + { + lastAllStar = false; + preReStr.push_back(curr[0]); + curr = Node::null(); + } + else if (!preReStr.empty()) + { + Assert(!lastAllStar); + // this groups consecutive strings a++b ---> ab + Node acc = nm->mkNode(STRING_TO_REGEXP, + utils::mkConcat(STRING_CONCAT, preReStr)); + cvec.push_back(acc); + preReStr.clear(); + } + else if (!curr.isNull() && lastAllStar) + { + // if empty, drop it + // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)* + if (isConstRegExp(curr) && testConstStringInRegExp(emptyStr, 0, curr)) + { + curr = Node::null(); + } + } + if (!curr.isNull()) + { + lastAllStar = false; + if (curr.getKind() == REGEXP_STAR) + { + // we can group stars (a)* ++ (a)* ---> (a)* + if (!cvec.empty() && cvec.back() == curr) + { + curr = Node::null(); + } + else if (curr[0].getKind() == REGEXP_SIGMA) + { + Assert(!lastAllStar); + lastAllStar = true; + // go back and remove empty ones from back of cvec + // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)* + while (!cvec.empty() && isConstRegExp(cvec.back()) + && testConstStringInRegExp(emptyStr, 0, cvec.back())) + { + cvec.pop_back(); + } + } + } + } + if (!curr.isNull()) + { + cvec.push_back(curr); + } + } + Assert(!cvec.empty()); + retNode = utils::mkConcat(REGEXP_CONCAT, cvec); + if (retNode != node) + { + // handles all cases where consecutive re constants are combined or dropped + // as described in the loop above. + return returnRewrite(node, retNode, "re.concat"); + } + + // flipping adjacent star arguments + changed = false; + for (size_t i = 0, size = cvec.size() - 1; i < size; i++) + { + if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1]) + { + // by convention, flip the order (a*)++a ---> a++(a*) + std::swap(cvec[i], cvec[i + 1]); + changed = true; + } + } + if (changed) + { + retNode = utils::mkConcat(REGEXP_CONCAT, cvec); + return returnRewrite(node, retNode, "re.concat.opt"); + } + return node; +} + +Node SequencesRewriter::rewriteStarRegExp(TNode node) +{ + Assert(node.getKind() == REGEXP_STAR); + NodeManager* nm = NodeManager::currentNM(); + Node retNode = node; + if (node[0].getKind() == REGEXP_STAR) + { + // ((R)*)* ---> R* + return returnRewrite(node, node[0], "re-star-nested-star"); + } + else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst() + && Word::isEmpty(node[0][0])) + { + // ("")* ---> "" + return returnRewrite(node, node[0], "re-star-empty-string"); + } + else if (node[0].getKind() == REGEXP_EMPTY) + { + // (empty)* ---> "" + retNode = nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))); + return returnRewrite(node, retNode, "re-star-empty"); + } + else if (node[0].getKind() == REGEXP_UNION) + { + // simplification of unions under star + if (hasEpsilonNode(node[0])) + { + bool changed = false; + std::vector node_vec; + for (const Node& nc : node[0]) + { + if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst() + && Word::isEmpty(nc[0])) + { + // can be removed + changed = true; + } + else + { + node_vec.push_back(nc); + } + } + if (changed) + { + retNode = node_vec.size() == 1 ? node_vec[0] + : nm->mkNode(REGEXP_UNION, node_vec); + retNode = nm->mkNode(REGEXP_STAR, retNode); + // simplification of union beneath star based on loop above + // for example, ( "" | "a" )* ---> ("a")* + return returnRewrite(node, retNode, "re-star-union"); + } + } + } + return node; +} + +Node SequencesRewriter::rewriteAndOrRegExp(TNode node) +{ + Kind nk = node.getKind(); + Assert(nk == REGEXP_UNION || nk == REGEXP_INTER); + Trace("strings-rewrite-debug") + << "Strings::rewriteAndOrRegExp start " << node << std::endl; + std::vector node_vec; + for (const Node& ni : node) + { + if (ni.getKind() == nk) + { + for (const Node& nic : ni) + { + if (std::find(node_vec.begin(), node_vec.end(), nic) == node_vec.end()) + { + node_vec.push_back(nic); + } + } + } + else if (ni.getKind() == REGEXP_EMPTY) + { + if (nk == REGEXP_INTER) + { + return returnRewrite(node, ni, "re.and-empty"); + } + // otherwise, can ignore + } + else if (ni.getKind() == REGEXP_STAR && ni[0].getKind() == REGEXP_SIGMA) + { + if (nk == REGEXP_UNION) + { + return returnRewrite(node, ni, "re.or-all"); + } + // otherwise, can ignore + } + else if (std::find(node_vec.begin(), node_vec.end(), ni) == node_vec.end()) + { + node_vec.push_back(ni); + } + } + NodeManager* nm = NodeManager::currentNM(); + std::vector nvec; + Node retNode; + if (node_vec.empty()) + { + if (nk == REGEXP_INTER) + { + retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, nvec)); + } + else + { + retNode = nm->mkNode(kind::REGEXP_EMPTY, nvec); + } + } + else + { + retNode = node_vec.size() == 1 ? node_vec[0] : nm->mkNode(nk, node_vec); + } + if (retNode != node) + { + // flattening and removing children, based on loop above + return returnRewrite(node, retNode, "re.andor-flatten"); + } + return node; +} + +Node SequencesRewriter::rewriteLoopRegExp(TNode node) +{ + Assert(node.getKind() == REGEXP_LOOP); + Node retNode = node; + Node r = node[0]; + if (r.getKind() == REGEXP_STAR) + { + return returnRewrite(node, r, "re.loop-star"); + } + TNode n1 = node[1]; + NodeManager* nm = NodeManager::currentNM(); + CVC4::Rational rMaxInt(String::maxSize()); + AlwaysAssert(n1.isConst()) << "re.loop contains non-constant integer (1)."; + AlwaysAssert(n1.getConst().sgn() >= 0) + << "Negative integer in string REGEXP_LOOP (1)"; + Assert(n1.getConst() <= rMaxInt) + << "Exceeded UINT32_MAX in string REGEXP_LOOP (1)"; + uint32_t l = n1.getConst().getNumerator().toUnsignedInt(); + std::vector vec_nodes; + for (unsigned i = 0; i < l; i++) + { + vec_nodes.push_back(r); + } + if (node.getNumChildren() == 3) + { + TNode n2 = Rewriter::rewrite(node[2]); + Node n = + vec_nodes.size() == 0 + ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))) + : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes); + AlwaysAssert(n2.isConst()) << "re.loop contains non-constant integer (2)."; + AlwaysAssert(n2.getConst().sgn() >= 0) + << "Negative integer in string REGEXP_LOOP (2)"; + Assert(n2.getConst() <= rMaxInt) + << "Exceeded UINT32_MAX in string REGEXP_LOOP (2)"; + uint32_t u = n2.getConst().getNumerator().toUnsignedInt(); + if (u <= l) + { + retNode = n; + } + else + { + std::vector vec2; + vec2.push_back(n); + for (unsigned j = l; j < u; j++) + { + vec_nodes.push_back(r); + n = utils::mkConcat(REGEXP_CONCAT, vec_nodes); + vec2.push_back(n); + } + retNode = nm->mkNode(REGEXP_UNION, vec2); + } + } + else + { + Node rest = nm->mkNode(REGEXP_STAR, r); + retNode = vec_nodes.size() == 0 + ? rest + : vec_nodes.size() == 1 + ? nm->mkNode(REGEXP_CONCAT, r, rest) + : nm->mkNode(REGEXP_CONCAT, + nm->mkNode(REGEXP_CONCAT, vec_nodes), + rest); + } + Trace("strings-lp") << "Strings::lp " << node << " => " << retNode + << std::endl; + if (retNode != node) + { + return returnRewrite(node, retNode, "re.loop"); + } + return node; +} + +bool SequencesRewriter::isConstRegExp(TNode t) +{ + if (t.getKind() == kind::STRING_TO_REGEXP) + { + return t[0].isConst(); + } + else if (t.isVar()) + { + return false; + } + else + { + for (unsigned i = 0; i < t.getNumChildren(); ++i) + { + if (!isConstRegExp(t[i])) + { + return false; + } + } + return true; + } +} + +bool SequencesRewriter::testConstStringInRegExp(CVC4::String& s, + unsigned int index_start, + TNode r) +{ + Assert(index_start <= s.size()); + Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at " + << index_start << std::endl; + Assert(!r.isVar()); + Kind k = r.getKind(); + switch (k) + { + case kind::STRING_TO_REGEXP: + { + CVC4::String s2 = s.substr(index_start, s.size() - index_start); + if (r[0].isConst()) + { + return (s2 == r[0].getConst()); + } + else + { + Assert(false) << "RegExp contains variables"; + return false; + } + } + case kind::REGEXP_CONCAT: + { + if (s.size() != index_start) + { + std::vector vec_k(r.getNumChildren(), -1); + int start = 0; + int left = (int)s.size() - index_start; + int i = 0; + while (i < (int)r.getNumChildren()) + { + bool flag = true; + if (i == (int)r.getNumChildren() - 1) + { + if (testConstStringInRegExp(s, index_start + start, r[i])) + { + return true; + } + } + else if (i == -1) + { + return false; + } + else + { + for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i]) + { + CVC4::String t = s.substr(index_start + start, vec_k[i]); + if (testConstStringInRegExp(t, 0, r[i])) + { + start += vec_k[i]; + left -= vec_k[i]; + flag = false; + ++i; + vec_k[i] = -1; + break; + } + } + } + + if (flag) + { + --i; + if (i >= 0) + { + start -= vec_k[i]; + left += vec_k[i]; + } + } + } + return false; + } + else + { + for (unsigned i = 0; i < r.getNumChildren(); ++i) + { + if (!testConstStringInRegExp(s, index_start, r[i])) return false; + } + return true; + } + } + case kind::REGEXP_UNION: + { + for (unsigned i = 0; i < r.getNumChildren(); ++i) + { + if (testConstStringInRegExp(s, index_start, r[i])) return true; + } + return false; + } + case kind::REGEXP_INTER: + { + for (unsigned i = 0; i < r.getNumChildren(); ++i) + { + if (!testConstStringInRegExp(s, index_start, r[i])) return false; + } + return true; + } + case kind::REGEXP_STAR: + { + if (s.size() != index_start) + { + for (unsigned i = s.size() - index_start; i > 0; --i) + { + CVC4::String t = s.substr(index_start, i); + if (testConstStringInRegExp(t, 0, r[0])) + { + if (index_start + i == s.size() + || testConstStringInRegExp(s, index_start + i, r)) + { + return true; + } + } + } + return false; + } + else + { + return true; + } + } + case kind::REGEXP_EMPTY: { return false; + } + case kind::REGEXP_SIGMA: + { + if (s.size() == index_start + 1) + { + return true; + } + else + { + return false; + } + } + case kind::REGEXP_RANGE: + { + if (s.size() == index_start + 1) + { + unsigned a = r[0].getConst().front(); + a = String::convertUnsignedIntToCode(a); + unsigned b = r[1].getConst().front(); + b = String::convertUnsignedIntToCode(b); + unsigned c = s.back(); + c = String::convertUnsignedIntToCode(c); + return (a <= c && c <= b); + } + else + { + return false; + } + } + case kind::REGEXP_LOOP: + { + uint32_t l = r[1].getConst().getNumerator().toUnsignedInt(); + if (s.size() == index_start) + { + return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]); + } + else if (l == 0 && r[1] == r[2]) + { + return false; + } + else + { + Assert(r.getNumChildren() == 3) + << "String rewriter error: LOOP has 2 children"; + if (l == 0) + { + // R{0,u} + uint32_t u = r[2].getConst().getNumerator().toUnsignedInt(); + for (unsigned len = s.size() - index_start; len >= 1; len--) + { + CVC4::String t = s.substr(index_start, len); + if (testConstStringInRegExp(t, 0, r[0])) + { + if (len + index_start == s.size()) + { + return true; + } + else + { + Node num2 = + NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1)); + Node r2 = NodeManager::currentNM()->mkNode( + kind::REGEXP_LOOP, r[0], r[1], num2); + if (testConstStringInRegExp(s, index_start + len, r2)) + { + return true; + } + } + } + } + return false; + } + else + { + // R{l,l} + Assert(r[1] == r[2]) + << "String rewriter error: LOOP nums are not equal"; + if (l > s.size() - index_start) + { + if (testConstStringInRegExp(s, s.size(), r[0])) + { + l = s.size() - index_start; + } + else + { + return false; + } + } + for (unsigned len = 1; len <= s.size() - index_start; len++) + { + CVC4::String t = s.substr(index_start, len); + if (testConstStringInRegExp(t, 0, r[0])) + { + Node num2 = + NodeManager::currentNM()->mkConst(CVC4::Rational(l - 1)); + Node r2 = NodeManager::currentNM()->mkNode( + kind::REGEXP_LOOP, r[0], num2, num2); + if (testConstStringInRegExp(s, index_start + len, r2)) + { + return true; + } + } + } + return false; + } + } + } + case REGEXP_COMPLEMENT: + { + return !testConstStringInRegExp(s, index_start, r[0]); + break; + } + default: + { + Assert(!RegExpOpr::isRegExpKind(k)); + return false; + } + } +} + +Node SequencesRewriter::rewriteMembership(TNode node) +{ + NodeManager* nm = NodeManager::currentNM(); + Node retNode = node; + Node x = node[0]; + Node r = node[1]; + + if (r.getKind() == kind::REGEXP_EMPTY) + { + retNode = NodeManager::currentNM()->mkConst(false); + } + else if (x.isConst() && isConstRegExp(r)) + { + // test whether x in node[1] + CVC4::String s = x.getConst(); + retNode = + NodeManager::currentNM()->mkConst(testConstStringInRegExp(s, 0, r)); + } + else if (r.getKind() == kind::REGEXP_SIGMA) + { + Node one = nm->mkConst(Rational(1)); + retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x)); + } + else if (r.getKind() == kind::REGEXP_STAR) + { + if (x.isConst()) + { + String s = x.getConst(); + if (s.size() == 0) + { + retNode = nm->mkConst(true); + // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true + return returnRewrite(node, retNode, "re-empty-in-str-star"); + } + else if (s.size() == 1) + { + if (r[0].getKind() == STRING_TO_REGEXP) + { + retNode = r[0][0].eqNode(x); + // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x + return returnRewrite(node, retNode, "re-char-in-str-star"); + } + } + } + else if (x.getKind() == STRING_CONCAT) + { + // (str.in.re (str.++ x1 ... xn) (re.* R)) --> + // (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R)) + // if the length of all strings in R is one. + Node flr = getFixedLengthForRegexp(r[0]); + if (!flr.isNull()) + { + Node one = nm->mkConst(Rational(1)); + if (flr == one) + { + NodeBuilder<> nb(AND); + for (const Node& xc : x) + { + nb << nm->mkNode(STRING_IN_REGEXP, xc, r); + } + return returnRewrite( + node, nb.constructNode(), "re-in-dist-char-star"); + } + } + } + if (r[0].getKind() == kind::REGEXP_SIGMA) + { + retNode = NodeManager::currentNM()->mkConst(true); + return returnRewrite(node, retNode, "re-in-sigma-star"); + } + } + else if (r.getKind() == kind::REGEXP_CONCAT) + { + bool allSigma = true; + bool allSigmaStrict = true; + unsigned allSigmaMinSize = 0; + Node constStr; + size_t constIdx = 0; + size_t nchildren = r.getNumChildren(); + for (size_t i = 0; i < nchildren; i++) + { + Node rc = r[i]; + Assert(rc.getKind() != kind::REGEXP_EMPTY); + if (rc.getKind() == kind::REGEXP_SIGMA) + { + allSigmaMinSize++; + } + else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA) + { + allSigmaStrict = false; + } + else if (rc.getKind() == STRING_TO_REGEXP) + { + if (constStr.isNull()) + { + constStr = rc[0]; + constIdx = i; + } + else + { + allSigma = false; + break; + } + } + else + { + allSigma = false; + break; + } + } + if (allSigma) + { + if (constStr.isNull()) + { + // x in re.++(_*, _, _) ---> str.len(x) >= 2 + Node num = nm->mkConst(Rational(allSigmaMinSize)); + Node lenx = nm->mkNode(STRING_LENGTH, x); + retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num); + return returnRewrite(node, retNode, "re-concat-pure-allchar"); + } + else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0 + && constIdx != nchildren - 1) + { + // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc") + retNode = nm->mkNode(STRING_STRCTN, x, constStr); + return returnRewrite(node, retNode, "re-concat-to-contains"); + } + } + } + else if (r.getKind() == kind::REGEXP_INTER + || r.getKind() == kind::REGEXP_UNION) + { + std::vector mvec; + for (unsigned i = 0; i < r.getNumChildren(); i++) + { + mvec.push_back( + NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r[i])); + } + retNode = NodeManager::currentNM()->mkNode( + r.getKind() == kind::REGEXP_INTER ? kind::AND : kind::OR, mvec); + } + else if (r.getKind() == kind::STRING_TO_REGEXP) + { + retNode = x.eqNode(r[0]); + } + else if (r.getKind() == REGEXP_RANGE) + { + // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j + Node xcode = nm->mkNode(STRING_TO_CODE, x); + retNode = + nm->mkNode(AND, + nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode), + nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1]))); + } + else if (r.getKind() == REGEXP_COMPLEMENT) + { + retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate(); + } + else if (x != node[0] || r != node[1]) + { + retNode = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r); + } + + // do simple consumes + if (retNode == node) + { + if (r.getKind() == kind::REGEXP_STAR) + { + for (unsigned dir = 0; dir <= 1; dir++) + { + std::vector mchildren; + utils::getConcat(x, mchildren); + bool success = true; + while (success) + { + success = false; + std::vector children; + utils::getConcat(r[0], children); + Node scn = simpleRegexpConsume(mchildren, children, dir); + if (!scn.isNull()) + { + Trace("regexp-ext-rewrite") + << "Regexp star : const conflict : " << node << std::endl; + return scn; + } + else if (children.empty()) + { + // fully consumed one copy of the STAR + if (mchildren.empty()) + { + Trace("regexp-ext-rewrite") + << "Regexp star : full consume : " << node << std::endl; + return NodeManager::currentNM()->mkConst(true); + } + else + { + retNode = nm->mkNode(STRING_IN_REGEXP, + utils::mkConcat(STRING_CONCAT, mchildren), + r); + success = true; + } + } + } + if (retNode != node) + { + Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node + << " -> " << retNode << std::endl; + break; + } + } + } + else + { + std::vector children; + utils::getConcat(r, children); + std::vector mchildren; + utils::getConcat(x, mchildren); + unsigned prevSize = children.size() + mchildren.size(); + Node scn = simpleRegexpConsume(mchildren, children); + if (!scn.isNull()) + { + Trace("regexp-ext-rewrite") + << "Regexp : const conflict : " << node << std::endl; + return scn; + } + else + { + if ((children.size() + mchildren.size()) != prevSize) + { + // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm), + // above, we strip components to construct an equivalent membership: + // (str.++ xi .. xj) in (re.++ rk ... rl). + Node xn = utils::mkConcat(STRING_CONCAT, mchildren); + Node emptyStr = nm->mkConst(String("")); + if (children.empty()) + { + // If we stripped all components on the right, then the left is + // equal to the empty string. + // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "") + retNode = xn.eqNode(emptyStr); + } + else + { + // otherwise, construct the updated regular expression + retNode = nm->mkNode( + STRING_IN_REGEXP, xn, utils::mkConcat(REGEXP_CONCAT, children)); + } + Trace("regexp-ext-rewrite") << "Regexp : rewrite : " << node << " -> " + << retNode << std::endl; + return returnRewrite(node, retNode, "re-simple-consume"); + } + } + } + } + return retNode; +} + +RewriteResponse SequencesRewriter::postRewrite(TNode node) +{ + Trace("strings-postrewrite") + << "Strings::postRewrite start " << node << std::endl; + NodeManager* nm = NodeManager::currentNM(); + Node retNode = node; + Node orig = retNode; + Kind nk = node.getKind(); + if (nk == kind::STRING_CONCAT) + { + retNode = rewriteConcat(node); + } + else if (nk == kind::EQUAL) + { + retNode = rewriteEquality(node); + } + else if (nk == kind::STRING_LENGTH) + { + Kind nk0 = node[0].getKind(); + if (node[0].isConst()) + { + retNode = nm->mkConst(Rational(Word::getLength(node[0]))); + } + else if (nk0 == kind::STRING_CONCAT) + { + Node tmpNode = node[0]; + if (tmpNode.isConst()) + { + retNode = nm->mkConst(Rational(Word::getLength(tmpNode))); + } + else if (tmpNode.getKind() == kind::STRING_CONCAT) + { + std::vector node_vec; + for (unsigned int i = 0; i < tmpNode.getNumChildren(); ++i) + { + if (tmpNode[i].isConst()) + { + node_vec.push_back( + nm->mkConst(Rational(Word::getLength(tmpNode[i])))); + } + else + { + node_vec.push_back(NodeManager::currentNM()->mkNode( + kind::STRING_LENGTH, tmpNode[i])); + } + } + retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec); + } + } + else if (nk0 == STRING_STRREPL || nk0 == STRING_STRREPLALL) + { + Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1])); + Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2])); + if (len1 == len2) + { + // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x ) + retNode = nm->mkNode(STRING_LENGTH, node[0][0]); + } + } + else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER + || nk0 == STRING_REV) + { + // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev. + retNode = nm->mkNode(STRING_LENGTH, node[0][0]); + } + } + else if (nk == kind::STRING_CHARAT) + { + Node one = NodeManager::currentNM()->mkConst(Rational(1)); + retNode = NodeManager::currentNM()->mkNode( + kind::STRING_SUBSTR, node[0], node[1], one); + } + else if (nk == kind::STRING_SUBSTR) + { + retNode = rewriteSubstr(node); + } + else if (nk == kind::STRING_STRCTN) + { + retNode = rewriteContains(node); + } + else if (nk == kind::STRING_LT) + { + // eliminate s < t ---> s != t AND s <= t + retNode = nm->mkNode(AND, + node[0].eqNode(node[1]).negate(), + nm->mkNode(STRING_LEQ, node[0], node[1])); + } + else if (nk == kind::STRING_LEQ) + { + retNode = StringsRewriter::rewriteStringLeq(node); + } + else if (nk == kind::STRING_STRIDOF) + { + retNode = rewriteIndexof(node); + } + else if (nk == kind::STRING_STRREPL) + { + retNode = rewriteReplace(node); + } + else if (nk == kind::STRING_STRREPLALL) + { + retNode = rewriteReplaceAll(node); + } + else if (nk == STRING_TOLOWER || nk == STRING_TOUPPER) + { + retNode = StringsRewriter::rewriteStrConvert(node); + } + else if (nk == STRING_REV) + { + retNode = rewriteStrReverse(node); + } + else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX) + { + retNode = rewritePrefixSuffix(node); + } + else if (nk == STRING_IS_DIGIT) + { + // eliminate str.is_digit(s) ----> 48 <= str.to_code(s) <= 57 + Node t = nm->mkNode(STRING_TO_CODE, node[0]); + retNode = nm->mkNode(AND, + nm->mkNode(LEQ, nm->mkConst(Rational(48)), t), + nm->mkNode(LEQ, t, nm->mkConst(Rational(57)))); + } + else if (nk == kind::STRING_ITOS) + { + retNode = StringsRewriter::rewriteIntToStr(node); + } + else if (nk == kind::STRING_STOI) + { + retNode = StringsRewriter::rewriteStrToInt(node); + } + else if (nk == kind::STRING_IN_REGEXP) + { + retNode = rewriteMembership(node); + } + else if (nk == STRING_TO_CODE) + { + retNode = StringsRewriter::rewriteStringToCode(node); + } + else if (nk == STRING_FROM_CODE) + { + retNode = StringsRewriter::rewriteStringFromCode(node); + } + else if (nk == REGEXP_CONCAT) + { + retNode = rewriteConcatRegExp(node); + } + else if (nk == REGEXP_UNION || nk == REGEXP_INTER) + { + retNode = rewriteAndOrRegExp(node); + } + else if (nk == REGEXP_DIFF) + { + retNode = nm->mkNode( + REGEXP_INTER, node[0], nm->mkNode(REGEXP_COMPLEMENT, node[1])); + } + else if (nk == REGEXP_STAR) + { + retNode = rewriteStarRegExp(node); + } + else if (nk == REGEXP_PLUS) + { + retNode = + nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0])); + } + else if (nk == REGEXP_OPT) + { + retNode = nm->mkNode(REGEXP_UNION, + nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))), + node[0]); + } + else if (nk == REGEXP_RANGE) + { + if (node[0] == node[1]) + { + retNode = nm->mkNode(STRING_TO_REGEXP, node[0]); + } + } + else if (nk == REGEXP_LOOP) + { + retNode = rewriteLoopRegExp(node); + } + + Trace("strings-postrewrite") + << "Strings::postRewrite returning " << retNode << std::endl; + if (orig != retNode) + { + Trace("strings-rewrite-debug") + << "Strings: post-rewrite " << orig << " to " << retNode << std::endl; + } + return RewriteResponse(orig == retNode ? REWRITE_DONE : REWRITE_AGAIN_FULL, + retNode); +} + +bool SequencesRewriter::hasEpsilonNode(TNode node) +{ + for (unsigned int i = 0; i < node.getNumChildren(); i++) + { + if (node[i].getKind() == kind::STRING_TO_REGEXP && node[i][0].isConst() + && Word::isEmpty(node[i][0])) + { + return true; + } + } + return false; +} + +RewriteResponse SequencesRewriter::preRewrite(TNode node) +{ + return RewriteResponse(REWRITE_DONE, node); +} + +Node SequencesRewriter::rewriteSubstr(Node node) +{ + Assert(node.getKind() == kind::STRING_SUBSTR); + + NodeManager* nm = NodeManager::currentNM(); + if (node[0].isConst()) + { + if (Word::isEmpty(node[0])) + { + Node ret = node[0]; + return returnRewrite(node, ret, "ss-emptystr"); + } + // rewriting for constant arguments + if (node[1].isConst() && node[2].isConst()) + { + Node s = node[0]; + CVC4::Rational rMaxInt(String::maxSize()); + uint32_t start; + if (node[1].getConst() > rMaxInt) + { + // start beyond the maximum size of strings + // thus, it must be beyond the end point of this string + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-const-start-max-oob"); + } + else if (node[1].getConst().sgn() < 0) + { + // start before the beginning of the string + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-const-start-neg"); + } + else + { + start = node[1].getConst().getNumerator().toUnsignedInt(); + if (start >= Word::getLength(node[0])) + { + // start beyond the end of the string + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-const-start-oob"); + } + } + if (node[2].getConst() > rMaxInt) + { + // take up to the end of the string + size_t lenS = Word::getLength(s); + Node ret = Word::suffix(s, lenS - start); + return returnRewrite(node, ret, "ss-const-len-max-oob"); + } + else if (node[2].getConst().sgn() <= 0) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-const-len-non-pos"); + } + else + { + uint32_t len = + node[2].getConst().getNumerator().toUnsignedInt(); + if (start + len > Word::getLength(node[0])) + { + // take up to the end of the string + size_t lenS = Word::getLength(s); + Node ret = Word::suffix(s, lenS - start); + return returnRewrite(node, ret, "ss-const-end-oob"); + } + else + { + // compute the substr using the constant string + Node ret = Word::substr(s, start, len); + return returnRewrite(node, ret, "ss-const-ss"); + } + } + } + } + Node zero = nm->mkConst(CVC4::Rational(0)); + + // if entailed non-positive length or negative start point + if (checkEntailArith(zero, node[1], true)) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-start-neg"); + } + else if (checkEntailArith(zero, node[2])) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-len-non-pos"); + } + + if (node[0].getKind() == STRING_SUBSTR) + { + // (str.substr (str.substr x a b) c d) ---> "" if c >= b + // + // Note that this rewrite can be generalized to: + // + // (str.substr x a b) ---> "" if a >= (str.len x) + // + // This can be done when we generalize our entailment methods to + // accept an optional context. Then we could conjecture that + // (str.substr x a b) rewrites to "" and do a case analysis: + // + // - a < 0 or b < 0 (the result is trivially empty in these cases) + // - a >= (str.len x) assuming that { a >= 0, b >= 0 } + // + // For example, for (str.substr (str.substr x a a) a a), we could + // then deduce that under those assumptions, "a" is an + // over-approximation of the length of (str.substr x a a), which + // then allows us to reason that the result of the whole term must + // be empty. + if (checkEntailArith(node[1], node[0][2])) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-start-geq-len"); + } + } + else if (node[0].getKind() == STRING_STRREPL) + { + // (str.substr (str.replace x y z) 0 n) + // ---> (str.replace (str.substr x 0 n) y z) + // if (str.len y) = 1 and (str.len z) = 1 + if (node[1] == zero) + { + if (checkEntailLengthOne(node[0][1], true) + && checkEntailLengthOne(node[0][2], true)) + { + Node ret = nm->mkNode( + kind::STRING_STRREPL, + nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]), + node[0][1], + node[0][2]); + return returnRewrite(node, ret, "substr-repl-swap"); + } + } + } + + std::vector n1; + utils::getConcat(node[0], n1); + + // definite inclusion + if (node[1] == zero) + { + Node curr = node[2]; + std::vector childrenr; + if (stripSymbolicLength(n1, childrenr, 1, curr)) + { + if (curr != zero && !n1.empty()) + { + childrenr.push_back(nm->mkNode(kind::STRING_SUBSTR, + utils::mkConcat(STRING_CONCAT, n1), + node[1], + curr)); + } + Node ret = utils::mkConcat(STRING_CONCAT, childrenr); + return returnRewrite(node, ret, "ss-len-include"); + } + } + + // symbolic length analysis + for (unsigned r = 0; r < 2; r++) + { + // the amount of characters we can strip + Node curr; + if (r == 0) + { + if (node[1] != zero) + { + // strip up to start point off the start of the string + curr = node[1]; + } + } + else if (r == 1) + { + Node tot_len = + Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, node[0])); + Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2])); + if (node[2] != tot_len) + { + if (checkEntailArith(node[2], tot_len)) + { + // end point beyond end point of string, map to tot_len + Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len); + return returnRewrite(node, ret, "ss-end-pt-norm"); + } + else + { + // strip up to ( str.len(node[0]) - end_pt ) off the end of the string + curr = Rewriter::rewrite(nm->mkNode(kind::MINUS, tot_len, end_pt)); + } + } + + // (str.substr s x y) --> "" if x < len(s) |= 0 >= y + Node n1_lt_tot_len = + Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len)); + if (checkEntailArithWithAssumption(n1_lt_tot_len, zero, node[2], false)) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-start-entails-zero-len"); + } + + // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s) + Node non_zero_len = + Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2])); + if (checkEntailArithWithAssumption(non_zero_len, node[1], tot_len, false)) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-non-zero-len-entails-oob"); + } + + // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s) + Node geq_zero_start = + Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero)); + if (checkEntailArithWithAssumption(geq_zero_start, zero, tot_len, false)) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-geq-zero-start-entails-emp-s"); + } + + // (str.substr s x x) ---> "" if (str.len s) <= 1 + if (node[1] == node[2] && checkEntailLengthOne(node[0])) + { + Node ret = Word::mkEmptyWord(node.getType()); + return returnRewrite(node, ret, "ss-len-one-z-z"); + } + } + if (!curr.isNull()) + { + // strip off components while quantity is entailed positive + int dir = r == 0 ? 1 : -1; + std::vector childrenr; + if (stripSymbolicLength(n1, childrenr, dir, curr)) + { + if (r == 0) + { + Node ret = nm->mkNode(kind::STRING_SUBSTR, + utils::mkConcat(STRING_CONCAT, n1), + curr, + node[2]); + return returnRewrite(node, ret, "ss-strip-start-pt"); + } + else + { + Node ret = nm->mkNode(kind::STRING_SUBSTR, + utils::mkConcat(STRING_CONCAT, n1), + node[1], + node[2]); + return returnRewrite(node, ret, "ss-strip-end-pt"); + } + } + } + } + // combine substr + if (node[0].getKind() == kind::STRING_SUBSTR) + { + Node start_inner = node[0][1]; + Node start_outer = node[1]; + if (checkEntailArith(start_outer) && checkEntailArith(start_inner)) + { + // both are positive + // thus, start point is definitely start_inner+start_outer. + // We can rewrite if it for certain what the length is + + // the length of a string from the inner substr subtracts the start point + // of the outer substr + Node len_from_inner = + Rewriter::rewrite(nm->mkNode(kind::MINUS, node[0][2], start_outer)); + Node len_from_outer = node[2]; + Node new_len; + // take quantity that is for sure smaller than the other + if (len_from_inner == len_from_outer) + { + new_len = len_from_inner; + } + else if (checkEntailArith(len_from_inner, len_from_outer)) + { + new_len = len_from_outer; + } + else if (checkEntailArith(len_from_outer, len_from_inner)) + { + new_len = len_from_inner; + } + if (!new_len.isNull()) + { + Node new_start = nm->mkNode(kind::PLUS, start_inner, start_outer); + Node ret = + nm->mkNode(kind::STRING_SUBSTR, node[0][0], new_start, new_len); + return returnRewrite(node, ret, "ss-combine"); + } + } + } + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; + return node; +} + +Node SequencesRewriter::rewriteContains(Node node) +{ + Assert(node.getKind() == kind::STRING_STRCTN); + NodeManager* nm = NodeManager::currentNM(); + + if (node[0] == node[1]) + { + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(node, ret, "ctn-eq"); + } + if (node[0].isConst()) + { + CVC4::String s = node[0].getConst(); + if (node[1].isConst()) + { + Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos); + return returnRewrite(node, ret, "ctn-const"); + } + else + { + Node t = node[1]; + if (Word::isEmpty(node[0])) + { + Node len1 = + NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + if (checkEntailArith(len1, true)) + { + // we handle the false case here since the rewrite for equality + // uses this function, hence we want to conclude false if possible. + // len(x)>0 => contains( "", x ) ---> false + Node ret = NodeManager::currentNM()->mkConst(false); + return returnRewrite(node, ret, "ctn-lhs-emptystr"); + } + } + else if (checkEntailLengthOne(t)) + { + const std::vector& vec = s.getVec(); + + NodeBuilder<> nb(OR); + nb << nm->mkConst(String("")).eqNode(t); + for (unsigned c : vec) + { + std::vector sv = {c}; + nb << nm->mkConst(String(sv)).eqNode(t); + } + + // str.contains("ABCabc", t) ---> + // t = "" v t = "A" v t = "B" v t = "C" v t = "a" v t = "b" v t = "c" + // if len(t) <= 1 + Node ret = nb; + return returnRewrite(node, ret, "ctn-split"); + } + else if (node[1].getKind() == kind::STRING_CONCAT) + { + int firstc, lastc; + if (!canConstantContainConcat(node[0], node[1], firstc, lastc)) + { + Node ret = NodeManager::currentNM()->mkConst(false); + return returnRewrite(node, ret, "ctn-nconst-ctn-concat"); + } + } + } + } + if (node[1].isConst()) + { + size_t len = Word::getLength(node[1]); + if (len == 0) + { + // contains( x, "" ) ---> true + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(node, ret, "ctn-rhs-emptystr"); + } + else if (len == 1) + { + // The following rewrites are specific to a single character second + // argument of contains, where we can reason that this character is + // not split over multiple components in the first argument. + if (node[0].getKind() == STRING_CONCAT) + { + std::vector nc1; + utils::getConcat(node[0], nc1); + NodeBuilder<> nb(OR); + for (const Node& ncc : nc1) + { + nb << nm->mkNode(STRING_STRCTN, ncc, node[1]); + } + Node ret = nb.constructNode(); + // str.contains( x ++ y, "A" ) ---> + // str.contains( x, "A" ) OR str.contains( y, "A" ) + return returnRewrite(node, ret, "ctn-concat-char"); + } + else if (node[0].getKind() == STRING_STRREPL) + { + Node rplDomain = checkEntailContains(node[0][1], node[1]); + if (!rplDomain.isNull() && !rplDomain.getConst()) + { + Node d1 = nm->mkNode(STRING_STRCTN, node[0][0], node[1]); + Node d2 = + nm->mkNode(AND, + nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]), + nm->mkNode(STRING_STRCTN, node[0][2], node[1])); + Node ret = nm->mkNode(OR, d1, d2); + // If str.contains( y, "A" ) ---> false, then: + // str.contains( str.replace( x, y, z ), "A" ) ---> + // str.contains( x, "A" ) OR + // ( str.contains( x, y ) AND str.contains( z, "A" ) ) + return returnRewrite(node, ret, "ctn-repl-char"); + } + } + } + } + std::vector nc1; + utils::getConcat(node[0], nc1); + std::vector nc2; + utils::getConcat(node[1], nc2); + + // component-wise containment + std::vector nc1rb; + std::vector nc1re; + if (componentContains(nc1, nc2, nc1rb, nc1re) != -1) + { + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(node, ret, "ctn-component"); + } + + // strip endpoints + std::vector nb; + std::vector ne; + if (stripConstantEndpoints(nc1, nc2, nb, ne)) + { + Node ret = NodeManager::currentNM()->mkNode( + kind::STRING_STRCTN, utils::mkConcat(STRING_CONCAT, nc1), node[1]); + return returnRewrite(node, ret, "ctn-strip-endpt"); + } + + for (const Node& n : nc2) + { + if (n.getKind() == kind::STRING_STRREPL) + { + // (str.contains x (str.replace y z w)) --> false + // if (str.contains x y) = false and (str.contains x w) = false + // + // Reasoning: (str.contains x y) checks that x does not contain y if the + // replacement does not change y. (str.contains x w) checks that if the + // replacement changes anything in y, the w makes it impossible for it to + // occur in x. + Node ctnConst = checkEntailContains(node[0], n[0]); + if (!ctnConst.isNull() && !ctnConst.getConst()) + { + Node ctnConst2 = checkEntailContains(node[0], n[2]); + if (!ctnConst2.isNull() && !ctnConst2.getConst()) + { + Node res = nm->mkConst(false); + return returnRewrite(node, res, "ctn-rpl-non-ctn"); + } + } + + // (str.contains x (str.++ w (str.replace x y x) z)) ---> + // (and (= w "") (= x (str.replace x y x)) (= z "")) + // + // TODO: Remove with under-/over-approximation + if (node[0] == n[0] && node[0] == n[2]) + { + Node ret; + if (nc2.size() > 1) + { + Node emp = nm->mkConst(CVC4::String("")); + NodeBuilder<> nb2(kind::AND); + for (const Node& n2 : nc2) + { + if (n2 == n) + { + nb2 << nm->mkNode(kind::EQUAL, node[0], node[1]); + } + else + { + nb2 << nm->mkNode(kind::EQUAL, emp, n2); + } + } + ret = nb2.constructNode(); + } + else + { + ret = nm->mkNode(kind::EQUAL, node[0], node[1]); + } + return returnRewrite(node, ret, "ctn-repl-self"); + } + } + } + + // length entailment + Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); + Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + if (checkEntailArith(len_n2, len_n1, true)) + { + // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false + Node ret = NodeManager::currentNM()->mkConst(false); + return returnRewrite(node, ret, "ctn-len-ineq"); + } + + // multi-set reasoning + // For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false + // since the number of a's in the second argument is greater than the number + // of a's in the first argument + if (checkEntailMultisetSubset(node[0], node[1])) + { + Node ret = nm->mkConst(false); + return returnRewrite(node, ret, "ctn-mset-nss"); + } + + if (checkEntailArith(len_n2, len_n1, false)) + { + // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2 + Node ret = node[0].eqNode(node[1]); + return returnRewrite(node, ret, "ctn-len-ineq-nstrict"); + } + + // splitting + if (node[0].getKind() == kind::STRING_CONCAT) + { + if (node[1].isConst()) + { + CVC4::String t = node[1].getConst(); + // Below, we are looking for a constant component of node[0] + // has no overlap with node[1], which means we can split. + // Notice that if the first or last components had no + // overlap, these would have been removed by strip + // constant endpoints above. + // Hence, we consider only the inner children. + for (unsigned i = 1; i < (node[0].getNumChildren() - 1); i++) + { + // constant contains + if (node[0][i].isConst()) + { + CVC4::String s = node[0][i].getConst(); + // if no overlap, we can split into disjunction + if (s.noOverlapWith(t)) + { + std::vector nc0; + utils::getConcat(node[0], nc0); + std::vector spl[2]; + spl[0].insert(spl[0].end(), nc0.begin(), nc0.begin() + i); + Assert(i < nc0.size() - 1); + spl[1].insert(spl[1].end(), nc0.begin() + i + 1, nc0.end()); + Node ret = NodeManager::currentNM()->mkNode( + kind::OR, + NodeManager::currentNM()->mkNode( + kind::STRING_STRCTN, + utils::mkConcat(STRING_CONCAT, spl[0]), + node[1]), + NodeManager::currentNM()->mkNode( + kind::STRING_STRCTN, + utils::mkConcat(STRING_CONCAT, spl[1]), + node[1])); + return returnRewrite(node, ret, "ctn-split"); + } + } + } + } + } + else if (node[0].getKind() == kind::STRING_SUBSTR) + { + // (str.contains (str.substr x n (str.len y)) y) ---> + // (= (str.substr x n (str.len y)) y) + // + // TODO: Remove with under-/over-approximation + if (node[0][2] == nm->mkNode(kind::STRING_LENGTH, node[1])) + { + Node ret = nm->mkNode(kind::EQUAL, node[0], node[1]); + return returnRewrite(node, ret, "ctn-substr"); + } + } + else if (node[0].getKind() == kind::STRING_STRREPL) + { + if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst()) + { + if (Word::noOverlapWith(node[1], node[0][1]) + && Word::noOverlapWith(node[1], node[0][2])) + { + // (str.contains (str.replace x c1 c2) c3) ---> (str.contains x c3) + // if there is no overlap between c1 and c3 and none between c2 and c3 + Node ret = nm->mkNode(STRING_STRCTN, node[0][0], node[1]); + return returnRewrite(node, ret, "ctn-repl-cnsts-to-ctn"); + } + } + + if (node[0][0] == node[0][2]) + { + // (str.contains (str.replace x y x) y) ---> (str.contains x y) + if (node[0][1] == node[1]) + { + Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]); + return returnRewrite(node, ret, "ctn-repl-to-ctn"); + } + + // (str.contains (str.replace x y x) z) ---> (str.contains x z) + // if (str.len z) <= 1 + if (checkEntailLengthOne(node[1])) + { + Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]); + return returnRewrite(node, ret, "ctn-repl-len-one-to-ctn"); + } + } + + // (str.contains (str.replace x y z) z) ---> + // (or (str.contains x y) (str.contains x z)) + if (node[0][2] == node[1]) + { + Node ret = nm->mkNode(OR, + nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]), + nm->mkNode(STRING_STRCTN, node[0][0], node[0][2])); + return returnRewrite(node, ret, "ctn-repl-to-ctn-disj"); + } + + // (str.contains (str.replace x y z) w) ---> + // (str.contains (str.replace x y "") w) + // if (str.contains z w) ---> false and (str.len w) = 1 + if (checkEntailLengthOne(node[1])) + { + Node ctn = checkEntailContains(node[1], node[0][2]); + if (!ctn.isNull() && !ctn.getConst()) + { + Node empty = nm->mkConst(String("")); + Node ret = nm->mkNode( + kind::STRING_STRCTN, + nm->mkNode(kind::STRING_STRREPL, node[0][0], node[0][1], empty), + node[1]); + return returnRewrite(node, ret, "ctn-repl-simp-repl"); + } + } + } + + if (node[1].getKind() == kind::STRING_STRREPL) + { + // (str.contains x (str.replace y x y)) ---> + // (str.contains x y) + if (node[0] == node[1][1] && node[1][0] == node[1][2]) + { + Node ret = nm->mkNode(kind::STRING_STRCTN, node[0], node[1][0]); + return returnRewrite(node, ret, "ctn-repl"); + } + + // (str.contains x (str.replace "" x y)) ---> + // (= "" (str.replace "" x y)) + // + // Note: Length-based reasoning is not sufficient to get this rewrite. We + // can neither show that str.len(str.replace("", x, y)) - str.len(x) >= 0 + // nor str.len(x) - str.len(str.replace("", x, y)) >= 0 + Node emp = nm->mkConst(CVC4::String("")); + if (node[0] == node[1][1] && node[1][0] == emp) + { + Node ret = nm->mkNode(kind::EQUAL, emp, node[1]); + return returnRewrite(node, ret, "ctn-repl-empty"); + } + } + + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; + return node; +} + +Node SequencesRewriter::rewriteIndexof(Node node) +{ + Assert(node.getKind() == kind::STRING_STRIDOF); + NodeManager* nm = NodeManager::currentNM(); + + if (node[2].isConst() && node[2].getConst().sgn() < 0) + { + // z<0 implies str.indexof( x, y, z ) --> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-neg"); + } + + // evaluation and simple cases + std::vector children0; + utils::getConcat(node[0], children0); + if (children0[0].isConst() && node[1].isConst() && node[2].isConst()) + { + CVC4::Rational rMaxInt(CVC4::String::maxSize()); + if (node[2].getConst() > rMaxInt) + { + // We know that, due to limitations on the size of string constants + // in our implementation, that accessing a position greater than + // rMaxInt is guaranteed to be out of bounds. + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-max"); + } + Assert(node[2].getConst().sgn() >= 0); + Node s = children0[0]; + Node t = node[1]; + uint32_t start = + node[2].getConst().getNumerator().toUnsignedInt(); + std::size_t ret = Word::find(s, t, start); + if (ret != std::string::npos) + { + Node retv = nm->mkConst(Rational(static_cast(ret))); + return returnRewrite(node, retv, "idof-find"); + } + else if (children0.size() == 1) + { + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-nfind"); + } + } + + if (node[0] == node[1]) + { + if (node[2].isConst()) + { + if (node[2].getConst().sgn() == 0) + { + // indexof( x, x, 0 ) --> 0 + Node zero = nm->mkConst(Rational(0)); + return returnRewrite(node, zero, "idof-eq-cst-start"); + } + } + if (checkEntailArith(node[2], true)) + { + // y>0 implies indexof( x, x, y ) --> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-eq-nstart"); + } + Node emp = nm->mkConst(CVC4::String("")); + if (node[0] != emp) + { + // indexof( x, x, z ) ---> indexof( "", "", z ) + Node ret = nm->mkNode(STRING_STRIDOF, emp, emp, node[2]); + return returnRewrite(node, ret, "idof-eq-norm"); + } + } + + Node len0 = nm->mkNode(STRING_LENGTH, node[0]); + Node len1 = nm->mkNode(STRING_LENGTH, node[1]); + Node len0m2 = nm->mkNode(MINUS, len0, node[2]); + + if (node[1].isConst()) + { + if (Word::isEmpty(node[1])) + { + if (checkEntailArith(len0, node[2]) && checkEntailArith(node[2])) + { + // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z + return returnRewrite(node, node[2], "idof-emp-idof"); + } + } + } + + if (checkEntailArith(len1, len0m2, true)) + { + // len(x)-z < len(y) implies indexof( x, y, z ) ----> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-len"); + } + + Node fstr = node[0]; + if (!node[2].isConst() || node[2].getConst().sgn() != 0) + { + fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0); + fstr = Rewriter::rewrite(fstr); + } + + Node cmp_conr = checkEntailContains(fstr, node[1]); + Trace("strings-rewrite-debug") << "For " << node << ", check contains(" + << fstr << ", " << node[1] << ")" << std::endl; + Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl; + std::vector children1; + utils::getConcat(node[1], children1); + if (!cmp_conr.isNull()) + { + if (cmp_conr.getConst()) + { + if (node[2].isConst() && node[2].getConst().sgn() == 0) + { + // past the first position in node[0] that contains node[1], we can drop + std::vector nb; + std::vector ne; + int cc = componentContains(children0, children1, nb, ne, true, 1); + if (cc != -1 && !ne.empty()) + { + // For example: + // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0) + Node nn = utils::mkConcat(STRING_CONCAT, children0); + Node ret = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]); + return returnRewrite(node, ret, "idof-def-ctn"); + } + + // Strip components from the beginning that are guaranteed not to match + if (stripConstantEndpoints(children0, children1, nb, ne, 1)) + { + // str.indexof(str.++("AB", x, "C"), "C", 0) ---> + // 2 + str.indexof(str.++(x, "C"), "C", 0) + Node ret = + nm->mkNode(kind::PLUS, + nm->mkNode(kind::STRING_LENGTH, + utils::mkConcat(STRING_CONCAT, nb)), + nm->mkNode(kind::STRING_STRIDOF, + utils::mkConcat(STRING_CONCAT, children0), + node[1], + node[2])); + return returnRewrite(node, ret, "idof-strip-cnst-endpts"); + } + } + + // strip symbolic length + Node new_len = node[2]; + std::vector nr; + if (stripSymbolicLength(children0, nr, 1, new_len)) + { + // For example: + // z>str.len( x1 ) and str.contains( x2, y )-->true + // implies + // str.indexof( str.++( x1, x2 ), y, z ) ---> + // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) ) + Node nn = utils::mkConcat(STRING_CONCAT, children0); + Node ret = + nm->mkNode(kind::PLUS, + nm->mkNode(kind::MINUS, node[2], new_len), + nm->mkNode(kind::STRING_STRIDOF, nn, node[1], new_len)); + return returnRewrite(node, ret, "idof-strip-sym-len"); + } + } + else + { + // str.contains( x, y ) --> false implies str.indexof(x,y,z) --> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-nctn"); + } + } + else + { + Node new_len = node[2]; + std::vector nr; + if (stripSymbolicLength(children0, nr, 1, new_len)) + { + // Normalize the string before the start index. + // + // For example: + // str.indexof(str.++("ABCD", x), y, 3) ---> + // str.indexof(str.++("AAAD", x), y, 3) + Node nodeNr = utils::mkConcat(STRING_CONCAT, nr); + Node normNr = lengthPreserveRewrite(nodeNr); + if (normNr != nodeNr) + { + std::vector normNrChildren; + utils::getConcat(normNr, normNrChildren); + std::vector children(normNrChildren); + children.insert(children.end(), children0.begin(), children0.end()); + Node nn = utils::mkConcat(STRING_CONCAT, children); + Node res = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]); + return returnRewrite(node, res, "idof-norm-prefix"); + } + } + } + + if (node[2].isConst() && node[2].getConst().sgn() == 0) + { + std::vector cb; + std::vector ce; + if (stripConstantEndpoints(children0, children1, cb, ce, -1)) + { + Node ret = utils::mkConcat(STRING_CONCAT, children0); + ret = nm->mkNode(STRING_STRIDOF, ret, node[1], node[2]); + // For example: + // str.indexof( str.++( x, "A" ), "B", 0 ) ---> str.indexof( x, "B", 0 ) + return returnRewrite(node, ret, "rpl-pull-endpt"); + } + } + + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; + return node; +} + +Node SequencesRewriter::rewriteReplace(Node node) +{ + Assert(node.getKind() == kind::STRING_STRREPL); + NodeManager* nm = NodeManager::currentNM(); + + if (node[1].isConst() && Word::isEmpty(node[1])) + { + Node ret = nm->mkNode(STRING_CONCAT, node[2], node[0]); + return returnRewrite(node, ret, "rpl-rpl-empty"); + } + + std::vector children0; + utils::getConcat(node[0], children0); + + if (node[1].isConst() && children0[0].isConst()) + { + Node s = children0[0]; + Node t = node[1]; + std::size_t p = Word::find(s, t); + if (p == std::string::npos) + { + if (children0.size() == 1) + { + return returnRewrite(node, node[0], "rpl-const-nfind"); + } + } + else + { + Node s1 = Word::substr(s, 0, p); + Node s3 = Word::substr(s, p + Word::getLength(t)); + std::vector children; + if (!Word::isEmpty(s1)) + { + children.push_back(s1); + } + children.push_back(node[2]); + if (!Word::isEmpty(s3)) + { + children.push_back(s3); + } + children.insert(children.end(), children0.begin() + 1, children0.end()); + Node ret = utils::mkConcat(STRING_CONCAT, children); + return returnRewrite(node, ret, "rpl-const-find"); + } + } + + // rewrites that apply to both replace and replaceall + Node rri = rewriteReplaceInternal(node); + if (!rri.isNull()) + { + // printing of the rewrite managed by the call above + return rri; + } + + if (node[0] == node[2]) + { + // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x + Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); + Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); + if (checkEntailArith(l1, l0)) + { + return returnRewrite(node, node[0], "rpl-rpl-len-id"); + } + + // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x) + // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "") + if (checkEntailLengthOne(node[0])) + { + Node empty = nm->mkConst(String("")); + Node rn1 = Rewriter::rewrite( + rewriteEqualityExt(nm->mkNode(EQUAL, node[1], empty))); + if (rn1 != node[1]) + { + std::vector emptyNodes; + bool allEmptyEqs; + std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(rn1); + + if (allEmptyEqs) + { + Node nn1 = utils::mkConcat(STRING_CONCAT, emptyNodes); + if (node[1] != nn1) + { + Node ret = nm->mkNode(STRING_STRREPL, node[0], nn1, node[2]); + return returnRewrite(node, ret, "rpl-x-y-x-simp"); + } + } + } + } + } + + std::vector children1; + utils::getConcat(node[1], children1); + + // check if contains definitely does (or does not) hold + Node cmp_con = nm->mkNode(kind::STRING_STRCTN, node[0], node[1]); + Node cmp_conr = Rewriter::rewrite(cmp_con); + if (!checkEntailContains(node[0], node[1]).isNull()) + { + if (cmp_conr.getConst()) + { + // component-wise containment + std::vector cb; + std::vector ce; + int cc = componentContains(children0, children1, cb, ce, true, 1); + if (cc != -1) + { + if (cc == 0 && children0[0] == children1[0]) + { + // definitely a prefix, can do the replace + // for example, + // str.replace( str.++( x, "ab" ), str.++( x, "a" ), y ) ---> + // str.++( y, "b" ) + std::vector cres; + cres.push_back(node[2]); + cres.insert(cres.end(), ce.begin(), ce.end()); + Node ret = utils::mkConcat(STRING_CONCAT, cres); + return returnRewrite(node, ret, "rpl-cctn-rpl"); + } + else if (!ce.empty()) + { + // we can pull remainder past first definite containment + // for example, + // str.replace( str.++( x, "ab" ), "a", y ) ---> + // str.++( str.replace( str.++( x, "a" ), "a", y ), "b" ) + // this is independent of whether the second argument may be empty + std::vector scc; + scc.push_back(NodeManager::currentNM()->mkNode( + kind::STRING_STRREPL, + utils::mkConcat(STRING_CONCAT, children0), + node[1], + node[2])); + scc.insert(scc.end(), ce.begin(), ce.end()); + Node ret = utils::mkConcat(STRING_CONCAT, scc); + return returnRewrite(node, ret, "rpl-cctn"); + } + } + } + else + { + // ~contains( t, s ) => ( replace( t, s, r ) ----> t ) + return returnRewrite(node, node[0], "rpl-nctn"); + } + } + else if (cmp_conr.getKind() == kind::EQUAL || cmp_conr.getKind() == kind::AND) + { + // Rewriting the str.contains may return equalities of the form (= x ""). + // In that case, we can substitute the variables appearing in those + // equalities with the empty string in the third argument of the + // str.replace. For example: + // + // (str.replace x (str.++ x y) y) --> (str.replace x (str.++ x y) "") + // + // This can be done because str.replace changes x iff (str.++ x y) is in x + // but that means that y must be empty in that case. Thus, we can + // substitute y with "" in the third argument. Note that the third argument + // does not matter when the str.replace does not apply. + // + Node empty = nm->mkConst(::CVC4::String("")); + + std::vector emptyNodes; + bool allEmptyEqs; + std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(cmp_conr); + + if (emptyNodes.size() > 0) + { + // Perform the substitutions + std::vector substs(emptyNodes.size(), TNode(empty)); + Node nn2 = node[2].substitute( + emptyNodes.begin(), emptyNodes.end(), substs.begin(), substs.end()); + + // If the contains rewrites to a conjunction of empty-string equalities + // and we are doing the replacement in an empty string, we can rewrite + // the string-to-replace with a concatenation of all the terms that must + // be empty: + // + // (str.replace "" y z) ---> (str.replace "" (str.++ y1 ... yn) z) + // if (str.contains "" y) ---> (and (= y1 "") ... (= yn "")) + if (node[0] == empty && allEmptyEqs) + { + std::vector emptyNodesList(emptyNodes.begin(), emptyNodes.end()); + Node nn1 = utils::mkConcat(STRING_CONCAT, emptyNodesList); + if (nn1 != node[1] || nn2 != node[2]) + { + Node res = nm->mkNode(kind::STRING_STRREPL, node[0], nn1, nn2); + return returnRewrite(node, res, "rpl-emp-cnts-substs"); + } + } + + if (nn2 != node[2]) + { + Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1], nn2); + return returnRewrite(node, res, "rpl-cnts-substs"); + } + } + } + + if (cmp_conr != cmp_con) + { + if (checkEntailNonEmpty(node[1])) + { + // pull endpoints that can be stripped + // for example, + // str.replace( str.++( "b", x, "b" ), "a", y ) ---> + // str.++( "b", str.replace( x, "a", y ), "b" ) + std::vector cb; + std::vector ce; + if (stripConstantEndpoints(children0, children1, cb, ce)) + { + std::vector cc; + cc.insert(cc.end(), cb.begin(), cb.end()); + cc.push_back(NodeManager::currentNM()->mkNode( + kind::STRING_STRREPL, + utils::mkConcat(STRING_CONCAT, children0), + node[1], + node[2])); + cc.insert(cc.end(), ce.begin(), ce.end()); + Node ret = utils::mkConcat(STRING_CONCAT, cc); + return returnRewrite(node, ret, "rpl-pull-endpt"); + } + } + } + + children1.clear(); + utils::getConcat(node[1], children1); + Node lastChild1 = children1[children1.size() - 1]; + if (lastChild1.getKind() == kind::STRING_SUBSTR) + { + // (str.replace x (str.++ t (str.substr y i j)) z) ---> + // (str.replace x (str.++ t + // (str.substr y i (+ (str.len x) 1 (- (str.len t))))) z) + // if j > len(x) + // + // Reasoning: If the string to be replaced is longer than x, then it does + // not matter how much longer it is, the result is always x. Thus, it is + // fine to only look at the prefix of length len(x) + 1 - len(t). + + children1.pop_back(); + // Length of the non-substr components in the second argument + Node partLen1 = nm->mkNode(kind::STRING_LENGTH, + utils::mkConcat(STRING_CONCAT, children1)); + Node maxLen1 = nm->mkNode(kind::PLUS, partLen1, lastChild1[2]); + + Node zero = nm->mkConst(Rational(0)); + Node one = nm->mkConst(Rational(1)); + Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]); + Node len0_1 = nm->mkNode(kind::PLUS, len0, one); + // Check len(t) + j > len(x) + 1 + if (checkEntailArith(maxLen1, len0_1, true)) + { + children1.push_back(nm->mkNode( + kind::STRING_SUBSTR, + lastChild1[0], + lastChild1[1], + nm->mkNode( + kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1)))); + Node res = nm->mkNode(kind::STRING_STRREPL, + node[0], + utils::mkConcat(STRING_CONCAT, children1), + node[2]); + return returnRewrite(node, res, "repl-subst-idx"); + } + } + + if (node[0].getKind() == STRING_STRREPL) + { + Node x = node[0]; + Node y = node[1]; + Node z = node[2]; + if (x[0] == x[2] && x[0] == y) + { + // (str.replace (str.replace y w y) y z) --> + // (str.replace (str.replace y w z) y z) + // if (str.len w) >= (str.len z) and w != z + // + // Reasoning: There are two cases: (1) w does not appear in y and (2) w + // does appear in y. + // + // Case (1): In this case, the reasoning is trivial. The + // inner replace does not do anything, so we can just replace its third + // argument with any string. + // + // Case (2): After the inner replace, we are guaranteed to have a string + // that contains y at the index of w in the original string y. The outer + // replace then replaces that y with z, so we can short-circuit that + // replace by directly replacing w with z in the inner replace. We can + // only do that if the result of the new inner replace does not contain + // y, otherwise we end up doing two replaces that are different from the + // original expression. We enforce that by requiring that the length of w + // has to be greater or equal to the length of z and that w and z have to + // be different. This makes sure that an inner replace changes a string + // to a string that is shorter than y, making it impossible for the outer + // replace to match. + Node w = x[1]; + + // (str.len w) >= (str.len z) + Node wlen = nm->mkNode(kind::STRING_LENGTH, w); + Node zlen = nm->mkNode(kind::STRING_LENGTH, z); + if (checkEntailArith(wlen, zlen)) + { + // w != z + Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z)); + if (wEqZ.isConst() && !wEqZ.getConst()) + { + Node ret = nm->mkNode(kind::STRING_STRREPL, + nm->mkNode(kind::STRING_STRREPL, y, w, z), + y, + z); + return returnRewrite(node, ret, "repl-repl-short-circuit"); + } + } + } + } + + if (node[1].getKind() == STRING_STRREPL) + { + if (node[1][0] == node[0]) + { + if (node[1][0] == node[1][2] && node[1][0] == node[2]) + { + // str.replace( x, str.replace( x, y, x ), x ) ---> x + return returnRewrite(node, node[0], "repl-repl2-inv-id"); + } + bool dualReplIteSuccess = false; + Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]); + if (!cmp_con2.isNull() && !cmp_con2.getConst()) + { + // str.contains( x, z ) ---> false + // implies + // str.replace( x, str.replace( x, y, z ), w ) ---> + // ite( str.contains( x, y ), x, w ) + dualReplIteSuccess = true; + } + else + { + // str.contains( y, z ) ---> false and str.contains( z, y ) ---> false + // implies + // str.replace( x, str.replace( x, y, z ), w ) ---> + // ite( str.contains( x, y ), x, w ) + cmp_con2 = checkEntailContains(node[1][1], node[1][2]); + if (!cmp_con2.isNull() && !cmp_con2.getConst()) + { + cmp_con2 = checkEntailContains(node[1][2], node[1][1]); + if (!cmp_con2.isNull() && !cmp_con2.getConst()) + { + dualReplIteSuccess = true; + } + } + } + if (dualReplIteSuccess) + { + Node res = nm->mkNode(ITE, + nm->mkNode(STRING_STRCTN, node[0], node[1][1]), + node[0], + node[2]); + return returnRewrite(node, res, "repl-dual-repl-ite"); + } + } + + bool invSuccess = false; + if (node[1][1] == node[0]) + { + if (node[1][0] == node[1][2]) + { + // str.replace(x, str.replace(y, x, y), w) ---> str.replace(x, y, w) + invSuccess = true; + } + else if (node[1][1] == node[2] || node[1][0] == node[2]) + { + // str.contains(y, z) ----> false and ( y == w or x == w ) implies + // implies + // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w) + Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]); + invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst(); + } + } + else + { + // str.contains(x, z) ----> false and str.contains(x, w) ----> false + // implies + // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u) + Node cmp_con2 = checkEntailContains(node[0], node[1][1]); + if (!cmp_con2.isNull() && !cmp_con2.getConst()) + { + cmp_con2 = checkEntailContains(node[0], node[1][2]); + invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst(); + } + } + if (invSuccess) + { + Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1][0], node[2]); + return returnRewrite(node, res, "repl-repl2-inv"); + } + } + if (node[2].getKind() == STRING_STRREPL) + { + if (node[2][1] == node[0]) + { + // str.contains( z, w ) ----> false implies + // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z ) + Node cmp_con2 = checkEntailContains(node[1], node[2][0]); + if (!cmp_con2.isNull() && !cmp_con2.getConst()) + { + Node res = + nm->mkNode(kind::STRING_STRREPL, node[0], node[1], node[2][0]); + return returnRewrite(node, res, "repl-repl3-inv"); + } + } + if (node[2][0] == node[1]) + { + bool success = false; + if (node[2][0] == node[2][2] && node[2][1] == node[0]) + { + // str.replace( x, y, str.replace( y, x, y ) ) ---> x + success = true; + } + else + { + // str.contains( x, z ) ----> false implies + // str.replace( x, y, str.replace( y, z, w ) ) ---> x + cmp_con = checkEntailContains(node[0], node[2][1]); + success = !cmp_con.isNull() && !cmp_con.getConst(); + } + if (success) + { + return returnRewrite(node, node[0], "repl-repl3-inv-id"); + } + } + } + // miniscope based on components that do not contribute to contains + // for example, + // str.replace( x ++ y ++ x ++ y, "A", z ) --> + // str.replace( x ++ y, "A", z ) ++ x ++ y + // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y. + if (checkEntailLengthOne(node[1])) + { + Node lastLhs; + unsigned lastCheckIndex = 0; + for (unsigned i = 1, iend = children0.size(); i < iend; i++) + { + unsigned checkIndex = children0.size() - i; + std::vector checkLhs; + checkLhs.insert( + checkLhs.end(), children0.begin(), children0.begin() + checkIndex); + Node lhs = utils::mkConcat(STRING_CONCAT, checkLhs); + Node rhs = children0[checkIndex]; + Node ctn = checkEntailContains(lhs, rhs); + if (!ctn.isNull() && ctn.getConst()) + { + lastLhs = lhs; + lastCheckIndex = checkIndex; + } + else + { + break; + } + } + if (!lastLhs.isNull()) + { + std::vector remc(children0.begin() + lastCheckIndex, + children0.end()); + Node rem = utils::mkConcat(STRING_CONCAT, remc); + Node ret = + nm->mkNode(STRING_CONCAT, + nm->mkNode(STRING_STRREPL, lastLhs, node[1], node[2]), + rem); + // for example: + // str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x + // Since we know that the first occurrence of "A" cannot be in the + // second occurrence of x. Notice this is specific to single characters + // due to complications with finds that span multiple components for + // non-characters. + return returnRewrite(node, ret, "repl-char-ncontrib-find"); + } + } + + // TODO (#1180) incorporate these? + // contains( t, s ) => + // replace( replace( x, t, s ), s, r ) ----> replace( x, t, r ) + // contains( t, s ) => + // contains( replace( t, s, r ), r ) ----> true + + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; + return node; +} + +Node SequencesRewriter::rewriteReplaceAll(Node node) +{ + Assert(node.getKind() == STRING_STRREPLALL); + + if (node[0].isConst() && node[1].isConst()) + { + std::vector children; + Node s = node[0]; + Node t = node[1]; + if (Word::isEmpty(s) || Word::isEmpty(t)) + { + return returnRewrite(node, node[0], "replall-empty-find"); + } + std::size_t sizeS = Word::getLength(s); + std::size_t sizeT = Word::getLength(t); + std::size_t index = 0; + std::size_t curr = 0; + do + { + curr = Word::find(s, t, index); + if (curr != std::string::npos) + { + if (curr > index) + { + children.push_back(Word::substr(s, index, curr - index)); + } + children.push_back(node[2]); + index = curr + sizeT; + } + else + { + children.push_back(Word::substr(s, index, sizeS - index)); + } + } while (curr != std::string::npos && curr < sizeS); + // constant evaluation + Node res = utils::mkConcat(STRING_CONCAT, children); + return returnRewrite(node, res, "replall-const"); + } + + // rewrites that apply to both replace and replaceall + Node rri = rewriteReplaceInternal(node); + if (!rri.isNull()) + { + // printing of the rewrite managed by the call above + return rri; + } + + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; + return node; +} + +Node SequencesRewriter::rewriteReplaceInternal(Node node) +{ + Kind nk = node.getKind(); + Assert(nk == STRING_STRREPL || nk == STRING_STRREPLALL); + + if (node[1] == node[2]) + { + return returnRewrite(node, node[0], "rpl-id"); + } + + if (node[0] == node[1]) + { + // only holds for replaceall if non-empty + if (nk == STRING_STRREPL || checkEntailNonEmpty(node[1])) + { + return returnRewrite(node, node[2], "rpl-replace"); + } + } + + return Node::null(); +} + +Node SequencesRewriter::rewriteStrReverse(Node node) +{ + Assert(node.getKind() == STRING_REV); + NodeManager* nm = NodeManager::currentNM(); + Node x = node[0]; + if (x.isConst()) + { + std::vector nvec = node[0].getConst().getVec(); + std::reverse(nvec.begin(), nvec.end()); + Node retNode = nm->mkConst(String(nvec)); + return returnRewrite(node, retNode, "str-conv-const"); + } + else if (x.getKind() == STRING_CONCAT) + { + std::vector children; + for (const Node& nc : x) + { + children.push_back(nm->mkNode(STRING_REV, nc)); + } + std::reverse(children.begin(), children.end()); + // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 ) + Node retNode = nm->mkNode(STRING_CONCAT, children); + return returnRewrite(node, retNode, "str-rev-minscope-concat"); + } + else if (x.getKind() == STRING_REV) + { + // rev( rev( x ) ) --> x + Node retNode = x[0]; + return returnRewrite(node, retNode, "str-rev-idem"); + } + return node; +} + +Node SequencesRewriter::rewritePrefixSuffix(Node n) +{ + Assert(n.getKind() == kind::STRING_PREFIX + || n.getKind() == kind::STRING_SUFFIX); + bool isPrefix = n.getKind() == kind::STRING_PREFIX; + if (n[0] == n[1]) + { + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(n, ret, "suf/prefix-eq"); + } + if (n[0].isConst()) + { + CVC4::String t = n[0].getConst(); + if (t.isEmptyString()) + { + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(n, ret, "suf/prefix-empty-const"); + } + } + if (n[1].isConst()) + { + Node s = n[1]; + size_t lenS = Word::getLength(s); + if (n[0].isConst()) + { + Node ret = NodeManager::currentNM()->mkConst(false); + Node t = n[0]; + size_t lenT = Word::getLength(t); + if (lenS >= lenT) + { + if ((isPrefix && t == Word::prefix(s, lenT)) + || (!isPrefix && t == Word::suffix(s, lenT))) + { + ret = NodeManager::currentNM()->mkConst(true); + } + } + return returnRewrite(n, ret, "suf/prefix-const"); + } + else if (lenS == 0) + { + Node ret = n[0].eqNode(n[1]); + return returnRewrite(n, ret, "suf/prefix-empty"); + } + else if (lenS == 1) + { + // (str.prefix x "A") and (str.suffix x "A") are equivalent to + // (str.contains "A" x ) + Node ret = + NodeManager::currentNM()->mkNode(kind::STRING_STRCTN, n[1], n[0]); + return returnRewrite(n, ret, "suf/prefix-ctn"); + } + } + Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]); + Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]); + Node val; + if (isPrefix) + { + val = NodeManager::currentNM()->mkConst(::CVC4::Rational(0)); + } + else + { + val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens); + } + + // Check if we can turn the prefix/suffix into equalities by showing that the + // prefix/suffix is at least as long as the string + Node eqs = inferEqsFromContains(n[1], n[0]); + if (!eqs.isNull()) + { + return returnRewrite(n, eqs, "suf/prefix-to-eqs"); + } + + // general reduction to equality + substr + Node retNode = n[0].eqNode( + NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens)); + + return retNode; +} + +Node SequencesRewriter::splitConstant(Node a, Node b, int& index, bool isRev) +{ + Assert(a.isConst() && b.isConst()); + size_t lenA = Word::getLength(a); + size_t lenB = Word::getLength(b); + index = lenA <= lenB ? 1 : 0; + size_t len_short = index == 1 ? lenA : lenB; + bool cmp = + isRev ? a.getConst().rstrncmp(b.getConst(), len_short) + : a.getConst().strncmp(b.getConst(), len_short); + if (cmp) + { + Node l = index == 0 ? a : b; + if (isRev) + { + int new_len = l.getConst().size() - len_short; + return Word::substr(l, 0, new_len); + } + else + { + return Word::substr(l, len_short); + } + } + // not the same prefix/suffix + return Node::null(); +} + +bool SequencesRewriter::canConstantContainConcat(Node c, + Node n, + int& firstc, + int& lastc) +{ + Assert(c.isConst()); + CVC4::String t = c.getConst(); + const std::vector& tvec = t.getVec(); + Assert(n.getKind() == kind::STRING_CONCAT); + // must find constant components in order + size_t pos = 0; + firstc = -1; + lastc = -1; + for (unsigned i = 0; i < n.getNumChildren(); i++) + { + if (n[i].isConst()) + { + firstc = firstc == -1 ? i : firstc; + lastc = i; + CVC4::String s = n[i].getConst(); + size_t new_pos = t.find(s, pos); + if (new_pos == std::string::npos) + { + return false; + } + else + { + pos = new_pos + s.size(); + } + } + else if (n[i].getKind() == kind::STRING_ITOS && checkEntailArith(n[i][0])) + { + // find the first occurrence of a digit starting at pos + while (pos < tvec.size() && !String::isDigit(tvec[pos])) + { + pos++; + } + if (pos == tvec.size()) + { + return false; + } + // must consume at least one digit here + pos++; + } + } + return true; +} + +bool SequencesRewriter::canConstantContainList(Node c, + std::vector& l, + int& firstc, + int& lastc) +{ + Assert(c.isConst()); + // must find constant components in order + size_t pos = 0; + firstc = -1; + lastc = -1; + for (unsigned i = 0; i < l.size(); i++) + { + if (l[i].isConst()) + { + firstc = firstc == -1 ? i : firstc; + lastc = i; + size_t new_pos = Word::find(c, l[i], pos); + if (new_pos == std::string::npos) + { + return false; + } + else + { + pos = new_pos + Word::getLength(l[i]); + } + } + } + return true; +} + +bool SequencesRewriter::stripSymbolicLength(std::vector& n1, + std::vector& nr, + int dir, + Node& curr) +{ + Assert(dir == 1 || dir == -1); + Assert(nr.empty()); + Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0)); + bool ret = false; + bool success; + unsigned sindex = 0; + do + { + Assert(!curr.isNull()); + success = false; + if (curr != zero && sindex < n1.size()) + { + unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); + if (n1[sindex_use].isConst()) + { + // could strip part of a constant + Node lowerBound = getConstantArithBound(Rewriter::rewrite(curr)); + if (!lowerBound.isNull()) + { + Assert(lowerBound.isConst()); + Rational lbr = lowerBound.getConst(); + if (lbr.sgn() > 0) + { + Assert(checkEntailArith(curr, true)); + CVC4::String s = n1[sindex_use].getConst(); + Node ncl = + NodeManager::currentNM()->mkConst(CVC4::Rational(s.size())); + Node next_s = + NodeManager::currentNM()->mkNode(kind::MINUS, lowerBound, ncl); + next_s = Rewriter::rewrite(next_s); + Assert(next_s.isConst()); + // we can remove the entire constant + if (next_s.getConst().sgn() >= 0) + { + curr = Rewriter::rewrite( + NodeManager::currentNM()->mkNode(kind::MINUS, curr, ncl)); + success = true; + sindex++; + } + else + { + // we can remove part of the constant + // lower bound minus the length of a concrete string is negative, + // hence lowerBound cannot be larger than long max + Assert(lbr < Rational(String::maxSize())); + curr = Rewriter::rewrite(NodeManager::currentNM()->mkNode( + kind::MINUS, curr, lowerBound)); + uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); + Assert(lbsize < s.size()); + if (dir == 1) + { + // strip partially from the front + nr.push_back( + NodeManager::currentNM()->mkConst(s.prefix(lbsize))); + n1[sindex_use] = NodeManager::currentNM()->mkConst( + s.suffix(s.size() - lbsize)); + } + else + { + // strip partially from the back + nr.push_back( + NodeManager::currentNM()->mkConst(s.suffix(lbsize))); + n1[sindex_use] = NodeManager::currentNM()->mkConst( + s.prefix(s.size() - lbsize)); + } + ret = true; + } + Assert(checkEntailArith(curr)); + } + else + { + // we cannot remove the constant + } + } + } + else + { + Node next_s = NodeManager::currentNM()->mkNode( + kind::MINUS, + curr, + NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, + n1[sindex_use])); + next_s = Rewriter::rewrite(next_s); + if (checkEntailArith(next_s)) + { + success = true; + curr = next_s; + sindex++; + } + } + } + } while (success); + if (sindex > 0) + { + if (dir == 1) + { + nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex); + n1.erase(n1.begin(), n1.begin() + sindex); + } + else + { + nr.insert(nr.end(), n1.end() - sindex, n1.end()); + n1.erase(n1.end() - sindex, n1.end()); + } + ret = true; + } + return ret; +} + +int SequencesRewriter::componentContains(std::vector& n1, + std::vector& n2, + std::vector& nb, + std::vector& ne, + bool computeRemainder, + int remainderDir) +{ + Assert(nb.empty()); + Assert(ne.empty()); + // if n2 is a singleton, we can do optimized version here + if (n2.size() == 1) + { + for (unsigned i = 0; i < n1.size(); i++) + { + Node n1rb; + Node n1re; + if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder)) + { + if (computeRemainder) + { + n1[i] = n2[0]; + if (remainderDir != -1) + { + if (!n1re.isNull()) + { + ne.push_back(n1re); + } + ne.insert(ne.end(), n1.begin() + i + 1, n1.end()); + n1.erase(n1.begin() + i + 1, n1.end()); + } + else if (!n1re.isNull()) + { + n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode( + kind::STRING_CONCAT, n1[i], n1re)); + } + if (remainderDir != 1) + { + nb.insert(nb.end(), n1.begin(), n1.begin() + i); + n1.erase(n1.begin(), n1.begin() + i); + if (!n1rb.isNull()) + { + nb.push_back(n1rb); + } + } + else if (!n1rb.isNull()) + { + n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode( + kind::STRING_CONCAT, n1rb, n1[i])); + } + } + return i; + } + } + } + else if (n1.size() >= n2.size()) + { + unsigned diff = n1.size() - n2.size(); + for (unsigned i = 0; i <= diff; i++) + { + Node n1rb_first; + Node n1re_first; + // first component of n2 must be a suffix + if (componentContainsBase(n1[i], + n2[0], + n1rb_first, + n1re_first, + 1, + computeRemainder && remainderDir != 1)) + { + Assert(n1re_first.isNull()); + for (unsigned j = 1; j < n2.size(); j++) + { + // are we in the last component? + if (j + 1 == n2.size()) + { + Node n1rb_last; + Node n1re_last; + // last component of n2 must be a prefix + if (componentContainsBase(n1[i + j], + n2[j], + n1rb_last, + n1re_last, + -1, + computeRemainder && remainderDir != -1)) + { + Assert(n1rb_last.isNull()); + if (computeRemainder) + { + if (remainderDir != -1) + { + if (!n1re_last.isNull()) + { + ne.push_back(n1re_last); + } + ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end()); + n1.erase(n1.begin() + i + j + 1, n1.end()); + n1[i + j] = n2[j]; + } + if (remainderDir != 1) + { + n1[i] = n2[0]; + nb.insert(nb.end(), n1.begin(), n1.begin() + i); + n1.erase(n1.begin(), n1.begin() + i); + if (!n1rb_first.isNull()) + { + nb.push_back(n1rb_first); + } + } + } + return i; + } + else + { + break; + } + } + else if (n1[i + j] != n2[j]) + { + break; + } + } + } + } + } + return -1; +} + +bool SequencesRewriter::componentContainsBase( + Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder) +{ + Assert(n1rb.isNull()); + Assert(n1re.isNull()); + + NodeManager* nm = NodeManager::currentNM(); + + if (n1 == n2) + { + return true; + } + else + { + if (n1.isConst() && n2.isConst()) + { + size_t len1 = Word::getLength(n1); + size_t len2 = Word::getLength(n2); + if (len2 < len1) + { + if (dir == 1) + { + if (Word::suffix(n1, len2) == n2) + { + if (computeRemainder) + { + n1rb = Word::prefix(n1, len1 - len2); + } + return true; + } + } + else if (dir == -1) + { + if (Word::prefix(n1, len2) == n2) + { + if (computeRemainder) + { + n1re = Word::suffix(n1, len1 - len2); + } + return true; + } + } + else + { + size_t f = Word::find(n1, n2); + if (f != std::string::npos) + { + if (computeRemainder) + { + if (f > 0) + { + n1rb = Word::prefix(n1, f); + } + if (len1 > f + len2) + { + n1re = Word::suffix(n1, len1 - (f + len2)); + } + } + return true; + } + } + } + } + else + { + // cases for: + // n1 = x containing n2 = substr( x, n2[1], n2[2] ) + if (n2.getKind() == kind::STRING_SUBSTR) + { + if (n2[0] == n1) + { + bool success = true; + Node start_pos = n2[1]; + Node end_pos = nm->mkNode(kind::PLUS, n2[1], n2[2]); + Node len_n2s = nm->mkNode(kind::STRING_LENGTH, n2[0]); + if (dir == 1) + { + // To be a suffix, start + length must be greater than + // or equal to the length of the string. + success = checkEntailArith(end_pos, len_n2s); + } + else if (dir == -1) + { + // To be a prefix, must literally start at 0, since + // if we knew it started at <0, it should be rewritten to "", + // if we knew it started at 0, then n2[1] should be rewritten to + // 0. + success = start_pos.isConst() + && start_pos.getConst().sgn() == 0; + } + if (success) + { + if (computeRemainder) + { + // we can only compute the remainder if start_pos and end_pos + // are known to be non-negative. + if (!checkEntailArith(start_pos) || !checkEntailArith(end_pos)) + { + return false; + } + if (dir != 1) + { + n1rb = nm->mkNode(kind::STRING_SUBSTR, + n2[0], + nm->mkConst(Rational(0)), + start_pos); + } + if (dir != -1) + { + n1re = nm->mkNode(kind::STRING_SUBSTR, n2[0], end_pos, len_n2s); + } + } + return true; + } + } + } + + if (!computeRemainder && dir == 0) + { + if (n1.getKind() == STRING_STRREPL) + { + // (str.contains (str.replace x y z) w) ---> true + // if (str.contains x w) --> true and (str.contains z w) ---> true + Node xCtnW = checkEntailContains(n1[0], n2); + if (!xCtnW.isNull() && xCtnW.getConst()) + { + Node zCtnW = checkEntailContains(n1[2], n2); + if (!zCtnW.isNull() && zCtnW.getConst()) + { + return true; + } + } + } + } + } + } + return false; +} + +bool SequencesRewriter::stripConstantEndpoints(std::vector& n1, + std::vector& n2, + std::vector& nb, + std::vector& ne, + int dir) +{ + Assert(nb.empty()); + Assert(ne.empty()); + + NodeManager* nm = NodeManager::currentNM(); + bool changed = false; + // for ( forwards, backwards ) direction + for (unsigned r = 0; r < 2; r++) + { + if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1)) + { + unsigned index0 = r == 0 ? 0 : n1.size() - 1; + unsigned index1 = r == 0 ? 0 : n2.size() - 1; + bool removeComponent = false; + Node n1cmp = n1[index0]; + + if (n1cmp.isConst() && n1cmp.getConst().size() == 0) + { + return false; + } + + std::vector sss; + std::vector sls; + n1cmp = decomposeSubstrChain(n1cmp, sss, sls); + Trace("strings-rewrite-debug2") + << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1] + << ", dir = " << dir << std::endl; + if (n1cmp.isConst()) + { + CVC4::String s = n1cmp.getConst(); + // overlap is an overapproximation of the number of characters + // n2[index1] can match in s + unsigned overlap = s.size(); + if (n2[index1].isConst()) + { + CVC4::String t = n2[index1].getConst(); + std::size_t ret = r == 0 ? s.find(t) : s.rfind(t); + if (ret == std::string::npos) + { + if (n1.size() == 1) + { + // can remove everything + // e.g. str.contains( "abc", str.++( "ba", x ) ) --> + // str.contains( "", str.++( "ba", x ) ) + removeComponent = true; + } + else if (sss.empty()) // only if not substr + { + // check how much overlap there is + // This is used to partially strip off the endpoint + // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) --> + // str.contains( str.++( "c", x ), str.++( "cd", y ) ) + overlap = r == 0 ? s.overlap(t) : t.overlap(s); + } + else + { + // if we are looking at a substring, we can remove the component + // if there is no overlap + // e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" ) + // --> str.contains( x, "a" ) + removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0); + } + } + else if (sss.empty()) // only if not substr + { + Assert(ret < s.size()); + // can strip off up to the find position, e.g. + // str.contains( str.++( "abc", x ), str.++( "b", y ) ) --> + // str.contains( str.++( "bc", x ), str.++( "b", y ) ), + // and + // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) --> + // str.contains( str.++( x, "abb" ), str.++( y, "b" ) ) + overlap = s.size() - ret; + } + } + else + { + // inconclusive + } + // process the overlap + if (overlap < s.size()) + { + changed = true; + if (overlap == 0) + { + removeComponent = true; + } + else + { + // can drop the prefix (resp. suffix) from the first (resp. last) + // component + if (r == 0) + { + nb.push_back(nm->mkConst(s.prefix(s.size() - overlap))); + n1[index0] = nm->mkConst(s.suffix(overlap)); + } + else + { + ne.push_back(nm->mkConst(s.suffix(s.size() - overlap))); + n1[index0] = nm->mkConst(s.prefix(overlap)); + } + } + } + } + else if (n1cmp.getKind() == kind::STRING_ITOS) + { + if (n2[index1].isConst()) + { + CVC4::String t = n2[index1].getConst(); + + if (n1.size() == 1) + { + // if n1.size()==1, then if n2[index1] is not a number, we can drop + // the entire component + // e.g. str.contains( int.to.str(x), "123a45") --> false + if (!t.isNumber()) + { + removeComponent = true; + } + } + else + { + const std::vector& tvec = t.getVec(); + Assert(tvec.size() > 0); + + // if n1.size()>1, then if the first (resp. last) character of + // n2[index1] + // is not a digit, we can drop the entire component, e.g.: + // str.contains( str.++( int.to.str(x), y ), "a12") --> + // str.contains( y, "a12" ) + // str.contains( str.++( y, int.to.str(x) ), "a0b") --> + // str.contains( y, "a0b" ) + unsigned i = r == 0 ? 0 : (tvec.size() - 1); + if (!String::isDigit(tvec[i])) + { + removeComponent = true; + } + } + } + } + if (removeComponent) + { + // can drop entire first (resp. last) component + if (r == 0) + { + nb.push_back(n1[index0]); + n1.erase(n1.begin(), n1.begin() + 1); + } + else + { + ne.push_back(n1[index0]); + n1.pop_back(); + } + if (n1.empty()) + { + // if we've removed everything, just return (we will rewrite to false) + return true; + } + else + { + changed = true; + } + } + } + } + // TODO (#1180) : computing the maximal overlap in this function may be + // important. + // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) ---> + // false + // ...since str.to.int(x) can contain at most 1 character from "2aaaa", + // leaving 4 characters + // which is larger that the upper bound for length of str.substr(y,0,3), + // which is 3. + return changed; +} + +Node SequencesRewriter::canonicalStrForSymbolicLength(Node len) +{ + NodeManager* nm = NodeManager::currentNM(); + + Node res; + if (len.getKind() == kind::CONST_RATIONAL) + { + // c -> "A" repeated c times + Rational ratLen = len.getConst(); + Assert(ratLen.getDenominator() == 1); + Integer intLen = ratLen.getNumerator(); + res = nm->mkConst(String(std::string(intLen.getUnsignedInt(), 'A'))); + } + else if (len.getKind() == kind::PLUS) + { + // x + y -> norm(x) + norm(y) + NodeBuilder<> concatBuilder(kind::STRING_CONCAT); + for (const auto& n : len) + { + Node sn = canonicalStrForSymbolicLength(n); + if (sn.isNull()) + { + return Node::null(); + } + std::vector snChildren; + utils::getConcat(sn, snChildren); + concatBuilder.append(snChildren); + } + res = concatBuilder.constructNode(); + } + else if (len.getKind() == kind::MULT && len.getNumChildren() == 2 + && len[0].isConst()) + { + // c * x -> norm(x) repeated c times + Rational ratReps = len[0].getConst(); + Assert(ratReps.getDenominator() == 1); + Integer intReps = ratReps.getNumerator(); + + Node nRep = canonicalStrForSymbolicLength(len[1]); + std::vector nRepChildren; + utils::getConcat(nRep, nRepChildren); + NodeBuilder<> concatBuilder(kind::STRING_CONCAT); + for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++) + { + concatBuilder.append(nRepChildren); + } + res = concatBuilder.constructNode(); + } + else if (len.getKind() == kind::STRING_LENGTH) + { + // len(x) -> x + res = len[0]; + } + return res; +} + +Node SequencesRewriter::lengthPreserveRewrite(Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n)); + Node res = canonicalStrForSymbolicLength(len); + return res.isNull() ? n : res; +} + +Node SequencesRewriter::checkEntailContains(Node a, Node b, bool fullRewriter) +{ + NodeManager* nm = NodeManager::currentNM(); + Node ctn = nm->mkNode(kind::STRING_STRCTN, a, b); + + if (fullRewriter) + { + ctn = Rewriter::rewrite(ctn); + } + else + { + Node prev; + do + { + prev = ctn; + ctn = rewriteContains(ctn); + } while (prev != ctn && ctn.getKind() == kind::STRING_STRCTN); + } + + Assert(ctn.getType().isBoolean()); + return ctn.isConst() ? ctn : Node::null(); +} + +bool SequencesRewriter::checkEntailNonEmpty(Node a) +{ + Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a); + len = Rewriter::rewrite(len); + return checkEntailArith(len, true); +} + +bool SequencesRewriter::checkEntailLengthOne(Node s, bool strict) +{ + NodeManager* nm = NodeManager::currentNM(); + Node one = nm->mkConst(Rational(1)); + Node len = nm->mkNode(STRING_LENGTH, s); + len = Rewriter::rewrite(len); + return checkEntailArith(one, len) && (!strict || checkEntailArith(len, true)); +} + +bool SequencesRewriter::checkEntailArithEq(Node a, Node b) +{ + if (a == b) + { + return true; + } + else + { + Node ar = Rewriter::rewrite(a); + Node br = Rewriter::rewrite(b); + return ar == br; + } +} + +bool SequencesRewriter::checkEntailArith(Node a, Node b, bool strict) +{ + if (a == b) + { + return !strict; + } + else + { + Node diff = NodeManager::currentNM()->mkNode(kind::MINUS, a, b); + return checkEntailArith(diff, strict); + } +} + +struct StrCheckEntailArithTag +{ +}; +struct StrCheckEntailArithComputedTag +{ +}; +/** Attribute true for expressions for which checkEntailArith returned true */ +typedef expr::Attribute StrCheckEntailArithAttr; +typedef expr::Attribute + StrCheckEntailArithComputedAttr; + +bool SequencesRewriter::checkEntailArith(Node a, bool strict) +{ + if (a.isConst()) + { + return a.getConst().sgn() >= (strict ? 1 : 0); + } + + Node ar = + strict + ? NodeManager::currentNM()->mkNode( + kind::MINUS, a, NodeManager::currentNM()->mkConst(Rational(1))) + : a; + ar = Rewriter::rewrite(ar); + + if (ar.getAttribute(StrCheckEntailArithComputedAttr())) + { + return ar.getAttribute(StrCheckEntailArithAttr()); + } + + bool ret = checkEntailArithInternal(ar); + if (!ret) + { + // try with approximations + ret = checkEntailArithApprox(ar); + } + // cache the result + ar.setAttribute(StrCheckEntailArithAttr(), ret); + ar.setAttribute(StrCheckEntailArithComputedAttr(), true); + return ret; +} + +bool SequencesRewriter::checkEntailArithApprox(Node ar) +{ + Assert(Rewriter::rewrite(ar) == ar); + NodeManager* nm = NodeManager::currentNM(); + std::map msum; + Trace("strings-ent-approx-debug") + << "Setup arithmetic approximations for " << ar << std::endl; + if (!ArithMSum::getMonomialSum(ar, msum)) + { + Trace("strings-ent-approx-debug") + << "...failed to get monomial sum!" << std::endl; + return false; + } + // for each monomial v*c, mApprox[v] a list of + // possibilities for how the term can be soundly approximated, that is, + // if mApprox[v] contains av, then v*c > av*c. Notice that if c + // is positive, then v > av, otherwise if c is negative, then v < av. + // In other words, av is an under-approximation if c is positive, and an + // over-approximation if c is negative. + bool changed = false; + std::map > mApprox; + // map from approximations to their monomial sums + std::map > approxMsums; + // aarSum stores each monomial that does not have multiple approximations + std::vector aarSum; + for (std::pair& m : msum) + { + Node v = m.first; + Node c = m.second; + Trace("strings-ent-approx-debug") + << "Get approximations " << v << "..." << std::endl; + if (v.isNull()) + { + Node mn = c.isNull() ? nm->mkConst(Rational(1)) : c; + aarSum.push_back(mn); + } + else + { + // c.isNull() means c = 1 + bool isOverApprox = !c.isNull() && c.getConst().sgn() == -1; + std::vector& approx = mApprox[v]; + std::unordered_set visited; + std::vector toProcess; + toProcess.push_back(v); + do + { + Node curr = toProcess.back(); + Trace("strings-ent-approx-debug") << " process " << curr << std::endl; + curr = Rewriter::rewrite(curr); + toProcess.pop_back(); + if (visited.find(curr) == visited.end()) + { + visited.insert(curr); + std::vector currApprox; + getArithApproximations(curr, currApprox, isOverApprox); + if (currApprox.empty()) + { + Trace("strings-ent-approx-debug") + << "...approximation: " << curr << std::endl; + // no approximations, thus curr is a possibility + approx.push_back(curr); + } + else + { + toProcess.insert( + toProcess.end(), currApprox.begin(), currApprox.end()); + } + } + } while (!toProcess.empty()); + Assert(!approx.empty()); + // if we have only one approximation, move it to final + if (approx.size() == 1) + { + changed = v != approx[0]; + Node mn = ArithMSum::mkCoeffTerm(c, approx[0]); + aarSum.push_back(mn); + mApprox.erase(v); + } + else + { + // compute monomial sum form for each approximation, used below + for (const Node& aa : approx) + { + if (approxMsums.find(aa) == approxMsums.end()) + { + CVC4_UNUSED bool ret = + ArithMSum::getMonomialSum(aa, approxMsums[aa]); + Assert(ret); + } + } + changed = true; + } + } + } + if (!changed) + { + // approximations had no effect, return + Trace("strings-ent-approx-debug") << "...no approximations" << std::endl; + return false; + } + // get the current "fixed" sum for the abstraction of ar + Node aar = aarSum.empty() + ? nm->mkConst(Rational(0)) + : (aarSum.size() == 1 ? aarSum[0] : nm->mkNode(PLUS, aarSum)); + aar = Rewriter::rewrite(aar); + Trace("strings-ent-approx-debug") + << "...processed fixed sum " << aar << " with " << mApprox.size() + << " approximated monomials." << std::endl; + // if we have a choice of how to approximate + if (!mApprox.empty()) + { + // convert aar back to monomial sum + std::map msumAar; + if (!ArithMSum::getMonomialSum(aar, msumAar)) + { + return false; + } + if (Trace.isOn("strings-ent-approx")) + { + Trace("strings-ent-approx") + << "---- Check arithmetic entailment by under-approximation " << ar + << " >= 0" << std::endl; + Trace("strings-ent-approx") << "FIXED:" << std::endl; + ArithMSum::debugPrintMonomialSum(msumAar, "strings-ent-approx"); + Trace("strings-ent-approx") << "APPROX:" << std::endl; + for (std::pair >& a : mApprox) + { + Node c = msum[a.first]; + Trace("strings-ent-approx") << " "; + if (!c.isNull()) + { + Trace("strings-ent-approx") << c << " * "; + } + Trace("strings-ent-approx") + << a.second << " ...from " << a.first << std::endl; + } + Trace("strings-ent-approx") << std::endl; + } + Rational one(1); + // incorporate monomials one at a time that have a choice of approximations + while (!mApprox.empty()) + { + Node v; + Node vapprox; + int maxScore = -1; + // Look at each approximation, take the one with the best score. + // Notice that we are in the process of trying to prove + // ( c1*t1 + .. + cn*tn ) + ( approx_1 | ... | approx_m ) >= 0, + // where c1*t1 + .. + cn*tn is the "fixed" component of our sum (aar) + // and approx_1 ... approx_m are possible approximations. The + // intution here is that we want coefficients c1...cn to be positive. + // This is because arithmetic string terms t1...tn (which may be + // applications of len, indexof, str.to.int) are never entailed to be + // negative. Hence, we add the approx_i that contributes the "most" + // towards making all constants c1...cn positive and cancelling negative + // monomials in approx_i itself. + for (std::pair >& nam : mApprox) + { + Node cr = msum[nam.first]; + for (const Node& aa : nam.second) + { + unsigned helpsCancelCount = 0; + unsigned addsObligationCount = 0; + std::map::iterator it; + // we are processing an approximation cr*( c1*t1 + ... + cn*tn ) + for (std::pair& aam : approxMsums[aa]) + { + // Say aar is of the form t + c*ti, and aam is the monomial ci*ti + // where ci != 0. We say aam: + // (1) helps cancel if c != 0 and c>0 != ci>0 + // (2) adds obligation if c>=0 and c+ci<0 + Node ti = aam.first; + Node ci = aam.second; + if (!cr.isNull()) + { + ci = ci.isNull() ? cr + : Rewriter::rewrite(nm->mkNode(MULT, ci, cr)); + } + Trace("strings-ent-approx-debug") << ci << "*" << ti << " "; + int ciSgn = ci.isNull() ? 1 : ci.getConst().sgn(); + it = msumAar.find(ti); + if (it != msumAar.end()) + { + Node c = it->second; + int cSgn = c.isNull() ? 1 : c.getConst().sgn(); + if (cSgn == 0) + { + addsObligationCount += (ciSgn == -1 ? 1 : 0); + } + else if (cSgn != ciSgn) + { + helpsCancelCount++; + Rational r1 = c.isNull() ? one : c.getConst(); + Rational r2 = ci.isNull() ? one : ci.getConst(); + Rational r12 = r1 + r2; + if (r12.sgn() == -1) + { + addsObligationCount++; + } + } + } + else + { + addsObligationCount += (ciSgn == -1 ? 1 : 0); + } + } + Trace("strings-ent-approx-debug") + << "counts=" << helpsCancelCount << "," << addsObligationCount + << " for " << aa << " into " << aar << std::endl; + int score = (addsObligationCount > 0 ? 0 : 2) + + (helpsCancelCount > 0 ? 1 : 0); + // if its the best, update v and vapprox + if (v.isNull() || score > maxScore) + { + v = nam.first; + vapprox = aa; + maxScore = score; + } + } + if (!v.isNull()) + { + break; + } + } + Trace("strings-ent-approx") + << "- Decide " << v << " = " << vapprox << std::endl; + // we incorporate v approximated by vapprox into the overall approximation + // for ar + Assert(!v.isNull() && !vapprox.isNull()); + Assert(msum.find(v) != msum.end()); + Node mn = ArithMSum::mkCoeffTerm(msum[v], vapprox); + aar = nm->mkNode(PLUS, aar, mn); + // update the msumAar map + aar = Rewriter::rewrite(aar); + msumAar.clear(); + if (!ArithMSum::getMonomialSum(aar, msumAar)) + { + Assert(false); + Trace("strings-ent-approx") + << "...failed to get monomial sum!" << std::endl; + return false; + } + // we have processed the approximation for v + mApprox.erase(v); + } + Trace("strings-ent-approx") << "-----------------" << std::endl; + } + if (aar == ar) + { + Trace("strings-ent-approx-debug") + << "...approximation had no effect" << std::endl; + // this should never happen, but we avoid the infinite loop for sanity here + Assert(false); + return false; + } + // Check entailment on the approximation of ar. + // Notice that this may trigger further reasoning by approximation. For + // example, len( replace( x ++ y, substr( x, 0, n ), z ) ) may be + // under-approximated as len( x ) + len( y ) - len( substr( x, 0, n ) ) on + // this call, where in the recursive call we may over-approximate + // len( substr( x, 0, n ) ) as len( x ). In this example, we can infer + // that len( replace( x ++ y, substr( x, 0, n ), z ) ) >= len( y ) in two + // steps. + if (checkEntailArith(aar)) + { + Trace("strings-ent-approx") + << "*** StrArithApprox: showed " << ar + << " >= 0 using under-approximation!" << std::endl; + Trace("strings-ent-approx") + << "*** StrArithApprox: under-approximation was " << aar << std::endl; + return true; + } + return false; +} + +void SequencesRewriter::getArithApproximations(Node a, + std::vector& approx, + bool isOverApprox) +{ + NodeManager* nm = NodeManager::currentNM(); + // We do not handle PLUS here since this leads to exponential behavior. + // Instead, this is managed, e.g. during checkEntailArithApprox, where + // PLUS terms are expanded "on-demand" during the reasoning. + Trace("strings-ent-approx-debug") + << "Get arith approximations " << a << std::endl; + Kind ak = a.getKind(); + if (ak == MULT) + { + Node c; + Node v; + if (ArithMSum::getMonomial(a, c, v)) + { + bool isNeg = c.getConst().sgn() == -1; + getArithApproximations(v, approx, isNeg ? !isOverApprox : isOverApprox); + for (unsigned i = 0, size = approx.size(); i < size; i++) + { + approx[i] = nm->mkNode(MULT, c, approx[i]); + } + } + } + else if (ak == STRING_LENGTH) + { + Kind aak = a[0].getKind(); + if (aak == STRING_SUBSTR) + { + // over,under-approximations for len( substr( x, n, m ) ) + Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]); + if (isOverApprox) + { + // m >= 0 implies + // m >= len( substr( x, n, m ) ) + if (checkEntailArith(a[0][2])) + { + approx.push_back(a[0][2]); + } + if (checkEntailArith(lenx, a[0][1])) + { + // n <= len( x ) implies + // len( x ) - n >= len( substr( x, n, m ) ) + approx.push_back(nm->mkNode(MINUS, lenx, a[0][1])); + } + else + { + // len( x ) >= len( substr( x, n, m ) ) + approx.push_back(lenx); + } + } + else + { + // 0 <= n and n+m <= len( x ) implies + // m <= len( substr( x, n, m ) ) + Node npm = nm->mkNode(PLUS, a[0][1], a[0][2]); + if (checkEntailArith(a[0][1]) && checkEntailArith(lenx, npm)) + { + approx.push_back(a[0][2]); + } + // 0 <= n and n+m >= len( x ) implies + // len(x)-n <= len( substr( x, n, m ) ) + if (checkEntailArith(a[0][1]) && checkEntailArith(npm, lenx)) + { + approx.push_back(nm->mkNode(MINUS, lenx, a[0][1])); + } + } + } + else if (aak == STRING_STRREPL) + { + // over,under-approximations for len( replace( x, y, z ) ) + // notice this is either len( x ) or ( len( x ) + len( z ) - len( y ) ) + Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]); + Node leny = nm->mkNode(STRING_LENGTH, a[0][1]); + Node lenz = nm->mkNode(STRING_LENGTH, a[0][2]); + if (isOverApprox) + { + if (checkEntailArith(leny, lenz)) + { + // len( y ) >= len( z ) implies + // len( x ) >= len( replace( x, y, z ) ) + approx.push_back(lenx); + } + else + { + // len( x ) + len( z ) >= len( replace( x, y, z ) ) + approx.push_back(nm->mkNode(PLUS, lenx, lenz)); + } + } + else + { + if (checkEntailArith(lenz, leny) || checkEntailArith(lenz, lenx)) + { + // len( y ) <= len( z ) or len( x ) <= len( z ) implies + // len( x ) <= len( replace( x, y, z ) ) + approx.push_back(lenx); + } + else + { + // len( x ) - len( y ) <= len( replace( x, y, z ) ) + approx.push_back(nm->mkNode(MINUS, lenx, leny)); + } + } + } + else if (aak == STRING_ITOS) + { + // over,under-approximations for len( int.to.str( x ) ) + if (isOverApprox) + { + if (checkEntailArith(a[0][0], false)) + { + if (checkEntailArith(a[0][0], true)) + { + // x > 0 implies + // x >= len( int.to.str( x ) ) + approx.push_back(a[0][0]); + } + else + { + // x >= 0 implies + // x+1 >= len( int.to.str( x ) ) + approx.push_back( + nm->mkNode(PLUS, nm->mkConst(Rational(1)), a[0][0])); + } + } + } + else + { + if (checkEntailArith(a[0][0])) + { + // x >= 0 implies + // len( int.to.str( x ) ) >= 1 + approx.push_back(nm->mkConst(Rational(1))); + } + // other crazy things are possible here, e.g. + // len( int.to.str( len( y ) + 10 ) ) >= 2 + } + } + } + else if (ak == STRING_STRIDOF) + { + // over,under-approximations for indexof( x, y, n ) + if (isOverApprox) + { + Node lenx = nm->mkNode(STRING_LENGTH, a[0]); + Node leny = nm->mkNode(STRING_LENGTH, a[1]); + if (checkEntailArith(lenx, leny)) + { + // len( x ) >= len( y ) implies + // len( x ) - len( y ) >= indexof( x, y, n ) + approx.push_back(nm->mkNode(MINUS, lenx, leny)); + } + else + { + // len( x ) >= indexof( x, y, n ) + approx.push_back(lenx); + } + } + else + { + // TODO?: + // contains( substr( x, n, len( x ) ), y ) implies + // n <= indexof( x, y, n ) + // ...hard to test, runs risk of non-termination + + // -1 <= indexof( x, y, n ) + approx.push_back(nm->mkConst(Rational(-1))); + } + } + else if (ak == STRING_STOI) + { + // over,under-approximations for str.to.int( x ) + if (isOverApprox) + { + // TODO?: + // y >= 0 implies + // y >= str.to.int( int.to.str( y ) ) + } + else + { + // -1 <= str.to.int( x ) + approx.push_back(nm->mkConst(Rational(-1))); + } + } + Trace("strings-ent-approx-debug") << "Return " << approx.size() << std::endl; +} + +bool SequencesRewriter::checkEntailMultisetSubset(Node a, Node b) +{ + NodeManager* nm = NodeManager::currentNM(); + + std::vector avec; + utils::getConcat(getMultisetApproximation(a), avec); + std::vector bvec; + utils::getConcat(b, bvec); + + std::map num_nconst[2]; + std::map num_const[2]; + for (unsigned j = 0; j < 2; j++) + { + std::vector& jvec = j == 0 ? avec : bvec; + for (const Node& cc : jvec) + { + if (cc.isConst()) + { + num_const[j][cc]++; + } + else + { + num_nconst[j][cc]++; + } + } + } + bool ms_success = true; + for (std::pair& nncp : num_nconst[0]) + { + if (nncp.second > num_nconst[1][nncp.first]) + { + ms_success = false; + break; + } + } + if (ms_success) + { + // count the number of constant characters in the first argument + std::map count_const[2]; + std::vector chars; + for (unsigned j = 0; j < 2; j++) + { + for (std::pair& ncp : num_const[j]) + { + Node cn = ncp.first; + Assert(cn.isConst()); + std::vector cc_vec; + const std::vector& cvec = cn.getConst().getVec(); + for (unsigned i = 0, size = cvec.size(); i < size; i++) + { + // make the character + cc_vec.clear(); + cc_vec.insert(cc_vec.end(), cvec.begin() + i, cvec.begin() + i + 1); + Node ch = nm->mkConst(String(cc_vec)); + count_const[j][ch] += ncp.second; + if (std::find(chars.begin(), chars.end(), ch) == chars.end()) + { + chars.push_back(ch); + } + } + } + } + Trace("strings-entail-ms-ss") + << "For " << a << " and " << b << " : " << std::endl; + for (const Node& ch : chars) + { + Trace("strings-entail-ms-ss") << " # occurrences of substring "; + Trace("strings-entail-ms-ss") << ch << " in arguments is "; + Trace("strings-entail-ms-ss") + << count_const[0][ch] << " / " << count_const[1][ch] << std::endl; + if (count_const[0][ch] < count_const[1][ch]) + { + return true; + } + } + + // TODO (#1180): count the number of 2,3,4,.. character substrings + // for example: + // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false + // since the second argument contains more occurrences of "bb". + // note this is orthogonal reasoning to inductive reasoning + // via regular membership reduction in Liang et al CAV 2015. + } + return false; +} + +Node SequencesRewriter::checkEntailHomogeneousString(Node a) +{ + NodeManager* nm = NodeManager::currentNM(); + + std::vector avec; + utils::getConcat(getMultisetApproximation(a), avec); + + bool cValid = false; + unsigned c = 0; + for (const Node& ac : avec) + { + if (ac.isConst()) + { + std::vector acv = ac.getConst().getVec(); + for (unsigned cc : acv) + { + if (!cValid) + { + cValid = true; + c = cc; + } + else if (c != cc) + { + // Found a different character + return Node::null(); + } + } + } + else + { + // Could produce a different character + return Node::null(); + } + } + + if (!cValid) + { + return nm->mkConst(String("")); + } + + std::vector cv = {c}; + return nm->mkConst(String(cv)); +} + +Node SequencesRewriter::getMultisetApproximation(Node a) +{ + NodeManager* nm = NodeManager::currentNM(); + if (a.getKind() == STRING_SUBSTR) + { + return a[0]; + } + else if (a.getKind() == STRING_STRREPL) + { + return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2])); + } + else if (a.getKind() == STRING_CONCAT) + { + NodeBuilder<> nb(STRING_CONCAT); + for (const Node& ac : a) + { + nb << getMultisetApproximation(ac); + } + return nb.constructNode(); + } + else + { + return a; + } +} + +bool SequencesRewriter::checkEntailArithWithEqAssumption(Node assumption, + Node a, + bool strict) +{ + Assert(assumption.getKind() == kind::EQUAL); + Assert(Rewriter::rewrite(assumption) == assumption); + + // Find candidates variables to compute substitutions for + std::unordered_set candVars; + std::vector toVisit = {assumption}; + while (!toVisit.empty()) + { + Node curr = toVisit.back(); + toVisit.pop_back(); + + if (curr.getKind() == kind::PLUS || curr.getKind() == kind::MULT + || curr.getKind() == kind::MINUS || curr.getKind() == kind::EQUAL) + { + for (const auto& currChild : curr) + { + toVisit.push_back(currChild); + } + } + else if (curr.isVar() && Theory::theoryOf(curr) == THEORY_ARITH) + { + candVars.insert(curr); + } + else if (curr.getKind() == kind::STRING_LENGTH) + { + candVars.insert(curr); + } + } + + // Check if any of the candidate variables are in n + Node v; + Assert(toVisit.empty()); + toVisit.push_back(a); + while (!toVisit.empty()) + { + Node curr = toVisit.back(); + toVisit.pop_back(); + + for (const auto& currChild : curr) + { + toVisit.push_back(currChild); + } + + if (candVars.find(curr) != candVars.end()) + { + v = curr; + break; + } + } + + if (v.isNull()) + { + // No suitable candidate found + return false; + } + + Node solution = ArithMSum::solveEqualityFor(assumption, v); + if (solution.isNull()) + { + // Could not solve for v + return false; + } + + a = a.substitute(TNode(v), TNode(solution)); + return checkEntailArith(a, strict); +} + +bool SequencesRewriter::checkEntailArithWithAssumption(Node assumption, + Node a, + Node b, + bool strict) +{ + Assert(Rewriter::rewrite(assumption) == assumption); + + NodeManager* nm = NodeManager::currentNM(); + + if (!assumption.isConst() && assumption.getKind() != kind::EQUAL) + { + // We rewrite inequality assumptions from x <= y to x + (str.len s) = y + // where s is some fresh string variable. We use (str.len s) because + // (str.len s) must be non-negative for the equation to hold. + Node x, y; + if (assumption.getKind() == kind::GEQ) + { + x = assumption[0]; + y = assumption[1]; + } + else + { + // (not (>= s t)) --> (>= (t - 1) s) + Assert(assumption.getKind() == kind::NOT + && assumption[0].getKind() == kind::GEQ); + x = nm->mkNode(kind::MINUS, assumption[0][1], nm->mkConst(Rational(1))); + y = assumption[0][0]; + } + + Node s = nm->mkBoundVar("slackVal", nm->stringType()); + Node slen = nm->mkNode(kind::STRING_LENGTH, s); + assumption = Rewriter::rewrite( + nm->mkNode(kind::EQUAL, x, nm->mkNode(kind::PLUS, y, slen))); + } + + Node diff = nm->mkNode(kind::MINUS, a, b); + bool res = false; + if (assumption.isConst()) + { + bool assumptionBool = assumption.getConst(); + if (assumptionBool) + { + res = checkEntailArith(diff, strict); + } + else + { + res = true; + } + } + else + { + res = checkEntailArithWithEqAssumption(assumption, diff, strict); + } + return res; +} + +bool SequencesRewriter::checkEntailArithWithAssumptions( + std::vector assumptions, Node a, Node b, bool strict) +{ + // TODO: We currently try to show the entailment with each assumption + // independently. In the future, we should make better use of multiple + // assumptions. + bool res = false; + for (const auto& assumption : assumptions) + { + Assert(Rewriter::rewrite(assumption) == assumption); + + if (checkEntailArithWithAssumption(assumption, a, b, strict)) + { + res = true; + break; + } + } + return res; +} + +Node SequencesRewriter::getConstantArithBound(Node a, bool isLower) +{ + Assert(Rewriter::rewrite(a) == a); + Node ret; + if (a.isConst()) + { + ret = a; + } + else if (a.getKind() == kind::STRING_LENGTH) + { + if (isLower) + { + ret = NodeManager::currentNM()->mkConst(Rational(0)); + } + } + else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT) + { + std::vector children; + bool success = true; + for (unsigned i = 0; i < a.getNumChildren(); i++) + { + Node ac = getConstantArithBound(a[i], isLower); + if (ac.isNull()) + { + ret = ac; + success = false; + break; + } + else + { + if (ac.getConst().sgn() == 0) + { + if (a.getKind() == kind::MULT) + { + ret = ac; + success = false; + break; + } + } + else + { + if (a.getKind() == kind::MULT) + { + if ((ac.getConst().sgn() > 0) != isLower) + { + ret = Node::null(); + success = false; + break; + } + } + children.push_back(ac); + } + } + } + if (success) + { + if (children.empty()) + { + ret = NodeManager::currentNM()->mkConst(Rational(0)); + } + else if (children.size() == 1) + { + ret = children[0]; + } + else + { + ret = NodeManager::currentNM()->mkNode(a.getKind(), children); + ret = Rewriter::rewrite(ret); + } + } + } + Trace("strings-rewrite-cbound") + << "Constant " << (isLower ? "lower" : "upper") << " bound for " << a + << " is " << ret << std::endl; + Assert(ret.isNull() || ret.isConst()); + // entailment check should be at least as powerful as computing a lower bound + Assert(!isLower || ret.isNull() || ret.getConst().sgn() < 0 + || checkEntailArith(a, false)); + Assert(!isLower || ret.isNull() || ret.getConst().sgn() <= 0 + || checkEntailArith(a, true)); + return ret; +} + +Node SequencesRewriter::getFixedLengthForRegexp(Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + if (n.getKind() == STRING_TO_REGEXP) + { + Node ret = nm->mkNode(STRING_LENGTH, n[0]); + ret = Rewriter::rewrite(ret); + if (ret.isConst()) + { + return ret; + } + } + else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE) + { + return nm->mkConst(Rational(1)); + } + else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER) + { + Node ret; + for (const Node& nc : n) + { + Node flc = getFixedLengthForRegexp(nc); + if (flc.isNull() || (!ret.isNull() && ret != flc)) + { + return Node::null(); + } + else if (ret.isNull()) + { + // first time + ret = flc; + } + } + return ret; + } + else if (n.getKind() == REGEXP_CONCAT) + { + NodeBuilder<> nb(PLUS); + for (const Node& nc : n) + { + Node flc = getFixedLengthForRegexp(nc); + if (flc.isNull()) + { + return flc; + } + nb << flc; + } + Node ret = nb.constructNode(); + ret = Rewriter::rewrite(ret); + return ret; + } + return Node::null(); +} + +bool SequencesRewriter::checkEntailArithInternal(Node a) +{ + Assert(Rewriter::rewrite(a) == a); + // check whether a >= 0 + if (a.isConst()) + { + return a.getConst().sgn() >= 0; + } + else if (a.getKind() == kind::STRING_LENGTH) + { + // str.len( t ) >= 0 + return true; + } + else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT) + { + for (unsigned i = 0; i < a.getNumChildren(); i++) + { + if (!checkEntailArithInternal(a[i])) + { + return false; + } + } + // t1 >= 0 ^ ... ^ tn >= 0 => t1 op ... op tn >= 0 + return true; + } + + return false; +} + +Node SequencesRewriter::decomposeSubstrChain(Node s, + std::vector& ss, + std::vector& ls) +{ + Assert(ss.empty()); + Assert(ls.empty()); + while (s.getKind() == STRING_SUBSTR) + { + ss.push_back(s[1]); + ls.push_back(s[2]); + s = s[0]; + } + std::reverse(ss.begin(), ss.end()); + std::reverse(ls.begin(), ls.end()); + return s; +} + +Node SequencesRewriter::mkSubstrChain(Node base, + const std::vector& ss, + const std::vector& ls) +{ + NodeManager* nm = NodeManager::currentNM(); + for (unsigned i = 0, size = ss.size(); i < size; i++) + { + base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]); + } + return base; +} + +Node SequencesRewriter::getStringOrEmpty(Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + Node res; + while (res.isNull()) + { + switch (n.getKind()) + { + case kind::STRING_STRREPL: + { + Node empty = nm->mkConst(::CVC4::String("")); + if (n[0] == empty) + { + // (str.replace "" x y) --> y + n = n[2]; + break; + } + + if (checkEntailLengthOne(n[0]) && n[2] == empty) + { + // (str.replace "A" x "") --> "A" + res = n[0]; + break; + } + + res = n; + break; + } + case kind::STRING_SUBSTR: + { + if (checkEntailLengthOne(n[0])) + { + // (str.substr "A" x y) --> "A" + res = n[0]; + break; + } + res = n; + break; + } + default: + { + res = n; + break; + } + } + } + return res; +} + +bool SequencesRewriter::inferZerosInSumGeq(Node x, + std::vector& ys, + std::vector& zeroYs) +{ + Assert(zeroYs.empty()); + + NodeManager* nm = NodeManager::currentNM(); + + // Check if we can show that y1 + ... + yn >= x + Node sum = (ys.size() > 1) ? nm->mkNode(PLUS, ys) : ys[0]; + if (!checkEntailArith(sum, x)) + { + return false; + } + + // Try to remove yi one-by-one and check if we can still show: + // + // y1 + ... + yi-1 + yi+1 + ... + yn >= x + // + // If that's the case, we know that yi can be zero and the inequality still + // holds. + size_t i = 0; + while (i < ys.size()) + { + Node yi = ys[i]; + std::vector::iterator pos = ys.erase(ys.begin() + i); + if (ys.size() > 1) + { + sum = nm->mkNode(PLUS, ys); + } + else + { + sum = ys.size() == 1 ? ys[0] : nm->mkConst(Rational(0)); + } + + if (checkEntailArith(sum, x)) + { + zeroYs.push_back(yi); + } + else + { + ys.insert(pos, yi); + i++; + } + } + return true; +} + +Node SequencesRewriter::inferEqsFromContains(Node x, Node y) +{ + NodeManager* nm = NodeManager::currentNM(); + Node emp = nm->mkConst(String("")); + + Node xLen = nm->mkNode(STRING_LENGTH, x); + std::vector yLens; + if (y.getKind() != STRING_CONCAT) + { + yLens.push_back(nm->mkNode(STRING_LENGTH, y)); + } + else + { + for (const Node& yi : y) + { + yLens.push_back(nm->mkNode(STRING_LENGTH, yi)); + } + } + + std::vector zeroLens; + if (x == emp) + { + // If x is the empty string, then all ys must be empty, too, and we can + // skip the expensive checks. Note that this is just a performance + // optimization. + zeroLens.swap(yLens); + } + else + { + // Check if we can infer that str.len(x) <= str.len(y). If that is the + // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... + + // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality + // true. The terms that can have length zero without making the inequality + // false must be all be empty if (str.contains x y) is true. + if (!inferZerosInSumGeq(xLen, yLens, zeroLens)) + { + // We could not prove that the inequality holds + return Node::null(); + } + else if (yLens.size() == y.getNumChildren()) + { + // We could only prove that the inequality holds but not that any of the + // ys must be empty + return nm->mkNode(EQUAL, x, y); + } + } + + if (y.getKind() != STRING_CONCAT) + { + if (zeroLens.size() == 1) + { + // y is not a concatenation and we found that it must be empty, so just + // return (= y "") + Assert(zeroLens[0][0] == y); + return nm->mkNode(EQUAL, y, emp); + } + else + { + Assert(yLens.size() == 1 && yLens[0][0] == y); + return nm->mkNode(EQUAL, x, y); + } + } + + std::vector cs; + for (const Node& yiLen : yLens) + { + Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end()); + cs.push_back(yiLen[0]); + } + + NodeBuilder<> nb(AND); + // (= x (str.++ y1' ... ym')) + if (!cs.empty()) + { + nb << nm->mkNode(EQUAL, x, utils::mkConcat(STRING_CONCAT, cs)); + } + // (= y1'' "") ... (= yk'' "") + for (const Node& zeroLen : zeroLens) + { + Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end()); + nb << nm->mkNode(EQUAL, zeroLen[0], emp); + } + + // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) + return nb.constructNode(); +} + +std::pair > SequencesRewriter::collectEmptyEqs(Node x) +{ + NodeManager* nm = NodeManager::currentNM(); + Node empty = nm->mkConst(::CVC4::String("")); + + // Collect the equalities of the form (= x "") (sorted) + std::set emptyNodes; + bool allEmptyEqs = true; + if (x.getKind() == kind::EQUAL) + { + if (x[0] == empty) + { + emptyNodes.insert(x[1]); + } + else if (x[1] == empty) + { + emptyNodes.insert(x[0]); + } + else + { + allEmptyEqs = false; + } + } + else if (x.getKind() == kind::AND) + { + for (const Node& c : x) + { + if (c.getKind() == kind::EQUAL) + { + if (c[0] == empty) + { + emptyNodes.insert(c[1]); + } + else if (c[1] == empty) + { + emptyNodes.insert(c[0]); + } + } + else + { + allEmptyEqs = false; + } + } + } + + if (emptyNodes.size() == 0) + { + allEmptyEqs = false; + } + + return std::make_pair( + allEmptyEqs, std::vector(emptyNodes.begin(), emptyNodes.end())); +} + +Node SequencesRewriter::returnRewrite(Node node, Node ret, const char* c) +{ + Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << c + << "." << std::endl; + + NodeManager* nm = NodeManager::currentNM(); + + // standard post-processing + // We rewrite (string) equalities immediately here. This allows us to forego + // the standard invariant on equality rewrites (that s=t must rewrite to one + // of { s=t, t=s, true, false } ). + Kind retk = ret.getKind(); + if (retk == OR || retk == AND) + { + std::vector children; + bool childChanged = false; + for (const Node& cret : ret) + { + Node creter = cret; + if (cret.getKind() == EQUAL) + { + creter = rewriteEqualityExt(cret); + } + else if (cret.getKind() == NOT && cret[0].getKind() == EQUAL) + { + creter = nm->mkNode(NOT, rewriteEqualityExt(cret[0])); + } + childChanged = childChanged || cret != creter; + children.push_back(creter); + } + if (childChanged) + { + ret = nm->mkNode(retk, children); + } + } + else if (retk == NOT && ret[0].getKind() == EQUAL) + { + ret = nm->mkNode(NOT, rewriteEqualityExt(ret[0])); + } + else if (retk == EQUAL && node.getKind() != EQUAL) + { + Trace("strings-rewrite") + << "Apply extended equality rewrite on " << ret << std::endl; + ret = rewriteEqualityExt(ret); + } + return ret; +} diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h new file mode 100644 index 000000000..5aba4ab6f --- /dev/null +++ b/src/theory/strings/sequences_rewriter.h @@ -0,0 +1,775 @@ +/********************* */ +/*! \file sequences_rewriter.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Rewriter for the theory of strings and sequences + ** + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__SEQUENCES_REWRITER_H +#define CVC4__THEORY__STRINGS__SEQUENCES_REWRITER_H + +#include +#include +#include + +#include "expr/attribute.h" +#include "theory/theory_rewriter.h" +#include "theory/type_enumerator.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +class SequencesRewriter : public TheoryRewriter +{ + protected: + /** simple regular expression consume + * + * This method is called when we are rewriting a membership of the form + * s1 ++ ... ++ sn in r1 ++ ... ++ rm + * We have that mchildren consists of the strings s1...sn, and children + * consists of the regular expressions r1...rm. + * + * This method tries to strip off parts of the concatenation terms. It updates + * the vectors such that the resulting vectors are such that the membership + * mchildren[n'...n''] in children[m'...m''] is equivalent to the input + * membership. The argument dir indicates the direction to consider, where + * 0 means strip off the front, 1 off the back, and < 0 off of both. + * + * If this method returns the false node, then we have inferred that no + * string in the language of r1 ++ ... ++ rm is a prefix (when dir!=1) or + * suffix (when dir!=0) of s1 ++ ... ++ sn. Otherwise, it returns the null + * node. + * + * For example, given input + * mchildren = { "ab", x }, children = { [["a"]], ([["cd"]])* } and dir = 0, + * this method updates: + * mchildren = { "b", x }, children = { ("cd")* } + * and returns null. + * + * For example, given input + * { x, "abb", x }, { [[x]], ["a"..."b"], allchar, [[y]], [[x]]} and dir=-1, + * this method updates: + * { "b" }, { [[y]] } + * where [[.]] denotes str.to.re, and returns null. + * + * Notice that the above requirement for returning false is stronger than + * determining that s1 ++ ... ++ sn in r1 ++ ... ++ rm is equivalent to false. + * For example, for input "bb" in "b" ++ ( "a" )*, we do not return false + * since "b" is in the language of "b" ++ ( "a" )* and is a prefix of "bb". + * We do not return false even though the above membership is equivalent + * to false. We do this because the function is used e.g. to test whether a + * possible unrolling leads to a conflict. This is demonstrated by the + * following examples: + * + * For example, given input + * { "bb", x }, { "b", ("a")* } and dir=-1, + * this method updates: + * { "b" }, { ("a")* } + * and returns null. + * + * For example, given input + * { "cb", x }, { "b", ("a")* } and dir=-1, + * this method leaves children and mchildren unchanged and returns false. + * + * Notice that based on this, we can determine that: + * "cb" ++ x in ( "b" ++ ("a")* )* + * is equivalent to false, whereas we cannot determine that: + * "bb" ++ x in ( "b" ++ ("a")* )* + * is equivalent to false. + */ + static Node simpleRegexpConsume(std::vector& mchildren, + std::vector& children, + int dir = -1); + static bool isConstRegExp(TNode t); + static bool testConstStringInRegExp(CVC4::String& s, + unsigned int index_start, + TNode r); + + /** rewrite regular expression concatenation + * + * This is the entry point for post-rewriting applications of re.++. + * Returns the rewritten form of node. + */ + static Node rewriteConcatRegExp(TNode node); + /** rewrite regular expression star + * + * This is the entry point for post-rewriting applications of re.*. + * Returns the rewritten form of node. + */ + static Node rewriteStarRegExp(TNode node); + /** rewrite regular expression intersection/union + * + * This is the entry point for post-rewriting applications of re.inter and + * re.union. Returns the rewritten form of node. + */ + static Node rewriteAndOrRegExp(TNode node); + /** rewrite regular expression loop + * + * This is the entry point for post-rewriting applications of re.loop. + * Returns the rewritten form of node. + */ + static Node rewriteLoopRegExp(TNode node); + /** rewrite regular expression membership + * + * This is the entry point for post-rewriting applications of str.in.re + * Returns the rewritten form of node. + */ + static Node rewriteMembership(TNode node); + + static bool hasEpsilonNode(TNode node); + /** check entail arithmetic internal + * Returns true if we can show a >= 0 always. + * a is in rewritten form. + */ + static bool checkEntailArithInternal(Node a); + /** rewrite string equality extended + * + * This method returns a formula that is equivalent to the equality between + * two strings s = t, given by node. It is called by rewriteEqualityExt. + */ + static Node rewriteStrEqualityExt(Node node); + /** rewrite arithmetic equality extended + * + * This method returns a formula that is equivalent to the equality between + * two arithmetic string terms s = t, given by node. t is called by + * rewriteEqualityExt. + */ + static Node rewriteArithEqualityExt(Node node); + /** + * Called when node rewrites to ret. + * + * The string c indicates the justification for the rewrite, which is printed + * by this function for debugging. + * + * If node is not an equality and ret is an equality, this method applies + * an additional rewrite step (rewriteEqualityExt) that performs + * additional rewrites on ret, after which we return the result of this call. + * Otherwise, this method simply returns ret. + */ + static Node returnRewrite(Node node, Node ret, const char* c); + + public: + RewriteResponse postRewrite(TNode node) override; + RewriteResponse preRewrite(TNode node) override; + + /** rewrite equality + * + * This method returns a formula that is equivalent to the equality between + * two strings s = t, given by node. The result of rewrite is one of + * { s = t, t = s, true, false }. + */ + static Node rewriteEquality(Node node); + /** rewrite equality extended + * + * This method returns a formula that is equivalent to the equality between + * two terms s = t, given by node, where s and t are terms in the signature + * of the theory of strings. Notice that s and t may be of string type or + * of Int type. + * + * Specifically, this function performs rewrites whose conclusion is not + * necessarily one of { s = t, t = s, true, false }. + */ + static Node rewriteEqualityExt(Node node); + /** rewrite concat + * This is the entry point for post-rewriting terms node of the form + * str.++( t1, .., tn ) + * Returns the rewritten form of node. + */ + static Node rewriteConcat(Node node); + /** rewrite substr + * This is the entry point for post-rewriting terms node of the form + * str.substr( s, i1, i2 ) + * Returns the rewritten form of node. + */ + static Node rewriteSubstr(Node node); + /** rewrite contains + * This is the entry point for post-rewriting terms node of the form + * str.contains( t, s ) + * Returns the rewritten form of node. + * + * For details on some of the basic rewrites done in this function, see Figure + * 7 of Reynolds et al "Scaling Up DPLL(T) String Solvers Using + * Context-Dependent Rewriting", CAV 2017. + */ + static Node rewriteContains(Node node); + /** rewrite indexof + * This is the entry point for post-rewriting terms n of the form + * str.indexof( s, t, n ) + * Returns the rewritten form of node. + */ + static Node rewriteIndexof(Node node); + /** rewrite replace + * This is the entry point for post-rewriting terms n of the form + * str.replace( s, t, r ) + * Returns the rewritten form of node. + */ + static Node rewriteReplace(Node node); + /** rewrite replace all + * This is the entry point for post-rewriting terms n of the form + * str.replaceall( s, t, r ) + * Returns the rewritten form of node. + */ + static Node rewriteReplaceAll(Node node); + /** rewrite replace internal + * + * This method implements rewrite rules that apply to both str.replace and + * str.replaceall. If it returns a non-null ret, then node rewrites to ret. + */ + static Node rewriteReplaceInternal(Node node); + /** rewrite string reverse + * + * This is the entry point for post-rewriting terms n of the form + * str.rev( s ) + * Returns the rewritten form of node. + */ + static Node rewriteStrReverse(Node node); + /** rewrite prefix/suffix + * This is the entry point for post-rewriting terms n of the form + * str.prefixof( s, t ) / str.suffixof( s, t ) + * Returns the rewritten form of node. + */ + static Node rewritePrefixSuffix(Node node); + + /** rewrite str.to_code + * This is the entry point for post-rewriting terms n of the form + * str.to_code( t ) + * Returns the rewritten form of node. + */ + static Node rewriteStringToCode(Node node); + + static Node splitConstant(Node a, Node b, int& index, bool isRev); + /** can constant contain list + * return true if constant c can contain the list l in order + * firstc/lastc store which indices in l were used to determine the return + * value. + * (This is typically used when this function returns false, for minimizing + * explanations) + * + * For example: + * canConstantContainList( "abc", { x, "c", y } ) returns true + * firstc/lastc are updated to 1/1 + * canConstantContainList( "abc", { x, "d", y } ) returns false + * firstc/lastc are updated to 1/1 + * canConstantContainList( "abcdef", { x, "b", y, "a", z, "c", w } + * returns false + * firstc/lastc are updated to 1/3 + * canConstantContainList( "abcdef", { x, "b", y, "e", z, "c", w } + * returns false + * firstc/lastc are updated to 1/5 + */ + static bool canConstantContainList(Node c, + std::vector& l, + int& firstc, + int& lastc); + /** can constant contain concat + * same as above but with n = str.++( l ) instead of l + */ + static bool canConstantContainConcat(Node c, Node n, int& firstc, int& lastc); + + /** strip symbolic length + * + * This function strips off components of n1 whose length is less than + * or equal to argument curr, and stores them in nr. The direction + * dir determines whether the components are removed from the start + * or end of n1. + * + * In detail, this function updates n1 to n1' such that: + * If dir=1, + * n1 = str.++( nr, n1' ) + * If dir=-1 + * n1 = str.++( n1', nr ) + * It updates curr to curr' such that: + * curr' = curr - str.len( str.++( nr ) ), and + * curr' >= 0 + * where the latter fact is determined by checkArithEntail. + * + * This function returns true if n1 is modified. + * + * For example: + * + * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)+1 ) + * returns true + * n1 is updated to { "bc", y } + * nr is updated to { x, "a" } + * curr is updated to 0 * + * + * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)-1 ) + * returns false + * + * stripSymbolicLength( { y, "abc", x }, {}, 1, str.len(x)+1 ) + * returns false + * + * stripSymbolicLength( { x, "abc", y }, {}, -1, 2*str.len(y)+4 ) + * returns true + * n1 is updated to { x } + * nr is updated to { "abc", y } + * curr is updated to str.len(y)+1 + */ + static bool stripSymbolicLength(std::vector& n1, + std::vector& nr, + int dir, + Node& curr); + /** component contains + * This function is used when rewriting str.contains( t1, t2 ), where + * n1 is the vector form of t1 + * n2 is the vector form of t2 + * + * If this function returns n>=0 for some n, then + * n1 = { x1...x{n-1} xn...x{n+s} x{n+s+1}...xm }, + * n2 = { y1...ys }, + * y1 is a suffix of xn, + * y2...y{s-1} = x{n+1}...x{n+s-1}, and + * ys is a prefix of x{n+s} + * Otherwise it returns -1. + * + * This function may update n1 if computeRemainder = true. + * We maintain the invariant that the resulting value n1' + * of n1 after this function is such that: + * n1 = str.++( nb, n1', ne ) + * The vectors nb and ne have the following properties. + * If computeRemainder = true, then + * If remainderDir != -1, then + * ne is { x{n+s}' x{n+s+1}...xm } + * where x{n+s} = str.++( ys, x{n+s}' ). + * If remainderDir != 1, then + * nb is { x1, ..., x{n-1}, xn' } + * where xn = str.++( xn', y1 ). + * + * For example: + * + * componentContains({ x, "abc", x }, { "b" }, {}, true, 0) + * returns 1, + * n1 is updated to { "b" }, + * nb is updated to { x, "a" }, + * ne is updated to { "c", x } + * + * componentContains({ x, "abc", x }, { "b" }, {}, true, 1) + * returns 1, + * n1 is updated to { x, "ab" }, + * ne is updated to { "c", x } + * + * componentContains({ y, z, "abc", x, "def" }, { "c", x, "de" }, {}, true, 1) + * returns 2, + * n1 is updated to { y, z, "abc", x, "de" }, + * ne is updated to { "f" } + * + * componentContains({ y, "abc", x, "def" }, { "c", x, "de" }, {}, true, -1) + * returns 1, + * n1 is updated to { "c", x, "def" }, + * nb is updated to { y, "ab" } + */ + static int componentContains(std::vector& n1, + std::vector& n2, + std::vector& nb, + std::vector& ne, + bool computeRemainder = false, + int remainderDir = 0); + /** component contains base + * + * This function is a helper for the above function. + * + * It returns true if n2 is contained in n1 with the following + * restrictions: + * If dir=1, then n2 must be a suffix of n1. + * If dir=-1, then n2 must be a prefix of n1. + * + * If computeRemainder is true, then n1rb and n1re are + * updated such that : + * n1 = str.++( n1rb, n2, n1re ) + * where a null value of n1rb and n1re indicates the + * empty string. + * + * For example: + * + * componentContainsBase("cabe", "ab", n1rb, n1re, 1, false) + * returns false. + * + * componentContainsBase("cabe", "ab", n1rb, n1re, 0, true) + * returns true, + * n1rb is set to "c", + * n1re is set to "e". + * + * componentContainsBase(y, str.substr(y,0,5), n1rb, n1re, -1, true) + * returns true, + * n1re is set to str.substr(y,5,str.len(y)). + * + * + * Notice that this function may return false when it cannot compute a + * remainder when it otherwise would have returned true. For example: + * + * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, false) + * returns true. + * + * Hence, we know that str.substr(y,x,z) is contained in y. However: + * + * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, true) + * returns false. + * + * The reason is since computeRemainder=true, it must be that + * y = str.++( n1rb, str.substr(y,x,z), n1re ) + * for some n1rb, n1re. However, to construct such n1rb, n1re would require + * e.g. the terms: + * y = str.++( ite( x+z < 0 OR x < 0, "", str.substr(y,0,x) ), + * str.substr(y,x,z), + * ite( x+z < 0 OR x < 0, y, str.substr(y,x+z,len(y)) ) ) + * + * Since we do not wish to introduce ITE terms in the rewriter, we instead + * return false, indicating that we cannot compute the remainder. + */ + static bool componentContainsBase( + Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder); + /** strip constant endpoints + * This function is used when rewriting str.contains( t1, t2 ), where + * n1 is the vector form of t1 + * n2 is the vector form of t2 + * + * It modifies n1 to a new vector n1' such that: + * (1) str.contains( str.++( n1 ), str.++( n2 ) ) is equivalent to + * str.contains( str.++( n1' ), str.++( n2 ) ) + * (2) str.++( n1 ) = str.++( nb, n1', ne ) + * + * "dir" is the direction in which we can modify n1: + * if dir = 1, then we allow dropping components from the front of n1, + * if dir = -1, then we allow dropping components from the back of n1, + * if dir = 0, then we allow dropping components from either. + * + * It returns true if n1 is modified. + * + * For example: + * stripConstantEndpoints({ "ab", x, "de" }, { "c" }, {}, {}, 1) + * returns true, + * n1 is updated to { x, "de" } + * nb is updated to { "ab" } + * stripConstantEndpoints({ "ab", x, "de" }, { "bd" }, {}, {}, 0) + * returns true, + * n1 is updated to { "b", x, "d" } + * nb is updated to { "a" } + * ne is updated to { "e" } + * stripConstantEndpoints({ "ad", substr("ccc",x,y) }, { "d" }, {}, {}, -1) + * returns true, + * n1 is updated to {"ad"} + * ne is updated to { substr("ccc",x,y) } + */ + static bool stripConstantEndpoints(std::vector& n1, + std::vector& n2, + std::vector& nb, + std::vector& ne, + int dir = 0); + + /** + * Given a symbolic length n, returns the canonical string for that length. + * For example if n is constant, this function returns a string consisting of + * "A" repeated n times. Returns the null node if no such string exists. + */ + static Node canonicalStrForSymbolicLength(Node n); + + /** length preserving rewrite + * + * Given input n, this returns a string n' whose length is equivalent to n. + * We apply certain normalizations to n', such as replacing all constants + * that are not relevant to length by "A". + */ + static Node lengthPreserveRewrite(Node n); + + /** + * Checks whether a string term `a` is entailed to contain or not contain a + * string term `b`. + * + * @param a The string that is checked whether it contains `b` + * @param b The string that is checked whether it is contained in `a` + * @param fullRewriter Determines whether the function can use the full + * rewriter or only `rewriteContains()` (useful for avoiding loops) + * @return true node if it can be shown that `a` contains `b`, false node if + * it can be shown that `a` does not contain `b`, null node otherwise + */ + static Node checkEntailContains(Node a, Node b, bool fullRewriter = true); + + /** entail non-empty + * + * Checks whether string a is entailed to be non-empty. Is equivalent to + * the call checkArithEntail( len( a ), true ). + */ + static bool checkEntailNonEmpty(Node a); + + /** + * Checks whether string has at most/exactly length one. Length one strings + * can be used for more aggressive rewriting because there is guaranteed that + * it cannot be overlap multiple components in a string concatenation. + * + * @param s The string to check + * @param strict If true, the string must have exactly length one, otherwise + * at most length one + * @return True if the string has at most/exactly length one, false otherwise + */ + static bool checkEntailLengthOne(Node s, bool strict = false); + + /** check arithmetic entailment equal + * Returns true if it is always the case that a = b. + */ + static bool checkEntailArithEq(Node a, Node b); + /** check arithmetic entailment + * Returns true if it is always the case that a >= b, + * and a>b if strict is true. + */ + static bool checkEntailArith(Node a, Node b, bool strict = false); + /** check arithmetic entailment + * Returns true if it is always the case that a >= 0. + */ + static bool checkEntailArith(Node a, bool strict = false); + /** check arithmetic entailment with approximations + * + * Returns true if it is always the case that a >= 0. We expect that a is in + * rewritten form. + * + * This function uses "approximation" techniques that under-approximate + * the value of a for the purposes of showing the entailment holds. For + * example, given: + * len( x ) - len( substr( y, 0, len( x ) ) ) + * Since we know that len( substr( y, 0, len( x ) ) ) <= len( x ), the above + * term can be under-approximated as len( x ) - len( x ) = 0, which is >= 0, + * and thus the entailment len( x ) - len( substr( y, 0, len( x ) ) ) >= 0 + * holds. + */ + static bool checkEntailArithApprox(Node a); + /** Get arithmetic approximations + * + * This gets the (set of) arithmetic approximations for term a and stores + * them in approx. If isOverApprox is true, these are over-approximations + * for the value of a, otherwise, they are underapproximations. For example, + * an over-approximation for len( substr( y, n, m ) ) is m; an + * under-approximation for indexof( x, y, n ) is -1. + * + * Notice that this function is not generally recursive (although it may make + * a small bounded of recursive calls). Instead, it returns the shape + * of the approximations for a. For example, an under-approximation + * for the term len( replace( substr( x, 0, n ), y, z ) ) returned by this + * function might be len( substr( x, 0, n ) ) - len( y ), where we don't + * consider (recursively) the approximations for len( substr( x, 0, n ) ). + */ + static void getArithApproximations(Node a, + std::vector& approx, + bool isOverApprox = false); + + /** + * Checks whether it is always true that `a` is a strict subset of `b` in the + * multiset domain. + * + * Examples: + * + * a = (str.++ "A" x), b = (str.++ "A" x "B") ---> true + * a = (str.++ "A" x), b = (str.++ "B" x "AA") ---> true + * a = (str.++ "A" x), b = (str.++ "B" y "AA") ---> false + * + * @param a The term for which it should be checked if it is a strict subset + * of `b` in the multiset domain + * @param b The term for which it should be checked if it is a strict + * superset of `a` in the multiset domain + * @return True if it is always the case that `a` is a strict subset of `b`, + * false otherwise. + */ + static bool checkEntailMultisetSubset(Node a, Node b); + + /** + * Returns a character `c` if it is always the case that str.in.re(a, c*), + * i.e. if all possible values of `a` only consist of `c` characters, and the + * null node otherwise. If `a` is the empty string, the function returns an + * empty string. + * + * @param a The node to check for homogeneity + * @return If `a` is homogeneous, the only character that it may contain, the + * empty string if `a` is empty, and the null node otherwise + */ + static Node checkEntailHomogeneousString(Node a); + + /** + * Simplifies a given node `a` s.t. the result is a concatenation of string + * terms that can be interpreted as a multiset and which contains all + * multisets that `a` could form. + * + * Examples: + * + * (str.substr "AA" 0 n) ---> "AA" + * (str.replace "AAA" x "BB") ---> (str.++ "AAA" "BB") + * + * @param a The node to simplify + * @return A concatenation that can be interpreted as a multiset + */ + static Node getMultisetApproximation(Node a); + + /** + * Checks whether assumption |= a >= 0 (if strict is false) or + * assumption |= a > 0 (if strict is true), where assumption is an equality + * assumption. The assumption must be in rewritten form. + * + * Example: + * + * checkEntailArithWithEqAssumption(x + (str.len y) = 0, -x, false) = true + * + * Because: x = -(str.len y), so -x >= 0 --> (str.len y) >= 0 --> true + */ + static bool checkEntailArithWithEqAssumption(Node assumption, + Node a, + bool strict = false); + + /** + * Checks whether assumption |= a >= b (if strict is false) or + * assumption |= a > b (if strict is true). The function returns true if it + * can be shown that the entailment holds and false otherwise. Assumption + * must be in rewritten form. Assumption may be an equality or an inequality. + * + * Example: + * + * checkEntailArithWithAssumption(x + (str.len y) = 0, 0, x, false) = true + * + * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true + */ + static bool checkEntailArithWithAssumption(Node assumption, + Node a, + Node b, + bool strict = false); + + /** + * Checks whether assumptions |= a >= b (if strict is false) or + * assumptions |= a > b (if strict is true). The function returns true if it + * can be shown that the entailment holds and false otherwise. Assumptions + * must be in rewritten form. Assumptions may be an equalities or an + * inequalities. + * + * Example: + * + * checkEntailArithWithAssumptions([x + (str.len y) = 0], 0, x, false) = true + * + * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true + */ + static bool checkEntailArithWithAssumptions(std::vector assumptions, + Node a, + Node b, + bool strict = false); + + /** get arithmetic lower bound + * If this function returns a non-null Node ret, + * then ret is a rational constant and + * we know that n >= ret always if isLower is true, + * or n <= ret if isLower is false. + * + * Notice the following invariant. + * If getConstantArithBound(a, true) = ret where ret is non-null, then for + * strict = { true, false } : + * ret >= strict ? 1 : 0 + * if and only if + * checkEntailArith( a, strict ) = true. + */ + static Node getConstantArithBound(Node a, bool isLower = true); + /** get length for regular expression + * + * Given regular expression n, if this method returns a non-null value c, then + * x in n entails len( x ) = c. + */ + static Node getFixedLengthForRegexp(Node n); + /** decompose substr chain + * + * If s is substr( ... substr( base, x1, y1 ) ..., xn, yn ), then this + * function returns base, adds { x1 ... xn } to ss, and { y1 ... yn } to ls. + */ + static Node decomposeSubstrChain(Node s, + std::vector& ss, + std::vector& ls); + /** make substr chain + * + * If ss is { x1 ... xn } and ls is { y1 ... yn }, this returns the term + * substr( ... substr( base, x1, y1 ) ..., xn, yn ). + */ + static Node mkSubstrChain(Node base, + const std::vector& ss, + const std::vector& ls); + + /** + * Overapproximates the possible values of node n. This overapproximation + * assumes that n can return a value x or the empty string and tries to find + * the simplest x such that this holds. In the general case, x is the same as + * the input n. This overapproximation can be used to sort terms with the + * same possible values in string concatenation for example. + * + * Example: + * + * getStringOrEmpty( (str.replace "" x y) ) --> y because (str.replace "" x y) + * either returns y or "" + * + * getStringOrEmpty( (str.substr "ABC" x y) ) --> (str.substr "ABC" x y) + * because the function could not compute a simpler + */ + static Node getStringOrEmpty(Node n); + + /** + * Given an inequality y1 + ... + yn >= x, removes operands yi s.t. the + * original inequality still holds. Returns true if the original inequality + * holds and false otherwise. The list of ys is modified to contain a subset + * of the original ys. + * + * Example: + * + * inferZerosInSumGeq( (str.len x), [ (str.len x), (str.len y), 1 ], [] ) + * --> returns true with ys = [ (str.len x) ] and zeroYs = [ (str.len y), 1 ] + * (can be used to rewrite the inequality to false) + * + * inferZerosInSumGeq( (str.len x), [ (str.len y) ], [] ) + * --> returns false because it is not possible to show + * str.len(y) >= str.len(x) + */ + static bool inferZerosInSumGeq(Node x, + std::vector& ys, + std::vector& zeroYs); + + /** + * Infers a conjunction of equalities that correspond to (str.contains x y) + * if it can show that the length of y is greater or equal to the length of + * x. If y is a concatentation, we get x = y1 ++ ... ++ yn, the conjunction + * is of the form: + * + * (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) + * + * where each yi'' are yi that must be empty for (= x y) to hold and yi' are + * yi that the function could not infer anything about. Returns a null node + * if the function cannot infer that str.len(y) >= str.len(x). Returns (= x + * y) if the function can infer that str.len(y) >= str.len(x) but cannot + * infer that any of the yi must be empty. + */ + static Node inferEqsFromContains(Node x, Node y); + + /** + * Collects equal-to-empty nodes from a conjunction or a single + * node. Returns a list of nodes that are compared to empty nodes + * and a boolean that indicates whether all nodes in the + * conjunction were a comparison with the empty node. The nodes in + * the list are sorted and duplicates removed. + * + * Examples: + * + * collectEmptyEqs( (= "" x) ) = { true, [x] } + * collectEmptyEqs( (and (= "" x) (= "" y)) ) = { true, [x, y] } + * collectEmptyEqs( (and (= "A" x) (= "" y) (= "" y)) ) = { false, [y] } + * + * @param x The conjunction of equalities or a single equality + * @return A pair of a boolean that indicates whether the + * conjunction consists only of comparisons to the empty string + * and the list of nodes that are compared to the empty string + */ + static std::pair > collectEmptyEqs(Node x); +}; /* class SequencesRewriter */ + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__SEQUENCES_REWRITER_H */ diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp index b4e1c74ea..7396a5013 100644 --- a/src/theory/strings/skolem_cache.cpp +++ b/src/theory/strings/skolem_cache.cpp @@ -15,7 +15,7 @@ #include "theory/strings/skolem_cache.h" #include "theory/rewriter.h" -#include "theory/strings/theory_strings_rewriter.h" +#include "theory/strings/sequences_rewriter.h" #include "util/rational.h" using namespace CVC4::kind; @@ -163,8 +163,8 @@ SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b) a = s; b = m; } - else if (TheoryStringsRewriter::checkEntailArith( - nm->mkNode(PLUS, n, m), nm->mkNode(STRING_LENGTH, s))) + else if (SequencesRewriter::checkEntailArith(nm->mkNode(PLUS, n, m), + nm->mkNode(STRING_LENGTH, s))) { // SK_PURIFY((str.substr x n m)) ---> SK_SUFFIX_REM(x, n) // if n + m >= (str.len x) diff --git a/src/theory/strings/strings_rewriter.cpp b/src/theory/strings/strings_rewriter.cpp new file mode 100644 index 000000000..75dfe7432 --- /dev/null +++ b/src/theory/strings/strings_rewriter.cpp @@ -0,0 +1,247 @@ +/********************* */ +/*! \file strings_rewriter.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of rewrite rules for string-specific operators in + ** theory of strings. + **/ + +#include "theory/strings/strings_rewriter.h" + +#include "expr/node_builder.h" +#include "theory/strings/theory_strings_utils.h" +#include "util/rational.h" + +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +Node StringsRewriter::rewriteStrToInt(Node node) +{ + Assert(node.getKind() == STRING_STOI); + NodeManager* nm = NodeManager::currentNM(); + if (node[0].isConst()) + { + Node ret; + String s = node[0].getConst(); + if (s.isNumber()) + { + ret = nm->mkConst(s.toNumber()); + } + else + { + ret = nm->mkConst(Rational(-1)); + } + return returnRewrite(node, ret, "stoi-eval"); + } + else if (node[0].getKind() == STRING_CONCAT) + { + for (TNode nc : node[0]) + { + if (nc.isConst()) + { + String t = nc.getConst(); + if (!t.isNumber()) + { + Node ret = nm->mkConst(Rational(-1)); + return returnRewrite(node, ret, "stoi-concat-nonnum"); + } + } + } + } + return node; +} + +Node StringsRewriter::rewriteIntToStr(Node node) +{ + Assert(node.getKind() == STRING_ITOS); + NodeManager* nm = NodeManager::currentNM(); + if (node[0].isConst()) + { + Node ret; + if (node[0].getConst().sgn() == -1) + { + ret = nm->mkConst(String("")); + } + else + { + std::string stmp = node[0].getConst().getNumerator().toString(); + Assert(stmp[0] != '-'); + ret = nm->mkConst(String(stmp)); + } + return returnRewrite(node, ret, "itos-eval"); + } + return node; +} + +Node StringsRewriter::rewriteStrConvert(Node node) +{ + Kind nk = node.getKind(); + Assert(nk == STRING_TOLOWER || nk == STRING_TOUPPER); + NodeManager* nm = NodeManager::currentNM(); + if (node[0].isConst()) + { + std::vector nvec = node[0].getConst().getVec(); + for (unsigned i = 0, nvsize = nvec.size(); i < nvsize; i++) + { + unsigned newChar = String::convertUnsignedIntToCode(nvec[i]); + // transform it + // upper 65 ... 90 + // lower 97 ... 122 + if (nk == STRING_TOUPPER) + { + if (newChar >= 97 && newChar <= 122) + { + newChar = newChar - 32; + } + } + else if (nk == STRING_TOLOWER) + { + if (newChar >= 65 && newChar <= 90) + { + newChar = newChar + 32; + } + } + newChar = String::convertCodeToUnsignedInt(newChar); + nvec[i] = newChar; + } + Node retNode = nm->mkConst(String(nvec)); + return returnRewrite(node, retNode, "str-conv-const"); + } + else if (node[0].getKind() == STRING_CONCAT) + { + NodeBuilder<> concatBuilder(STRING_CONCAT); + for (const Node& nc : node[0]) + { + concatBuilder << nm->mkNode(nk, nc); + } + // tolower( x1 ++ x2 ) --> tolower( x1 ) ++ tolower( x2 ) + Node retNode = concatBuilder.constructNode(); + return returnRewrite(node, retNode, "str-conv-minscope-concat"); + } + else if (node[0].getKind() == STRING_TOLOWER + || node[0].getKind() == STRING_TOUPPER) + { + // tolower( tolower( x ) ) --> tolower( x ) + // tolower( toupper( x ) ) --> tolower( x ) + Node retNode = nm->mkNode(nk, node[0][0]); + return returnRewrite(node, retNode, "str-conv-idem"); + } + else if (node[0].getKind() == STRING_ITOS) + { + // tolower( str.from.int( x ) ) --> str.from.int( x ) + return returnRewrite(node, node[0], "str-conv-itos"); + } + return node; +} + +Node StringsRewriter::rewriteStringLeq(Node n) +{ + Assert(n.getKind() == kind::STRING_LEQ); + NodeManager* nm = NodeManager::currentNM(); + if (n[0] == n[1]) + { + Node ret = nm->mkConst(true); + return returnRewrite(n, ret, "str-leq-id"); + } + if (n[0].isConst() && n[1].isConst()) + { + String s = n[0].getConst(); + String t = n[1].getConst(); + Node ret = nm->mkConst(s.isLeq(t)); + return returnRewrite(n, ret, "str-leq-eval"); + } + // empty strings + for (unsigned i = 0; i < 2; i++) + { + if (n[i].isConst() && n[i].getConst().isEmptyString()) + { + Node ret = i == 0 ? nm->mkConst(true) : n[0].eqNode(n[1]); + return returnRewrite(n, ret, "str-leq-empty"); + } + } + + std::vector n1; + utils::getConcat(n[0], n1); + std::vector n2; + utils::getConcat(n[1], n2); + Assert(!n1.empty() && !n2.empty()); + + // constant prefixes + if (n1[0].isConst() && n2[0].isConst() && n1[0] != n2[0]) + { + String s = n1[0].getConst(); + String t = n2[0].getConst(); + // only need to truncate if s is longer + if (s.size() > t.size()) + { + s = s.prefix(t.size()); + } + // if prefix is not leq, then entire string is not leq + if (!s.isLeq(t)) + { + Node ret = nm->mkConst(false); + return returnRewrite(n, ret, "str-leq-cprefix"); + } + } + return n; +} + +Node StringsRewriter::rewriteStringFromCode(Node n) +{ + Assert(n.getKind() == kind::STRING_FROM_CODE); + NodeManager* nm = NodeManager::currentNM(); + + if (n[0].isConst()) + { + Integer i = n[0].getConst().getNumerator(); + Node ret; + if (i >= 0 && i < strings::utils::getAlphabetCardinality()) + { + std::vector svec = {i.toUnsignedInt()}; + ret = nm->mkConst(String(svec)); + } + else + { + ret = nm->mkConst(String("")); + } + return returnRewrite(n, ret, "from-code-eval"); + } + return n; +} + +Node StringsRewriter::rewriteStringToCode(Node n) +{ + Assert(n.getKind() == kind::STRING_TO_CODE); + if (n[0].isConst()) + { + NodeManager* nm = NodeManager::currentNM(); + String s = n[0].getConst(); + Node ret; + if (s.size() == 1) + { + std::vector vec = s.getVec(); + Assert(vec.size() == 1); + ret = nm->mkConst(Rational(String::convertUnsignedIntToCode(vec[0]))); + } + else + { + ret = nm->mkConst(Rational(-1)); + } + return returnRewrite(n, ret, "to-code-eval"); + } + return n; +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/strings_rewriter.h b/src/theory/strings/strings_rewriter.h new file mode 100644 index 000000000..e6a6b0693 --- /dev/null +++ b/src/theory/strings/strings_rewriter.h @@ -0,0 +1,88 @@ +/********************* */ +/*! \file strings_rewriter.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Andres Noetzli, Tianyi Liang + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Rewrite rules for string-specific operators in theory of strings + ** + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__STRINGS_REWRITER_H +#define CVC4__THEORY__STRINGS__STRINGS_REWRITER_H + +#include "expr/node.h" +#include "theory/strings/sequences_rewriter.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** + * An extension of SequencesRewriter that handles operators that + * are specific to strings (and cannot be applied to sequences). + */ +class StringsRewriter : public SequencesRewriter +{ + public: + /** rewrite string to integer + * + * This is the entry point for post-rewriting terms n of the form + * str.to_int( s ) + * Returns the rewritten form of n. + */ + static Node rewriteStrToInt(Node n); + + /** rewrite integer to string + * + * This is the entry point for post-rewriting terms n of the form + * str.from_int( i ) + * Returns the rewritten form of n. + */ + static Node rewriteIntToStr(Node n); + + /** rewrite string convert + * + * This is the entry point for post-rewriting terms n of the form + * str.tolower( s ) and str.toupper( s ) + * Returns the rewritten form of n. + */ + static Node rewriteStrConvert(Node n); + + /** rewrite string less than or equal + * + * This is the entry point for post-rewriting terms n of the form + * str.<=( t, s ) + * Returns the rewritten form of n. + */ + static Node rewriteStringLeq(Node n); + + /** rewrite str.from_code + * + * This is the entry point for post-rewriting terms n of the form + * str.from_code( t ) + * Returns the rewritten form of n. + */ + static Node rewriteStringFromCode(Node n); + + /** rewrite str.to_code + * + * This is the entry point for post-rewriting terms n of the form + * str.to_code( t ) + * Returns the rewritten form of n. + */ + static Node rewriteStringToCode(Node n); +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__STRINGS_REWRITER_H */ diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index c0dc561f6..e6e0f8557 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -26,7 +26,6 @@ #include "smt/smt_statistics_registry.h" #include "theory/ext_theory.h" #include "theory/rewriter.h" -#include "theory/strings/theory_strings_rewriter.h" #include "theory/strings/theory_strings_utils.h" #include "theory/strings/type_enumerator.h" #include "theory/strings/word.h" diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index a4b0a6705..d4183700d 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -22,7 +22,7 @@ #include "options/strings_options.h" #include "proof/proof_manager.h" #include "smt/logic_exception.h" -#include "theory/strings/theory_strings_rewriter.h" +#include "theory/strings/sequences_rewriter.h" using namespace CVC4; using namespace CVC4::kind; @@ -71,7 +71,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node sk1 = n == d_zero ? d_empty_str : d_sc->mkSkolemCached( s, n, SkolemCache::SK_PREFIX, "sspre"); - Node sk2 = TheoryStringsRewriter::checkEntailArith(t12, lt0) + Node sk2 = SequencesRewriter::checkEntailArith(t12, lt0) ? d_empty_str : d_sc->mkSkolemCached( s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp deleted file mode 100644 index 95f537878..000000000 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ /dev/null @@ -1,5545 +0,0 @@ -/********************* */ -/*! \file theory_strings_rewriter.cpp - ** \verbatim - ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli, Tianyi Liang - ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. - ** All rights reserved. See the file COPYING in the top-level source - ** directory for licensing information.\endverbatim - ** - ** \brief Implementation of the theory of strings. - ** - ** Implementation of the theory of strings. - **/ - -#include "theory/strings/theory_strings_rewriter.h" - -#include -#include - -#include "expr/node_builder.h" -#include "options/strings_options.h" -#include "smt/logic_exception.h" -#include "theory/arith/arith_msum.h" -#include "theory/strings/regexp_operation.h" -#include "theory/strings/theory_strings_utils.h" -#include "theory/strings/word.h" -#include "theory/theory.h" -#include "util/integer.h" -#include "util/rational.h" - -using namespace std; -using namespace CVC4; -using namespace CVC4::kind; -using namespace CVC4::theory; -using namespace CVC4::theory::strings; - -Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, std::vector< Node >& children, int dir ){ - Trace("regexp-ext-rewrite-debug") - << "Simple reg exp consume, dir=" << dir << ":" << std::endl; - Trace("regexp-ext-rewrite-debug") - << " mchildren : " << mchildren << std::endl; - Trace("regexp-ext-rewrite-debug") << " children : " << children << std::endl; - NodeManager* nm = NodeManager::currentNM(); - unsigned tmin = dir<0 ? 0 : dir; - unsigned tmax = dir<0 ? 1 : dir; - //try to remove off front and back - for( unsigned t=0; t<2; t++ ){ - if( tmin<=t && t<=tmax ){ - bool do_next = true; - while( !children.empty() && !mchildren.empty() && do_next ){ - do_next = false; - Node xc = mchildren[mchildren.size()-1]; - Node rc = children[children.size()-1]; - Assert(rc.getKind() != kind::REGEXP_CONCAT); - Assert(xc.getKind() != kind::STRING_CONCAT); - if( rc.getKind() == kind::STRING_TO_REGEXP ){ - if( xc==rc[0] ){ - children.pop_back(); - mchildren.pop_back(); - do_next = true; - Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl; - }else if( xc.isConst() && rc[0].isConst() ){ - //split the constant - int index; - Node s = splitConstant( xc, rc[0], index, t==0 ); - Trace("regexp-ext-rewrite-debug") << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> " << s << " " << index << " " << t << std::endl; - if( s.isNull() ){ - Trace("regexp-ext-rewrite-debug") - << "...return false" << std::endl; - return NodeManager::currentNM()->mkConst( false ); - }else{ - Trace("regexp-ext-rewrite-debug") - << "...strip equal const" << std::endl; - children.pop_back(); - mchildren.pop_back(); - if( index==0 ){ - mchildren.push_back( s ); - }else{ - children.push_back(nm->mkNode(STRING_TO_REGEXP, s)); - } - } - do_next = true; - } - }else if( xc.isConst() ){ - //check for constants - CVC4::String s = xc.getConst(); - if (Word::isEmpty(xc)) - { - Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl; - // ignore and continue - mchildren.pop_back(); - do_next = true; - } - else if (rc.getKind() == kind::REGEXP_RANGE - || rc.getKind() == kind::REGEXP_SIGMA) - { - std::vector ssVec; - ssVec.push_back(t == 0 ? s.back() : s.front()); - CVC4::String ss(ssVec); - if( testConstStringInRegExp( ss, 0, rc ) ){ - //strip off one character - mchildren.pop_back(); - if( s.size()>1 ){ - if( t==0 ){ - mchildren.push_back( NodeManager::currentNM()->mkConst(s.substr( 0, s.size()-1 )) ); - }else{ - mchildren.push_back( NodeManager::currentNM()->mkConst(s.substr( 1 )) ); - } - } - children.pop_back(); - do_next = true; - }else{ - return NodeManager::currentNM()->mkConst( false ); - } - }else if( rc.getKind()==kind::REGEXP_INTER || rc.getKind()==kind::REGEXP_UNION ){ - //see if any/each child does not work - bool result_valid = true; - Node result; - Node emp_s = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); - for( unsigned i=0; i mchildren_s; - std::vector< Node > children_s; - mchildren_s.push_back( xc ); - utils::getConcat(rc[i], children_s); - Node ret = simpleRegexpConsume( mchildren_s, children_s, t ); - if( !ret.isNull() ){ - // one conjunct cannot be satisfied, return false - if( rc.getKind()==kind::REGEXP_INTER ){ - return ret; - } - }else{ - if( children_s.empty() ){ - //if we were able to fully consume, store the result - Assert(mchildren_s.size() <= 1); - if( mchildren_s.empty() ){ - mchildren_s.push_back( emp_s ); - } - if( result.isNull() ){ - result = mchildren_s[0]; - }else if( result!=mchildren_s[0] ){ - result_valid = false; - } - }else{ - result_valid = false; - } - } - } - if( result_valid ){ - if( result.isNull() ){ - //all disjuncts cannot be satisfied, return false - Assert(rc.getKind() == kind::REGEXP_UNION); - return NodeManager::currentNM()->mkConst( false ); - }else{ - //all branches led to the same result - children.pop_back(); - mchildren.pop_back(); - if( result!=emp_s ){ - mchildren.push_back( result ); - } - do_next = true; - } - } - }else if( rc.getKind()==kind::REGEXP_STAR ){ - //check if there is no way that this star can be unrolled even once - std::vector< Node > mchildren_s; - mchildren_s.insert( mchildren_s.end(), mchildren.begin(), mchildren.end() ); - if( t==1 ){ - std::reverse( mchildren_s.begin(), mchildren_s.end() ); - } - std::vector< Node > children_s; - utils::getConcat(rc[0], children_s); - Trace("regexp-ext-rewrite-debug") - << "...recursive call on body of star" << std::endl; - Node ret = simpleRegexpConsume( mchildren_s, children_s, t ); - if( !ret.isNull() ){ - Trace("regexp-ext-rewrite-debug") << "CRE : regexp star infeasable " << xc << " " << rc << std::endl; - children.pop_back(); - if (!children.empty()) - { - Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl; - do_next = true; - } - }else{ - if( children_s.empty() ){ - //check if beyond this, we can't do it or there is nothing left, if so, repeat - bool can_skip = false; - if( children.size()>1 ){ - std::vector< Node > mchildren_ss; - mchildren_ss.insert( mchildren_ss.end(), mchildren.begin(), mchildren.end() ); - std::vector< Node > children_ss; - children_ss.insert( children_ss.end(), children.begin(), children.end()-1 ); - if( t==1 ){ - std::reverse( mchildren_ss.begin(), mchildren_ss.end() ); - std::reverse( children_ss.begin(), children_ss.end() ); - } - if (simpleRegexpConsume(mchildren_ss, children_ss, t) - .isNull()) - { - can_skip = true; - } - } - if( !can_skip ){ - Trace("regexp-ext-rewrite-debug") - << "...can't skip" << std::endl; - //take the result of fully consuming once - if( t==1 ){ - std::reverse( mchildren_s.begin(), mchildren_s.end() ); - } - mchildren.clear(); - mchildren.insert( mchildren.end(), mchildren_s.begin(), mchildren_s.end() ); - do_next = true; - }else{ - Trace("regexp-ext-rewrite-debug") - << "...can skip " << rc << " from " << xc << std::endl; - } - } - } - } - } - if( !do_next ){ - Trace("regexp-ext-rewrite") << "Cannot consume : " << xc << " " << rc << std::endl; - } - } - } - if( dir!=0 ){ - std::reverse( children.begin(), children.end() ); - std::reverse( mchildren.begin(), mchildren.end() ); - } - } - return Node::null(); -} - -Node TheoryStringsRewriter::rewriteEquality(Node node) -{ - Assert(node.getKind() == kind::EQUAL); - if (node[0] == node[1]) - { - return NodeManager::currentNM()->mkConst(true); - } - else if (node[0].isConst() && node[1].isConst()) - { - return NodeManager::currentNM()->mkConst(false); - } - - // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false ) - for (unsigned r = 0; r < 2; r++) - { - // must call rewrite contains directly to avoid infinite loop - // we do a fix point since we may rewrite contains terms to simpler - // contains terms. - Node ctn = checkEntailContains(node[r], node[1 - r], false); - if (!ctn.isNull()) - { - if (!ctn.getConst()) - { - return returnRewrite(node, ctn, "eq-nctn"); - } - else - { - // definitely contains but not syntactically equal - // We may be able to simplify, e.g. - // str.++( x, "a" ) == "a" ----> x = "" - } - } - } - - // ( len( s ) != len( t ) ) => ( s == t ---> false ) - // This covers cases like str.++( x, x ) == "a" ---> false - Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); - Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); - Node len_eq = len0.eqNode(len1); - len_eq = Rewriter::rewrite(len_eq); - if (len_eq.isConst() && !len_eq.getConst()) - { - return returnRewrite(node, len_eq, "eq-len-deq"); - } - - std::vector c[2]; - for (unsigned i = 0; i < 2; i++) - { - utils::getConcat(node[i], c[i]); - } - - // check if the prefix, suffix mismatches - // For example, str.++( x, "a", y ) == str.++( x, "bc", z ) ---> false - unsigned minsize = std::min(c[0].size(), c[1].size()); - for (unsigned r = 0; r < 2; r++) - { - for (unsigned i = 0; i < minsize; i++) - { - unsigned index1 = r == 0 ? i : (c[0].size() - 1) - i; - unsigned index2 = r == 0 ? i : (c[1].size() - 1) - i; - if (c[0][index1].isConst() && c[1][index2].isConst()) - { - CVC4::String s = c[0][index1].getConst(); - CVC4::String t = c[1][index2].getConst(); - unsigned len_short = s.size() <= t.size() ? s.size() : t.size(); - bool isSameFix = - r == 1 ? s.rstrncmp(t, len_short) : s.strncmp(t, len_short); - if (!isSameFix) - { - Node ret = NodeManager::currentNM()->mkConst(false); - return returnRewrite(node, ret, "eq-nfix"); - } - } - if (c[0][index1] != c[1][index2]) - { - break; - } - } - } - - // standard ordering - if (node[0] > node[1]) - { - return NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]); - } - return node; -} - -Node TheoryStringsRewriter::rewriteEqualityExt(Node node) -{ - Assert(node.getKind() == EQUAL); - if (node[0].getType().isInteger()) - { - return rewriteArithEqualityExt(node); - } - if (node[0].getType().isString()) - { - return rewriteStrEqualityExt(node); - } - return node; -} - -Node TheoryStringsRewriter::rewriteStrEqualityExt(Node node) -{ - Assert(node.getKind() == EQUAL && node[0].getType().isString()); - - NodeManager* nm = NodeManager::currentNM(); - std::vector c[2]; - Node new_ret; - for (unsigned i = 0; i < 2; i++) - { - utils::getConcat(node[i], c[i]); - } - // ------- equality unification - bool changed = false; - for (unsigned i = 0; i < 2; i++) - { - while (!c[0].empty() && !c[1].empty() && c[0].back() == c[1].back()) - { - c[0].pop_back(); - c[1].pop_back(); - changed = true; - } - // splice constants - if (!c[0].empty() && !c[1].empty() && c[0].back().isConst() - && c[1].back().isConst()) - { - Node cs[2]; - size_t csl[2]; - for (unsigned j = 0; j < 2; j++) - { - cs[j] = c[j].back(); - csl[j] = Word::getLength(cs[j]); - } - size_t larger = csl[0] > csl[1] ? 0 : 1; - size_t smallerSize = csl[1 - larger]; - if (cs[1 - larger] - == (i == 0 ? Word::suffix(cs[larger], smallerSize) - : Word::prefix(cs[larger], smallerSize))) - { - size_t sizeDiff = csl[larger] - smallerSize; - c[larger][c[larger].size() - 1] = - i == 0 ? Word::prefix(cs[larger], sizeDiff) - : Word::suffix(cs[larger], sizeDiff); - c[1 - larger].pop_back(); - changed = true; - } - } - for (unsigned j = 0; j < 2; j++) - { - std::reverse(c[j].begin(), c[j].end()); - } - } - if (changed) - { - // e.g. x++y = x++z ---> y = z, "AB" ++ x = "A" ++ y --> "B" ++ x = y - Node s1 = utils::mkConcat(STRING_CONCAT, c[0]); - Node s2 = utils::mkConcat(STRING_CONCAT, c[1]); - new_ret = s1.eqNode(s2); - node = returnRewrite(node, new_ret, "str-eq-unify"); - } - - // ------- homogeneous constants - for (unsigned i = 0; i < 2; i++) - { - Node cn = checkEntailHomogeneousString(node[i]); - if (!cn.isNull() && !Word::isEmpty(cn)) - { - Assert(cn.isConst()); - Assert(Word::getLength(cn) == 1); - unsigned hchar = cn.getConst().front(); - - // The operands of the concat on each side of the equality without - // constant strings - std::vector trimmed[2]; - // Counts the number of `hchar`s on each side - size_t numHChars[2] = {0, 0}; - for (size_t j = 0; j < 2; j++) - { - // Sort the operands of the concats on both sides of the equality - // (since both sides may only contain one char, the order does not - // matter) - std::sort(c[j].begin(), c[j].end()); - for (const Node& cc : c[j]) - { - if (cc.isConst()) - { - // Count the number of `hchar`s in the string constant and make - // sure that all chars are `hchar`s - std::vector veccc = cc.getConst().getVec(); - for (size_t k = 0, size = veccc.size(); k < size; k++) - { - if (veccc[k] != hchar) - { - // This conflict case should mostly should be taken care of by - // multiset reasoning in the strings rewriter, but we recognize - // this conflict just in case. - new_ret = nm->mkConst(false); - return returnRewrite( - node, new_ret, "string-eq-const-conflict-non-homog"); - } - numHChars[j]++; - } - } - else - { - trimmed[j].push_back(cc); - } - } - } - - // We have to remove the same number of `hchar`s from both sides, so the - // side with less `hchar`s determines how many we can remove - size_t trimmedConst = std::min(numHChars[0], numHChars[1]); - for (size_t j = 0; j < 2; j++) - { - size_t diff = numHChars[j] - trimmedConst; - if (diff != 0) - { - // Add a constant string to the side with more `hchar`s to restore - // the difference in number of `hchar`s - std::vector vec(diff, hchar); - trimmed[j].push_back(nm->mkConst(String(vec))); - } - } - - Node lhs = utils::mkConcat(STRING_CONCAT, trimmed[i]); - Node ss = utils::mkConcat(STRING_CONCAT, trimmed[1 - i]); - if (lhs != node[i] || ss != node[1 - i]) - { - // e.g. - // "AA" = y ++ x ---> "AA" = x ++ y if x < y - // "AAA" = y ++ "A" ++ z ---> "AA" = y ++ z - new_ret = lhs.eqNode(ss); - node = returnRewrite(node, new_ret, "str-eq-homog-const"); - } - } - } - - // ------- rewrites for (= "" _) - Node empty = nm->mkConst(::CVC4::String("")); - for (size_t i = 0; i < 2; i++) - { - if (node[i] == empty) - { - Node ne = node[1 - i]; - if (ne.getKind() == STRING_STRREPL) - { - // (= "" (str.replace x y x)) ---> (= x "") - if (ne[0] == ne[2]) - { - Node ret = nm->mkNode(EQUAL, ne[0], empty); - return returnRewrite(node, ret, "str-emp-repl-x-y-x"); - } - - // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y ""))) - if (checkEntailNonEmpty(ne[2])) - { - Node ret = - nm->mkNode(AND, - nm->mkNode(EQUAL, ne[0], empty), - nm->mkNode(NOT, nm->mkNode(EQUAL, ne[1], empty))); - return returnRewrite(node, ret, "str-emp-repl-emp"); - } - - // (= "" (str.replace x "A" "")) ---> (str.prefix x "A") - if (checkEntailLengthOne(ne[1]) && ne[2] == empty) - { - Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]); - return returnRewrite(node, ret, "str-emp-repl-emp"); - } - } - else if (ne.getKind() == STRING_SUBSTR) - { - Node zero = nm->mkConst(Rational(0)); - - if (checkEntailArith(ne[1], false) && checkEntailArith(ne[2], true)) - { - // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0 - if (ne[1] == zero) - { - Node ret = nm->mkNode(EQUAL, ne[0], empty); - return returnRewrite(node, ret, "str-emp-substr-leq-len"); - } - - // (= "" (str.substr x n m)) ---> (<= (str.len x) n) - // if n >= 0 and m > 0 - Node ret = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, ne[0]), ne[1]); - return returnRewrite(node, ret, "str-emp-substr-leq-len"); - } - - // (= "" (str.substr "A" 0 z)) ---> (<= z 0) - if (checkEntailNonEmpty(ne[0]) && ne[1] == zero) - { - Node ret = nm->mkNode(LEQ, ne[2], zero); - return returnRewrite(node, ret, "str-emp-substr-leq-z"); - } - } - } - } - - // ------- rewrites for (= (str.replace _ _ _) _) - for (size_t i = 0; i < 2; i++) - { - if (node[i].getKind() == STRING_STRREPL) - { - Node repl = node[i]; - Node x = node[1 - i]; - - // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x)) - if (checkEntailNonEmpty(x) && repl[0] == empty) - { - Node ret = nm->mkNode( - EQUAL, empty, nm->mkNode(STRING_STRREPL, x, repl[2], repl[1])); - return returnRewrite(node, ret, "str-eq-repl-emp"); - } - - // (= x (str.replace y x y)) ---> (= x y) - if (repl[0] == repl[2] && x == repl[1]) - { - Node ret = nm->mkNode(EQUAL, x, repl[0]); - return returnRewrite(node, ret, "str-eq-repl-to-eq"); - } - - // (= x (str.replace x "A" "B")) ---> (not (str.contains x "A")) - if (x == repl[0]) - { - Node eq = Rewriter::rewrite(nm->mkNode(EQUAL, repl[1], repl[2])); - if (eq.isConst() && !eq.getConst()) - { - Node ret = nm->mkNode(NOT, nm->mkNode(STRING_STRCTN, x, repl[1])); - return returnRewrite(node, ret, "str-eq-repl-not-ctn"); - } - } - - // (= (str.replace x y z) z) --> (or (= x y) (= x z)) - // if (str.len y) = (str.len z) - if (repl[2] == x) - { - Node lenY = nm->mkNode(STRING_LENGTH, repl[1]); - Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]); - if (checkEntailArithEq(lenY, lenZ)) - { - Node ret = nm->mkNode(OR, - nm->mkNode(EQUAL, repl[0], repl[1]), - nm->mkNode(EQUAL, repl[0], repl[2])); - return returnRewrite(node, ret, "str-eq-repl-to-dis"); - } - } - } - } - - // Try to rewrite (= x y) into a conjunction of equalities based on length - // entailment. - // - // (<= (str.len x) (str.++ y1 ... yn)) AND (= x (str.++ y1 ... yn)) ---> - // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) - // - // where yi' and yi'' correspond to some yj and - // (<= (str.len x) (str.++ y1' ... ym')) - for (unsigned i = 0; i < 2; i++) - { - if (node[1 - i].getKind() == STRING_CONCAT) - { - new_ret = inferEqsFromContains(node[i], node[1 - i]); - if (!new_ret.isNull()) - { - return returnRewrite(node, new_ret, "str-eq-conj-len-entail"); - } - } - } - - if (node[0].getKind() == STRING_CONCAT && node[1].getKind() == STRING_CONCAT) - { - // (= (str.++ x_1 ... x_i x_{i + 1} ... x_n) - // (str.++ y_1 ... y_j y_{j + 1} ... y_m)) ---> - // (and (= (str.++ x_1 ... x_i) (str.++ y_1 ... y_j)) - // (= (str.++ x_{i + 1} ... x_n) (str.++ y_{j + 1} ... y_m))) - // - // if (str.len (str.++ x_1 ... x_i)) = (str.len (str.++ y_1 ... y_j)) - // - // This rewrite performs length-based equality splitting: If we can show - // that two prefixes have the same length, we can split an equality into - // two equalities, one over the prefixes and another over the suffixes. - std::vector v0, v1; - utils::getConcat(node[0], v0); - utils::getConcat(node[1], v1); - size_t startRhs = 0; - for (size_t i = 0, size0 = v0.size(); i <= size0; i++) - { - std::vector pfxv0(v0.begin(), v0.begin() + i); - Node pfx0 = utils::mkConcat(STRING_CONCAT, pfxv0); - for (size_t j = startRhs, size1 = v1.size(); j <= size1; j++) - { - if (!(i == 0 && j == 0) && !(i == v0.size() && j == v1.size())) - { - std::vector pfxv1(v1.begin(), v1.begin() + j); - Node pfx1 = utils::mkConcat(STRING_CONCAT, pfxv1); - Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0); - Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1); - - if (checkEntailArithEq(lenPfx0, lenPfx1)) - { - std::vector sfxv0(v0.begin() + i, v0.end()); - std::vector sfxv1(v1.begin() + j, v1.end()); - Node ret = - nm->mkNode(kind::AND, - pfx0.eqNode(pfx1), - utils::mkConcat(STRING_CONCAT, sfxv0) - .eqNode(utils::mkConcat(STRING_CONCAT, sfxv1))); - return returnRewrite(node, ret, "split-eq"); - } - else if (checkEntailArith(lenPfx1, lenPfx0, true)) - { - // The prefix on the right-hand side is strictly longer than the - // prefix on the left-hand side, so we try to strip the right-hand - // prefix by the length of the left-hand prefix - // - // Example: - // (= (str.++ "A" x y) (str.++ x "AB" z)) ---> - // (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z))) - std::vector rpfxv1; - if (stripSymbolicLength(pfxv1, rpfxv1, 1, lenPfx0)) - { - std::vector sfxv0(v0.begin() + i, v0.end()); - pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end()); - Node ret = nm->mkNode( - kind::AND, - pfx0.eqNode(utils::mkConcat(STRING_CONCAT, rpfxv1)), - utils::mkConcat(STRING_CONCAT, sfxv0) - .eqNode(utils::mkConcat(STRING_CONCAT, pfxv1))); - return returnRewrite(node, ret, "split-eq-strip-r"); - } - - // If the prefix of the right-hand side is (strictly) longer than - // the prefix of the left-hand side, we can advance the left-hand - // side (since the length of the right-hand side is only increasing - // in the inner loop) - break; - } - else if (checkEntailArith(lenPfx0, lenPfx1, true)) - { - // The prefix on the left-hand side is strictly longer than the - // prefix on the right-hand side, so we try to strip the left-hand - // prefix by the length of the right-hand prefix - // - // Example: - // (= (str.++ x "AB" z) (str.++ "A" x y)) ---> - // (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y)) - std::vector rpfxv0; - if (stripSymbolicLength(pfxv0, rpfxv0, 1, lenPfx1)) - { - pfxv0.insert(pfxv0.end(), v0.begin() + i, v0.end()); - std::vector sfxv1(v1.begin() + j, v1.end()); - Node ret = nm->mkNode( - kind::AND, - utils::mkConcat(STRING_CONCAT, rpfxv0).eqNode(pfx1), - utils::mkConcat(STRING_CONCAT, pfxv0) - .eqNode(utils::mkConcat(STRING_CONCAT, sfxv1))); - return returnRewrite(node, ret, "split-eq-strip-l"); - } - - // If the prefix of the left-hand side is (strictly) longer than - // the prefix of the right-hand side, then we don't need to check - // that right-hand prefix for future left-hand prefixes anymore - // (since they are increasing in length) - startRhs = j + 1; - } - } - } - } - } - - return node; -} - -Node TheoryStringsRewriter::rewriteArithEqualityExt(Node node) -{ - Assert(node.getKind() == EQUAL && node[0].getType().isInteger()); - - // cases where we can solve the equality - - // notice we cannot rewrite str.to.int(x)=n to x="n" due to leading zeroes. - - return node; -} - -// TODO (#1180) add rewrite -// str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) ---> -// str.substr( x, n1, n2+n3 ) -Node TheoryStringsRewriter::rewriteConcat(Node node) -{ - Assert(node.getKind() == kind::STRING_CONCAT); - Trace("strings-rewrite-debug") - << "Strings::rewriteConcat start " << node << std::endl; - NodeManager* nm = NodeManager::currentNM(); - Node retNode = node; - std::vector node_vec; - Node preNode = Node::null(); - for (Node tmpNode : node) - { - if (tmpNode.getKind() == STRING_CONCAT) - { - unsigned j = 0; - // combine the first term with the previous constant if applicable - if (!preNode.isNull()) - { - if (tmpNode[0].isConst()) - { - preNode = nm->mkConst( - preNode.getConst().concat(tmpNode[0].getConst())); - node_vec.push_back(preNode); - } - else - { - node_vec.push_back(preNode); - node_vec.push_back(tmpNode[0]); - } - preNode = Node::null(); - ++j; - } - // insert the middle terms to node_vec - if (j <= tmpNode.getNumChildren() - 1) - { - node_vec.insert(node_vec.end(), tmpNode.begin() + j, tmpNode.end() - 1); - } - // take the last term as the current - tmpNode = tmpNode[tmpNode.getNumChildren() - 1]; - } - if(!tmpNode.isConst()) { - if(!preNode.isNull()) { - if (preNode.isConst() && !Word::isEmpty(preNode)) - { - node_vec.push_back( preNode ); - } - preNode = Node::null(); - } - node_vec.push_back( tmpNode ); - }else{ - if( preNode.isNull() ){ - preNode = tmpNode; - }else{ - std::vector vec; - vec.push_back(preNode); - vec.push_back(tmpNode); - preNode = Word::mkWord(vec); - } - } - } - if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode))) - { - node_vec.push_back( preNode ); - } - - // Sort adjacent operands in str.++ that all result in the same string or the - // empty string. - // - // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) --> - // (str.++ ... [sort those 3 arguments] ... ) - size_t lastIdx = 0; - Node lastX; - for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++) - { - Node s = getStringOrEmpty(node_vec[i]); - bool nextX = false; - if (s != lastX) - { - nextX = true; - } - - if (nextX) - { - std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i); - lastX = s; - lastIdx = i; - } - } - std::sort(node_vec.begin() + lastIdx, node_vec.end()); - - retNode = utils::mkConcat(STRING_CONCAT, node_vec); - Trace("strings-rewrite-debug") - << "Strings::rewriteConcat end " << retNode << std::endl; - return retNode; -} - -Node TheoryStringsRewriter::rewriteConcatRegExp(TNode node) -{ - Assert(node.getKind() == kind::REGEXP_CONCAT); - NodeManager* nm = NodeManager::currentNM(); - Trace("strings-rewrite-debug") - << "Strings::rewriteConcatRegExp flatten " << node << std::endl; - Node retNode = node; - std::vector vec; - bool changed = false; - Node emptyRe; - for (const Node& c : node) - { - if (c.getKind() == REGEXP_CONCAT) - { - changed = true; - for (const Node& cc : c) - { - vec.push_back(cc); - } - } - else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst() - && Word::isEmpty(c[0])) - { - changed = true; - emptyRe = c; - } - else if (c.getKind() == REGEXP_EMPTY) - { - // re.++( ..., empty, ... ) ---> empty - std::vector nvec; - return nm->mkNode(REGEXP_EMPTY, nvec); - } - else - { - vec.push_back(c); - } - } - if (changed) - { - // flatten - // this handles nested re.++ and elimination or str.to.re(""), e.g.: - // re.++( re.++( R1, R2 ), str.to.re(""), R3 ) ---> re.++( R1, R2, R3 ) - if (vec.empty()) - { - Assert(!emptyRe.isNull()); - retNode = emptyRe; - } - else - { - retNode = vec.size() == 1 ? vec[0] : nm->mkNode(REGEXP_CONCAT, vec); - } - return returnRewrite(node, retNode, "re.concat-flatten"); - } - Trace("strings-rewrite-debug") - << "Strings::rewriteConcatRegExp start " << node << std::endl; - std::vector cvec; - // the current accumulation of constant strings - std::vector preReStr; - // whether the last component was (_)* - bool lastAllStar = false; - String emptyStr = String(""); - // this loop checks to see if components can be combined or dropped - for (unsigned i = 0, size = vec.size(); i <= size; i++) - { - Node curr; - if (i < size) - { - curr = vec[i]; - Assert(curr.getKind() != REGEXP_CONCAT); - } - // update preReStr - if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP) - { - lastAllStar = false; - preReStr.push_back(curr[0]); - curr = Node::null(); - } - else if (!preReStr.empty()) - { - Assert(!lastAllStar); - // this groups consecutive strings a++b ---> ab - Node acc = nm->mkNode(STRING_TO_REGEXP, - utils::mkConcat(STRING_CONCAT, preReStr)); - cvec.push_back(acc); - preReStr.clear(); - } - else if (!curr.isNull() && lastAllStar) - { - // if empty, drop it - // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)* - if (isConstRegExp(curr) && testConstStringInRegExp(emptyStr, 0, curr)) - { - curr = Node::null(); - } - } - if (!curr.isNull()) - { - lastAllStar = false; - if (curr.getKind() == REGEXP_STAR) - { - // we can group stars (a)* ++ (a)* ---> (a)* - if (!cvec.empty() && cvec.back() == curr) - { - curr = Node::null(); - } - else if (curr[0].getKind() == REGEXP_SIGMA) - { - Assert(!lastAllStar); - lastAllStar = true; - // go back and remove empty ones from back of cvec - // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)* - while (!cvec.empty() && isConstRegExp(cvec.back()) - && testConstStringInRegExp(emptyStr, 0, cvec.back())) - { - cvec.pop_back(); - } - } - } - } - if (!curr.isNull()) - { - cvec.push_back(curr); - } - } - Assert(!cvec.empty()); - retNode = utils::mkConcat(REGEXP_CONCAT, cvec); - if (retNode != node) - { - // handles all cases where consecutive re constants are combined or dropped - // as described in the loop above. - return returnRewrite(node, retNode, "re.concat"); - } - - // flipping adjacent star arguments - changed = false; - for (size_t i = 0, size = cvec.size() - 1; i < size; i++) - { - if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1]) - { - // by convention, flip the order (a*)++a ---> a++(a*) - std::swap(cvec[i], cvec[i+1]); - changed = true; - } - } - if (changed) - { - retNode = utils::mkConcat(REGEXP_CONCAT, cvec); - return returnRewrite(node, retNode, "re.concat.opt"); - } - return node; -} - -Node TheoryStringsRewriter::rewriteStarRegExp(TNode node) -{ - Assert(node.getKind() == REGEXP_STAR); - NodeManager* nm = NodeManager::currentNM(); - Node retNode = node; - if (node[0].getKind() == REGEXP_STAR) - { - // ((R)*)* ---> R* - return returnRewrite(node, node[0], "re-star-nested-star"); - } - else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst() - && Word::isEmpty(node[0][0])) - { - // ("")* ---> "" - return returnRewrite(node, node[0], "re-star-empty-string"); - } - else if (node[0].getKind() == REGEXP_EMPTY) - { - // (empty)* ---> "" - retNode = nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))); - return returnRewrite(node, retNode, "re-star-empty"); - } - else if (node[0].getKind() == REGEXP_UNION) - { - // simplification of unions under star - if (hasEpsilonNode(node[0])) - { - bool changed = false; - std::vector node_vec; - for (const Node& nc : node[0]) - { - if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst() - && Word::isEmpty(nc[0])) - { - // can be removed - changed = true; - } - else - { - node_vec.push_back(nc); - } - } - if (changed) - { - retNode = node_vec.size() == 1 ? node_vec[0] - : nm->mkNode(REGEXP_UNION, node_vec); - retNode = nm->mkNode(REGEXP_STAR, retNode); - // simplification of union beneath star based on loop above - // for example, ( "" | "a" )* ---> ("a")* - return returnRewrite(node, retNode, "re-star-union"); - } - } - } - return node; -} - -Node TheoryStringsRewriter::rewriteAndOrRegExp(TNode node) -{ - Kind nk = node.getKind(); - Assert(nk == REGEXP_UNION || nk == REGEXP_INTER); - Trace("strings-rewrite-debug") - << "Strings::rewriteAndOrRegExp start " << node << std::endl; - std::vector node_vec; - for (const Node& ni : node) - { - if (ni.getKind() == nk) - { - for (const Node& nic : ni) - { - if (std::find(node_vec.begin(), node_vec.end(), nic) == node_vec.end()) - { - node_vec.push_back(nic); - } - } - } - else if (ni.getKind() == REGEXP_EMPTY) - { - if (nk == REGEXP_INTER) - { - return returnRewrite(node, ni, "re.and-empty"); - } - // otherwise, can ignore - } - else if (ni.getKind() == REGEXP_STAR && ni[0].getKind() == REGEXP_SIGMA) - { - if (nk == REGEXP_UNION) - { - return returnRewrite(node, ni, "re.or-all"); - } - // otherwise, can ignore - } - else if (std::find(node_vec.begin(), node_vec.end(), ni) == node_vec.end()) - { - node_vec.push_back(ni); - } - } - NodeManager* nm = NodeManager::currentNM(); - std::vector nvec; - Node retNode; - if (node_vec.empty()) - { - if (nk == REGEXP_INTER) - { - retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, nvec)); - } - else - { - retNode = nm->mkNode(kind::REGEXP_EMPTY, nvec); - } - } - else - { - retNode = node_vec.size() == 1 ? node_vec[0] : nm->mkNode(nk, node_vec); - } - if (retNode != node) - { - // flattening and removing children, based on loop above - return returnRewrite(node, retNode, "re.andor-flatten"); - } - return node; -} - -Node TheoryStringsRewriter::rewriteLoopRegExp(TNode node) -{ - Assert(node.getKind() == REGEXP_LOOP); - Node retNode = node; - Node r = node[0]; - if (r.getKind() == REGEXP_STAR) - { - return returnRewrite(node, r, "re.loop-star"); - } - TNode n1 = node[1]; - NodeManager* nm = NodeManager::currentNM(); - CVC4::Rational rMaxInt(String::maxSize()); - AlwaysAssert(n1.isConst()) << "re.loop contains non-constant integer (1)."; - AlwaysAssert(n1.getConst().sgn() >= 0) - << "Negative integer in string REGEXP_LOOP (1)"; - Assert(n1.getConst() <= rMaxInt) - << "Exceeded UINT32_MAX in string REGEXP_LOOP (1)"; - uint32_t l = n1.getConst().getNumerator().toUnsignedInt(); - std::vector vec_nodes; - for (unsigned i = 0; i < l; i++) - { - vec_nodes.push_back(r); - } - if (node.getNumChildren() == 3) - { - TNode n2 = Rewriter::rewrite(node[2]); - Node n = - vec_nodes.size() == 0 - ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))) - : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes); - AlwaysAssert(n2.isConst()) << "re.loop contains non-constant integer (2)."; - AlwaysAssert(n2.getConst().sgn() >= 0) - << "Negative integer in string REGEXP_LOOP (2)"; - Assert(n2.getConst() <= rMaxInt) - << "Exceeded UINT32_MAX in string REGEXP_LOOP (2)"; - uint32_t u = n2.getConst().getNumerator().toUnsignedInt(); - if (u <= l) - { - retNode = n; - } - else - { - std::vector vec2; - vec2.push_back(n); - for (unsigned j = l; j < u; j++) - { - vec_nodes.push_back(r); - n = utils::mkConcat(REGEXP_CONCAT, vec_nodes); - vec2.push_back(n); - } - retNode = nm->mkNode(REGEXP_UNION, vec2); - } - } - else - { - Node rest = nm->mkNode(REGEXP_STAR, r); - retNode = vec_nodes.size() == 0 - ? rest - : vec_nodes.size() == 1 - ? nm->mkNode(REGEXP_CONCAT, r, rest) - : nm->mkNode(REGEXP_CONCAT, - nm->mkNode(REGEXP_CONCAT, vec_nodes), - rest); - } - Trace("strings-lp") << "Strings::lp " << node << " => " << retNode - << std::endl; - if (retNode != node) - { - return returnRewrite(node, retNode, "re.loop"); - } - return node; -} - -bool TheoryStringsRewriter::isConstRegExp( TNode t ) { - if( t.getKind()==kind::STRING_TO_REGEXP ) { - return t[0].isConst(); - } - else if (t.isVar()) - { - return false; - }else{ - for( unsigned i = 0; i() ); - } - else - { - Assert(false) << "RegExp contains variables"; - return false; - } - } - case kind::REGEXP_CONCAT: { - if( s.size() != index_start ) { - std::vector vec_k( r.getNumChildren(), -1 ); - int start = 0; - int left = (int) s.size() - index_start; - int i=0; - while( i<(int) r.getNumChildren() ) { - bool flag = true; - if( i == (int) r.getNumChildren() - 1 ) { - if( testConstStringInRegExp( s, index_start + start, r[i] ) ) { - return true; - } - } else if( i == -1 ) { - return false; - } else { - for(vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i]) { - CVC4::String t = s.substr(index_start + start, vec_k[i]); - if( testConstStringInRegExp( t, 0, r[i] ) ) { - start += vec_k[i]; left -= vec_k[i]; flag = false; - ++i; vec_k[i] = -1; - break; - } - } - } - - if(flag) { - --i; - if(i >= 0) { - start -= vec_k[i]; left += vec_k[i]; - } - } - } - return false; - } else { - for(unsigned i=0; i 0; --i) - { - CVC4::String t = s.substr(index_start, i); - if( testConstStringInRegExp( t, 0, r[0] ) ) { - if (index_start + i == s.size() - || testConstStringInRegExp(s, index_start + i, r)) - { - return true; - } - } - } - return false; - } else { - return true; - } - } - case kind::REGEXP_EMPTY: { - return false; - } - case kind::REGEXP_SIGMA: { - if(s.size() == index_start + 1) { - return true; - } else { - return false; - } - } - case kind::REGEXP_RANGE: { - if(s.size() == index_start + 1) { - unsigned a = r[0].getConst().front(); - a = String::convertUnsignedIntToCode(a); - unsigned b = r[1].getConst().front(); - b = String::convertUnsignedIntToCode(b); - unsigned c = s.back(); - c = String::convertUnsignedIntToCode(c); - return (a <= c && c <= b); - } else { - return false; - } - } - case kind::REGEXP_LOOP: { - uint32_t l = r[1].getConst().getNumerator().toUnsignedInt(); - if(s.size() == index_start) { - return l==0? true : testConstStringInRegExp(s, index_start, r[0]); - } else if(l==0 && r[1]==r[2]) { - return false; - } else { - Assert(r.getNumChildren() == 3) - << "String rewriter error: LOOP has 2 children"; - if(l==0) { - //R{0,u} - uint32_t u = r[2].getConst().getNumerator().toUnsignedInt(); - for(unsigned len=s.size() - index_start; len>=1; len--) { - CVC4::String t = s.substr(index_start, len); - if(testConstStringInRegExp(t, 0, r[0])) { - if(len + index_start == s.size()) { - return true; - } else { - Node num2 = NodeManager::currentNM()->mkConst( CVC4::Rational(u - 1) ); - Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], r[1], num2); - if(testConstStringInRegExp(s, index_start+len, r2)) { - return true; - } - } - } - } - return false; - } else { - //R{l,l} - Assert(r[1] == r[2]) - << "String rewriter error: LOOP nums are not equal"; - if(l>s.size() - index_start) { - if(testConstStringInRegExp(s, s.size(), r[0])) { - l = s.size() - index_start; - } else { - return false; - } - } - for(unsigned len=1; len<=s.size() - index_start; len++) { - CVC4::String t = s.substr(index_start, len); - if(testConstStringInRegExp(t, 0, r[0])) { - Node num2 = NodeManager::currentNM()->mkConst( CVC4::Rational(l - 1) ); - Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num2, num2); - if(testConstStringInRegExp(s, index_start+len, r2)) { - return true; - } - } - } - return false; - } - } - } - case REGEXP_COMPLEMENT: - { - return !testConstStringInRegExp(s, index_start, r[0]); - break; - } - default: { - Assert(!RegExpOpr::isRegExpKind(k)); - return false; - } - } -} - -Node TheoryStringsRewriter::rewriteMembership(TNode node) { - NodeManager* nm = NodeManager::currentNM(); - Node retNode = node; - Node x = node[0]; - Node r = node[1]; - - if(r.getKind() == kind::REGEXP_EMPTY) { - retNode = NodeManager::currentNM()->mkConst( false ); - } - else if (x.isConst() && isConstRegExp(r)) - { - //test whether x in node[1] - CVC4::String s = x.getConst(); - retNode = NodeManager::currentNM()->mkConst( testConstStringInRegExp( s, 0, r ) ); - } - else if (r.getKind() == kind::REGEXP_SIGMA) - { - Node one = nm->mkConst(Rational(1)); - retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x)); - } - else if (r.getKind() == kind::REGEXP_STAR) - { - if (x.isConst()) - { - String s = x.getConst(); - if (s.size() == 0) - { - retNode = nm->mkConst(true); - // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true - return returnRewrite(node, retNode, "re-empty-in-str-star"); - } - else if (s.size() == 1) - { - if (r[0].getKind() == STRING_TO_REGEXP) - { - retNode = r[0][0].eqNode(x); - // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x - return returnRewrite(node, retNode, "re-char-in-str-star"); - } - } - } - else if (x.getKind() == STRING_CONCAT) - { - // (str.in.re (str.++ x1 ... xn) (re.* R)) --> - // (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R)) - // if the length of all strings in R is one. - Node flr = getFixedLengthForRegexp(r[0]); - if (!flr.isNull()) - { - Node one = nm->mkConst(Rational(1)); - if (flr == one) - { - NodeBuilder<> nb(AND); - for (const Node& xc : x) - { - nb << nm->mkNode(STRING_IN_REGEXP, xc, r); - } - return returnRewrite( - node, nb.constructNode(), "re-in-dist-char-star"); - } - } - } - if (r[0].getKind() == kind::REGEXP_SIGMA) - { - retNode = NodeManager::currentNM()->mkConst( true ); - return returnRewrite(node, retNode, "re-in-sigma-star"); - } - } - else if (r.getKind() == kind::REGEXP_CONCAT) - { - bool allSigma = true; - bool allSigmaStrict = true; - unsigned allSigmaMinSize = 0; - Node constStr; - size_t constIdx = 0; - size_t nchildren = r.getNumChildren(); - for (size_t i = 0; i < nchildren; i++) - { - Node rc = r[i]; - Assert(rc.getKind() != kind::REGEXP_EMPTY); - if (rc.getKind() == kind::REGEXP_SIGMA) - { - allSigmaMinSize++; - } - else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA) - { - allSigmaStrict = false; - } - else if (rc.getKind() == STRING_TO_REGEXP) - { - if (constStr.isNull()) - { - constStr = rc[0]; - constIdx = i; - } - else - { - allSigma = false; - break; - } - } - else - { - allSigma = false; - break; - } - } - if (allSigma) - { - if (constStr.isNull()) - { - // x in re.++(_*, _, _) ---> str.len(x) >= 2 - Node num = nm->mkConst(Rational(allSigmaMinSize)); - Node lenx = nm->mkNode(STRING_LENGTH, x); - retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num); - return returnRewrite(node, retNode, "re-concat-pure-allchar"); - } - else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0 - && constIdx != nchildren - 1) - { - // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc") - retNode = nm->mkNode(STRING_STRCTN, x, constStr); - return returnRewrite(node, retNode, "re-concat-to-contains"); - } - } - } - else if (r.getKind() == kind::REGEXP_INTER - || r.getKind() == kind::REGEXP_UNION) - { - std::vector< Node > mvec; - for( unsigned i=0; imkNode( kind::STRING_IN_REGEXP, x, r[i] ) ); - } - retNode = NodeManager::currentNM()->mkNode( r.getKind()==kind::REGEXP_INTER ? kind::AND : kind::OR, mvec ); - } - else if (r.getKind() == kind::STRING_TO_REGEXP) - { - retNode = x.eqNode(r[0]); - } - else if (r.getKind() == REGEXP_RANGE) - { - // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j - Node xcode = nm->mkNode(STRING_TO_CODE, x); - retNode = - nm->mkNode(AND, - nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode), - nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1]))); - } - else if (r.getKind() == REGEXP_COMPLEMENT) - { - retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate(); - } - else if (x != node[0] || r != node[1]) - { - retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, r ); - } - - //do simple consumes - if( retNode==node ){ - if( r.getKind()==kind::REGEXP_STAR ){ - for( unsigned dir=0; dir<=1; dir++ ){ - std::vector< Node > mchildren; - utils::getConcat(x, mchildren); - bool success = true; - while( success ){ - success = false; - std::vector< Node > children; - utils::getConcat(r[0], children); - Node scn = simpleRegexpConsume( mchildren, children, dir ); - if( !scn.isNull() ){ - Trace("regexp-ext-rewrite") << "Regexp star : const conflict : " << node << std::endl; - return scn; - }else if( children.empty() ){ - //fully consumed one copy of the STAR - if( mchildren.empty() ){ - Trace("regexp-ext-rewrite") << "Regexp star : full consume : " << node << std::endl; - return NodeManager::currentNM()->mkConst( true ); - }else{ - retNode = nm->mkNode(STRING_IN_REGEXP, - utils::mkConcat(STRING_CONCAT, mchildren), - r); - success = true; - } - } - } - if( retNode!=node ){ - Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node << " -> " << retNode << std::endl; - break; - } - } - }else{ - std::vector< Node > children; - utils::getConcat(r, children); - std::vector< Node > mchildren; - utils::getConcat(x, mchildren); - unsigned prevSize = children.size() + mchildren.size(); - Node scn = simpleRegexpConsume( mchildren, children ); - if( !scn.isNull() ){ - Trace("regexp-ext-rewrite") << "Regexp : const conflict : " << node << std::endl; - return scn; - }else{ - if( (children.size() + mchildren.size())!=prevSize ){ - // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm), - // above, we strip components to construct an equivalent membership: - // (str.++ xi .. xj) in (re.++ rk ... rl). - Node xn = utils::mkConcat(STRING_CONCAT, mchildren); - Node emptyStr = nm->mkConst(String("")); - if( children.empty() ){ - // If we stripped all components on the right, then the left is - // equal to the empty string. - // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "") - retNode = xn.eqNode(emptyStr); - }else{ - // otherwise, construct the updated regular expression - retNode = nm->mkNode( - STRING_IN_REGEXP, xn, utils::mkConcat(REGEXP_CONCAT, children)); - } - Trace("regexp-ext-rewrite") << "Regexp : rewrite : " << node << " -> " << retNode << std::endl; - return returnRewrite(node, retNode, "re-simple-consume"); - } - } - } - } - return retNode; -} - -RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { - Trace("strings-postrewrite") << "Strings::postRewrite start " << node << std::endl; - NodeManager* nm = NodeManager::currentNM(); - Node retNode = node; - Node orig = retNode; - Kind nk = node.getKind(); - if (nk == kind::STRING_CONCAT) - { - retNode = rewriteConcat(node); - } - else if (nk == kind::EQUAL) - { - retNode = rewriteEquality(node); - } - else if (nk == kind::STRING_LENGTH) - { - Kind nk0 = node[0].getKind(); - if( node[0].isConst() ){ - retNode = nm->mkConst(Rational(Word::getLength(node[0]))); - } - else if (nk0 == kind::STRING_CONCAT) - { - Node tmpNode = node[0]; - if(tmpNode.isConst()) { - retNode = nm->mkConst(Rational(Word::getLength(tmpNode))); - }else if( tmpNode.getKind()==kind::STRING_CONCAT ){ - std::vector node_vec; - for(unsigned int i=0; imkConst(Rational(Word::getLength(tmpNode[i])))); - } else { - node_vec.push_back( NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, tmpNode[i]) ); - } - } - retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec); - } - } - else if (nk0 == STRING_STRREPL || nk0 == STRING_STRREPLALL) - { - Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1])); - Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2])); - if (len1 == len2) - { - // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x ) - retNode = nm->mkNode(STRING_LENGTH, node[0][0]); - } - } - else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER - || nk0 == STRING_REV) - { - // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev. - retNode = nm->mkNode(STRING_LENGTH, node[0][0]); - } - } - else if (nk == kind::STRING_CHARAT) - { - Node one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); - retNode = NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, node[0], node[1], one); - } - else if (nk == kind::STRING_SUBSTR) - { - retNode = rewriteSubstr(node); - } - else if (nk == kind::STRING_STRCTN) - { - retNode = rewriteContains( node ); - } - else if (nk == kind::STRING_LT) - { - // eliminate s < t ---> s != t AND s <= t - retNode = nm->mkNode(AND, - node[0].eqNode(node[1]).negate(), - nm->mkNode(STRING_LEQ, node[0], node[1])); - } - else if (nk == kind::STRING_LEQ) - { - retNode = rewriteStringLeq(node); - } - else if (nk == kind::STRING_STRIDOF) - { - retNode = rewriteIndexof( node ); - } - else if (nk == kind::STRING_STRREPL) - { - retNode = rewriteReplace( node ); - } - else if (nk == kind::STRING_STRREPLALL) - { - retNode = rewriteReplaceAll(node); - } - else if (nk == STRING_TOLOWER || nk == STRING_TOUPPER) - { - retNode = rewriteStrConvert(node); - } - else if (nk == STRING_REV) - { - retNode = rewriteStrReverse(node); - } - else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX) - { - retNode = rewritePrefixSuffix(node); - } - else if (nk == STRING_IS_DIGIT) - { - // eliminate str.is_digit(s) ----> 48 <= str.to_code(s) <= 57 - Node t = nm->mkNode(STRING_TO_CODE, node[0]); - retNode = nm->mkNode(AND, - nm->mkNode(LEQ, nm->mkConst(Rational(48)), t), - nm->mkNode(LEQ, t, nm->mkConst(Rational(57)))); - } - else if (nk == kind::STRING_ITOS) - { - if(node[0].isConst()) { - if( node[0].getConst().sgn()==-1 ){ - retNode = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); - }else{ - std::string stmp = node[0].getConst().getNumerator().toString(); - Assert(stmp[0] != '-'); - retNode = NodeManager::currentNM()->mkConst( ::CVC4::String(stmp) ); - } - } - } - else if (nk == kind::STRING_STOI) - { - if(node[0].isConst()) { - CVC4::String s = node[0].getConst(); - if(s.isNumber()) { - retNode = nm->mkConst(s.toNumber()); - } else { - retNode = nm->mkConst(Rational(-1)); - } - } else if(node[0].getKind() == kind::STRING_CONCAT) { - for(unsigned i=0; i(); - if(!t.isNumber()) { - retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(-1)); - break; - } - } - } - } - } - else if (nk == kind::STRING_IN_REGEXP) - { - retNode = rewriteMembership(node); - } - else if (nk == STRING_TO_CODE) - { - retNode = rewriteStringToCode(node); - } - else if (nk == REGEXP_CONCAT) - { - retNode = rewriteConcatRegExp(node); - } - else if (nk == REGEXP_UNION || nk == REGEXP_INTER) - { - retNode = rewriteAndOrRegExp(node); - } - else if (nk == REGEXP_DIFF) - { - retNode = nm->mkNode(REGEXP_INTER, node[0],nm->mkNode(REGEXP_COMPLEMENT, node[1])); - } - else if (nk == REGEXP_STAR) - { - retNode = rewriteStarRegExp(node); - } - else if (nk == REGEXP_PLUS) - { - retNode = - nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0])); - } - else if (nk == REGEXP_OPT) - { - retNode = nm->mkNode(REGEXP_UNION, - nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))), - node[0]); - } - else if (nk == REGEXP_RANGE) - { - if (node[0] == node[1]) - { - retNode = nm->mkNode(STRING_TO_REGEXP, node[0]); - } - } - else if (nk == REGEXP_LOOP) - { - retNode = rewriteLoopRegExp(node); - } - - Trace("strings-postrewrite") << "Strings::postRewrite returning " << retNode << std::endl; - if( orig!=retNode ){ - Trace("strings-rewrite-debug") << "Strings: post-rewrite " << orig << " to " << retNode << std::endl; - } - return RewriteResponse(orig==retNode ? REWRITE_DONE : REWRITE_AGAIN_FULL, retNode); -} - -bool TheoryStringsRewriter::hasEpsilonNode(TNode node) { - for(unsigned int i=0; i() > rMaxInt) - { - // start beyond the maximum size of strings - // thus, it must be beyond the end point of this string - Node ret = Word::mkEmptyWord(node.getType()); - return returnRewrite(node, ret, "ss-const-start-max-oob"); - } - else if (node[1].getConst().sgn() < 0) - { - // start before the beginning of the string - Node ret = Word::mkEmptyWord(node.getType()); - return returnRewrite(node, ret, "ss-const-start-neg"); - } - else - { - start = node[1].getConst().getNumerator().toUnsignedInt(); - if (start >= Word::getLength(node[0])) - { - // start beyond the end of the string - Node ret = Word::mkEmptyWord(node.getType()); - return returnRewrite(node, ret, "ss-const-start-oob"); - } - } - if (node[2].getConst() > rMaxInt) - { - // take up to the end of the string - size_t lenS = Word::getLength(s); - Node ret = Word::suffix(s, lenS - start); - return returnRewrite(node, ret, "ss-const-len-max-oob"); - } - else if (node[2].getConst().sgn() <= 0) - { - Node ret = Word::mkEmptyWord(node.getType()); - return returnRewrite(node, ret, "ss-const-len-non-pos"); - } - else - { - uint32_t len = - node[2].getConst().getNumerator().toUnsignedInt(); - if (start + len > Word::getLength(node[0])) - { - // take up to the end of the string - size_t lenS = Word::getLength(s); - Node ret = Word::suffix(s, lenS - start); - return returnRewrite(node, ret, "ss-const-end-oob"); - } - else - { - // compute the substr using the constant string - Node ret = Word::substr(s, start, len); - return returnRewrite(node, ret, "ss-const-ss"); - } - } - } - } - Node zero = nm->mkConst(CVC4::Rational(0)); - - // if entailed non-positive length or negative start point - if (checkEntailArith(zero, node[1], true)) - { - Node ret = Word::mkEmptyWord(node.getType()); - return returnRewrite(node, ret, "ss-start-neg"); - } - else if (checkEntailArith(zero, node[2])) - { - Node ret = Word::mkEmptyWord(node.getType()); - return returnRewrite(node, ret, "ss-len-non-pos"); - } - - if (node[0].getKind() == STRING_SUBSTR) - { - // (str.substr (str.substr x a b) c d) ---> "" if c >= b - // - // Note that this rewrite can be generalized to: - // - // (str.substr x a b) ---> "" if a >= (str.len x) - // - // This can be done when we generalize our entailment methods to - // accept an optional context. Then we could conjecture that - // (str.substr x a b) rewrites to "" and do a case analysis: - // - // - a < 0 or b < 0 (the result is trivially empty in these cases) - // - a >= (str.len x) assuming that { a >= 0, b >= 0 } - // - // For example, for (str.substr (str.substr x a a) a a), we could - // then deduce that under those assumptions, "a" is an - // over-approximation of the length of (str.substr x a a), which - // then allows us to reason that the result of the whole term must - // be empty. - if (checkEntailArith(node[1], node[0][2])) - { - Node ret = Word::mkEmptyWord(node.getType()); - return returnRewrite(node, ret, "ss-start-geq-len"); - } - } - else if (node[0].getKind() == STRING_STRREPL) - { - // (str.substr (str.replace x y z) 0 n) - // ---> (str.replace (str.substr x 0 n) y z) - // if (str.len y) = 1 and (str.len z) = 1 - if (node[1] == zero) - { - if (checkEntailLengthOne(node[0][1], true) - && checkEntailLengthOne(node[0][2], true)) - { - Node ret = nm->mkNode( - kind::STRING_STRREPL, - nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]), - node[0][1], - node[0][2]); - return returnRewrite(node, ret, "substr-repl-swap"); - } - } - } - - std::vector n1; - utils::getConcat(node[0], n1); - - // definite inclusion - if (node[1] == zero) - { - Node curr = node[2]; - std::vector childrenr; - if (stripSymbolicLength(n1, childrenr, 1, curr)) - { - if (curr != zero && !n1.empty()) - { - childrenr.push_back(nm->mkNode(kind::STRING_SUBSTR, - utils::mkConcat(STRING_CONCAT, n1), - node[1], - curr)); - } - Node ret = utils::mkConcat(STRING_CONCAT, childrenr); - return returnRewrite(node, ret, "ss-len-include"); - } - } - - // symbolic length analysis - for (unsigned r = 0; r < 2; r++) - { - // the amount of characters we can strip - Node curr; - if (r == 0) - { - if (node[1] != zero) - { - // strip up to start point off the start of the string - curr = node[1]; - } - } - else if (r == 1) - { - Node tot_len = - Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, node[0])); - Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2])); - if (node[2] != tot_len) - { - if (checkEntailArith(node[2], tot_len)) - { - // end point beyond end point of string, map to tot_len - Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len); - return returnRewrite(node, ret, "ss-end-pt-norm"); - } - else - { - // strip up to ( str.len(node[0]) - end_pt ) off the end of the string - curr = Rewriter::rewrite(nm->mkNode(kind::MINUS, tot_len, end_pt)); - } - } - - // (str.substr s x y) --> "" if x < len(s) |= 0 >= y - Node n1_lt_tot_len = - Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len)); - if (checkEntailArithWithAssumption(n1_lt_tot_len, zero, node[2], false)) - { - Node ret = Word::mkEmptyWord(node.getType()); - return returnRewrite(node, ret, "ss-start-entails-zero-len"); - } - - // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s) - Node non_zero_len = - Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2])); - if (checkEntailArithWithAssumption(non_zero_len, node[1], tot_len, false)) - { - Node ret = Word::mkEmptyWord(node.getType()); - return returnRewrite(node, ret, "ss-non-zero-len-entails-oob"); - } - - // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s) - Node geq_zero_start = - Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero)); - if (checkEntailArithWithAssumption(geq_zero_start, zero, tot_len, false)) - { - Node ret = Word::mkEmptyWord(node.getType()); - return returnRewrite(node, ret, "ss-geq-zero-start-entails-emp-s"); - } - - // (str.substr s x x) ---> "" if (str.len s) <= 1 - if (node[1] == node[2] && checkEntailLengthOne(node[0])) - { - Node ret = Word::mkEmptyWord(node.getType()); - return returnRewrite(node, ret, "ss-len-one-z-z"); - } - } - if (!curr.isNull()) - { - // strip off components while quantity is entailed positive - int dir = r == 0 ? 1 : -1; - std::vector childrenr; - if (stripSymbolicLength(n1, childrenr, dir, curr)) - { - if (r == 0) - { - Node ret = nm->mkNode(kind::STRING_SUBSTR, - utils::mkConcat(STRING_CONCAT, n1), - curr, - node[2]); - return returnRewrite(node, ret, "ss-strip-start-pt"); - } - else - { - Node ret = nm->mkNode(kind::STRING_SUBSTR, - utils::mkConcat(STRING_CONCAT, n1), - node[1], - node[2]); - return returnRewrite(node, ret, "ss-strip-end-pt"); - } - } - } - } - // combine substr - if (node[0].getKind() == kind::STRING_SUBSTR) - { - Node start_inner = node[0][1]; - Node start_outer = node[1]; - if (checkEntailArith(start_outer) && checkEntailArith(start_inner)) - { - // both are positive - // thus, start point is definitely start_inner+start_outer. - // We can rewrite if it for certain what the length is - - // the length of a string from the inner substr subtracts the start point - // of the outer substr - Node len_from_inner = - Rewriter::rewrite(nm->mkNode(kind::MINUS, node[0][2], start_outer)); - Node len_from_outer = node[2]; - Node new_len; - // take quantity that is for sure smaller than the other - if (len_from_inner == len_from_outer) - { - new_len = len_from_inner; - } - else if (checkEntailArith(len_from_inner, len_from_outer)) - { - new_len = len_from_outer; - } - else if (checkEntailArith(len_from_outer, len_from_inner)) - { - new_len = len_from_inner; - } - if (!new_len.isNull()) - { - Node new_start = nm->mkNode(kind::PLUS, start_inner, start_outer); - Node ret = - nm->mkNode(kind::STRING_SUBSTR, node[0][0], new_start, new_len); - return returnRewrite(node, ret, "ss-combine"); - } - } - } - Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; - return node; -} - -Node TheoryStringsRewriter::rewriteContains( Node node ) { - Assert(node.getKind() == kind::STRING_STRCTN); - NodeManager* nm = NodeManager::currentNM(); - - if( node[0] == node[1] ){ - Node ret = NodeManager::currentNM()->mkConst(true); - return returnRewrite(node, ret, "ctn-eq"); - } - if (node[0].isConst()) - { - CVC4::String s = node[0].getConst(); - if (node[1].isConst()) - { - Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos); - return returnRewrite(node, ret, "ctn-const"); - }else{ - Node t = node[1]; - if (Word::isEmpty(node[0])) - { - Node len1 = - NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); - if (checkEntailArith(len1, true)) - { - // we handle the false case here since the rewrite for equality - // uses this function, hence we want to conclude false if possible. - // len(x)>0 => contains( "", x ) ---> false - Node ret = NodeManager::currentNM()->mkConst(false); - return returnRewrite(node, ret, "ctn-lhs-emptystr"); - } - } - else if (checkEntailLengthOne(t)) - { - const std::vector& vec = s.getVec(); - - NodeBuilder<> nb(OR); - nb << nm->mkConst(String("")).eqNode(t); - for (unsigned c : vec) - { - std::vector sv = {c}; - nb << nm->mkConst(String(sv)).eqNode(t); - } - - // str.contains("ABCabc", t) ---> - // t = "" v t = "A" v t = "B" v t = "C" v t = "a" v t = "b" v t = "c" - // if len(t) <= 1 - Node ret = nb; - return returnRewrite(node, ret, "ctn-split"); - } - else if (node[1].getKind() == kind::STRING_CONCAT) - { - int firstc, lastc; - if (!canConstantContainConcat(node[0], node[1], firstc, lastc)) - { - Node ret = NodeManager::currentNM()->mkConst(false); - return returnRewrite(node, ret, "ctn-nconst-ctn-concat"); - } - } - } - } - if (node[1].isConst()) - { - size_t len = Word::getLength(node[1]); - if (len == 0) - { - // contains( x, "" ) ---> true - Node ret = NodeManager::currentNM()->mkConst(true); - return returnRewrite(node, ret, "ctn-rhs-emptystr"); - } - else if (len == 1) - { - // The following rewrites are specific to a single character second - // argument of contains, where we can reason that this character is - // not split over multiple components in the first argument. - if (node[0].getKind() == STRING_CONCAT) - { - std::vector nc1; - utils::getConcat(node[0], nc1); - NodeBuilder<> nb(OR); - for (const Node& ncc : nc1) - { - nb << nm->mkNode(STRING_STRCTN, ncc, node[1]); - } - Node ret = nb.constructNode(); - // str.contains( x ++ y, "A" ) ---> - // str.contains( x, "A" ) OR str.contains( y, "A" ) - return returnRewrite(node, ret, "ctn-concat-char"); - } - else if (node[0].getKind() == STRING_STRREPL) - { - Node rplDomain = checkEntailContains(node[0][1], node[1]); - if (!rplDomain.isNull() && !rplDomain.getConst()) - { - Node d1 = nm->mkNode(STRING_STRCTN, node[0][0], node[1]); - Node d2 = - nm->mkNode(AND, - nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]), - nm->mkNode(STRING_STRCTN, node[0][2], node[1])); - Node ret = nm->mkNode(OR, d1, d2); - // If str.contains( y, "A" ) ---> false, then: - // str.contains( str.replace( x, y, z ), "A" ) ---> - // str.contains( x, "A" ) OR - // ( str.contains( x, y ) AND str.contains( z, "A" ) ) - return returnRewrite(node, ret, "ctn-repl-char"); - } - } - } - } - std::vector nc1; - utils::getConcat(node[0], nc1); - std::vector nc2; - utils::getConcat(node[1], nc2); - - // component-wise containment - std::vector nc1rb; - std::vector nc1re; - if (componentContains(nc1, nc2, nc1rb, nc1re) != -1) - { - Node ret = NodeManager::currentNM()->mkConst(true); - return returnRewrite(node, ret, "ctn-component"); - } - - // strip endpoints - std::vector nb; - std::vector ne; - if (stripConstantEndpoints(nc1, nc2, nb, ne)) - { - Node ret = NodeManager::currentNM()->mkNode( - kind::STRING_STRCTN, utils::mkConcat(STRING_CONCAT, nc1), node[1]); - return returnRewrite(node, ret, "ctn-strip-endpt"); - } - - for (const Node& n : nc2) - { - if (n.getKind() == kind::STRING_STRREPL) - { - // (str.contains x (str.replace y z w)) --> false - // if (str.contains x y) = false and (str.contains x w) = false - // - // Reasoning: (str.contains x y) checks that x does not contain y if the - // replacement does not change y. (str.contains x w) checks that if the - // replacement changes anything in y, the w makes it impossible for it to - // occur in x. - Node ctnConst = checkEntailContains(node[0], n[0]); - if (!ctnConst.isNull() && !ctnConst.getConst()) - { - Node ctnConst2 = checkEntailContains(node[0], n[2]); - if (!ctnConst2.isNull() && !ctnConst2.getConst()) - { - Node res = nm->mkConst(false); - return returnRewrite(node, res, "ctn-rpl-non-ctn"); - } - } - - // (str.contains x (str.++ w (str.replace x y x) z)) ---> - // (and (= w "") (= x (str.replace x y x)) (= z "")) - // - // TODO: Remove with under-/over-approximation - if (node[0] == n[0] && node[0] == n[2]) - { - Node ret; - if (nc2.size() > 1) - { - Node emp = nm->mkConst(CVC4::String("")); - NodeBuilder<> nb2(kind::AND); - for (const Node& n2 : nc2) - { - if (n2 == n) - { - nb2 << nm->mkNode(kind::EQUAL, node[0], node[1]); - } - else - { - nb2 << nm->mkNode(kind::EQUAL, emp, n2); - } - } - ret = nb2.constructNode(); - } - else - { - ret = nm->mkNode(kind::EQUAL, node[0], node[1]); - } - return returnRewrite(node, ret, "ctn-repl-self"); - } - } - } - - // length entailment - Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); - Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); - if (checkEntailArith(len_n2, len_n1, true)) - { - // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false - Node ret = NodeManager::currentNM()->mkConst(false); - return returnRewrite(node, ret, "ctn-len-ineq"); - } - - // multi-set reasoning - // For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false - // since the number of a's in the second argument is greater than the number - // of a's in the first argument - if (checkEntailMultisetSubset(node[0], node[1])) - { - Node ret = nm->mkConst(false); - return returnRewrite(node, ret, "ctn-mset-nss"); - } - - if (checkEntailArith(len_n2, len_n1, false)) - { - // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2 - Node ret = node[0].eqNode(node[1]); - return returnRewrite(node, ret, "ctn-len-ineq-nstrict"); - } - - // splitting - if (node[0].getKind() == kind::STRING_CONCAT) - { - if( node[1].isConst() ){ - CVC4::String t = node[1].getConst(); - // Below, we are looking for a constant component of node[0] - // has no overlap with node[1], which means we can split. - // Notice that if the first or last components had no - // overlap, these would have been removed by strip - // constant endpoints above. - // Hence, we consider only the inner children. - for (unsigned i = 1; i < (node[0].getNumChildren() - 1); i++) - { - //constant contains - if( node[0][i].isConst() ){ - CVC4::String s = node[0][i].getConst(); - // if no overlap, we can split into disjunction - if (s.noOverlapWith(t)) - { - std::vector nc0; - utils::getConcat(node[0], nc0); - std::vector spl[2]; - spl[0].insert(spl[0].end(), nc0.begin(), nc0.begin() + i); - Assert(i < nc0.size() - 1); - spl[1].insert(spl[1].end(), nc0.begin() + i + 1, nc0.end()); - Node ret = NodeManager::currentNM()->mkNode( - kind::OR, - NodeManager::currentNM()->mkNode( - kind::STRING_STRCTN, - utils::mkConcat(STRING_CONCAT, spl[0]), - node[1]), - NodeManager::currentNM()->mkNode( - kind::STRING_STRCTN, - utils::mkConcat(STRING_CONCAT, spl[1]), - node[1])); - return returnRewrite(node, ret, "ctn-split"); - } - } - } - } - } - else if (node[0].getKind() == kind::STRING_SUBSTR) - { - // (str.contains (str.substr x n (str.len y)) y) ---> - // (= (str.substr x n (str.len y)) y) - // - // TODO: Remove with under-/over-approximation - if (node[0][2] == nm->mkNode(kind::STRING_LENGTH, node[1])) - { - Node ret = nm->mkNode(kind::EQUAL, node[0], node[1]); - return returnRewrite(node, ret, "ctn-substr"); - } - } - else if (node[0].getKind() == kind::STRING_STRREPL) - { - if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst()) - { - if (Word::noOverlapWith(node[1], node[0][1]) - && Word::noOverlapWith(node[1], node[0][2])) - { - // (str.contains (str.replace x c1 c2) c3) ---> (str.contains x c3) - // if there is no overlap between c1 and c3 and none between c2 and c3 - Node ret = nm->mkNode(STRING_STRCTN, node[0][0], node[1]); - return returnRewrite(node, ret, "ctn-repl-cnsts-to-ctn"); - } - } - - if (node[0][0] == node[0][2]) - { - // (str.contains (str.replace x y x) y) ---> (str.contains x y) - if (node[0][1] == node[1]) - { - Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]); - return returnRewrite(node, ret, "ctn-repl-to-ctn"); - } - - // (str.contains (str.replace x y x) z) ---> (str.contains x z) - // if (str.len z) <= 1 - if (checkEntailLengthOne(node[1])) - { - Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]); - return returnRewrite(node, ret, "ctn-repl-len-one-to-ctn"); - } - } - - // (str.contains (str.replace x y z) z) ---> - // (or (str.contains x y) (str.contains x z)) - if (node[0][2] == node[1]) - { - Node ret = nm->mkNode(OR, - nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]), - nm->mkNode(STRING_STRCTN, node[0][0], node[0][2])); - return returnRewrite(node, ret, "ctn-repl-to-ctn-disj"); - } - - // (str.contains (str.replace x y z) w) ---> - // (str.contains (str.replace x y "") w) - // if (str.contains z w) ---> false and (str.len w) = 1 - if (checkEntailLengthOne(node[1])) - { - Node ctn = checkEntailContains(node[1], node[0][2]); - if (!ctn.isNull() && !ctn.getConst()) - { - Node empty = nm->mkConst(String("")); - Node ret = nm->mkNode( - kind::STRING_STRCTN, - nm->mkNode(kind::STRING_STRREPL, node[0][0], node[0][1], empty), - node[1]); - return returnRewrite(node, ret, "ctn-repl-simp-repl"); - } - } - } - - if (node[1].getKind() == kind::STRING_STRREPL) - { - // (str.contains x (str.replace y x y)) ---> - // (str.contains x y) - if (node[0] == node[1][1] && node[1][0] == node[1][2]) - { - Node ret = nm->mkNode(kind::STRING_STRCTN, node[0], node[1][0]); - return returnRewrite(node, ret, "ctn-repl"); - } - - // (str.contains x (str.replace "" x y)) ---> - // (= "" (str.replace "" x y)) - // - // Note: Length-based reasoning is not sufficient to get this rewrite. We - // can neither show that str.len(str.replace("", x, y)) - str.len(x) >= 0 - // nor str.len(x) - str.len(str.replace("", x, y)) >= 0 - Node emp = nm->mkConst(CVC4::String("")); - if (node[0] == node[1][1] && node[1][0] == emp) - { - Node ret = nm->mkNode(kind::EQUAL, emp, node[1]); - return returnRewrite(node, ret, "ctn-repl-empty"); - } - } - - Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; - return node; -} - -Node TheoryStringsRewriter::rewriteIndexof( Node node ) { - Assert(node.getKind() == kind::STRING_STRIDOF); - NodeManager* nm = NodeManager::currentNM(); - - if (node[2].isConst() && node[2].getConst().sgn() < 0) - { - // z<0 implies str.indexof( x, y, z ) --> -1 - Node negone = nm->mkConst(Rational(-1)); - return returnRewrite(node, negone, "idof-neg"); - } - - // evaluation and simple cases - std::vector children0; - utils::getConcat(node[0], children0); - if (children0[0].isConst() && node[1].isConst() && node[2].isConst()) - { - CVC4::Rational rMaxInt(CVC4::String::maxSize()); - if (node[2].getConst() > rMaxInt) - { - // We know that, due to limitations on the size of string constants - // in our implementation, that accessing a position greater than - // rMaxInt is guaranteed to be out of bounds. - Node negone = nm->mkConst(Rational(-1)); - return returnRewrite(node, negone, "idof-max"); - } - Assert(node[2].getConst().sgn() >= 0); - Node s = children0[0]; - Node t = node[1]; - uint32_t start = - node[2].getConst().getNumerator().toUnsignedInt(); - std::size_t ret = Word::find(s, t, start); - if (ret != std::string::npos) - { - Node retv = nm->mkConst(Rational(static_cast(ret))); - return returnRewrite(node, retv, "idof-find"); - } - else if (children0.size() == 1) - { - Node negone = nm->mkConst(Rational(-1)); - return returnRewrite(node, negone, "idof-nfind"); - } - } - - if (node[0] == node[1]) - { - if (node[2].isConst()) - { - if (node[2].getConst().sgn() == 0) - { - // indexof( x, x, 0 ) --> 0 - Node zero = nm->mkConst(Rational(0)); - return returnRewrite(node, zero, "idof-eq-cst-start"); - } - } - if (checkEntailArith(node[2], true)) - { - // y>0 implies indexof( x, x, y ) --> -1 - Node negone = nm->mkConst(Rational(-1)); - return returnRewrite(node, negone, "idof-eq-nstart"); - } - Node emp = nm->mkConst(CVC4::String("")); - if (node[0] != emp) - { - // indexof( x, x, z ) ---> indexof( "", "", z ) - Node ret = nm->mkNode(STRING_STRIDOF, emp, emp, node[2]); - return returnRewrite(node, ret, "idof-eq-norm"); - } - } - - Node len0 = nm->mkNode(STRING_LENGTH, node[0]); - Node len1 = nm->mkNode(STRING_LENGTH, node[1]); - Node len0m2 = nm->mkNode(MINUS, len0, node[2]); - - if (node[1].isConst()) - { - if (Word::isEmpty(node[1])) - { - if (checkEntailArith(len0, node[2]) && checkEntailArith(node[2])) - { - // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z - return returnRewrite(node, node[2], "idof-emp-idof"); - } - } - } - - if (checkEntailArith(len1, len0m2, true)) - { - // len(x)-z < len(y) implies indexof( x, y, z ) ----> -1 - Node negone = nm->mkConst(Rational(-1)); - return returnRewrite(node, negone, "idof-len"); - } - - Node fstr = node[0]; - if (!node[2].isConst() || node[2].getConst().sgn() != 0) - { - fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0); - fstr = Rewriter::rewrite(fstr); - } - - Node cmp_conr = checkEntailContains(fstr, node[1]); - Trace("strings-rewrite-debug") << "For " << node << ", check contains(" - << fstr << ", " << node[1] << ")" << std::endl; - Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl; - std::vector children1; - utils::getConcat(node[1], children1); - if (!cmp_conr.isNull()) - { - if (cmp_conr.getConst()) - { - if (node[2].isConst() && node[2].getConst().sgn() == 0) - { - // past the first position in node[0] that contains node[1], we can drop - std::vector nb; - std::vector ne; - int cc = componentContains(children0, children1, nb, ne, true, 1); - if (cc != -1 && !ne.empty()) - { - // For example: - // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0) - Node nn = utils::mkConcat(STRING_CONCAT, children0); - Node ret = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]); - return returnRewrite(node, ret, "idof-def-ctn"); - } - - // Strip components from the beginning that are guaranteed not to match - if (stripConstantEndpoints(children0, children1, nb, ne, 1)) - { - // str.indexof(str.++("AB", x, "C"), "C", 0) ---> - // 2 + str.indexof(str.++(x, "C"), "C", 0) - Node ret = - nm->mkNode(kind::PLUS, - nm->mkNode(kind::STRING_LENGTH, - utils::mkConcat(STRING_CONCAT, nb)), - nm->mkNode(kind::STRING_STRIDOF, - utils::mkConcat(STRING_CONCAT, children0), - node[1], - node[2])); - return returnRewrite(node, ret, "idof-strip-cnst-endpts"); - } - } - - // strip symbolic length - Node new_len = node[2]; - std::vector nr; - if (stripSymbolicLength(children0, nr, 1, new_len)) - { - // For example: - // z>str.len( x1 ) and str.contains( x2, y )-->true - // implies - // str.indexof( str.++( x1, x2 ), y, z ) ---> - // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) ) - Node nn = utils::mkConcat(STRING_CONCAT, children0); - Node ret = - nm->mkNode(kind::PLUS, - nm->mkNode(kind::MINUS, node[2], new_len), - nm->mkNode(kind::STRING_STRIDOF, nn, node[1], new_len)); - return returnRewrite(node, ret, "idof-strip-sym-len"); - } - } - else - { - // str.contains( x, y ) --> false implies str.indexof(x,y,z) --> -1 - Node negone = nm->mkConst(Rational(-1)); - return returnRewrite(node, negone, "idof-nctn"); - } - } - else - { - Node new_len = node[2]; - std::vector nr; - if (stripSymbolicLength(children0, nr, 1, new_len)) - { - // Normalize the string before the start index. - // - // For example: - // str.indexof(str.++("ABCD", x), y, 3) ---> - // str.indexof(str.++("AAAD", x), y, 3) - Node nodeNr = utils::mkConcat(STRING_CONCAT, nr); - Node normNr = lengthPreserveRewrite(nodeNr); - if (normNr != nodeNr) - { - std::vector normNrChildren; - utils::getConcat(normNr, normNrChildren); - std::vector children(normNrChildren); - children.insert(children.end(), children0.begin(), children0.end()); - Node nn = utils::mkConcat(STRING_CONCAT, children); - Node res = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]); - return returnRewrite(node, res, "idof-norm-prefix"); - } - } - } - - if (node[2].isConst() && node[2].getConst().sgn()==0) - { - std::vector cb; - std::vector ce; - if (stripConstantEndpoints(children0, children1, cb, ce, -1)) - { - Node ret = utils::mkConcat(STRING_CONCAT, children0); - ret = nm->mkNode(STRING_STRIDOF, ret, node[1], node[2]); - // For example: - // str.indexof( str.++( x, "A" ), "B", 0 ) ---> str.indexof( x, "B", 0 ) - return returnRewrite(node, ret, "rpl-pull-endpt"); - } - } - - Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; - return node; -} - -Node TheoryStringsRewriter::rewriteReplace( Node node ) { - Assert(node.getKind() == kind::STRING_STRREPL); - NodeManager* nm = NodeManager::currentNM(); - - if (node[1].isConst() && Word::isEmpty(node[1])) - { - Node ret = nm->mkNode(STRING_CONCAT, node[2], node[0]); - return returnRewrite(node, ret, "rpl-rpl-empty"); - } - - std::vector children0; - utils::getConcat(node[0], children0); - - if (node[1].isConst() && children0[0].isConst()) - { - Node s = children0[0]; - Node t = node[1]; - std::size_t p = Word::find(s, t); - if (p == std::string::npos) - { - if (children0.size() == 1) - { - return returnRewrite(node, node[0], "rpl-const-nfind"); - } - } - else - { - Node s1 = Word::substr(s, 0, p); - Node s3 = Word::substr(s, p + Word::getLength(t)); - std::vector children; - if (!Word::isEmpty(s1)) - { - children.push_back(s1); - } - children.push_back(node[2]); - if (!Word::isEmpty(s3)) - { - children.push_back(s3); - } - children.insert(children.end(), children0.begin() + 1, children0.end()); - Node ret = utils::mkConcat(STRING_CONCAT, children); - return returnRewrite(node, ret, "rpl-const-find"); - } - } - - // rewrites that apply to both replace and replaceall - Node rri = rewriteReplaceInternal(node); - if (!rri.isNull()) - { - // printing of the rewrite managed by the call above - return rri; - } - - if (node[0] == node[2]) - { - // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x - Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]); - Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]); - if (checkEntailArith(l1, l0)) - { - return returnRewrite(node, node[0], "rpl-rpl-len-id"); - } - - // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x) - // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "") - if (checkEntailLengthOne(node[0])) - { - Node empty = nm->mkConst(String("")); - Node rn1 = Rewriter::rewrite( - rewriteEqualityExt(nm->mkNode(EQUAL, node[1], empty))); - if (rn1 != node[1]) - { - std::vector emptyNodes; - bool allEmptyEqs; - std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(rn1); - - if (allEmptyEqs) - { - Node nn1 = utils::mkConcat(STRING_CONCAT, emptyNodes); - if (node[1] != nn1) - { - Node ret = nm->mkNode(STRING_STRREPL, node[0], nn1, node[2]); - return returnRewrite(node, ret, "rpl-x-y-x-simp"); - } - } - } - } - } - - std::vector children1; - utils::getConcat(node[1], children1); - - // check if contains definitely does (or does not) hold - Node cmp_con = nm->mkNode(kind::STRING_STRCTN, node[0], node[1]); - Node cmp_conr = Rewriter::rewrite(cmp_con); - if (!checkEntailContains(node[0], node[1]).isNull()) - { - if (cmp_conr.getConst()) - { - // component-wise containment - std::vector cb; - std::vector ce; - int cc = componentContains(children0, children1, cb, ce, true, 1); - if (cc != -1) - { - if (cc == 0 && children0[0] == children1[0]) - { - // definitely a prefix, can do the replace - // for example, - // str.replace( str.++( x, "ab" ), str.++( x, "a" ), y ) ---> - // str.++( y, "b" ) - std::vector cres; - cres.push_back(node[2]); - cres.insert(cres.end(), ce.begin(), ce.end()); - Node ret = utils::mkConcat(STRING_CONCAT, cres); - return returnRewrite(node, ret, "rpl-cctn-rpl"); - } - else if (!ce.empty()) - { - // we can pull remainder past first definite containment - // for example, - // str.replace( str.++( x, "ab" ), "a", y ) ---> - // str.++( str.replace( str.++( x, "a" ), "a", y ), "b" ) - // this is independent of whether the second argument may be empty - std::vector scc; - scc.push_back(NodeManager::currentNM()->mkNode( - kind::STRING_STRREPL, - utils::mkConcat(STRING_CONCAT, children0), - node[1], - node[2])); - scc.insert(scc.end(), ce.begin(), ce.end()); - Node ret = utils::mkConcat(STRING_CONCAT, scc); - return returnRewrite(node, ret, "rpl-cctn"); - } - } - } - else - { - // ~contains( t, s ) => ( replace( t, s, r ) ----> t ) - return returnRewrite(node, node[0], "rpl-nctn"); - } - } - else if (cmp_conr.getKind() == kind::EQUAL || cmp_conr.getKind() == kind::AND) - { - // Rewriting the str.contains may return equalities of the form (= x ""). - // In that case, we can substitute the variables appearing in those - // equalities with the empty string in the third argument of the - // str.replace. For example: - // - // (str.replace x (str.++ x y) y) --> (str.replace x (str.++ x y) "") - // - // This can be done because str.replace changes x iff (str.++ x y) is in x - // but that means that y must be empty in that case. Thus, we can - // substitute y with "" in the third argument. Note that the third argument - // does not matter when the str.replace does not apply. - // - Node empty = nm->mkConst(::CVC4::String("")); - - std::vector emptyNodes; - bool allEmptyEqs; - std::tie(allEmptyEqs, emptyNodes) = collectEmptyEqs(cmp_conr); - - if (emptyNodes.size() > 0) - { - // Perform the substitutions - std::vector substs(emptyNodes.size(), TNode(empty)); - Node nn2 = node[2].substitute( - emptyNodes.begin(), emptyNodes.end(), substs.begin(), substs.end()); - - // If the contains rewrites to a conjunction of empty-string equalities - // and we are doing the replacement in an empty string, we can rewrite - // the string-to-replace with a concatenation of all the terms that must - // be empty: - // - // (str.replace "" y z) ---> (str.replace "" (str.++ y1 ... yn) z) - // if (str.contains "" y) ---> (and (= y1 "") ... (= yn "")) - if (node[0] == empty && allEmptyEqs) - { - std::vector emptyNodesList(emptyNodes.begin(), emptyNodes.end()); - Node nn1 = utils::mkConcat(STRING_CONCAT, emptyNodesList); - if (nn1 != node[1] || nn2 != node[2]) - { - Node res = nm->mkNode(kind::STRING_STRREPL, node[0], nn1, nn2); - return returnRewrite(node, res, "rpl-emp-cnts-substs"); - } - } - - if (nn2 != node[2]) - { - Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1], nn2); - return returnRewrite(node, res, "rpl-cnts-substs"); - } - } - } - - if (cmp_conr != cmp_con) - { - if (checkEntailNonEmpty(node[1])) - { - // pull endpoints that can be stripped - // for example, - // str.replace( str.++( "b", x, "b" ), "a", y ) ---> - // str.++( "b", str.replace( x, "a", y ), "b" ) - std::vector cb; - std::vector ce; - if (stripConstantEndpoints(children0, children1, cb, ce)) - { - std::vector cc; - cc.insert(cc.end(), cb.begin(), cb.end()); - cc.push_back(NodeManager::currentNM()->mkNode( - kind::STRING_STRREPL, - utils::mkConcat(STRING_CONCAT, children0), - node[1], - node[2])); - cc.insert(cc.end(), ce.begin(), ce.end()); - Node ret = utils::mkConcat(STRING_CONCAT, cc); - return returnRewrite(node, ret, "rpl-pull-endpt"); - } - } - } - - children1.clear(); - utils::getConcat(node[1], children1); - Node lastChild1 = children1[children1.size() - 1]; - if (lastChild1.getKind() == kind::STRING_SUBSTR) - { - // (str.replace x (str.++ t (str.substr y i j)) z) ---> - // (str.replace x (str.++ t - // (str.substr y i (+ (str.len x) 1 (- (str.len t))))) z) - // if j > len(x) - // - // Reasoning: If the string to be replaced is longer than x, then it does - // not matter how much longer it is, the result is always x. Thus, it is - // fine to only look at the prefix of length len(x) + 1 - len(t). - - children1.pop_back(); - // Length of the non-substr components in the second argument - Node partLen1 = nm->mkNode(kind::STRING_LENGTH, - utils::mkConcat(STRING_CONCAT, children1)); - Node maxLen1 = nm->mkNode(kind::PLUS, partLen1, lastChild1[2]); - - Node zero = nm->mkConst(Rational(0)); - Node one = nm->mkConst(Rational(1)); - Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]); - Node len0_1 = nm->mkNode(kind::PLUS, len0, one); - // Check len(t) + j > len(x) + 1 - if (checkEntailArith(maxLen1, len0_1, true)) - { - children1.push_back(nm->mkNode( - kind::STRING_SUBSTR, - lastChild1[0], - lastChild1[1], - nm->mkNode( - kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1)))); - Node res = nm->mkNode(kind::STRING_STRREPL, - node[0], - utils::mkConcat(STRING_CONCAT, children1), - node[2]); - return returnRewrite(node, res, "repl-subst-idx"); - } - } - - if (node[0].getKind() == STRING_STRREPL) - { - Node x = node[0]; - Node y = node[1]; - Node z = node[2]; - if (x[0] == x[2] && x[0] == y) - { - // (str.replace (str.replace y w y) y z) --> - // (str.replace (str.replace y w z) y z) - // if (str.len w) >= (str.len z) and w != z - // - // Reasoning: There are two cases: (1) w does not appear in y and (2) w - // does appear in y. - // - // Case (1): In this case, the reasoning is trivial. The - // inner replace does not do anything, so we can just replace its third - // argument with any string. - // - // Case (2): After the inner replace, we are guaranteed to have a string - // that contains y at the index of w in the original string y. The outer - // replace then replaces that y with z, so we can short-circuit that - // replace by directly replacing w with z in the inner replace. We can - // only do that if the result of the new inner replace does not contain - // y, otherwise we end up doing two replaces that are different from the - // original expression. We enforce that by requiring that the length of w - // has to be greater or equal to the length of z and that w and z have to - // be different. This makes sure that an inner replace changes a string - // to a string that is shorter than y, making it impossible for the outer - // replace to match. - Node w = x[1]; - - // (str.len w) >= (str.len z) - Node wlen = nm->mkNode(kind::STRING_LENGTH, w); - Node zlen = nm->mkNode(kind::STRING_LENGTH, z); - if (checkEntailArith(wlen, zlen)) - { - // w != z - Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z)); - if (wEqZ.isConst() && !wEqZ.getConst()) - { - Node ret = nm->mkNode(kind::STRING_STRREPL, - nm->mkNode(kind::STRING_STRREPL, y, w, z), - y, - z); - return returnRewrite(node, ret, "repl-repl-short-circuit"); - } - } - } - } - - if (node[1].getKind() == STRING_STRREPL) - { - if (node[1][0] == node[0]) - { - if (node[1][0] == node[1][2] && node[1][0] == node[2]) - { - // str.replace( x, str.replace( x, y, x ), x ) ---> x - return returnRewrite(node, node[0], "repl-repl2-inv-id"); - } - bool dualReplIteSuccess = false; - Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]); - if (!cmp_con2.isNull() && !cmp_con2.getConst()) - { - // str.contains( x, z ) ---> false - // implies - // str.replace( x, str.replace( x, y, z ), w ) ---> - // ite( str.contains( x, y ), x, w ) - dualReplIteSuccess = true; - } - else - { - // str.contains( y, z ) ---> false and str.contains( z, y ) ---> false - // implies - // str.replace( x, str.replace( x, y, z ), w ) ---> - // ite( str.contains( x, y ), x, w ) - cmp_con2 = checkEntailContains(node[1][1], node[1][2]); - if (!cmp_con2.isNull() && !cmp_con2.getConst()) - { - cmp_con2 = checkEntailContains(node[1][2], node[1][1]); - if (!cmp_con2.isNull() && !cmp_con2.getConst()) - { - dualReplIteSuccess = true; - } - } - } - if (dualReplIteSuccess) - { - Node res = nm->mkNode(ITE, - nm->mkNode(STRING_STRCTN, node[0], node[1][1]), - node[0], - node[2]); - return returnRewrite(node, res, "repl-dual-repl-ite"); - } - } - - bool invSuccess = false; - if (node[1][1] == node[0]) - { - if (node[1][0] == node[1][2]) - { - // str.replace(x, str.replace(y, x, y), w) ---> str.replace(x, y, w) - invSuccess = true; - } - else if (node[1][1] == node[2] || node[1][0] == node[2]) - { - // str.contains(y, z) ----> false and ( y == w or x == w ) implies - // implies - // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w) - Node cmp_con2 = checkEntailContains(node[1][0], node[1][2]); - invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst(); - } - } - else - { - // str.contains(x, z) ----> false and str.contains(x, w) ----> false - // implies - // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u) - Node cmp_con2 = checkEntailContains(node[0], node[1][1]); - if (!cmp_con2.isNull() && !cmp_con2.getConst()) - { - cmp_con2 = checkEntailContains(node[0], node[1][2]); - invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst(); - } - } - if (invSuccess) - { - Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1][0], node[2]); - return returnRewrite(node, res, "repl-repl2-inv"); - } - } - if (node[2].getKind() == STRING_STRREPL) - { - if (node[2][1] == node[0]) - { - // str.contains( z, w ) ----> false implies - // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z ) - Node cmp_con2 = checkEntailContains(node[1], node[2][0]); - if (!cmp_con2.isNull() && !cmp_con2.getConst()) - { - Node res = - nm->mkNode(kind::STRING_STRREPL, node[0], node[1], node[2][0]); - return returnRewrite(node, res, "repl-repl3-inv"); - } - } - if (node[2][0] == node[1]) - { - bool success = false; - if (node[2][0] == node[2][2] && node[2][1] == node[0]) - { - // str.replace( x, y, str.replace( y, x, y ) ) ---> x - success = true; - } - else - { - // str.contains( x, z ) ----> false implies - // str.replace( x, y, str.replace( y, z, w ) ) ---> x - cmp_con = checkEntailContains(node[0], node[2][1]); - success = !cmp_con.isNull() && !cmp_con.getConst(); - } - if (success) - { - return returnRewrite(node, node[0], "repl-repl3-inv-id"); - } - } - } - // miniscope based on components that do not contribute to contains - // for example, - // str.replace( x ++ y ++ x ++ y, "A", z ) --> - // str.replace( x ++ y, "A", z ) ++ x ++ y - // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y. - if (checkEntailLengthOne(node[1])) - { - Node lastLhs; - unsigned lastCheckIndex = 0; - for (unsigned i = 1, iend = children0.size(); i < iend; i++) - { - unsigned checkIndex = children0.size() - i; - std::vector checkLhs; - checkLhs.insert( - checkLhs.end(), children0.begin(), children0.begin() + checkIndex); - Node lhs = utils::mkConcat(STRING_CONCAT, checkLhs); - Node rhs = children0[checkIndex]; - Node ctn = checkEntailContains(lhs, rhs); - if (!ctn.isNull() && ctn.getConst()) - { - lastLhs = lhs; - lastCheckIndex = checkIndex; - } - else - { - break; - } - } - if (!lastLhs.isNull()) - { - std::vector remc(children0.begin() + lastCheckIndex, - children0.end()); - Node rem = utils::mkConcat(STRING_CONCAT, remc); - Node ret = - nm->mkNode(STRING_CONCAT, - nm->mkNode(STRING_STRREPL, lastLhs, node[1], node[2]), - rem); - // for example: - // str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x - // Since we know that the first occurrence of "A" cannot be in the - // second occurrence of x. Notice this is specific to single characters - // due to complications with finds that span multiple components for - // non-characters. - return returnRewrite(node, ret, "repl-char-ncontrib-find"); - } - } - - // TODO (#1180) incorporate these? - // contains( t, s ) => - // replace( replace( x, t, s ), s, r ) ----> replace( x, t, r ) - // contains( t, s ) => - // contains( replace( t, s, r ), r ) ----> true - - Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; - return node; -} - -Node TheoryStringsRewriter::rewriteReplaceAll(Node node) -{ - Assert(node.getKind() == STRING_STRREPLALL); - - if (node[0].isConst() && node[1].isConst()) - { - std::vector children; - Node s = node[0]; - Node t = node[1]; - if (Word::isEmpty(s) || Word::isEmpty(t)) - { - return returnRewrite(node, node[0], "replall-empty-find"); - } - std::size_t sizeS = Word::getLength(s); - std::size_t sizeT = Word::getLength(t); - std::size_t index = 0; - std::size_t curr = 0; - do - { - curr = Word::find(s, t, index); - if (curr != std::string::npos) - { - if (curr > index) - { - children.push_back(Word::substr(s, index, curr - index)); - } - children.push_back(node[2]); - index = curr + sizeT; - } - else - { - children.push_back(Word::substr(s, index, sizeS - index)); - } - } while (curr != std::string::npos && curr < sizeS); - // constant evaluation - Node res = utils::mkConcat(STRING_CONCAT, children); - return returnRewrite(node, res, "replall-const"); - } - - // rewrites that apply to both replace and replaceall - Node rri = rewriteReplaceInternal(node); - if (!rri.isNull()) - { - // printing of the rewrite managed by the call above - return rri; - } - - Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; - return node; -} - -Node TheoryStringsRewriter::rewriteReplaceInternal(Node node) -{ - Kind nk = node.getKind(); - Assert(nk == STRING_STRREPL || nk == STRING_STRREPLALL); - - if (node[1] == node[2]) - { - return returnRewrite(node, node[0], "rpl-id"); - } - - if (node[0] == node[1]) - { - // only holds for replaceall if non-empty - if (nk == STRING_STRREPL || checkEntailNonEmpty(node[1])) - { - return returnRewrite(node, node[2], "rpl-replace"); - } - } - - return Node::null(); -} - -Node TheoryStringsRewriter::rewriteStrConvert(Node node) -{ - Kind nk = node.getKind(); - Assert(nk == STRING_TOLOWER || nk == STRING_TOUPPER); - NodeManager* nm = NodeManager::currentNM(); - if (node[0].isConst()) - { - std::vector nvec = node[0].getConst().getVec(); - for (unsigned i = 0, nvsize = nvec.size(); i < nvsize; i++) - { - unsigned newChar = CVC4::String::convertUnsignedIntToCode(nvec[i]); - // transform it - // upper 65 ... 90 - // lower 97 ... 122 - if (nk == STRING_TOUPPER) - { - if (newChar >= 97 && newChar <= 122) - { - newChar = newChar - 32; - } - } - else if (nk == STRING_TOLOWER) - { - if (newChar >= 65 && newChar <= 90) - { - newChar = newChar + 32; - } - } - newChar = CVC4::String::convertCodeToUnsignedInt(newChar); - nvec[i] = newChar; - } - Node retNode = nm->mkConst(String(nvec)); - return returnRewrite(node, retNode, "str-conv-const"); - } - else if (node[0].getKind() == STRING_CONCAT) - { - NodeBuilder<> concatBuilder(STRING_CONCAT); - for (const Node& nc : node[0]) - { - concatBuilder << nm->mkNode(nk, nc); - } - // tolower( x1 ++ x2 ) --> tolower( x1 ) ++ tolower( x2 ) - Node retNode = concatBuilder.constructNode(); - return returnRewrite(node, retNode, "str-conv-minscope-concat"); - } - else if (node[0].getKind() == STRING_TOLOWER - || node[0].getKind() == STRING_TOUPPER) - { - // tolower( tolower( x ) ) --> tolower( x ) - // tolower( toupper( x ) ) --> tolower( x ) - Node retNode = nm->mkNode(nk, node[0][0]); - return returnRewrite(node, retNode, "str-conv-idem"); - } - else if (node[0].getKind() == STRING_ITOS) - { - // tolower( str.from.int( x ) ) --> str.from.int( x ) - return returnRewrite(node, node[0], "str-conv-itos"); - } - return node; -} - -Node TheoryStringsRewriter::rewriteStrReverse(Node node) -{ - Assert(node.getKind() == STRING_REV); - NodeManager* nm = NodeManager::currentNM(); - Node x = node[0]; - if (x.isConst()) - { - std::vector nvec = node[0].getConst().getVec(); - std::reverse(nvec.begin(), nvec.end()); - Node retNode = nm->mkConst(String(nvec)); - return returnRewrite(node, retNode, "str-conv-const"); - } - else if (x.getKind() == STRING_CONCAT) - { - std::vector children; - for (const Node& nc : x) - { - children.push_back(nm->mkNode(STRING_REV, nc)); - } - std::reverse(children.begin(), children.end()); - // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 ) - Node retNode = nm->mkNode(STRING_CONCAT, children); - return returnRewrite(node, retNode, "str-rev-minscope-concat"); - } - else if (x.getKind() == STRING_REV) - { - // rev( rev( x ) ) --> x - Node retNode = x[0]; - return returnRewrite(node, retNode, "str-rev-idem"); - } - return node; -} - -Node TheoryStringsRewriter::rewriteStringLeq(Node n) -{ - Assert(n.getKind() == kind::STRING_LEQ); - NodeManager* nm = NodeManager::currentNM(); - if (n[0] == n[1]) - { - Node ret = nm->mkConst(true); - return returnRewrite(n, ret, "str-leq-id"); - } - if (n[0].isConst() && n[1].isConst()) - { - String s = n[0].getConst(); - String t = n[1].getConst(); - Node ret = nm->mkConst(s.isLeq(t)); - return returnRewrite(n, ret, "str-leq-eval"); - } - // empty strings - for (unsigned i = 0; i < 2; i++) - { - if (n[i].isConst() && n[i].getConst().isEmptyString()) - { - Node ret = i == 0 ? nm->mkConst(true) : n[0].eqNode(n[1]); - return returnRewrite(n, ret, "str-leq-empty"); - } - } - - std::vector n1; - utils::getConcat(n[0], n1); - std::vector n2; - utils::getConcat(n[1], n2); - Assert(!n1.empty() && !n2.empty()); - - // constant prefixes - if (n1[0].isConst() && n2[0].isConst() && n1[0] != n2[0]) - { - String s = n1[0].getConst(); - String t = n2[0].getConst(); - // only need to truncate if s is longer - if (s.size() > t.size()) - { - s = s.prefix(t.size()); - } - // if prefix is not leq, then entire string is not leq - if (!s.isLeq(t)) - { - Node ret = nm->mkConst(false); - return returnRewrite(n, ret, "str-leq-cprefix"); - } - } - - Trace("strings-rewrite-nf") << "No rewrites for : " << n << std::endl; - return n; -} - -Node TheoryStringsRewriter::rewritePrefixSuffix(Node n) -{ - Assert(n.getKind() == kind::STRING_PREFIX - || n.getKind() == kind::STRING_SUFFIX); - bool isPrefix = n.getKind() == kind::STRING_PREFIX; - if (n[0] == n[1]) - { - Node ret = NodeManager::currentNM()->mkConst(true); - return returnRewrite(n, ret, "suf/prefix-eq"); - } - if (n[0].isConst()) - { - CVC4::String t = n[0].getConst(); - if (t.isEmptyString()) - { - Node ret = NodeManager::currentNM()->mkConst(true); - return returnRewrite(n, ret, "suf/prefix-empty-const"); - } - } - if (n[1].isConst()) - { - Node s = n[1]; - size_t lenS = Word::getLength(s); - if (n[0].isConst()) - { - Node ret = NodeManager::currentNM()->mkConst(false); - Node t = n[0]; - size_t lenT = Word::getLength(t); - if (lenS >= lenT) - { - if ((isPrefix && t == Word::prefix(s, lenT)) - || (!isPrefix && t == Word::suffix(s, lenT))) - { - ret = NodeManager::currentNM()->mkConst(true); - } - } - return returnRewrite(n, ret, "suf/prefix-const"); - } - else if (lenS == 0) - { - Node ret = n[0].eqNode(n[1]); - return returnRewrite(n, ret, "suf/prefix-empty"); - } - else if (lenS == 1) - { - // (str.prefix x "A") and (str.suffix x "A") are equivalent to - // (str.contains "A" x ) - Node ret = - NodeManager::currentNM()->mkNode(kind::STRING_STRCTN, n[1], n[0]); - return returnRewrite(n, ret, "suf/prefix-ctn"); - } - } - Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]); - Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]); - Node val; - if (isPrefix) - { - val = NodeManager::currentNM()->mkConst(::CVC4::Rational(0)); - } - else - { - val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens); - } - - // Check if we can turn the prefix/suffix into equalities by showing that the - // prefix/suffix is at least as long as the string - Node eqs = inferEqsFromContains(n[1], n[0]); - if (!eqs.isNull()) - { - return returnRewrite(n, eqs, "suf/prefix-to-eqs"); - } - - // general reduction to equality + substr - Node retNode = n[0].eqNode( - NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens)); - - return retNode; -} - -Node TheoryStringsRewriter::rewriteStringFromCode(Node n) -{ - Assert(n.getKind() == kind::STRING_FROM_CODE); - NodeManager* nm = NodeManager::currentNM(); - - if (n[0].isConst()) - { - Integer i = n[0].getConst().getNumerator(); - Node ret; - if (i >= 0 && i < strings::utils::getAlphabetCardinality()) - { - std::vector svec = {i.toUnsignedInt()}; - ret = nm->mkConst(String(svec)); - } - else - { - ret = nm->mkConst(String("")); - } - return returnRewrite(n, ret, "from-code-eval"); - } - - return n; -} - -Node TheoryStringsRewriter::rewriteStringToCode(Node n) -{ - Assert(n.getKind() == kind::STRING_TO_CODE); - if (n[0].isConst()) - { - CVC4::String s = n[0].getConst(); - Node ret; - if (s.size() == 1) - { - std::vector vec = s.getVec(); - Assert(vec.size() == 1); - ret = NodeManager::currentNM()->mkConst( - Rational(CVC4::String::convertUnsignedIntToCode(vec[0]))); - } - else - { - ret = NodeManager::currentNM()->mkConst(Rational(-1)); - } - return returnRewrite(n, ret, "to-code-eval"); - } - - return n; -} - -Node TheoryStringsRewriter::splitConstant( Node a, Node b, int& index, bool isRev ) { - Assert(a.isConst() && b.isConst()); - size_t lenA = Word::getLength(a); - size_t lenB = Word::getLength(b); - index = lenA <= lenB ? 1 : 0; - size_t len_short = index == 1 ? lenA : lenB; - bool cmp = isRev ? a.getConst().rstrncmp(b.getConst(), len_short): a.getConst().strncmp(b.getConst(), len_short); - if( cmp ) { - Node l = index==0 ? a : b; - if( isRev ){ - int new_len = l.getConst().size() - len_short; - return Word::substr(l, 0, new_len); - }else{ - return Word::substr(l, len_short); - } - } - // not the same prefix/suffix - return Node::null(); -} - -bool TheoryStringsRewriter::canConstantContainConcat( Node c, Node n, int& firstc, int& lastc ) { - Assert(c.isConst()); - CVC4::String t = c.getConst(); - const std::vector& tvec = t.getVec(); - Assert(n.getKind() == kind::STRING_CONCAT); - //must find constant components in order - size_t pos = 0; - firstc = -1; - lastc = -1; - for(unsigned i=0; i(); - size_t new_pos = t.find(s,pos); - if( new_pos==std::string::npos ) { - return false; - }else{ - pos = new_pos + s.size(); - } - } - else if (n[i].getKind() == kind::STRING_ITOS && checkEntailArith(n[i][0])) - { - // find the first occurrence of a digit starting at pos - while (pos < tvec.size() && !String::isDigit(tvec[pos])) - { - pos++; - } - if (pos == tvec.size()) - { - return false; - } - // must consume at least one digit here - pos++; - } - } - return true; -} - -bool TheoryStringsRewriter::canConstantContainList( Node c, std::vector< Node >& l, int& firstc, int& lastc ) { - Assert(c.isConst()); - //must find constant components in order - size_t pos = 0; - firstc = -1; - lastc = -1; - for(unsigned i=0; i& n1, - std::vector& nr, - int dir, - Node& curr) -{ - Assert(dir == 1 || dir == -1); - Assert(nr.empty()); - Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0)); - bool ret = false; - bool success; - unsigned sindex = 0; - do - { - Assert(!curr.isNull()); - success = false; - if (curr != zero && sindex < n1.size()) - { - unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); - if (n1[sindex_use].isConst()) - { - // could strip part of a constant - Node lowerBound = getConstantArithBound(Rewriter::rewrite(curr)); - if (!lowerBound.isNull()) - { - Assert(lowerBound.isConst()); - Rational lbr = lowerBound.getConst(); - if (lbr.sgn() > 0) - { - Assert(checkEntailArith(curr, true)); - CVC4::String s = n1[sindex_use].getConst(); - Node ncl = - NodeManager::currentNM()->mkConst(CVC4::Rational(s.size())); - Node next_s = - NodeManager::currentNM()->mkNode(kind::MINUS, lowerBound, ncl); - next_s = Rewriter::rewrite(next_s); - Assert(next_s.isConst()); - // we can remove the entire constant - if (next_s.getConst().sgn() >= 0) - { - curr = Rewriter::rewrite( - NodeManager::currentNM()->mkNode(kind::MINUS, curr, ncl)); - success = true; - sindex++; - } - else - { - // we can remove part of the constant - // lower bound minus the length of a concrete string is negative, - // hence lowerBound cannot be larger than long max - Assert(lbr < Rational(String::maxSize())); - curr = Rewriter::rewrite(NodeManager::currentNM()->mkNode( - kind::MINUS, curr, lowerBound)); - uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); - Assert(lbsize < s.size()); - if (dir == 1) - { - // strip partially from the front - nr.push_back( - NodeManager::currentNM()->mkConst(s.prefix(lbsize))); - n1[sindex_use] = NodeManager::currentNM()->mkConst( - s.suffix(s.size() - lbsize)); - } - else - { - // strip partially from the back - nr.push_back( - NodeManager::currentNM()->mkConst(s.suffix(lbsize))); - n1[sindex_use] = NodeManager::currentNM()->mkConst( - s.prefix(s.size() - lbsize)); - } - ret = true; - } - Assert(checkEntailArith(curr)); - } - else - { - // we cannot remove the constant - } - } - } - else - { - Node next_s = NodeManager::currentNM()->mkNode( - kind::MINUS, - curr, - NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, - n1[sindex_use])); - next_s = Rewriter::rewrite(next_s); - if (checkEntailArith(next_s)) - { - success = true; - curr = next_s; - sindex++; - } - } - } - } while (success); - if (sindex > 0) - { - if (dir == 1) - { - nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex); - n1.erase(n1.begin(), n1.begin() + sindex); - } - else - { - nr.insert(nr.end(), n1.end() - sindex, n1.end()); - n1.erase(n1.end() - sindex, n1.end()); - } - ret = true; - } - return ret; -} - -int TheoryStringsRewriter::componentContains(std::vector& n1, - std::vector& n2, - std::vector& nb, - std::vector& ne, - bool computeRemainder, - int remainderDir) -{ - Assert(nb.empty()); - Assert(ne.empty()); - // if n2 is a singleton, we can do optimized version here - if (n2.size() == 1) - { - for (unsigned i = 0; i < n1.size(); i++) - { - Node n1rb; - Node n1re; - if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder)) - { - if (computeRemainder) - { - n1[i] = n2[0]; - if (remainderDir != -1) - { - if (!n1re.isNull()) - { - ne.push_back(n1re); - } - ne.insert(ne.end(), n1.begin() + i + 1, n1.end()); - n1.erase(n1.begin() + i + 1, n1.end()); - } - else if (!n1re.isNull()) - { - n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode( - kind::STRING_CONCAT, n1[i], n1re)); - } - if (remainderDir != 1) - { - nb.insert(nb.end(), n1.begin(), n1.begin() + i); - n1.erase(n1.begin(), n1.begin() + i); - if (!n1rb.isNull()) - { - nb.push_back(n1rb); - } - } - else if (!n1rb.isNull()) - { - n1[i] = Rewriter::rewrite(NodeManager::currentNM()->mkNode( - kind::STRING_CONCAT, n1rb, n1[i])); - } - } - return i; - } - } - } - else if (n1.size() >= n2.size()) - { - unsigned diff = n1.size() - n2.size(); - for (unsigned i = 0; i <= diff; i++) - { - Node n1rb_first; - Node n1re_first; - // first component of n2 must be a suffix - if (componentContainsBase(n1[i], - n2[0], - n1rb_first, - n1re_first, - 1, - computeRemainder && remainderDir != 1)) - { - Assert(n1re_first.isNull()); - for (unsigned j = 1; j < n2.size(); j++) - { - // are we in the last component? - if (j + 1 == n2.size()) - { - Node n1rb_last; - Node n1re_last; - // last component of n2 must be a prefix - if (componentContainsBase(n1[i + j], - n2[j], - n1rb_last, - n1re_last, - -1, - computeRemainder && remainderDir != -1)) - { - Assert(n1rb_last.isNull()); - if (computeRemainder) - { - if (remainderDir != -1) - { - if (!n1re_last.isNull()) - { - ne.push_back(n1re_last); - } - ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end()); - n1.erase(n1.begin() + i + j + 1, n1.end()); - n1[i + j] = n2[j]; - } - if (remainderDir != 1) - { - n1[i] = n2[0]; - nb.insert(nb.end(), n1.begin(), n1.begin() + i); - n1.erase(n1.begin(), n1.begin() + i); - if (!n1rb_first.isNull()) - { - nb.push_back(n1rb_first); - } - } - } - return i; - } - else - { - break; - } - } - else if (n1[i + j] != n2[j]) - { - break; - } - } - } - } - } - return -1; -} - -bool TheoryStringsRewriter::componentContainsBase( - Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder) -{ - Assert(n1rb.isNull()); - Assert(n1re.isNull()); - - NodeManager* nm = NodeManager::currentNM(); - - if (n1 == n2) - { - return true; - } - else - { - if (n1.isConst() && n2.isConst()) - { - size_t len1 = Word::getLength(n1); - size_t len2 = Word::getLength(n2); - if (len2 < len1) - { - if (dir == 1) - { - if (Word::suffix(n1, len2) == n2) - { - if (computeRemainder) - { - n1rb = Word::prefix(n1, len1 - len2); - } - return true; - } - } - else if (dir == -1) - { - if (Word::prefix(n1, len2) == n2) - { - if (computeRemainder) - { - n1re = Word::suffix(n1, len1 - len2); - } - return true; - } - } - else - { - size_t f = Word::find(n1, n2); - if (f != std::string::npos) - { - if (computeRemainder) - { - if (f > 0) - { - n1rb = Word::prefix(n1, f); - } - if (len1 > f + len2) - { - n1re = Word::suffix(n1, len1 - (f + len2)); - } - } - return true; - } - } - } - } - else - { - // cases for: - // n1 = x containing n2 = substr( x, n2[1], n2[2] ) - if (n2.getKind() == kind::STRING_SUBSTR) - { - if (n2[0] == n1) - { - bool success = true; - Node start_pos = n2[1]; - Node end_pos = nm->mkNode(kind::PLUS, n2[1], n2[2]); - Node len_n2s = nm->mkNode(kind::STRING_LENGTH, n2[0]); - if (dir == 1) - { - // To be a suffix, start + length must be greater than - // or equal to the length of the string. - success = checkEntailArith(end_pos, len_n2s); - } - else if (dir == -1) - { - // To be a prefix, must literally start at 0, since - // if we knew it started at <0, it should be rewritten to "", - // if we knew it started at 0, then n2[1] should be rewritten to - // 0. - success = start_pos.isConst() - && start_pos.getConst().sgn() == 0; - } - if (success) - { - if (computeRemainder) - { - // we can only compute the remainder if start_pos and end_pos - // are known to be non-negative. - if (!checkEntailArith(start_pos) || !checkEntailArith(end_pos)) - { - return false; - } - if (dir != 1) - { - n1rb = nm->mkNode(kind::STRING_SUBSTR, - n2[0], - nm->mkConst(Rational(0)), - start_pos); - } - if (dir != -1) - { - n1re = nm->mkNode(kind::STRING_SUBSTR, n2[0], end_pos, len_n2s); - } - } - return true; - } - } - } - - if (!computeRemainder && dir == 0) - { - if (n1.getKind() == STRING_STRREPL) - { - // (str.contains (str.replace x y z) w) ---> true - // if (str.contains x w) --> true and (str.contains z w) ---> true - Node xCtnW = checkEntailContains(n1[0], n2); - if (!xCtnW.isNull() && xCtnW.getConst()) - { - Node zCtnW = checkEntailContains(n1[2], n2); - if (!zCtnW.isNull() && zCtnW.getConst()) - { - return true; - } - } - } - } - } - } - return false; -} - -bool TheoryStringsRewriter::stripConstantEndpoints(std::vector& n1, - std::vector& n2, - std::vector& nb, - std::vector& ne, - int dir) -{ - Assert(nb.empty()); - Assert(ne.empty()); - - NodeManager* nm = NodeManager::currentNM(); - bool changed = false; - // for ( forwards, backwards ) direction - for (unsigned r = 0; r < 2; r++) - { - if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1)) - { - unsigned index0 = r == 0 ? 0 : n1.size() - 1; - unsigned index1 = r == 0 ? 0 : n2.size() - 1; - bool removeComponent = false; - Node n1cmp = n1[index0]; - - if (n1cmp.isConst() && n1cmp.getConst().size() == 0) - { - return false; - } - - std::vector sss; - std::vector sls; - n1cmp = decomposeSubstrChain(n1cmp, sss, sls); - Trace("strings-rewrite-debug2") - << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1] - << ", dir = " << dir << std::endl; - if (n1cmp.isConst()) - { - CVC4::String s = n1cmp.getConst(); - // overlap is an overapproximation of the number of characters - // n2[index1] can match in s - unsigned overlap = s.size(); - if (n2[index1].isConst()) - { - CVC4::String t = n2[index1].getConst(); - std::size_t ret = r == 0 ? s.find(t) : s.rfind(t); - if (ret == std::string::npos) - { - if (n1.size() == 1) - { - // can remove everything - // e.g. str.contains( "abc", str.++( "ba", x ) ) --> - // str.contains( "", str.++( "ba", x ) ) - removeComponent = true; - } - else if (sss.empty()) // only if not substr - { - // check how much overlap there is - // This is used to partially strip off the endpoint - // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) --> - // str.contains( str.++( "c", x ), str.++( "cd", y ) ) - overlap = r == 0 ? s.overlap(t) : t.overlap(s); - } - else - { - // if we are looking at a substring, we can remove the component - // if there is no overlap - // e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" ) - // --> str.contains( x, "a" ) - removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0); - } - } - else if (sss.empty()) // only if not substr - { - Assert(ret < s.size()); - // can strip off up to the find position, e.g. - // str.contains( str.++( "abc", x ), str.++( "b", y ) ) --> - // str.contains( str.++( "bc", x ), str.++( "b", y ) ), - // and - // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) --> - // str.contains( str.++( x, "abb" ), str.++( y, "b" ) ) - overlap = s.size() - ret; - } - } - else - { - // inconclusive - } - // process the overlap - if (overlap < s.size()) - { - changed = true; - if (overlap == 0) - { - removeComponent = true; - } - else - { - // can drop the prefix (resp. suffix) from the first (resp. last) - // component - if (r == 0) - { - nb.push_back(nm->mkConst(s.prefix(s.size() - overlap))); - n1[index0] = nm->mkConst(s.suffix(overlap)); - } - else - { - ne.push_back(nm->mkConst(s.suffix(s.size() - overlap))); - n1[index0] = nm->mkConst(s.prefix(overlap)); - } - } - } - } - else if (n1cmp.getKind() == kind::STRING_ITOS) - { - if (n2[index1].isConst()) - { - CVC4::String t = n2[index1].getConst(); - - if (n1.size() == 1) - { - // if n1.size()==1, then if n2[index1] is not a number, we can drop - // the entire component - // e.g. str.contains( int.to.str(x), "123a45") --> false - if (!t.isNumber()) - { - removeComponent = true; - } - } - else - { - const std::vector& tvec = t.getVec(); - Assert(tvec.size() > 0); - - // if n1.size()>1, then if the first (resp. last) character of - // n2[index1] - // is not a digit, we can drop the entire component, e.g.: - // str.contains( str.++( int.to.str(x), y ), "a12") --> - // str.contains( y, "a12" ) - // str.contains( str.++( y, int.to.str(x) ), "a0b") --> - // str.contains( y, "a0b" ) - unsigned i = r == 0 ? 0 : (tvec.size() - 1); - if (!String::isDigit(tvec[i])) - { - removeComponent = true; - } - } - } - } - if (removeComponent) - { - // can drop entire first (resp. last) component - if (r == 0) - { - nb.push_back(n1[index0]); - n1.erase(n1.begin(), n1.begin() + 1); - } - else - { - ne.push_back(n1[index0]); - n1.pop_back(); - } - if (n1.empty()) - { - // if we've removed everything, just return (we will rewrite to false) - return true; - } - else - { - changed = true; - } - } - } - } - // TODO (#1180) : computing the maximal overlap in this function may be - // important. - // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) ---> - // false - // ...since str.to.int(x) can contain at most 1 character from "2aaaa", - // leaving 4 characters - // which is larger that the upper bound for length of str.substr(y,0,3), - // which is 3. - return changed; -} - -Node TheoryStringsRewriter::canonicalStrForSymbolicLength(Node len) -{ - NodeManager* nm = NodeManager::currentNM(); - - Node res; - if (len.getKind() == kind::CONST_RATIONAL) - { - // c -> "A" repeated c times - Rational ratLen = len.getConst(); - Assert(ratLen.getDenominator() == 1); - Integer intLen = ratLen.getNumerator(); - res = nm->mkConst(String(std::string(intLen.getUnsignedInt(), 'A'))); - } - else if (len.getKind() == kind::PLUS) - { - // x + y -> norm(x) + norm(y) - NodeBuilder<> concatBuilder(kind::STRING_CONCAT); - for (const auto& n : len) - { - Node sn = canonicalStrForSymbolicLength(n); - if (sn.isNull()) - { - return Node::null(); - } - std::vector snChildren; - utils::getConcat(sn, snChildren); - concatBuilder.append(snChildren); - } - res = concatBuilder.constructNode(); - } - else if (len.getKind() == kind::MULT && len.getNumChildren() == 2 - && len[0].isConst()) - { - // c * x -> norm(x) repeated c times - Rational ratReps = len[0].getConst(); - Assert(ratReps.getDenominator() == 1); - Integer intReps = ratReps.getNumerator(); - - Node nRep = canonicalStrForSymbolicLength(len[1]); - std::vector nRepChildren; - utils::getConcat(nRep, nRepChildren); - NodeBuilder<> concatBuilder(kind::STRING_CONCAT); - for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++) - { - concatBuilder.append(nRepChildren); - } - res = concatBuilder.constructNode(); - } - else if (len.getKind() == kind::STRING_LENGTH) - { - // len(x) -> x - res = len[0]; - } - return res; -} - -Node TheoryStringsRewriter::lengthPreserveRewrite(Node n) -{ - NodeManager* nm = NodeManager::currentNM(); - Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n)); - Node res = canonicalStrForSymbolicLength(len); - return res.isNull() ? n : res; -} - -Node TheoryStringsRewriter::checkEntailContains(Node a, - Node b, - bool fullRewriter) -{ - NodeManager* nm = NodeManager::currentNM(); - Node ctn = nm->mkNode(kind::STRING_STRCTN, a, b); - - if (fullRewriter) - { - ctn = Rewriter::rewrite(ctn); - } - else - { - Node prev; - do - { - prev = ctn; - ctn = rewriteContains(ctn); - } while (prev != ctn && ctn.getKind() == kind::STRING_STRCTN); - } - - Assert(ctn.getType().isBoolean()); - return ctn.isConst() ? ctn : Node::null(); -} - -bool TheoryStringsRewriter::checkEntailNonEmpty(Node a) -{ - Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a); - len = Rewriter::rewrite(len); - return checkEntailArith(len, true); -} - -bool TheoryStringsRewriter::checkEntailLengthOne(Node s, bool strict) -{ - NodeManager* nm = NodeManager::currentNM(); - Node one = nm->mkConst(Rational(1)); - Node len = nm->mkNode(STRING_LENGTH, s); - len = Rewriter::rewrite(len); - return checkEntailArith(one, len) && (!strict || checkEntailArith(len, true)); -} - -bool TheoryStringsRewriter::checkEntailArithEq(Node a, Node b) -{ - if (a == b) - { - return true; - } - else - { - Node ar = Rewriter::rewrite(a); - Node br = Rewriter::rewrite(b); - return ar == br; - } -} - -bool TheoryStringsRewriter::checkEntailArith(Node a, Node b, bool strict) -{ - if (a == b) - { - return !strict; - } - else - { - Node diff = NodeManager::currentNM()->mkNode(kind::MINUS, a, b); - return checkEntailArith(diff, strict); - } -} - -struct StrCheckEntailArithTag -{ -}; -struct StrCheckEntailArithComputedTag -{ -}; -/** Attribute true for expressions for which checkEntailArith returned true */ -typedef expr::Attribute StrCheckEntailArithAttr; -typedef expr::Attribute - StrCheckEntailArithComputedAttr; - -bool TheoryStringsRewriter::checkEntailArith(Node a, bool strict) -{ - if (a.isConst()) - { - return a.getConst().sgn() >= (strict ? 1 : 0); - } - - Node ar = - strict - ? NodeManager::currentNM()->mkNode( - kind::MINUS, a, NodeManager::currentNM()->mkConst(Rational(1))) - : a; - ar = Rewriter::rewrite(ar); - - if (ar.getAttribute(StrCheckEntailArithComputedAttr())) - { - return ar.getAttribute(StrCheckEntailArithAttr()); - } - - bool ret = checkEntailArithInternal(ar); - if (!ret) - { - // try with approximations - ret = checkEntailArithApprox(ar); - } - // cache the result - ar.setAttribute(StrCheckEntailArithAttr(), ret); - ar.setAttribute(StrCheckEntailArithComputedAttr(), true); - return ret; -} - -bool TheoryStringsRewriter::checkEntailArithApprox(Node ar) -{ - Assert(Rewriter::rewrite(ar) == ar); - NodeManager* nm = NodeManager::currentNM(); - std::map msum; - Trace("strings-ent-approx-debug") - << "Setup arithmetic approximations for " << ar << std::endl; - if (!ArithMSum::getMonomialSum(ar, msum)) - { - Trace("strings-ent-approx-debug") - << "...failed to get monomial sum!" << std::endl; - return false; - } - // for each monomial v*c, mApprox[v] a list of - // possibilities for how the term can be soundly approximated, that is, - // if mApprox[v] contains av, then v*c > av*c. Notice that if c - // is positive, then v > av, otherwise if c is negative, then v < av. - // In other words, av is an under-approximation if c is positive, and an - // over-approximation if c is negative. - bool changed = false; - std::map > mApprox; - // map from approximations to their monomial sums - std::map > approxMsums; - // aarSum stores each monomial that does not have multiple approximations - std::vector aarSum; - for (std::pair& m : msum) - { - Node v = m.first; - Node c = m.second; - Trace("strings-ent-approx-debug") - << "Get approximations " << v << "..." << std::endl; - if (v.isNull()) - { - Node mn = c.isNull() ? nm->mkConst(Rational(1)) : c; - aarSum.push_back(mn); - } - else - { - // c.isNull() means c = 1 - bool isOverApprox = !c.isNull() && c.getConst().sgn() == -1; - std::vector& approx = mApprox[v]; - std::unordered_set visited; - std::vector toProcess; - toProcess.push_back(v); - do - { - Node curr = toProcess.back(); - Trace("strings-ent-approx-debug") << " process " << curr << std::endl; - curr = Rewriter::rewrite(curr); - toProcess.pop_back(); - if (visited.find(curr) == visited.end()) - { - visited.insert(curr); - std::vector currApprox; - getArithApproximations(curr, currApprox, isOverApprox); - if (currApprox.empty()) - { - Trace("strings-ent-approx-debug") - << "...approximation: " << curr << std::endl; - // no approximations, thus curr is a possibility - approx.push_back(curr); - } - else - { - toProcess.insert( - toProcess.end(), currApprox.begin(), currApprox.end()); - } - } - } while (!toProcess.empty()); - Assert(!approx.empty()); - // if we have only one approximation, move it to final - if (approx.size() == 1) - { - changed = v != approx[0]; - Node mn = ArithMSum::mkCoeffTerm(c, approx[0]); - aarSum.push_back(mn); - mApprox.erase(v); - } - else - { - // compute monomial sum form for each approximation, used below - for (const Node& aa : approx) - { - if (approxMsums.find(aa) == approxMsums.end()) - { - CVC4_UNUSED bool ret = - ArithMSum::getMonomialSum(aa, approxMsums[aa]); - Assert(ret); - } - } - changed = true; - } - } - } - if (!changed) - { - // approximations had no effect, return - Trace("strings-ent-approx-debug") << "...no approximations" << std::endl; - return false; - } - // get the current "fixed" sum for the abstraction of ar - Node aar = aarSum.empty() - ? nm->mkConst(Rational(0)) - : (aarSum.size() == 1 ? aarSum[0] : nm->mkNode(PLUS, aarSum)); - aar = Rewriter::rewrite(aar); - Trace("strings-ent-approx-debug") - << "...processed fixed sum " << aar << " with " << mApprox.size() - << " approximated monomials." << std::endl; - // if we have a choice of how to approximate - if (!mApprox.empty()) - { - // convert aar back to monomial sum - std::map msumAar; - if (!ArithMSum::getMonomialSum(aar, msumAar)) - { - return false; - } - if (Trace.isOn("strings-ent-approx")) - { - Trace("strings-ent-approx") - << "---- Check arithmetic entailment by under-approximation " << ar - << " >= 0" << std::endl; - Trace("strings-ent-approx") << "FIXED:" << std::endl; - ArithMSum::debugPrintMonomialSum(msumAar, "strings-ent-approx"); - Trace("strings-ent-approx") << "APPROX:" << std::endl; - for (std::pair >& a : mApprox) - { - Node c = msum[a.first]; - Trace("strings-ent-approx") << " "; - if (!c.isNull()) - { - Trace("strings-ent-approx") << c << " * "; - } - Trace("strings-ent-approx") - << a.second << " ...from " << a.first << std::endl; - } - Trace("strings-ent-approx") << std::endl; - } - Rational one(1); - // incorporate monomials one at a time that have a choice of approximations - while (!mApprox.empty()) - { - Node v; - Node vapprox; - int maxScore = -1; - // Look at each approximation, take the one with the best score. - // Notice that we are in the process of trying to prove - // ( c1*t1 + .. + cn*tn ) + ( approx_1 | ... | approx_m ) >= 0, - // where c1*t1 + .. + cn*tn is the "fixed" component of our sum (aar) - // and approx_1 ... approx_m are possible approximations. The - // intution here is that we want coefficients c1...cn to be positive. - // This is because arithmetic string terms t1...tn (which may be - // applications of len, indexof, str.to.int) are never entailed to be - // negative. Hence, we add the approx_i that contributes the "most" - // towards making all constants c1...cn positive and cancelling negative - // monomials in approx_i itself. - for (std::pair >& nam : mApprox) - { - Node cr = msum[nam.first]; - for (const Node& aa : nam.second) - { - unsigned helpsCancelCount = 0; - unsigned addsObligationCount = 0; - std::map::iterator it; - // we are processing an approximation cr*( c1*t1 + ... + cn*tn ) - for (std::pair& aam : approxMsums[aa]) - { - // Say aar is of the form t + c*ti, and aam is the monomial ci*ti - // where ci != 0. We say aam: - // (1) helps cancel if c != 0 and c>0 != ci>0 - // (2) adds obligation if c>=0 and c+ci<0 - Node ti = aam.first; - Node ci = aam.second; - if (!cr.isNull()) - { - ci = ci.isNull() ? cr - : Rewriter::rewrite(nm->mkNode(MULT, ci, cr)); - } - Trace("strings-ent-approx-debug") << ci << "*" << ti << " "; - int ciSgn = ci.isNull() ? 1 : ci.getConst().sgn(); - it = msumAar.find(ti); - if (it != msumAar.end()) - { - Node c = it->second; - int cSgn = c.isNull() ? 1 : c.getConst().sgn(); - if (cSgn == 0) - { - addsObligationCount += (ciSgn == -1 ? 1 : 0); - } - else if (cSgn != ciSgn) - { - helpsCancelCount++; - Rational r1 = c.isNull() ? one : c.getConst(); - Rational r2 = ci.isNull() ? one : ci.getConst(); - Rational r12 = r1 + r2; - if (r12.sgn() == -1) - { - addsObligationCount++; - } - } - } - else - { - addsObligationCount += (ciSgn == -1 ? 1 : 0); - } - } - Trace("strings-ent-approx-debug") - << "counts=" << helpsCancelCount << "," << addsObligationCount - << " for " << aa << " into " << aar << std::endl; - int score = (addsObligationCount > 0 ? 0 : 2) - + (helpsCancelCount > 0 ? 1 : 0); - // if its the best, update v and vapprox - if (v.isNull() || score > maxScore) - { - v = nam.first; - vapprox = aa; - maxScore = score; - } - } - if (!v.isNull()) - { - break; - } - } - Trace("strings-ent-approx") - << "- Decide " << v << " = " << vapprox << std::endl; - // we incorporate v approximated by vapprox into the overall approximation - // for ar - Assert(!v.isNull() && !vapprox.isNull()); - Assert(msum.find(v) != msum.end()); - Node mn = ArithMSum::mkCoeffTerm(msum[v], vapprox); - aar = nm->mkNode(PLUS, aar, mn); - // update the msumAar map - aar = Rewriter::rewrite(aar); - msumAar.clear(); - if (!ArithMSum::getMonomialSum(aar, msumAar)) - { - Assert(false); - Trace("strings-ent-approx") - << "...failed to get monomial sum!" << std::endl; - return false; - } - // we have processed the approximation for v - mApprox.erase(v); - } - Trace("strings-ent-approx") << "-----------------" << std::endl; - } - if (aar == ar) - { - Trace("strings-ent-approx-debug") - << "...approximation had no effect" << std::endl; - // this should never happen, but we avoid the infinite loop for sanity here - Assert(false); - return false; - } - // Check entailment on the approximation of ar. - // Notice that this may trigger further reasoning by approximation. For - // example, len( replace( x ++ y, substr( x, 0, n ), z ) ) may be - // under-approximated as len( x ) + len( y ) - len( substr( x, 0, n ) ) on - // this call, where in the recursive call we may over-approximate - // len( substr( x, 0, n ) ) as len( x ). In this example, we can infer - // that len( replace( x ++ y, substr( x, 0, n ), z ) ) >= len( y ) in two - // steps. - if (checkEntailArith(aar)) - { - Trace("strings-ent-approx") - << "*** StrArithApprox: showed " << ar - << " >= 0 using under-approximation!" << std::endl; - Trace("strings-ent-approx") - << "*** StrArithApprox: under-approximation was " << aar << std::endl; - return true; - } - return false; -} - -void TheoryStringsRewriter::getArithApproximations(Node a, - std::vector& approx, - bool isOverApprox) -{ - NodeManager* nm = NodeManager::currentNM(); - // We do not handle PLUS here since this leads to exponential behavior. - // Instead, this is managed, e.g. during checkEntailArithApprox, where - // PLUS terms are expanded "on-demand" during the reasoning. - Trace("strings-ent-approx-debug") - << "Get arith approximations " << a << std::endl; - Kind ak = a.getKind(); - if (ak == MULT) - { - Node c; - Node v; - if (ArithMSum::getMonomial(a, c, v)) - { - bool isNeg = c.getConst().sgn() == -1; - getArithApproximations(v, approx, isNeg ? !isOverApprox : isOverApprox); - for (unsigned i = 0, size = approx.size(); i < size; i++) - { - approx[i] = nm->mkNode(MULT, c, approx[i]); - } - } - } - else if (ak == STRING_LENGTH) - { - Kind aak = a[0].getKind(); - if (aak == STRING_SUBSTR) - { - // over,under-approximations for len( substr( x, n, m ) ) - Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]); - if (isOverApprox) - { - // m >= 0 implies - // m >= len( substr( x, n, m ) ) - if (checkEntailArith(a[0][2])) - { - approx.push_back(a[0][2]); - } - if (checkEntailArith(lenx, a[0][1])) - { - // n <= len( x ) implies - // len( x ) - n >= len( substr( x, n, m ) ) - approx.push_back(nm->mkNode(MINUS, lenx, a[0][1])); - } - else - { - // len( x ) >= len( substr( x, n, m ) ) - approx.push_back(lenx); - } - } - else - { - // 0 <= n and n+m <= len( x ) implies - // m <= len( substr( x, n, m ) ) - Node npm = nm->mkNode(PLUS, a[0][1], a[0][2]); - if (checkEntailArith(a[0][1]) && checkEntailArith(lenx, npm)) - { - approx.push_back(a[0][2]); - } - // 0 <= n and n+m >= len( x ) implies - // len(x)-n <= len( substr( x, n, m ) ) - if (checkEntailArith(a[0][1]) && checkEntailArith(npm, lenx)) - { - approx.push_back(nm->mkNode(MINUS, lenx, a[0][1])); - } - } - } - else if (aak == STRING_STRREPL) - { - // over,under-approximations for len( replace( x, y, z ) ) - // notice this is either len( x ) or ( len( x ) + len( z ) - len( y ) ) - Node lenx = nm->mkNode(STRING_LENGTH, a[0][0]); - Node leny = nm->mkNode(STRING_LENGTH, a[0][1]); - Node lenz = nm->mkNode(STRING_LENGTH, a[0][2]); - if (isOverApprox) - { - if (checkEntailArith(leny, lenz)) - { - // len( y ) >= len( z ) implies - // len( x ) >= len( replace( x, y, z ) ) - approx.push_back(lenx); - } - else - { - // len( x ) + len( z ) >= len( replace( x, y, z ) ) - approx.push_back(nm->mkNode(PLUS, lenx, lenz)); - } - } - else - { - if (checkEntailArith(lenz, leny) || checkEntailArith(lenz, lenx)) - { - // len( y ) <= len( z ) or len( x ) <= len( z ) implies - // len( x ) <= len( replace( x, y, z ) ) - approx.push_back(lenx); - } - else - { - // len( x ) - len( y ) <= len( replace( x, y, z ) ) - approx.push_back(nm->mkNode(MINUS, lenx, leny)); - } - } - } - else if (aak == STRING_ITOS) - { - // over,under-approximations for len( int.to.str( x ) ) - if (isOverApprox) - { - if (checkEntailArith(a[0][0], false)) - { - if (checkEntailArith(a[0][0], true)) - { - // x > 0 implies - // x >= len( int.to.str( x ) ) - approx.push_back(a[0][0]); - } - else - { - // x >= 0 implies - // x+1 >= len( int.to.str( x ) ) - approx.push_back( - nm->mkNode(PLUS, nm->mkConst(Rational(1)), a[0][0])); - } - } - } - else - { - if (checkEntailArith(a[0][0])) - { - // x >= 0 implies - // len( int.to.str( x ) ) >= 1 - approx.push_back(nm->mkConst(Rational(1))); - } - // other crazy things are possible here, e.g. - // len( int.to.str( len( y ) + 10 ) ) >= 2 - } - } - } - else if (ak == STRING_STRIDOF) - { - // over,under-approximations for indexof( x, y, n ) - if (isOverApprox) - { - Node lenx = nm->mkNode(STRING_LENGTH, a[0]); - Node leny = nm->mkNode(STRING_LENGTH, a[1]); - if (checkEntailArith(lenx, leny)) - { - // len( x ) >= len( y ) implies - // len( x ) - len( y ) >= indexof( x, y, n ) - approx.push_back(nm->mkNode(MINUS, lenx, leny)); - } - else - { - // len( x ) >= indexof( x, y, n ) - approx.push_back(lenx); - } - } - else - { - // TODO?: - // contains( substr( x, n, len( x ) ), y ) implies - // n <= indexof( x, y, n ) - // ...hard to test, runs risk of non-termination - - // -1 <= indexof( x, y, n ) - approx.push_back(nm->mkConst(Rational(-1))); - } - } - else if (ak == STRING_STOI) - { - // over,under-approximations for str.to.int( x ) - if (isOverApprox) - { - // TODO?: - // y >= 0 implies - // y >= str.to.int( int.to.str( y ) ) - } - else - { - // -1 <= str.to.int( x ) - approx.push_back(nm->mkConst(Rational(-1))); - } - } - Trace("strings-ent-approx-debug") << "Return " << approx.size() << std::endl; -} - -bool TheoryStringsRewriter::checkEntailMultisetSubset(Node a, Node b) -{ - NodeManager* nm = NodeManager::currentNM(); - - std::vector avec; - utils::getConcat(getMultisetApproximation(a), avec); - std::vector bvec; - utils::getConcat(b, bvec); - - std::map num_nconst[2]; - std::map num_const[2]; - for (unsigned j = 0; j < 2; j++) - { - std::vector& jvec = j == 0 ? avec : bvec; - for (const Node& cc : jvec) - { - if (cc.isConst()) - { - num_const[j][cc]++; - } - else - { - num_nconst[j][cc]++; - } - } - } - bool ms_success = true; - for (std::pair& nncp : num_nconst[0]) - { - if (nncp.second > num_nconst[1][nncp.first]) - { - ms_success = false; - break; - } - } - if (ms_success) - { - // count the number of constant characters in the first argument - std::map count_const[2]; - std::vector chars; - for (unsigned j = 0; j < 2; j++) - { - for (std::pair& ncp : num_const[j]) - { - Node cn = ncp.first; - Assert(cn.isConst()); - std::vector cc_vec; - const std::vector& cvec = cn.getConst().getVec(); - for (unsigned i = 0, size = cvec.size(); i < size; i++) - { - // make the character - cc_vec.clear(); - cc_vec.insert(cc_vec.end(), cvec.begin() + i, cvec.begin() + i + 1); - Node ch = nm->mkConst(String(cc_vec)); - count_const[j][ch] += ncp.second; - if (std::find(chars.begin(), chars.end(), ch) == chars.end()) - { - chars.push_back(ch); - } - } - } - } - Trace("strings-entail-ms-ss") - << "For " << a << " and " << b << " : " << std::endl; - for (const Node& ch : chars) - { - Trace("strings-entail-ms-ss") << " # occurrences of substring "; - Trace("strings-entail-ms-ss") << ch << " in arguments is "; - Trace("strings-entail-ms-ss") - << count_const[0][ch] << " / " << count_const[1][ch] << std::endl; - if (count_const[0][ch] < count_const[1][ch]) - { - return true; - } - } - - // TODO (#1180): count the number of 2,3,4,.. character substrings - // for example: - // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false - // since the second argument contains more occurrences of "bb". - // note this is orthogonal reasoning to inductive reasoning - // via regular membership reduction in Liang et al CAV 2015. - } - return false; -} - -Node TheoryStringsRewriter::checkEntailHomogeneousString(Node a) -{ - NodeManager* nm = NodeManager::currentNM(); - - std::vector avec; - utils::getConcat(getMultisetApproximation(a), avec); - - bool cValid = false; - unsigned c = 0; - for (const Node& ac : avec) - { - if (ac.isConst()) - { - std::vector acv = ac.getConst().getVec(); - for (unsigned cc : acv) - { - if (!cValid) - { - cValid = true; - c = cc; - } - else if (c != cc) - { - // Found a different character - return Node::null(); - } - } - } - else - { - // Could produce a different character - return Node::null(); - } - } - - if (!cValid) - { - return nm->mkConst(String("")); - } - - std::vector cv = {c}; - return nm->mkConst(String(cv)); -} - -Node TheoryStringsRewriter::getMultisetApproximation(Node a) -{ - NodeManager* nm = NodeManager::currentNM(); - if (a.getKind() == STRING_SUBSTR) - { - return a[0]; - } - else if (a.getKind() == STRING_STRREPL) - { - return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2])); - } - else if (a.getKind() == STRING_CONCAT) - { - NodeBuilder<> nb(STRING_CONCAT); - for (const Node& ac : a) - { - nb << getMultisetApproximation(ac); - } - return nb.constructNode(); - } - else - { - return a; - } -} - -bool TheoryStringsRewriter::checkEntailArithWithEqAssumption(Node assumption, - Node a, - bool strict) -{ - Assert(assumption.getKind() == kind::EQUAL); - Assert(Rewriter::rewrite(assumption) == assumption); - - // Find candidates variables to compute substitutions for - std::unordered_set candVars; - std::vector toVisit = {assumption}; - while (!toVisit.empty()) - { - Node curr = toVisit.back(); - toVisit.pop_back(); - - if (curr.getKind() == kind::PLUS || curr.getKind() == kind::MULT - || curr.getKind() == kind::MINUS || curr.getKind() == kind::EQUAL) - { - for (const auto& currChild : curr) - { - toVisit.push_back(currChild); - } - } - else if (curr.isVar() && Theory::theoryOf(curr) == THEORY_ARITH) - { - candVars.insert(curr); - } - else if (curr.getKind() == kind::STRING_LENGTH) - { - candVars.insert(curr); - } - } - - // Check if any of the candidate variables are in n - Node v; - Assert(toVisit.empty()); - toVisit.push_back(a); - while (!toVisit.empty()) - { - Node curr = toVisit.back(); - toVisit.pop_back(); - - for (const auto& currChild : curr) - { - toVisit.push_back(currChild); - } - - if (candVars.find(curr) != candVars.end()) - { - v = curr; - break; - } - } - - if (v.isNull()) - { - // No suitable candidate found - return false; - } - - Node solution = ArithMSum::solveEqualityFor(assumption, v); - if (solution.isNull()) - { - // Could not solve for v - return false; - } - - a = a.substitute(TNode(v), TNode(solution)); - return checkEntailArith(a, strict); -} - -bool TheoryStringsRewriter::checkEntailArithWithAssumption(Node assumption, - Node a, - Node b, - bool strict) -{ - Assert(Rewriter::rewrite(assumption) == assumption); - - NodeManager* nm = NodeManager::currentNM(); - - if (!assumption.isConst() && assumption.getKind() != kind::EQUAL) - { - // We rewrite inequality assumptions from x <= y to x + (str.len s) = y - // where s is some fresh string variable. We use (str.len s) because - // (str.len s) must be non-negative for the equation to hold. - Node x, y; - if (assumption.getKind() == kind::GEQ) - { - x = assumption[0]; - y = assumption[1]; - } - else - { - // (not (>= s t)) --> (>= (t - 1) s) - Assert(assumption.getKind() == kind::NOT - && assumption[0].getKind() == kind::GEQ); - x = nm->mkNode(kind::MINUS, assumption[0][1], nm->mkConst(Rational(1))); - y = assumption[0][0]; - } - - Node s = nm->mkBoundVar("slackVal", nm->stringType()); - Node slen = nm->mkNode(kind::STRING_LENGTH, s); - assumption = Rewriter::rewrite( - nm->mkNode(kind::EQUAL, x, nm->mkNode(kind::PLUS, y, slen))); - } - - Node diff = nm->mkNode(kind::MINUS, a, b); - bool res = false; - if (assumption.isConst()) - { - bool assumptionBool = assumption.getConst(); - if (assumptionBool) - { - res = checkEntailArith(diff, strict); - } - else - { - res = true; - } - } - else - { - res = checkEntailArithWithEqAssumption(assumption, diff, strict); - } - return res; -} - -bool TheoryStringsRewriter::checkEntailArithWithAssumptions( - std::vector assumptions, Node a, Node b, bool strict) -{ - // TODO: We currently try to show the entailment with each assumption - // independently. In the future, we should make better use of multiple - // assumptions. - bool res = false; - for (const auto& assumption : assumptions) - { - Assert(Rewriter::rewrite(assumption) == assumption); - - if (checkEntailArithWithAssumption(assumption, a, b, strict)) - { - res = true; - break; - } - } - return res; -} - -Node TheoryStringsRewriter::getConstantArithBound(Node a, bool isLower) -{ - Assert(Rewriter::rewrite(a) == a); - Node ret; - if (a.isConst()) - { - ret = a; - } - else if (a.getKind() == kind::STRING_LENGTH) - { - if (isLower) - { - ret = NodeManager::currentNM()->mkConst(Rational(0)); - } - } - else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT) - { - std::vector children; - bool success = true; - for (unsigned i = 0; i < a.getNumChildren(); i++) - { - Node ac = getConstantArithBound(a[i], isLower); - if (ac.isNull()) - { - ret = ac; - success = false; - break; - } - else - { - if (ac.getConst().sgn() == 0) - { - if (a.getKind() == kind::MULT) - { - ret = ac; - success = false; - break; - } - } - else - { - if (a.getKind() == kind::MULT) - { - if ((ac.getConst().sgn() > 0) != isLower) - { - ret = Node::null(); - success = false; - break; - } - } - children.push_back(ac); - } - } - } - if (success) - { - if (children.empty()) - { - ret = NodeManager::currentNM()->mkConst(Rational(0)); - } - else if (children.size() == 1) - { - ret = children[0]; - } - else - { - ret = NodeManager::currentNM()->mkNode(a.getKind(), children); - ret = Rewriter::rewrite(ret); - } - } - } - Trace("strings-rewrite-cbound") - << "Constant " << (isLower ? "lower" : "upper") << " bound for " << a - << " is " << ret << std::endl; - Assert(ret.isNull() || ret.isConst()); - // entailment check should be at least as powerful as computing a lower bound - Assert(!isLower || ret.isNull() || ret.getConst().sgn() < 0 - || checkEntailArith(a, false)); - Assert(!isLower || ret.isNull() || ret.getConst().sgn() <= 0 - || checkEntailArith(a, true)); - return ret; -} - -Node TheoryStringsRewriter::getFixedLengthForRegexp(Node n) -{ - NodeManager* nm = NodeManager::currentNM(); - if (n.getKind() == STRING_TO_REGEXP) - { - Node ret = nm->mkNode(STRING_LENGTH, n[0]); - ret = Rewriter::rewrite(ret); - if (ret.isConst()) - { - return ret; - } - } - else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE) - { - return nm->mkConst(Rational(1)); - } - else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER) - { - Node ret; - for (const Node& nc : n) - { - Node flc = getFixedLengthForRegexp(nc); - if (flc.isNull() || (!ret.isNull() && ret != flc)) - { - return Node::null(); - } - else if (ret.isNull()) - { - // first time - ret = flc; - } - } - return ret; - } - else if (n.getKind() == REGEXP_CONCAT) - { - NodeBuilder<> nb(PLUS); - for (const Node& nc : n) - { - Node flc = getFixedLengthForRegexp(nc); - if (flc.isNull()) - { - return flc; - } - nb << flc; - } - Node ret = nb.constructNode(); - ret = Rewriter::rewrite(ret); - return ret; - } - return Node::null(); -} - -bool TheoryStringsRewriter::checkEntailArithInternal(Node a) -{ - Assert(Rewriter::rewrite(a) == a); - // check whether a >= 0 - if (a.isConst()) - { - return a.getConst().sgn() >= 0; - } - else if (a.getKind() == kind::STRING_LENGTH) - { - // str.len( t ) >= 0 - return true; - } - else if (a.getKind() == kind::PLUS || a.getKind() == kind::MULT) - { - for (unsigned i = 0; i < a.getNumChildren(); i++) - { - if (!checkEntailArithInternal(a[i])) - { - return false; - } - } - // t1 >= 0 ^ ... ^ tn >= 0 => t1 op ... op tn >= 0 - return true; - } - - return false; -} - -Node TheoryStringsRewriter::decomposeSubstrChain(Node s, - std::vector& ss, - std::vector& ls) -{ - Assert(ss.empty()); - Assert(ls.empty()); - while (s.getKind() == STRING_SUBSTR) - { - ss.push_back(s[1]); - ls.push_back(s[2]); - s = s[0]; - } - std::reverse(ss.begin(), ss.end()); - std::reverse(ls.begin(), ls.end()); - return s; -} - -Node TheoryStringsRewriter::mkSubstrChain(Node base, - const std::vector& ss, - const std::vector& ls) -{ - NodeManager* nm = NodeManager::currentNM(); - for (unsigned i = 0, size = ss.size(); i < size; i++) - { - base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]); - } - return base; -} - -Node TheoryStringsRewriter::getStringOrEmpty(Node n) -{ - NodeManager* nm = NodeManager::currentNM(); - Node res; - while (res.isNull()) - { - switch (n.getKind()) - { - case kind::STRING_STRREPL: - { - Node empty = nm->mkConst(::CVC4::String("")); - if (n[0] == empty) - { - // (str.replace "" x y) --> y - n = n[2]; - break; - } - - if (checkEntailLengthOne(n[0]) && n[2] == empty) - { - // (str.replace "A" x "") --> "A" - res = n[0]; - break; - } - - res = n; - break; - } - case kind::STRING_SUBSTR: - { - if (checkEntailLengthOne(n[0])) - { - // (str.substr "A" x y) --> "A" - res = n[0]; - break; - } - res = n; - break; - } - default: - { - res = n; - break; - } - } - } - return res; -} - -bool TheoryStringsRewriter::inferZerosInSumGeq(Node x, - std::vector& ys, - std::vector& zeroYs) -{ - Assert(zeroYs.empty()); - - NodeManager* nm = NodeManager::currentNM(); - - // Check if we can show that y1 + ... + yn >= x - Node sum = (ys.size() > 1) ? nm->mkNode(PLUS, ys) : ys[0]; - if (!checkEntailArith(sum, x)) - { - return false; - } - - // Try to remove yi one-by-one and check if we can still show: - // - // y1 + ... + yi-1 + yi+1 + ... + yn >= x - // - // If that's the case, we know that yi can be zero and the inequality still - // holds. - size_t i = 0; - while (i < ys.size()) - { - Node yi = ys[i]; - std::vector::iterator pos = ys.erase(ys.begin() + i); - if (ys.size() > 1) - { - sum = nm->mkNode(PLUS, ys); - } - else - { - sum = ys.size() == 1 ? ys[0] : nm->mkConst(Rational(0)); - } - - if (checkEntailArith(sum, x)) - { - zeroYs.push_back(yi); - } - else - { - ys.insert(pos, yi); - i++; - } - } - return true; -} - -Node TheoryStringsRewriter::inferEqsFromContains(Node x, Node y) -{ - NodeManager* nm = NodeManager::currentNM(); - Node emp = nm->mkConst(String("")); - - Node xLen = nm->mkNode(STRING_LENGTH, x); - std::vector yLens; - if (y.getKind() != STRING_CONCAT) - { - yLens.push_back(nm->mkNode(STRING_LENGTH, y)); - } - else - { - for (const Node& yi : y) - { - yLens.push_back(nm->mkNode(STRING_LENGTH, yi)); - } - } - - std::vector zeroLens; - if (x == emp) - { - // If x is the empty string, then all ys must be empty, too, and we can - // skip the expensive checks. Note that this is just a performance - // optimization. - zeroLens.swap(yLens); - } - else - { - // Check if we can infer that str.len(x) <= str.len(y). If that is the - // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... + - // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality - // true. The terms that can have length zero without making the inequality - // false must be all be empty if (str.contains x y) is true. - if (!inferZerosInSumGeq(xLen, yLens, zeroLens)) - { - // We could not prove that the inequality holds - return Node::null(); - } - else if (yLens.size() == y.getNumChildren()) - { - // We could only prove that the inequality holds but not that any of the - // ys must be empty - return nm->mkNode(EQUAL, x, y); - } - } - - if (y.getKind() != STRING_CONCAT) - { - if (zeroLens.size() == 1) - { - // y is not a concatenation and we found that it must be empty, so just - // return (= y "") - Assert(zeroLens[0][0] == y); - return nm->mkNode(EQUAL, y, emp); - } - else - { - Assert(yLens.size() == 1 && yLens[0][0] == y); - return nm->mkNode(EQUAL, x, y); - } - } - - std::vector cs; - for (const Node& yiLen : yLens) - { - Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end()); - cs.push_back(yiLen[0]); - } - - NodeBuilder<> nb(AND); - // (= x (str.++ y1' ... ym')) - if (!cs.empty()) - { - nb << nm->mkNode(EQUAL, x, utils::mkConcat(STRING_CONCAT, cs)); - } - // (= y1'' "") ... (= yk'' "") - for (const Node& zeroLen : zeroLens) - { - Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end()); - nb << nm->mkNode(EQUAL, zeroLen[0], emp); - } - - // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) - return nb.constructNode(); -} - -std::pair > TheoryStringsRewriter::collectEmptyEqs( - Node x) -{ - NodeManager* nm = NodeManager::currentNM(); - Node empty = nm->mkConst(::CVC4::String("")); - - // Collect the equalities of the form (= x "") (sorted) - std::set emptyNodes; - bool allEmptyEqs = true; - if (x.getKind() == kind::EQUAL) - { - if (x[0] == empty) - { - emptyNodes.insert(x[1]); - } - else if (x[1] == empty) - { - emptyNodes.insert(x[0]); - } - else - { - allEmptyEqs = false; - } - } - else if (x.getKind() == kind::AND) - { - for (const Node& c : x) - { - if (c.getKind() == kind::EQUAL) - { - if (c[0] == empty) - { - emptyNodes.insert(c[1]); - } - else if (c[1] == empty) - { - emptyNodes.insert(c[0]); - } - } - else - { - allEmptyEqs = false; - } - } - } - - if (emptyNodes.size() == 0) - { - allEmptyEqs = false; - } - - return std::make_pair( - allEmptyEqs, std::vector(emptyNodes.begin(), emptyNodes.end())); -} - -Node TheoryStringsRewriter::returnRewrite(Node node, Node ret, const char* c) -{ - Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << c - << "." << std::endl; - - NodeManager* nm = NodeManager::currentNM(); - - // standard post-processing - // We rewrite (string) equalities immediately here. This allows us to forego - // the standard invariant on equality rewrites (that s=t must rewrite to one - // of { s=t, t=s, true, false } ). - Kind retk = ret.getKind(); - if (retk == OR || retk == AND) - { - std::vector children; - bool childChanged = false; - for (const Node& cret : ret) - { - Node creter = cret; - if (cret.getKind() == EQUAL) - { - creter = rewriteEqualityExt(cret); - } - else if (cret.getKind() == NOT && cret[0].getKind() == EQUAL) - { - creter = nm->mkNode(NOT, rewriteEqualityExt(cret[0])); - } - childChanged = childChanged || cret != creter; - children.push_back(creter); - } - if (childChanged) - { - ret = nm->mkNode(retk, children); - } - } - else if (retk == NOT && ret[0].getKind() == EQUAL) - { - ret = nm->mkNode(NOT, rewriteEqualityExt(ret[0])); - } - else if (retk == EQUAL && node.getKind() != EQUAL) - { - Trace("strings-rewrite") - << "Apply extended equality rewrite on " << ret << std::endl; - ret = rewriteEqualityExt(ret); - } - return ret; -} diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h deleted file mode 100644 index 4accfca39..000000000 --- a/src/theory/strings/theory_strings_rewriter.h +++ /dev/null @@ -1,788 +0,0 @@ -/********************* */ -/*! \file theory_strings_rewriter.h - ** \verbatim - ** Top contributors (to current version): - ** Andrew Reynolds, Andres Noetzli, Tianyi Liang - ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. - ** All rights reserved. See the file COPYING in the top-level source - ** directory for licensing information.\endverbatim - ** - ** \brief Rewriter for the theory of strings - ** - **/ - -#include "cvc4_private.h" - -#ifndef CVC4__THEORY__STRINGS__THEORY_STRINGS_REWRITER_H -#define CVC4__THEORY__STRINGS__THEORY_STRINGS_REWRITER_H - -#include -#include -#include - -#include "expr/attribute.h" -#include "theory/theory_rewriter.h" -#include "theory/type_enumerator.h" - -namespace CVC4 { -namespace theory { -namespace strings { - -class TheoryStringsRewriter : public TheoryRewriter -{ - private: - /** simple regular expression consume - * - * This method is called when we are rewriting a membership of the form - * s1 ++ ... ++ sn in r1 ++ ... ++ rm - * We have that mchildren consists of the strings s1...sn, and children - * consists of the regular expressions r1...rm. - * - * This method tries to strip off parts of the concatenation terms. It updates - * the vectors such that the resulting vectors are such that the membership - * mchildren[n'...n''] in children[m'...m''] is equivalent to the input - * membership. The argument dir indicates the direction to consider, where - * 0 means strip off the front, 1 off the back, and < 0 off of both. - * - * If this method returns the false node, then we have inferred that no - * string in the language of r1 ++ ... ++ rm is a prefix (when dir!=1) or - * suffix (when dir!=0) of s1 ++ ... ++ sn. Otherwise, it returns the null - * node. - * - * For example, given input - * mchildren = { "ab", x }, children = { [["a"]], ([["cd"]])* } and dir = 0, - * this method updates: - * mchildren = { "b", x }, children = { ("cd")* } - * and returns null. - * - * For example, given input - * { x, "abb", x }, { [[x]], ["a"..."b"], allchar, [[y]], [[x]]} and dir=-1, - * this method updates: - * { "b" }, { [[y]] } - * where [[.]] denotes str.to.re, and returns null. - * - * Notice that the above requirement for returning false is stronger than - * determining that s1 ++ ... ++ sn in r1 ++ ... ++ rm is equivalent to false. - * For example, for input "bb" in "b" ++ ( "a" )*, we do not return false - * since "b" is in the language of "b" ++ ( "a" )* and is a prefix of "bb". - * We do not return false even though the above membership is equivalent - * to false. We do this because the function is used e.g. to test whether a - * possible unrolling leads to a conflict. This is demonstrated by the - * following examples: - * - * For example, given input - * { "bb", x }, { "b", ("a")* } and dir=-1, - * this method updates: - * { "b" }, { ("a")* } - * and returns null. - * - * For example, given input - * { "cb", x }, { "b", ("a")* } and dir=-1, - * this method leaves children and mchildren unchanged and returns false. - * - * Notice that based on this, we can determine that: - * "cb" ++ x in ( "b" ++ ("a")* )* - * is equivalent to false, whereas we cannot determine that: - * "bb" ++ x in ( "b" ++ ("a")* )* - * is equivalent to false. - */ - static Node simpleRegexpConsume( std::vector< Node >& mchildren, std::vector< Node >& children, int dir = -1 ); - static bool isConstRegExp( TNode t ); - static bool testConstStringInRegExp( CVC4::String &s, unsigned int index_start, TNode r ); - - /** rewrite regular expression concatenation - * - * This is the entry point for post-rewriting applications of re.++. - * Returns the rewritten form of node. - */ - static Node rewriteConcatRegExp(TNode node); - /** rewrite regular expression star - * - * This is the entry point for post-rewriting applications of re.*. - * Returns the rewritten form of node. - */ - static Node rewriteStarRegExp(TNode node); - /** rewrite regular expression intersection/union - * - * This is the entry point for post-rewriting applications of re.inter and - * re.union. Returns the rewritten form of node. - */ - static Node rewriteAndOrRegExp(TNode node); - /** rewrite regular expression loop - * - * This is the entry point for post-rewriting applications of re.loop. - * Returns the rewritten form of node. - */ - static Node rewriteLoopRegExp(TNode node); - /** rewrite regular expression membership - * - * This is the entry point for post-rewriting applications of str.in.re - * Returns the rewritten form of node. - */ - static Node rewriteMembership(TNode node); - - static bool hasEpsilonNode(TNode node); - /** check entail arithmetic internal - * Returns true if we can show a >= 0 always. - * a is in rewritten form. - */ - static bool checkEntailArithInternal(Node a); - /** rewrite string equality extended - * - * This method returns a formula that is equivalent to the equality between - * two strings s = t, given by node. It is called by rewriteEqualityExt. - */ - static Node rewriteStrEqualityExt(Node node); - /** rewrite arithmetic equality extended - * - * This method returns a formula that is equivalent to the equality between - * two arithmetic string terms s = t, given by node. t is called by - * rewriteEqualityExt. - */ - static Node rewriteArithEqualityExt(Node node); - /** - * Called when node rewrites to ret. - * - * The string c indicates the justification for the rewrite, which is printed - * by this function for debugging. - * - * If node is not an equality and ret is an equality, this method applies - * an additional rewrite step (rewriteEqualityExt) that performs - * additional rewrites on ret, after which we return the result of this call. - * Otherwise, this method simply returns ret. - */ - static Node returnRewrite(Node node, Node ret, const char* c); - - public: - RewriteResponse postRewrite(TNode node) override; - RewriteResponse preRewrite(TNode node) override; - - /** rewrite equality - * - * This method returns a formula that is equivalent to the equality between - * two strings s = t, given by node. The result of rewrite is one of - * { s = t, t = s, true, false }. - */ - static Node rewriteEquality(Node node); - /** rewrite equality extended - * - * This method returns a formula that is equivalent to the equality between - * two terms s = t, given by node, where s and t are terms in the signature - * of the theory of strings. Notice that s and t may be of string type or - * of Int type. - * - * Specifically, this function performs rewrites whose conclusion is not - * necessarily one of { s = t, t = s, true, false }. - */ - static Node rewriteEqualityExt(Node node); - /** rewrite concat - * This is the entry point for post-rewriting terms node of the form - * str.++( t1, .., tn ) - * Returns the rewritten form of node. - */ - static Node rewriteConcat(Node node); - /** rewrite substr - * This is the entry point for post-rewriting terms node of the form - * str.substr( s, i1, i2 ) - * Returns the rewritten form of node. - */ - static Node rewriteSubstr(Node node); - /** rewrite contains - * This is the entry point for post-rewriting terms node of the form - * str.contains( t, s ) - * Returns the rewritten form of node. - * - * For details on some of the basic rewrites done in this function, see Figure - * 7 of Reynolds et al "Scaling Up DPLL(T) String Solvers Using - * Context-Dependent Rewriting", CAV 2017. - */ - static Node rewriteContains(Node node); - /** rewrite indexof - * This is the entry point for post-rewriting terms n of the form - * str.indexof( s, t, n ) - * Returns the rewritten form of node. - */ - static Node rewriteIndexof(Node node); - /** rewrite replace - * This is the entry point for post-rewriting terms n of the form - * str.replace( s, t, r ) - * Returns the rewritten form of node. - */ - static Node rewriteReplace(Node node); - /** rewrite replace all - * This is the entry point for post-rewriting terms n of the form - * str.replaceall( s, t, r ) - * Returns the rewritten form of node. - */ - static Node rewriteReplaceAll(Node node); - /** rewrite replace internal - * - * This method implements rewrite rules that apply to both str.replace and - * str.replaceall. If it returns a non-null ret, then node rewrites to ret. - */ - static Node rewriteReplaceInternal(Node node); - /** rewrite string convert - * - * This is the entry point for post-rewriting terms n of the form - * str.tolower( s ) and str.toupper( s ) - * Returns the rewritten form of node. - */ - static Node rewriteStrConvert(Node node); - /** rewrite string reverse - * - * This is the entry point for post-rewriting terms n of the form - * str.rev( s ) - * Returns the rewritten form of node. - */ - static Node rewriteStrReverse(Node node); - /** rewrite string less than or equal - * This is the entry point for post-rewriting terms n of the form - * str.<=( t, s ) - * Returns the rewritten form of n. - */ - static Node rewriteStringLeq(Node n); - /** rewrite prefix/suffix - * This is the entry point for post-rewriting terms n of the form - * str.prefixof( s, t ) / str.suffixof( s, t ) - * Returns the rewritten form of node. - */ - static Node rewritePrefixSuffix(Node node); - - /** rewrite str.from_code - * This is the entry point for post-rewriting terms n of the form - * str.from_code( t ) - * Returns the rewritten form of node. - */ - static Node rewriteStringFromCode(Node node); - - /** rewrite str.to_code - * This is the entry point for post-rewriting terms n of the form - * str.to_code( t ) - * Returns the rewritten form of node. - */ - static Node rewriteStringToCode(Node node); - - static Node splitConstant( Node a, Node b, int& index, bool isRev ); - /** can constant contain list - * return true if constant c can contain the list l in order - * firstc/lastc store which indices in l were used to determine the return - * value. - * (This is typically used when this function returns false, for minimizing - * explanations) - * - * For example: - * canConstantContainList( "abc", { x, "c", y } ) returns true - * firstc/lastc are updated to 1/1 - * canConstantContainList( "abc", { x, "d", y } ) returns false - * firstc/lastc are updated to 1/1 - * canConstantContainList( "abcdef", { x, "b", y, "a", z, "c", w } - * returns false - * firstc/lastc are updated to 1/3 - * canConstantContainList( "abcdef", { x, "b", y, "e", z, "c", w } - * returns false - * firstc/lastc are updated to 1/5 - */ - static bool canConstantContainList( Node c, std::vector< Node >& l, int& firstc, int& lastc ); - /** can constant contain concat - * same as above but with n = str.++( l ) instead of l - */ - static bool canConstantContainConcat(Node c, Node n, int& firstc, int& lastc); - - /** strip symbolic length - * - * This function strips off components of n1 whose length is less than - * or equal to argument curr, and stores them in nr. The direction - * dir determines whether the components are removed from the start - * or end of n1. - * - * In detail, this function updates n1 to n1' such that: - * If dir=1, - * n1 = str.++( nr, n1' ) - * If dir=-1 - * n1 = str.++( n1', nr ) - * It updates curr to curr' such that: - * curr' = curr - str.len( str.++( nr ) ), and - * curr' >= 0 - * where the latter fact is determined by checkArithEntail. - * - * This function returns true if n1 is modified. - * - * For example: - * - * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)+1 ) - * returns true - * n1 is updated to { "bc", y } - * nr is updated to { x, "a" } - * curr is updated to 0 * - * - * stripSymbolicLength( { x, "abc", y }, {}, 1, str.len(x)-1 ) - * returns false - * - * stripSymbolicLength( { y, "abc", x }, {}, 1, str.len(x)+1 ) - * returns false - * - * stripSymbolicLength( { x, "abc", y }, {}, -1, 2*str.len(y)+4 ) - * returns true - * n1 is updated to { x } - * nr is updated to { "abc", y } - * curr is updated to str.len(y)+1 - */ - static bool stripSymbolicLength(std::vector& n1, - std::vector& nr, - int dir, - Node& curr); - /** component contains - * This function is used when rewriting str.contains( t1, t2 ), where - * n1 is the vector form of t1 - * n2 is the vector form of t2 - * - * If this function returns n>=0 for some n, then - * n1 = { x1...x{n-1} xn...x{n+s} x{n+s+1}...xm }, - * n2 = { y1...ys }, - * y1 is a suffix of xn, - * y2...y{s-1} = x{n+1}...x{n+s-1}, and - * ys is a prefix of x{n+s} - * Otherwise it returns -1. - * - * This function may update n1 if computeRemainder = true. - * We maintain the invariant that the resulting value n1' - * of n1 after this function is such that: - * n1 = str.++( nb, n1', ne ) - * The vectors nb and ne have the following properties. - * If computeRemainder = true, then - * If remainderDir != -1, then - * ne is { x{n+s}' x{n+s+1}...xm } - * where x{n+s} = str.++( ys, x{n+s}' ). - * If remainderDir != 1, then - * nb is { x1, ..., x{n-1}, xn' } - * where xn = str.++( xn', y1 ). - * - * For example: - * - * componentContains({ x, "abc", x }, { "b" }, {}, true, 0) - * returns 1, - * n1 is updated to { "b" }, - * nb is updated to { x, "a" }, - * ne is updated to { "c", x } - * - * componentContains({ x, "abc", x }, { "b" }, {}, true, 1) - * returns 1, - * n1 is updated to { x, "ab" }, - * ne is updated to { "c", x } - * - * componentContains({ y, z, "abc", x, "def" }, { "c", x, "de" }, {}, true, 1) - * returns 2, - * n1 is updated to { y, z, "abc", x, "de" }, - * ne is updated to { "f" } - * - * componentContains({ y, "abc", x, "def" }, { "c", x, "de" }, {}, true, -1) - * returns 1, - * n1 is updated to { "c", x, "def" }, - * nb is updated to { y, "ab" } - */ - static int componentContains(std::vector& n1, - std::vector& n2, - std::vector& nb, - std::vector& ne, - bool computeRemainder = false, - int remainderDir = 0); - /** component contains base - * - * This function is a helper for the above function. - * - * It returns true if n2 is contained in n1 with the following - * restrictions: - * If dir=1, then n2 must be a suffix of n1. - * If dir=-1, then n2 must be a prefix of n1. - * - * If computeRemainder is true, then n1rb and n1re are - * updated such that : - * n1 = str.++( n1rb, n2, n1re ) - * where a null value of n1rb and n1re indicates the - * empty string. - * - * For example: - * - * componentContainsBase("cabe", "ab", n1rb, n1re, 1, false) - * returns false. - * - * componentContainsBase("cabe", "ab", n1rb, n1re, 0, true) - * returns true, - * n1rb is set to "c", - * n1re is set to "e". - * - * componentContainsBase(y, str.substr(y,0,5), n1rb, n1re, -1, true) - * returns true, - * n1re is set to str.substr(y,5,str.len(y)). - * - * - * Notice that this function may return false when it cannot compute a - * remainder when it otherwise would have returned true. For example: - * - * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, false) - * returns true. - * - * Hence, we know that str.substr(y,x,z) is contained in y. However: - * - * componentContainsBase(y, str.substr(y,x,z), n1rb, n1re, 0, true) - * returns false. - * - * The reason is since computeRemainder=true, it must be that - * y = str.++( n1rb, str.substr(y,x,z), n1re ) - * for some n1rb, n1re. However, to construct such n1rb, n1re would require - * e.g. the terms: - * y = str.++( ite( x+z < 0 OR x < 0, "", str.substr(y,0,x) ), - * str.substr(y,x,z), - * ite( x+z < 0 OR x < 0, y, str.substr(y,x+z,len(y)) ) ) - * - * Since we do not wish to introduce ITE terms in the rewriter, we instead - * return false, indicating that we cannot compute the remainder. - */ - static bool componentContainsBase( - Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder); - /** strip constant endpoints - * This function is used when rewriting str.contains( t1, t2 ), where - * n1 is the vector form of t1 - * n2 is the vector form of t2 - * - * It modifies n1 to a new vector n1' such that: - * (1) str.contains( str.++( n1 ), str.++( n2 ) ) is equivalent to - * str.contains( str.++( n1' ), str.++( n2 ) ) - * (2) str.++( n1 ) = str.++( nb, n1', ne ) - * - * "dir" is the direction in which we can modify n1: - * if dir = 1, then we allow dropping components from the front of n1, - * if dir = -1, then we allow dropping components from the back of n1, - * if dir = 0, then we allow dropping components from either. - * - * It returns true if n1 is modified. - * - * For example: - * stripConstantEndpoints({ "ab", x, "de" }, { "c" }, {}, {}, 1) - * returns true, - * n1 is updated to { x, "de" } - * nb is updated to { "ab" } - * stripConstantEndpoints({ "ab", x, "de" }, { "bd" }, {}, {}, 0) - * returns true, - * n1 is updated to { "b", x, "d" } - * nb is updated to { "a" } - * ne is updated to { "e" } - * stripConstantEndpoints({ "ad", substr("ccc",x,y) }, { "d" }, {}, {}, -1) - * returns true, - * n1 is updated to {"ad"} - * ne is updated to { substr("ccc",x,y) } - */ - static bool stripConstantEndpoints(std::vector& n1, - std::vector& n2, - std::vector& nb, - std::vector& ne, - int dir = 0); - - /** - * Given a symbolic length n, returns the canonical string for that length. - * For example if n is constant, this function returns a string consisting of - * "A" repeated n times. Returns the null node if no such string exists. - */ - static Node canonicalStrForSymbolicLength(Node n); - - /** length preserving rewrite - * - * Given input n, this returns a string n' whose length is equivalent to n. - * We apply certain normalizations to n', such as replacing all constants - * that are not relevant to length by "A". - */ - static Node lengthPreserveRewrite(Node n); - - /** - * Checks whether a string term `a` is entailed to contain or not contain a - * string term `b`. - * - * @param a The string that is checked whether it contains `b` - * @param b The string that is checked whether it is contained in `a` - * @param fullRewriter Determines whether the function can use the full - * rewriter or only `rewriteContains()` (useful for avoiding loops) - * @return true node if it can be shown that `a` contains `b`, false node if - * it can be shown that `a` does not contain `b`, null node otherwise - */ - static Node checkEntailContains(Node a, Node b, bool fullRewriter = true); - - /** entail non-empty - * - * Checks whether string a is entailed to be non-empty. Is equivalent to - * the call checkArithEntail( len( a ), true ). - */ - static bool checkEntailNonEmpty(Node a); - - /** - * Checks whether string has at most/exactly length one. Length one strings - * can be used for more aggressive rewriting because there is guaranteed that - * it cannot be overlap multiple components in a string concatenation. - * - * @param s The string to check - * @param strict If true, the string must have exactly length one, otherwise - * at most length one - * @return True if the string has at most/exactly length one, false otherwise - */ - static bool checkEntailLengthOne(Node s, bool strict = false); - - /** check arithmetic entailment equal - * Returns true if it is always the case that a = b. - */ - static bool checkEntailArithEq(Node a, Node b); - /** check arithmetic entailment - * Returns true if it is always the case that a >= b, - * and a>b if strict is true. - */ - static bool checkEntailArith(Node a, Node b, bool strict = false); - /** check arithmetic entailment - * Returns true if it is always the case that a >= 0. - */ - static bool checkEntailArith(Node a, bool strict = false); - /** check arithmetic entailment with approximations - * - * Returns true if it is always the case that a >= 0. We expect that a is in - * rewritten form. - * - * This function uses "approximation" techniques that under-approximate - * the value of a for the purposes of showing the entailment holds. For - * example, given: - * len( x ) - len( substr( y, 0, len( x ) ) ) - * Since we know that len( substr( y, 0, len( x ) ) ) <= len( x ), the above - * term can be under-approximated as len( x ) - len( x ) = 0, which is >= 0, - * and thus the entailment len( x ) - len( substr( y, 0, len( x ) ) ) >= 0 - * holds. - */ - static bool checkEntailArithApprox(Node a); - /** Get arithmetic approximations - * - * This gets the (set of) arithmetic approximations for term a and stores - * them in approx. If isOverApprox is true, these are over-approximations - * for the value of a, otherwise, they are underapproximations. For example, - * an over-approximation for len( substr( y, n, m ) ) is m; an - * under-approximation for indexof( x, y, n ) is -1. - * - * Notice that this function is not generally recursive (although it may make - * a small bounded of recursive calls). Instead, it returns the shape - * of the approximations for a. For example, an under-approximation - * for the term len( replace( substr( x, 0, n ), y, z ) ) returned by this - * function might be len( substr( x, 0, n ) ) - len( y ), where we don't - * consider (recursively) the approximations for len( substr( x, 0, n ) ). - */ - static void getArithApproximations(Node a, - std::vector& approx, - bool isOverApprox = false); - - /** - * Checks whether it is always true that `a` is a strict subset of `b` in the - * multiset domain. - * - * Examples: - * - * a = (str.++ "A" x), b = (str.++ "A" x "B") ---> true - * a = (str.++ "A" x), b = (str.++ "B" x "AA") ---> true - * a = (str.++ "A" x), b = (str.++ "B" y "AA") ---> false - * - * @param a The term for which it should be checked if it is a strict subset - * of `b` in the multiset domain - * @param b The term for which it should be checked if it is a strict - * superset of `a` in the multiset domain - * @return True if it is always the case that `a` is a strict subset of `b`, - * false otherwise. - */ - static bool checkEntailMultisetSubset(Node a, Node b); - - /** - * Returns a character `c` if it is always the case that str.in.re(a, c*), - * i.e. if all possible values of `a` only consist of `c` characters, and the - * null node otherwise. If `a` is the empty string, the function returns an - * empty string. - * - * @param a The node to check for homogeneity - * @return If `a` is homogeneous, the only character that it may contain, the - * empty string if `a` is empty, and the null node otherwise - */ - static Node checkEntailHomogeneousString(Node a); - - /** - * Simplifies a given node `a` s.t. the result is a concatenation of string - * terms that can be interpreted as a multiset and which contains all - * multisets that `a` could form. - * - * Examples: - * - * (str.substr "AA" 0 n) ---> "AA" - * (str.replace "AAA" x "BB") ---> (str.++ "AAA" "BB") - * - * @param a The node to simplify - * @return A concatenation that can be interpreted as a multiset - */ - static Node getMultisetApproximation(Node a); - - /** - * Checks whether assumption |= a >= 0 (if strict is false) or - * assumption |= a > 0 (if strict is true), where assumption is an equality - * assumption. The assumption must be in rewritten form. - * - * Example: - * - * checkEntailArithWithEqAssumption(x + (str.len y) = 0, -x, false) = true - * - * Because: x = -(str.len y), so -x >= 0 --> (str.len y) >= 0 --> true - */ - static bool checkEntailArithWithEqAssumption(Node assumption, - Node a, - bool strict = false); - - /** - * Checks whether assumption |= a >= b (if strict is false) or - * assumption |= a > b (if strict is true). The function returns true if it - * can be shown that the entailment holds and false otherwise. Assumption - * must be in rewritten form. Assumption may be an equality or an inequality. - * - * Example: - * - * checkEntailArithWithAssumption(x + (str.len y) = 0, 0, x, false) = true - * - * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true - */ - static bool checkEntailArithWithAssumption(Node assumption, - Node a, - Node b, - bool strict = false); - - /** - * Checks whether assumptions |= a >= b (if strict is false) or - * assumptions |= a > b (if strict is true). The function returns true if it - * can be shown that the entailment holds and false otherwise. Assumptions - * must be in rewritten form. Assumptions may be an equalities or an - * inequalities. - * - * Example: - * - * checkEntailArithWithAssumptions([x + (str.len y) = 0], 0, x, false) = true - * - * Because: x = -(str.len y), so 0 >= x --> 0 >= -(str.len y) --> true - */ - static bool checkEntailArithWithAssumptions(std::vector assumptions, - Node a, - Node b, - bool strict = false); - - /** get arithmetic lower bound - * If this function returns a non-null Node ret, - * then ret is a rational constant and - * we know that n >= ret always if isLower is true, - * or n <= ret if isLower is false. - * - * Notice the following invariant. - * If getConstantArithBound(a, true) = ret where ret is non-null, then for - * strict = { true, false } : - * ret >= strict ? 1 : 0 - * if and only if - * checkEntailArith( a, strict ) = true. - */ - static Node getConstantArithBound(Node a, bool isLower = true); - /** get length for regular expression - * - * Given regular expression n, if this method returns a non-null value c, then - * x in n entails len( x ) = c. - */ - static Node getFixedLengthForRegexp(Node n); - /** decompose substr chain - * - * If s is substr( ... substr( base, x1, y1 ) ..., xn, yn ), then this - * function returns base, adds { x1 ... xn } to ss, and { y1 ... yn } to ls. - */ - static Node decomposeSubstrChain(Node s, - std::vector& ss, - std::vector& ls); - /** make substr chain - * - * If ss is { x1 ... xn } and ls is { y1 ... yn }, this returns the term - * substr( ... substr( base, x1, y1 ) ..., xn, yn ). - */ - static Node mkSubstrChain(Node base, - const std::vector& ss, - const std::vector& ls); - - /** - * Overapproximates the possible values of node n. This overapproximation - * assumes that n can return a value x or the empty string and tries to find - * the simplest x such that this holds. In the general case, x is the same as - * the input n. This overapproximation can be used to sort terms with the - * same possible values in string concatenation for example. - * - * Example: - * - * getStringOrEmpty( (str.replace "" x y) ) --> y because (str.replace "" x y) - * either returns y or "" - * - * getStringOrEmpty( (str.substr "ABC" x y) ) --> (str.substr "ABC" x y) - * because the function could not compute a simpler - */ - static Node getStringOrEmpty(Node n); - - /** - * Given an inequality y1 + ... + yn >= x, removes operands yi s.t. the - * original inequality still holds. Returns true if the original inequality - * holds and false otherwise. The list of ys is modified to contain a subset - * of the original ys. - * - * Example: - * - * inferZerosInSumGeq( (str.len x), [ (str.len x), (str.len y), 1 ], [] ) - * --> returns true with ys = [ (str.len x) ] and zeroYs = [ (str.len y), 1 ] - * (can be used to rewrite the inequality to false) - * - * inferZerosInSumGeq( (str.len x), [ (str.len y) ], [] ) - * --> returns false because it is not possible to show - * str.len(y) >= str.len(x) - */ - static bool inferZerosInSumGeq(Node x, - std::vector& ys, - std::vector& zeroYs); - - /** - * Infers a conjunction of equalities that correspond to (str.contains x y) - * if it can show that the length of y is greater or equal to the length of - * x. If y is a concatentation, we get x = y1 ++ ... ++ yn, the conjunction - * is of the form: - * - * (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) - * - * where each yi'' are yi that must be empty for (= x y) to hold and yi' are - * yi that the function could not infer anything about. Returns a null node - * if the function cannot infer that str.len(y) >= str.len(x). Returns (= x - * y) if the function can infer that str.len(y) >= str.len(x) but cannot - * infer that any of the yi must be empty. - */ - static Node inferEqsFromContains(Node x, Node y); - - /** - * Collects equal-to-empty nodes from a conjunction or a single - * node. Returns a list of nodes that are compared to empty nodes - * and a boolean that indicates whether all nodes in the - * conjunction were a comparison with the empty node. The nodes in - * the list are sorted and duplicates removed. - * - * Examples: - * - * collectEmptyEqs( (= "" x) ) = { true, [x] } - * collectEmptyEqs( (and (= "" x) (= "" y)) ) = { true, [x, y] } - * collectEmptyEqs( (and (= "A" x) (= "" y) (= "" y)) ) = { false, [y] } - * - * @param x The conjunction of equalities or a single equality - * @return A pair of a boolean that indicates whether the - * conjunction consists only of comparisons to the empty string - * and the list of nodes that are compared to the empty string - */ - static std::pair > collectEmptyEqs(Node x); -}; /* class TheoryStringsRewriter */ - -}/* CVC4::theory::strings namespace */ -}/* CVC4::theory namespace */ -}/* CVC4 namespace */ - -#endif /* CVC4__THEORY__STRINGS__THEORY_STRINGS_REWRITER_H */ diff --git a/test/unit/theory/CMakeLists.txt b/test/unit/theory/CMakeLists.txt index 35f2f7bfa..d6a6b701c 100644 --- a/test/unit/theory/CMakeLists.txt +++ b/test/unit/theory/CMakeLists.txt @@ -2,6 +2,7 @@ cvc4_add_unit_test_black(regexp_operation_black theory) cvc4_add_unit_test_black(theory_black theory) cvc4_add_unit_test_white(evaluator_white theory) cvc4_add_unit_test_white(logic_info_white theory) +cvc4_add_unit_test_white(sequences_rewriter_white theory) cvc4_add_unit_test_white(theory_arith_white theory) cvc4_add_unit_test_white(theory_bv_rewriter_white theory) cvc4_add_unit_test_white(theory_bv_white theory) @@ -9,7 +10,6 @@ cvc4_add_unit_test_white(theory_engine_white theory) cvc4_add_unit_test_white(theory_quantifiers_bv_instantiator_white theory) cvc4_add_unit_test_white(theory_quantifiers_bv_inverter_white theory) cvc4_add_unit_test_white(theory_sets_type_enumerator_white theory) -cvc4_add_unit_test_white(theory_strings_rewriter_white theory) cvc4_add_unit_test_white(theory_strings_skolem_cache_black theory) cvc4_add_unit_test_white(theory_strings_word_white theory) cvc4_add_unit_test_white(theory_white theory) diff --git a/test/unit/theory/sequences_rewriter_white.h b/test/unit/theory/sequences_rewriter_white.h new file mode 100644 index 000000000..200a36d0b --- /dev/null +++ b/test/unit/theory/sequences_rewriter_white.h @@ -0,0 +1,1578 @@ +/********************* */ +/*! \file sequences_rewriter_white.h + ** \verbatim + ** Top contributors (to current version): + ** Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Unit tests for the strings/sequences rewriter + ** + ** Unit tests for the strings/sequences rewriter. + **/ + +#include "expr/node.h" +#include "expr/node_manager.h" +#include "smt/smt_engine.h" +#include "smt/smt_engine_scope.h" +#include "theory/quantifiers/extended_rewrite.h" +#include "theory/rewriter.h" +#include "theory/strings/sequences_rewriter.h" + +#include +#include +#include +#include + +using namespace CVC4; +using namespace CVC4::smt; +using namespace CVC4::theory; +using namespace CVC4::theory::quantifiers; +using namespace CVC4::theory::strings; + +class SequencesRewriterWhite : public CxxTest::TestSuite +{ + public: + SequencesRewriterWhite() {} + + void setUp() override + { + Options opts; + opts.setOutputLanguage(language::output::LANG_SMTLIB_V2); + d_em = new ExprManager(opts); + d_smt = new SmtEngine(d_em); + d_scope = new SmtScope(d_smt); + d_rewriter = new ExtendedRewriter(true); + + d_nm = NodeManager::currentNM(); + } + + void tearDown() override + { + delete d_rewriter; + delete d_scope; + delete d_smt; + delete d_em; + } + + void inNormalForm(Node t) + { + Node res_t = d_rewriter->extendedRewrite(t); + + std::cout << std::endl; + std::cout << t << " ---> " << res_t << std::endl; + TS_ASSERT_EQUALS(t, res_t); + } + + void sameNormalForm(Node t1, Node t2) + { + Node res_t1 = d_rewriter->extendedRewrite(t1); + Node res_t2 = d_rewriter->extendedRewrite(t2); + + std::cout << std::endl; + std::cout << t1 << " ---> " << res_t1 << std::endl; + std::cout << t2 << " ---> " << res_t2 << std::endl; + TS_ASSERT_EQUALS(res_t1, res_t2); + } + + void differentNormalForms(Node t1, Node t2) + { + Node res_t1 = d_rewriter->extendedRewrite(t1); + Node res_t2 = d_rewriter->extendedRewrite(t2); + + std::cout << std::endl; + std::cout << t1 << " ---> " << res_t1 << std::endl; + std::cout << t2 << " ---> " << res_t2 << std::endl; + TS_ASSERT_DIFFERS(res_t1, res_t2); + } + + void testCheckEntailLengthOne() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node a = d_nm->mkConst(::CVC4::String("A")); + Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); + Node aaad = d_nm->mkConst(::CVC4::String("AAAD")); + Node b = d_nm->mkConst(::CVC4::String("B")); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + Node negOne = d_nm->mkConst(Rational(-1)); + Node zero = d_nm->mkConst(Rational(0)); + Node one = d_nm->mkConst(Rational(1)); + Node two = d_nm->mkConst(Rational(2)); + Node three = d_nm->mkConst(Rational(3)); + Node i = d_nm->mkVar("i", intType); + + TS_ASSERT(SequencesRewriter::checkEntailLengthOne(a)); + TS_ASSERT(SequencesRewriter::checkEntailLengthOne(a, true)); + + Node substr = d_nm->mkNode(kind::STRING_SUBSTR, x, zero, one); + TS_ASSERT(SequencesRewriter::checkEntailLengthOne(substr)); + TS_ASSERT(!SequencesRewriter::checkEntailLengthOne(substr, true)); + + substr = d_nm->mkNode(kind::STRING_SUBSTR, + d_nm->mkNode(kind::STRING_CONCAT, a, x), + zero, + one); + TS_ASSERT(SequencesRewriter::checkEntailLengthOne(substr)); + TS_ASSERT(SequencesRewriter::checkEntailLengthOne(substr, true)); + + substr = d_nm->mkNode(kind::STRING_SUBSTR, x, zero, two); + TS_ASSERT(!SequencesRewriter::checkEntailLengthOne(substr)); + TS_ASSERT(!SequencesRewriter::checkEntailLengthOne(substr, true)); + } + + void testCheckEntailArith() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node z = d_nm->mkVar("z", strType); + Node n = d_nm->mkVar("n", intType); + Node one = d_nm->mkConst(Rational(1)); + + // 1 >= (str.len (str.substr z n 1)) ---> true + Node substr_z = d_nm->mkNode(kind::STRING_LENGTH, + d_nm->mkNode(kind::STRING_SUBSTR, z, n, one)); + TS_ASSERT(SequencesRewriter::checkEntailArith(one, substr_z)); + + // (str.len (str.substr z n 1)) >= 1 ---> false + TS_ASSERT(!SequencesRewriter::checkEntailArith(substr_z, one)); + } + + void testCheckEntailArithWithAssumption() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node x = d_nm->mkVar("x", intType); + Node y = d_nm->mkVar("y", strType); + Node z = d_nm->mkVar("z", intType); + + Node zero = d_nm->mkConst(Rational(0)); + Node one = d_nm->mkConst(Rational(1)); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node a = d_nm->mkConst(::CVC4::String("A")); + + Node slen_y = d_nm->mkNode(kind::STRING_LENGTH, y); + Node x_plus_slen_y = d_nm->mkNode(kind::PLUS, x, slen_y); + Node x_plus_slen_y_eq_zero = + Rewriter::rewrite(d_nm->mkNode(kind::EQUAL, x_plus_slen_y, zero)); + + // x + (str.len y) = 0 |= 0 >= x --> true + TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( + x_plus_slen_y_eq_zero, zero, x, false)); + + // x + (str.len y) = 0 |= 0 > x --> false + TS_ASSERT(!SequencesRewriter::checkEntailArithWithAssumption( + x_plus_slen_y_eq_zero, zero, x, true)); + + Node x_plus_slen_y_plus_z_eq_zero = Rewriter::rewrite(d_nm->mkNode( + kind::EQUAL, d_nm->mkNode(kind::PLUS, x_plus_slen_y, z), zero)); + + // x + (str.len y) + z = 0 |= 0 > x --> false + TS_ASSERT(!SequencesRewriter::checkEntailArithWithAssumption( + x_plus_slen_y_plus_z_eq_zero, zero, x, true)); + + Node x_plus_slen_y_plus_slen_y_eq_zero = Rewriter::rewrite(d_nm->mkNode( + kind::EQUAL, d_nm->mkNode(kind::PLUS, x_plus_slen_y, slen_y), zero)); + + // x + (str.len y) + (str.len y) = 0 |= 0 >= x --> true + TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( + x_plus_slen_y_plus_slen_y_eq_zero, zero, x, false)); + + Node five = d_nm->mkConst(Rational(5)); + Node six = d_nm->mkConst(Rational(6)); + Node x_plus_five = d_nm->mkNode(kind::PLUS, x, five); + Node x_plus_five_lt_six = + Rewriter::rewrite(d_nm->mkNode(kind::LT, x_plus_five, six)); + + // x + 5 < 6 |= 0 >= x --> true + TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( + x_plus_five_lt_six, zero, x, false)); + + // x + 5 < 6 |= 0 > x --> false + TS_ASSERT(!SequencesRewriter::checkEntailArithWithAssumption( + x_plus_five_lt_six, zero, x, true)); + + Node neg_x = d_nm->mkNode(kind::UMINUS, x); + Node x_plus_five_lt_five = + Rewriter::rewrite(d_nm->mkNode(kind::LT, x_plus_five, five)); + + // x + 5 < 5 |= -x >= 0 --> true + TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( + x_plus_five_lt_five, neg_x, zero, false)); + + // x + 5 < 5 |= 0 > x --> true + TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( + x_plus_five_lt_five, zero, x, false)); + + // 0 < x |= x >= (str.len (int.to.str x)) + Node assm = Rewriter::rewrite(d_nm->mkNode(kind::LT, zero, x)); + TS_ASSERT(SequencesRewriter::checkEntailArithWithAssumption( + assm, + x, + d_nm->mkNode(kind::STRING_LENGTH, d_nm->mkNode(kind::STRING_ITOS, x)), + false)); + } + + void testRewriteSubstr() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node a = d_nm->mkConst(::CVC4::String("A")); + Node b = d_nm->mkConst(::CVC4::String("B")); + Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); + Node negone = d_nm->mkConst(Rational(-1)); + Node zero = d_nm->mkConst(Rational(0)); + Node one = d_nm->mkConst(Rational(1)); + Node two = d_nm->mkConst(Rational(2)); + Node three = d_nm->mkConst(Rational(3)); + + Node s = d_nm->mkVar("s", strType); + Node s2 = d_nm->mkVar("s2", strType); + Node x = d_nm->mkVar("x", intType); + Node y = d_nm->mkVar("y", intType); + + // (str.substr "A" x x) --> "" + Node n = d_nm->mkNode(kind::STRING_SUBSTR, a, x, x); + Node res = SequencesRewriter::rewriteSubstr(n); + TS_ASSERT_EQUALS(res, empty); + + // (str.substr "A" (+ x 1) x) -> "" + n = d_nm->mkNode(kind::STRING_SUBSTR, + a, + d_nm->mkNode(kind::PLUS, x, d_nm->mkConst(Rational(1))), + x); + res = SequencesRewriter::rewriteSubstr(n); + TS_ASSERT_EQUALS(res, empty); + + // (str.substr "A" (+ x (str.len s2)) x) -> "" + n = d_nm->mkNode( + kind::STRING_SUBSTR, + a, + d_nm->mkNode(kind::PLUS, x, d_nm->mkNode(kind::STRING_LENGTH, s)), + x); + res = SequencesRewriter::rewriteSubstr(n); + TS_ASSERT_EQUALS(res, empty); + + // (str.substr "A" x y) -> (str.substr "A" x y) + n = d_nm->mkNode(kind::STRING_SUBSTR, a, x, y); + res = SequencesRewriter::rewriteSubstr(n); + TS_ASSERT_EQUALS(res, n); + + // (str.substr "ABCD" (+ x 3) x) -> "" + n = d_nm->mkNode( + kind::STRING_SUBSTR, abcd, d_nm->mkNode(kind::PLUS, x, three), x); + res = SequencesRewriter::rewriteSubstr(n); + TS_ASSERT_EQUALS(res, empty); + + // (str.substr "ABCD" (+ x 2) x) -> (str.substr "ABCD" (+ x 2) x) + n = d_nm->mkNode( + kind::STRING_SUBSTR, abcd, d_nm->mkNode(kind::PLUS, x, two), x); + res = SequencesRewriter::rewriteSubstr(n); + TS_ASSERT_EQUALS(res, n); + + // (str.substr (str.substr s x x) x x) -> "" + n = d_nm->mkNode( + kind::STRING_SUBSTR, d_nm->mkNode(kind::STRING_SUBSTR, s, x, x), x, x); + sameNormalForm(n, empty); + + // Same normal form for: + // + // (str.substr (str.replace "" s "B") x x) + // + // (str.replace "" s (str.substr "B" x x))) + Node lhs = d_nm->mkNode(kind::STRING_SUBSTR, + d_nm->mkNode(kind::STRING_STRREPL, empty, s, b), + x, + x); + Node rhs = d_nm->mkNode(kind::STRING_STRREPL, + empty, + s, + d_nm->mkNode(kind::STRING_SUBSTR, b, x, x)); + sameNormalForm(lhs, rhs); + + // Same normal form: + // + // (str.substr (str.replace s "A" "B") 0 x) + // + // (str.replace (str.substr s 0 x) "A" "B") + Node substr_repl = d_nm->mkNode(kind::STRING_SUBSTR, + d_nm->mkNode(kind::STRING_STRREPL, s, a, b), + zero, + x); + Node repl_substr = + d_nm->mkNode(kind::STRING_STRREPL, + d_nm->mkNode(kind::STRING_SUBSTR, s, zero, x), + a, + b); + sameNormalForm(substr_repl, repl_substr); + + // Same normal form: + // + // (str.substr (str.replace s (str.substr (str.++ s2 "A") 0 1) "B") 0 x) + // + // (str.replace (str.substr s 0 x) (str.substr (str.++ s2 "A") 0 1) "B") + Node substr_y = d_nm->mkNode(kind::STRING_SUBSTR, + d_nm->mkNode(kind::STRING_CONCAT, s2, a), + zero, + one); + substr_repl = + d_nm->mkNode(kind::STRING_SUBSTR, + d_nm->mkNode(kind::STRING_STRREPL, s, substr_y, b), + zero, + x); + repl_substr = d_nm->mkNode(kind::STRING_STRREPL, + d_nm->mkNode(kind::STRING_SUBSTR, s, zero, x), + substr_y, + b); + sameNormalForm(substr_repl, repl_substr); + + // (str.substr (str.int.to.str x) x x) ---> empty + Node substr_itos = d_nm->mkNode( + kind::STRING_SUBSTR, d_nm->mkNode(kind::STRING_ITOS, x), x, x); + sameNormalForm(substr_itos, empty); + + // (str.substr s (* (- 1) (str.len s)) 1) ---> empty + Node substr = d_nm->mkNode( + kind::STRING_SUBSTR, + s, + d_nm->mkNode(kind::MULT, negone, d_nm->mkNode(kind::STRING_LENGTH, s)), + one); + sameNormalForm(substr, empty); + } + + void testRewriteConcat() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node a = d_nm->mkConst(::CVC4::String("A")); + Node zero = d_nm->mkConst(Rational(0)); + Node three = d_nm->mkConst(Rational(3)); + + Node i = d_nm->mkVar("i", intType); + Node s = d_nm->mkVar("s", strType); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + + // Same normal form for: + // + // (str.++ (str.replace "A" x "") "A") + // + // (str.++ "A" (str.replace "A" x "")) + Node repl_a_x_e = d_nm->mkNode(kind::STRING_STRREPL, a, x, empty); + Node repl_a = d_nm->mkNode(kind::STRING_CONCAT, repl_a_x_e, a); + Node a_repl = d_nm->mkNode(kind::STRING_CONCAT, a, repl_a_x_e); + sameNormalForm(repl_a, a_repl); + + // Same normal form for: + // + // (str.++ y (str.replace "" x (str.substr y 0 3)) (str.substr y 0 3) "A" (str.substr y 0 3)) + // + // (str.++ y (str.substr y 0 3) (str.replace "" x (str.substr y 0 3)) "A" (str.substr y 0 3)) + Node z = d_nm->mkNode(kind::STRING_SUBSTR, y, zero, three); + Node repl_e_x_z = d_nm->mkNode(kind::STRING_STRREPL, empty, x, z); + repl_a = d_nm->mkNode(kind::STRING_CONCAT, y, repl_e_x_z, z, a, z); + a_repl = d_nm->mkNode(kind::STRING_CONCAT, y, z, repl_e_x_z, a, z); + sameNormalForm(repl_a, a_repl); + + // Same normal form for: + // + // (str.++ "A" (str.replace "A" x "") (str.substr "A" 0 i)) + // + // (str.++ (str.substr "A" 0 i) (str.replace "A" x "") "A") + Node substr_a = d_nm->mkNode(kind::STRING_SUBSTR, a, zero, i); + Node a_substr_repl = + d_nm->mkNode(kind::STRING_CONCAT, a, substr_a, repl_a_x_e); + Node substr_repl_a = + d_nm->mkNode(kind::STRING_CONCAT, substr_a, repl_a_x_e, a); + sameNormalForm(a_substr_repl, substr_repl_a); + + // Same normal form for: + // + // (str.++ (str.replace "" x (str.substr "A" 0 i)) (str.substr "A" 0 i) (str.at "A" i)) + // + // (str.++ (str.at "A" i) (str.replace "" x (str.substr "A" 0 i)) (str.substr "A" 0 i)) + Node charat_a = d_nm->mkNode(kind::STRING_CHARAT, a, i); + Node repl_e_x_s = d_nm->mkNode(kind::STRING_STRREPL, empty, x, substr_a); + Node repl_substr_a = + d_nm->mkNode(kind::STRING_CONCAT, repl_e_x_s, substr_a, charat_a); + Node a_repl_substr = + d_nm->mkNode(kind::STRING_CONCAT, charat_a, repl_e_x_s, substr_a); + sameNormalForm(repl_substr_a, a_repl_substr); + } + + void testLengthPreserveRewrite() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); + Node f = d_nm->mkConst(::CVC4::String("F")); + Node gh = d_nm->mkConst(::CVC4::String("GH")); + Node ij = d_nm->mkConst(::CVC4::String("IJ")); + + Node i = d_nm->mkVar("i", intType); + Node s = d_nm->mkVar("s", strType); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + + // Same length preserving rewrite for: + // + // (str.++ "ABCD" (str.++ x x)) + // + // (str.++ "GH" (str.repl "GH" "IJ") "IJ") + Node concat1 = d_nm->mkNode( + kind::STRING_CONCAT, abcd, d_nm->mkNode(kind::STRING_CONCAT, x, x)); + Node concat2 = d_nm->mkNode(kind::STRING_CONCAT, + gh, + x, + d_nm->mkNode(kind::STRING_STRREPL, x, gh, ij), + ij); + Node res_concat1 = SequencesRewriter::lengthPreserveRewrite(concat1); + Node res_concat2 = SequencesRewriter::lengthPreserveRewrite(concat2); + TS_ASSERT_EQUALS(res_concat1, res_concat2); + } + + void testRewriteIndexOf() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node a = d_nm->mkConst(::CVC4::String("A")); + Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); + Node aaad = d_nm->mkConst(::CVC4::String("AAAD")); + Node b = d_nm->mkConst(::CVC4::String("B")); + Node c = d_nm->mkConst(::CVC4::String("C")); + Node ccc = d_nm->mkConst(::CVC4::String("CCC")); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + Node negOne = d_nm->mkConst(Rational(-1)); + Node zero = d_nm->mkConst(Rational(0)); + Node one = d_nm->mkConst(Rational(1)); + Node two = d_nm->mkConst(Rational(2)); + Node three = d_nm->mkConst(Rational(3)); + Node i = d_nm->mkVar("i", intType); + Node j = d_nm->mkVar("j", intType); + + // Same normal form for: + // + // (str.to.int (str.indexof "A" x 1)) + // + // (str.to.int (str.indexof "B" x 1)) + Node a_idof_x = d_nm->mkNode(kind::STRING_STRIDOF, a, x, two); + Node itos_a_idof_x = d_nm->mkNode(kind::STRING_ITOS, a_idof_x); + Node b_idof_x = d_nm->mkNode(kind::STRING_STRIDOF, b, x, two); + Node itos_b_idof_x = d_nm->mkNode(kind::STRING_ITOS, b_idof_x); + sameNormalForm(itos_a_idof_x, itos_b_idof_x); + + // Same normal form for: + // + // (str.indexof (str.++ "ABCD" x) y 3) + // + // (str.indexof (str.++ "AAAD" x) y 3) + Node idof_abcd = d_nm->mkNode(kind::STRING_STRIDOF, + d_nm->mkNode(kind::STRING_CONCAT, abcd, x), + y, + three); + Node idof_aaad = d_nm->mkNode(kind::STRING_STRIDOF, + d_nm->mkNode(kind::STRING_CONCAT, aaad, x), + y, + three); + sameNormalForm(idof_abcd, idof_aaad); + + // (str.indexof (str.substr x 1 i) "A" i) ---> -1 + Node idof_substr = + d_nm->mkNode(kind::STRING_STRIDOF, + d_nm->mkNode(kind::STRING_SUBSTR, x, one, i), + a, + i); + sameNormalForm(idof_substr, negOne); + + { + // Same normal form for: + // + // (str.indexof (str.++ "B" (str.substr "CCC" i j) x "A") "A" 0) + // + // (+ 1 (str.len (str.substr "CCC" i j)) + // (str.indexof (str.++ "A" x y) "A" 0)) + Node lhs = d_nm->mkNode( + kind::STRING_STRIDOF, + d_nm->mkNode(kind::STRING_CONCAT, + b, + d_nm->mkNode(kind::STRING_SUBSTR, ccc, i, j), + x, + a), + a, + zero); + Node rhs = d_nm->mkNode( + kind::PLUS, + one, + d_nm->mkNode(kind::STRING_LENGTH, + d_nm->mkNode(kind::STRING_SUBSTR, ccc, i, j)), + d_nm->mkNode(kind::STRING_STRIDOF, + d_nm->mkNode(kind::STRING_CONCAT, x, a), + a, + zero)); + sameNormalForm(lhs, rhs); + } + + { + // Same normal form for: + // + // (str.indexof (str.++ "B" "C" "A" x y) "A" 0) + // + // (+ 2 (str.indexof (str.++ "A" x y) "A" 0)) + Node lhs = d_nm->mkNode(kind::STRING_STRIDOF, + d_nm->mkNode(kind::STRING_CONCAT, b, c, a, x, y), + a, + zero); + Node rhs = + d_nm->mkNode(kind::PLUS, + two, + d_nm->mkNode(kind::STRING_STRIDOF, + d_nm->mkNode(kind::STRING_CONCAT, a, x, y), + a, + zero)); + sameNormalForm(lhs, rhs); + } + } + + void testRewriteReplace() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node a = d_nm->mkConst(::CVC4::String("A")); + Node ab = d_nm->mkConst(::CVC4::String("AB")); + Node b = d_nm->mkConst(::CVC4::String("B")); + Node c = d_nm->mkConst(::CVC4::String("C")); + Node d = d_nm->mkConst(::CVC4::String("D")); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + Node z = d_nm->mkVar("z", strType); + Node zero = d_nm->mkConst(Rational(0)); + Node one = d_nm->mkConst(Rational(1)); + Node n = d_nm->mkVar("n", intType); + + // (str.replace (str.replace x "B" x) x "A") --> + // (str.replace (str.replace x "B" "A") x "A") + Node repl_repl = d_nm->mkNode(kind::STRING_STRREPL, + d_nm->mkNode(kind::STRING_STRREPL, x, b, x), + x, + a); + Node repl_repl_short = + d_nm->mkNode(kind::STRING_STRREPL, + d_nm->mkNode(kind::STRING_STRREPL, x, b, a), + x, + a); + sameNormalForm(repl_repl, repl_repl_short); + + // (str.replace "A" (str.replace "B", x, "C") "D") --> "A" + repl_repl = d_nm->mkNode(kind::STRING_STRREPL, + a, + d_nm->mkNode(kind::STRING_STRREPL, b, x, c), + d); + sameNormalForm(repl_repl, a); + + // (str.replace "A" (str.replace "B", x, "A") "D") -/-> "A" + repl_repl = d_nm->mkNode(kind::STRING_STRREPL, + a, + d_nm->mkNode(kind::STRING_STRREPL, b, x, a), + d); + differentNormalForms(repl_repl, a); + + // Same normal form for: + // + // (str.replace x (str.++ x y z) y) + // + // (str.replace x (str.++ x y z) z) + Node xyz = d_nm->mkNode(kind::STRING_CONCAT, x, y, z); + Node repl_x_xyz = d_nm->mkNode(kind::STRING_STRREPL, x, xyz, y); + Node repl_x_zyx = d_nm->mkNode(kind::STRING_STRREPL, x, xyz, z); + sameNormalForm(repl_x_xyz, repl_x_zyx); + + // (str.replace "" (str.++ x x) x) --> "" + Node repl_empty_xx = d_nm->mkNode(kind::STRING_STRREPL, + empty, + d_nm->mkNode(kind::STRING_CONCAT, x, x), + x); + sameNormalForm(repl_empty_xx, empty); + + // (str.replace "AB" (str.++ x "A") x) --> (str.replace "AB" (str.++ x "A") + // "") + Node repl_ab_xa_x = d_nm->mkNode(kind::STRING_STRREPL, + d_nm->mkNode(kind::STRING_CONCAT, a, b), + d_nm->mkNode(kind::STRING_CONCAT, x, a), + x); + Node repl_ab_xa_e = d_nm->mkNode(kind::STRING_STRREPL, + d_nm->mkNode(kind::STRING_CONCAT, a, b), + d_nm->mkNode(kind::STRING_CONCAT, x, a), + empty); + sameNormalForm(repl_ab_xa_x, repl_ab_xa_e); + + // (str.replace "AB" (str.++ x "A") x) -/-> (str.replace "AB" (str.++ "A" x) + // "") + Node repl_ab_ax_e = d_nm->mkNode(kind::STRING_STRREPL, + d_nm->mkNode(kind::STRING_CONCAT, a, b), + d_nm->mkNode(kind::STRING_CONCAT, a, x), + empty); + differentNormalForms(repl_ab_ax_e, repl_ab_xa_e); + + // (str.replace "" (str.replace y x "A") y) ---> "" + repl_repl = d_nm->mkNode(kind::STRING_STRREPL, + empty, + d_nm->mkNode(kind::STRING_STRREPL, y, x, a), + y); + sameNormalForm(repl_repl, empty); + + // (str.replace "" (str.replace x y x) x) ---> "" + repl_repl = d_nm->mkNode(kind::STRING_STRREPL, + empty, + d_nm->mkNode(kind::STRING_STRREPL, x, y, x), + x); + sameNormalForm(repl_repl, empty); + + // (str.replace "" (str.substr x 0 1) x) ---> "" + repl_repl = d_nm->mkNode(kind::STRING_STRREPL, + empty, + d_nm->mkNode(kind::STRING_SUBSTR, x, zero, one), + x); + sameNormalForm(repl_repl, empty); + + // Same normal form for: + // + // (str.replace "" (str.replace x y x) y) + // + // (str.replace "" x y) + repl_repl = d_nm->mkNode(kind::STRING_STRREPL, + empty, + d_nm->mkNode(kind::STRING_STRREPL, x, y, x), + y); + Node repl = d_nm->mkNode(kind::STRING_STRREPL, empty, x, y); + sameNormalForm(repl_repl, repl); + + // Same normal form: + // + // (str.replace "B" (str.replace x "A" "B") "B") + // + // (str.replace "B" x "B")) + repl_repl = d_nm->mkNode(kind::STRING_STRREPL, + b, + d_nm->mkNode(kind::STRING_STRREPL, x, a, b), + b); + repl = d_nm->mkNode(kind::STRING_STRREPL, b, x, b); + sameNormalForm(repl_repl, repl); + + // Different normal forms for: + // + // (str.replace "B" (str.replace "" x "A") "B") + // + // (str.replace "B" x "B") + repl_repl = d_nm->mkNode(kind::STRING_STRREPL, + b, + d_nm->mkNode(kind::STRING_STRREPL, empty, x, a), + b); + repl = d_nm->mkNode(kind::STRING_STRREPL, b, x, b); + differentNormalForms(repl_repl, repl); + + { + // Same normal form: + // + // (str.replace (str.++ "AB" x) "C" y) + // + // (str.++ "AB" (str.replace x "C" y)) + Node lhs = d_nm->mkNode( + kind::STRING_STRREPL, d_nm->mkNode(kind::STRING_CONCAT, ab, x), c, y); + Node rhs = d_nm->mkNode( + kind::STRING_CONCAT, ab, d_nm->mkNode(kind::STRING_STRREPL, x, c, y)); + sameNormalForm(lhs, rhs); + } + } + + void testRewriteContains() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node a = d_nm->mkConst(::CVC4::String("A")); + Node ab = d_nm->mkConst(::CVC4::String("AB")); + Node b = d_nm->mkConst(::CVC4::String("B")); + Node c = d_nm->mkConst(::CVC4::String("C")); + Node e = d_nm->mkConst(::CVC4::String("E")); + Node h = d_nm->mkConst(::CVC4::String("H")); + Node j = d_nm->mkConst(::CVC4::String("J")); + Node p = d_nm->mkConst(::CVC4::String("P")); + Node abc = d_nm->mkConst(::CVC4::String("ABC")); + Node def = d_nm->mkConst(::CVC4::String("DEF")); + Node ghi = d_nm->mkConst(::CVC4::String("GHI")); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + Node xy = d_nm->mkNode(kind::STRING_CONCAT, x, y); + Node yx = d_nm->mkNode(kind::STRING_CONCAT, y, x); + Node z = d_nm->mkVar("z", strType); + Node n = d_nm->mkVar("n", intType); + Node m = d_nm->mkVar("m", intType); + Node one = d_nm->mkConst(Rational(1)); + Node two = d_nm->mkConst(Rational(2)); + Node three = d_nm->mkConst(Rational(3)); + Node four = d_nm->mkConst(Rational(4)); + Node t = d_nm->mkConst(true); + Node f = d_nm->mkConst(false); + + // Same normal form for: + // + // (str.replace "A" (str.substr x 1 3) y z) + // + // (str.replace "A" (str.substr x 1 4) y z) + Node substr_3 = + d_nm->mkNode(kind::STRING_STRREPL, + a, + d_nm->mkNode(kind::STRING_SUBSTR, x, one, three), + z); + Node substr_4 = + d_nm->mkNode(kind::STRING_STRREPL, + a, + d_nm->mkNode(kind::STRING_SUBSTR, x, one, four), + z); + sameNormalForm(substr_3, substr_4); + + // Same normal form for: + // + // (str.replace "A" (str.++ y (str.substr x 1 3)) y z) + // + // (str.replace "A" (str.++ y (str.substr x 1 4)) y z) + Node concat_substr_3 = d_nm->mkNode( + kind::STRING_STRREPL, + a, + d_nm->mkNode(kind::STRING_CONCAT, + y, + d_nm->mkNode(kind::STRING_SUBSTR, x, one, three)), + z); + Node concat_substr_4 = d_nm->mkNode( + kind::STRING_STRREPL, + a, + d_nm->mkNode(kind::STRING_CONCAT, + y, + d_nm->mkNode(kind::STRING_SUBSTR, x, one, four)), + z); + sameNormalForm(concat_substr_3, concat_substr_4); + + // (str.contains "A" (str.++ a (str.replace "B", x, "C")) --> false + Node ctn_repl = + d_nm->mkNode(kind::STRING_STRCTN, + a, + d_nm->mkNode(kind::STRING_CONCAT, + a, + d_nm->mkNode(kind::STRING_STRREPL, b, x, c))); + sameNormalForm(ctn_repl, f); + + // (str.contains x (str.++ x x)) --> (= x "") + Node x_cnts_x_x = d_nm->mkNode( + kind::STRING_STRCTN, x, d_nm->mkNode(kind::STRING_CONCAT, x, x)); + sameNormalForm(x_cnts_x_x, d_nm->mkNode(kind::EQUAL, x, empty)); + + // Same normal form for: + // + // (str.contains (str.++ y x) (str.++ x z y)) + // + // (and (str.contains (str.++ y x) (str.++ x y)) (= z "")) + Node yx_cnts_xzy = d_nm->mkNode( + kind::STRING_STRCTN, yx, d_nm->mkNode(kind::STRING_CONCAT, x, z, y)); + Node yx_cnts_xy = d_nm->mkNode(kind::AND, + d_nm->mkNode(kind::EQUAL, z, empty), + d_nm->mkNode(kind::STRING_STRCTN, yx, xy)); + sameNormalForm(yx_cnts_xzy, yx_cnts_xy); + + // Same normal form for: + // + // (str.contains (str.substr x n (str.len y)) y) + // + // (= (str.substr x n (str.len y)) y) + Node ctn_substr = d_nm->mkNode( + kind::STRING_STRCTN, + d_nm->mkNode( + kind::STRING_SUBSTR, x, n, d_nm->mkNode(kind::STRING_LENGTH, y)), + y); + Node substr_eq = d_nm->mkNode( + kind::EQUAL, + d_nm->mkNode( + kind::STRING_SUBSTR, x, n, d_nm->mkNode(kind::STRING_LENGTH, y)), + y); + sameNormalForm(ctn_substr, substr_eq); + + // Same normal form for: + // + // (str.contains x (str.replace y x y)) + // + // (str.contains x y) + Node ctn_repl_y_x_y = d_nm->mkNode( + kind::STRING_STRCTN, x, d_nm->mkNode(kind::STRING_STRREPL, y, x, y)); + Node ctn_x_y = d_nm->mkNode(kind::STRING_STRCTN, x, y); + sameNormalForm(ctn_repl_y_x_y, ctn_repl_y_x_y); + + // Same normal form for: + // + // (str.contains x (str.replace x y x)) + // + // (= x (str.replace x y x)) + Node ctn_repl_self = d_nm->mkNode( + kind::STRING_STRCTN, x, d_nm->mkNode(kind::STRING_STRREPL, x, y, x)); + Node eq_repl = d_nm->mkNode( + kind::EQUAL, x, d_nm->mkNode(kind::STRING_STRREPL, x, y, x)); + sameNormalForm(ctn_repl_self, eq_repl); + + // (str.contains x (str.++ "A" (str.replace x y x))) ---> false + Node ctn_repl_self_f = + d_nm->mkNode(kind::STRING_STRCTN, + x, + d_nm->mkNode(kind::STRING_CONCAT, + a, + d_nm->mkNode(kind::STRING_STRREPL, x, y, x))); + sameNormalForm(ctn_repl_self_f, f); + + // Same normal form for: + // + // (str.contains x (str.replace "" x y)) + // + // (= "" (str.replace "" x y)) + Node ctn_repl_empty = + d_nm->mkNode(kind::STRING_STRCTN, + x, + d_nm->mkNode(kind::STRING_STRREPL, empty, x, y)); + Node eq_repl_empty = d_nm->mkNode( + kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, empty, x, y)); + sameNormalForm(ctn_repl_empty, eq_repl_empty); + + // Same normal form for: + // + // (str.contains x (str.++ x y)) + // + // (= "" y) + Node ctn_x_x_y = d_nm->mkNode( + kind::STRING_STRCTN, x, d_nm->mkNode(kind::STRING_CONCAT, x, y)); + Node eq_emp_y = d_nm->mkNode(kind::EQUAL, empty, y); + sameNormalForm(ctn_x_x_y, eq_emp_y); + + // Same normal form for: + // + // (str.contains (str.++ y x) (str.++ x y)) + // + // (= (str.++ y x) (str.++ x y)) + Node ctn_yxxy = d_nm->mkNode(kind::STRING_STRCTN, yx, xy); + Node eq_yxxy = d_nm->mkNode(kind::EQUAL, yx, xy); + sameNormalForm(ctn_yxxy, eq_yxxy); + + // (str.contains (str.replace x y x) x) ---> true + ctn_repl = d_nm->mkNode( + kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, y, x), x); + sameNormalForm(ctn_repl, t); + + // (str.contains (str.replace (str.++ x y) z (str.++ y x)) x) ---> true + ctn_repl = d_nm->mkNode( + kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, xy, z, yx), x); + sameNormalForm(ctn_repl, t); + + // (str.contains (str.++ z (str.replace (str.++ x y) z (str.++ y x))) x) + // ---> true + ctn_repl = d_nm->mkNode( + kind::STRING_STRCTN, + d_nm->mkNode(kind::STRING_CONCAT, + z, + d_nm->mkNode(kind::STRING_STRREPL, xy, z, yx)), + x); + sameNormalForm(ctn_repl, t); + + // Same normal form for: + // + // (str.contains (str.replace x y x) y) + // + // (str.contains x y) + Node lhs = d_nm->mkNode( + kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, y, x), y); + Node rhs = d_nm->mkNode(kind::STRING_STRCTN, x, y); + sameNormalForm(lhs, rhs); + + // Same normal form for: + // + // (str.contains (str.replace x y x) "B") + // + // (str.contains x "B") + lhs = d_nm->mkNode( + kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, y, x), b); + rhs = d_nm->mkNode(kind::STRING_STRCTN, x, b); + sameNormalForm(lhs, rhs); + + // Same normal form for: + // + // (str.contains (str.replace x y x) (str.substr z n 1)) + // + // (str.contains x (str.substr z n 1)) + Node substr_z = d_nm->mkNode(kind::STRING_SUBSTR, z, n, one); + lhs = d_nm->mkNode(kind::STRING_STRCTN, + d_nm->mkNode(kind::STRING_STRREPL, x, y, x), + substr_z); + rhs = d_nm->mkNode(kind::STRING_STRCTN, x, substr_z); + sameNormalForm(lhs, rhs); + + // Same normal form for: + // + // (str.contains (str.replace x y z) z) + // + // (str.contains (str.replace x z y) y) + lhs = d_nm->mkNode( + kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, y, z), z); + rhs = d_nm->mkNode( + kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, z, y), y); + sameNormalForm(lhs, rhs); + + // Same normal form for: + // + // (str.contains (str.replace x "A" "B") "A") + // + // (str.contains (str.replace x "A" "") "A") + lhs = d_nm->mkNode( + kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, a, b), a); + rhs = d_nm->mkNode(kind::STRING_STRCTN, + d_nm->mkNode(kind::STRING_STRREPL, x, a, empty), + a); + sameNormalForm(lhs, rhs); + + { + // (str.contains (str.++ x "A") (str.++ "B" x)) ---> false + Node ctn = d_nm->mkNode(kind::STRING_STRCTN, + d_nm->mkNode(kind::STRING_CONCAT, x, a), + d_nm->mkNode(kind::STRING_CONCAT, b, x)); + sameNormalForm(ctn, f); + } + + { + // Same normal form for: + // + // (str.contains (str.replace x "ABC" "DEF") "GHI") + // + // (str.contains x "GHI") + lhs = d_nm->mkNode(kind::STRING_STRCTN, + d_nm->mkNode(kind::STRING_STRREPL, x, abc, def), + ghi); + rhs = d_nm->mkNode(kind::STRING_STRCTN, x, ghi); + sameNormalForm(lhs, rhs); + } + + { + // Different normal forms for: + // + // (str.contains (str.replace x "ABC" "DEF") "B") + // + // (str.contains x "B") + lhs = d_nm->mkNode(kind::STRING_STRCTN, + d_nm->mkNode(kind::STRING_STRREPL, x, abc, def), + b); + rhs = d_nm->mkNode(kind::STRING_STRCTN, x, b); + differentNormalForms(lhs, rhs); + } + + { + // Different normal forms for: + // + // (str.contains (str.replace x "B" "DEF") "ABC") + // + // (str.contains x "ABC") + lhs = d_nm->mkNode(kind::STRING_STRCTN, + d_nm->mkNode(kind::STRING_STRREPL, x, b, def), + abc); + rhs = d_nm->mkNode(kind::STRING_STRCTN, x, abc); + differentNormalForms(lhs, rhs); + } + + { + // Same normal form for: + // + // (str.contains (str.++ (str.substr "DEF" n m) x) "AB") + // + // (str.contains x "AB") + lhs = d_nm->mkNode( + kind::STRING_STRCTN, + d_nm->mkNode(kind::STRING_CONCAT, + d_nm->mkNode(kind::STRING_SUBSTR, def, n, m), + x), + ab); + rhs = d_nm->mkNode(kind::STRING_STRCTN, x, ab); + sameNormalForm(lhs, rhs); + } + + { + // Same normal form for: + // + // (str.contains "ABC" (str.at x n)) + // + // (or (= x "") + // (= x "A") (= x "B") (= x "C")) + Node cat = d_nm->mkNode(kind::STRING_CHARAT, x, n); + lhs = d_nm->mkNode(kind::STRING_STRCTN, abc, cat); + rhs = d_nm->mkNode(kind::OR, + d_nm->mkNode(kind::EQUAL, cat, empty), + d_nm->mkNode(kind::EQUAL, cat, a), + d_nm->mkNode(kind::EQUAL, cat, b), + d_nm->mkNode(kind::EQUAL, cat, c)); + sameNormalForm(lhs, rhs); + } + } + + void testInferEqsFromContains() + { + TypeNode strType = d_nm->stringType(); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node a = d_nm->mkConst(::CVC4::String("A")); + Node b = d_nm->mkConst(::CVC4::String("B")); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + Node xy = d_nm->mkNode(kind::STRING_CONCAT, x, y); + Node f = d_nm->mkConst(false); + + // inferEqsFromContains("", (str.++ x y)) returns something equivalent to + // (= "" y) + Node empty_x_y = d_nm->mkNode(kind::AND, + d_nm->mkNode(kind::EQUAL, empty, x), + d_nm->mkNode(kind::EQUAL, empty, y)); + sameNormalForm(SequencesRewriter::inferEqsFromContains(empty, xy), + empty_x_y); + + // inferEqsFromContains(x, (str.++ x y)) returns false + Node bxya = d_nm->mkNode(kind::STRING_CONCAT, b, y, x, a); + sameNormalForm(SequencesRewriter::inferEqsFromContains(x, bxya), f); + + // inferEqsFromContains(x, y) returns null + Node n = SequencesRewriter::inferEqsFromContains(x, y); + TS_ASSERT(n.isNull()); + + // inferEqsFromContains(x, x) returns something equivalent to (= x x) + Node eq_x_x = d_nm->mkNode(kind::EQUAL, x, x); + sameNormalForm(SequencesRewriter::inferEqsFromContains(x, x), eq_x_x); + + // inferEqsFromContains((str.replace x "B" "A"), x) returns something + // equivalent to (= (str.replace x "B" "A") x) + Node repl = d_nm->mkNode(kind::STRING_STRREPL, x, b, a); + Node eq_repl_x = d_nm->mkNode(kind::EQUAL, repl, x); + sameNormalForm(SequencesRewriter::inferEqsFromContains(repl, x), + eq_repl_x); + + // inferEqsFromContains(x, (str.replace x "B" "A")) returns something + // equivalent to (= (str.replace x "B" "A") x) + Node eq_x_repl = d_nm->mkNode(kind::EQUAL, x, repl); + sameNormalForm(SequencesRewriter::inferEqsFromContains(x, repl), + eq_x_repl); + } + + void testRewritePrefixSuffix() + { + TypeNode strType = d_nm->stringType(); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node a = d_nm->mkConst(::CVC4::String("A")); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + Node xx = d_nm->mkNode(kind::STRING_CONCAT, x, x); + Node xxa = d_nm->mkNode(kind::STRING_CONCAT, x, x, a); + Node xy = d_nm->mkNode(kind::STRING_CONCAT, x, y); + Node f = d_nm->mkConst(false); + + // Same normal form for: + // + // (str.prefix (str.++ x y) x) + // + // (= y "") + Node p_xy = d_nm->mkNode(kind::STRING_PREFIX, xy, x); + Node empty_y = d_nm->mkNode(kind::EQUAL, y, empty); + sameNormalForm(p_xy, empty_y); + + // Same normal form for: + // + // (str.suffix (str.++ x x) x) + // + // (= x "") + Node p_xx = d_nm->mkNode(kind::STRING_SUFFIX, xx, x); + Node empty_x = d_nm->mkNode(kind::EQUAL, x, empty); + sameNormalForm(p_xx, empty_x); + + // (str.suffix x (str.++ x x "A")) ---> false + Node p_xxa = d_nm->mkNode(kind::STRING_SUFFIX, xxa, x); + sameNormalForm(p_xxa, f); + } + + void testRewriteEqualityExt() + { + TypeNode strType = d_nm->stringType(); + TypeNode intType = d_nm->integerType(); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node a = d_nm->mkConst(::CVC4::String("A")); + Node aaa = d_nm->mkConst(::CVC4::String("AAA")); + Node b = d_nm->mkConst(::CVC4::String("B")); + Node ba = d_nm->mkConst(::CVC4::String("BA")); + Node w = d_nm->mkVar("w", strType); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + Node z = d_nm->mkVar("z", strType); + Node xxa = d_nm->mkNode(kind::STRING_CONCAT, x, x, a); + Node f = d_nm->mkConst(false); + Node n = d_nm->mkVar("n", intType); + Node zero = d_nm->mkConst(Rational(0)); + Node one = d_nm->mkConst(Rational(1)); + Node three = d_nm->mkConst(Rational(3)); + + // Same normal form for: + // + // (= "" (str.replace "" x "B")) + // + // (not (= x "")) + Node empty_repl = d_nm->mkNode( + kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, empty, x, b)); + Node empty_x = d_nm->mkNode(kind::NOT, d_nm->mkNode(kind::EQUAL, x, empty)); + sameNormalForm(empty_repl, empty_x); + + // Same normal form for: + // + // (= "" (str.replace x y (str.++ x x "A"))) + // + // (and (= x "") (not (= y ""))) + Node empty_repl_xaa = d_nm->mkNode( + kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, x, y, xxa)); + Node empty_xy = d_nm->mkNode( + kind::AND, + d_nm->mkNode(kind::EQUAL, x, empty), + d_nm->mkNode(kind::NOT, d_nm->mkNode(kind::EQUAL, y, empty))); + sameNormalForm(empty_repl_xaa, empty_xy); + + // (= "" (str.replace (str.++ x x "A") x y)) ---> false + Node empty_repl_xxaxy = d_nm->mkNode( + kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, xxa, x, y)); + Node eq_xxa_repl = d_nm->mkNode( + kind::EQUAL, xxa, d_nm->mkNode(kind::STRING_STRREPL, empty, y, x)); + sameNormalForm(empty_repl_xxaxy, f); + + // Same normal form for: + // + // (= "" (str.replace "A" x y)) + // + // (= "A" (str.replace "" y x)) + Node empty_repl_axy = d_nm->mkNode( + kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, a, x, y)); + Node eq_a_repl = d_nm->mkNode( + kind::EQUAL, a, d_nm->mkNode(kind::STRING_STRREPL, empty, y, x)); + sameNormalForm(empty_repl_axy, eq_a_repl); + + // Same normal form for: + // + // (= "" (str.replace x "A" "")) + // + // (str.prefix x "A") + Node empty_repl_a = d_nm->mkNode( + kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, x, a, empty)); + Node prefix_a = d_nm->mkNode(kind::STRING_PREFIX, x, a); + sameNormalForm(empty_repl_a, prefix_a); + + // Same normal form for: + // + // (= "" (str.substr x 1 2)) + // + // (<= (str.len x) 1) + Node empty_substr = d_nm->mkNode( + kind::EQUAL, empty, d_nm->mkNode(kind::STRING_SUBSTR, x, one, three)); + Node leq_len_x = + d_nm->mkNode(kind::LEQ, d_nm->mkNode(kind::STRING_LENGTH, x), one); + sameNormalForm(empty_substr, leq_len_x); + + // Different normal form for: + // + // (= "" (str.substr x 0 n)) + // + // (<= n 0) + Node empty_substr_x = d_nm->mkNode( + kind::EQUAL, empty, d_nm->mkNode(kind::STRING_SUBSTR, x, zero, n)); + Node leq_n = d_nm->mkNode(kind::LEQ, n, zero); + differentNormalForms(empty_substr_x, leq_n); + + // Same normal form for: + // + // (= "" (str.substr "A" 0 n)) + // + // (<= n 0) + Node empty_substr_a = d_nm->mkNode( + kind::EQUAL, empty, d_nm->mkNode(kind::STRING_SUBSTR, a, zero, n)); + sameNormalForm(empty_substr_a, leq_n); + + // Same normal form for: + // + // (= (str.++ x x a) (str.replace y (str.++ x x a) y)) + // + // (= (str.++ x x a) y) + Node eq_xxa_repl_y = d_nm->mkNode( + kind::EQUAL, xxa, d_nm->mkNode(kind::STRING_STRREPL, y, xxa, y)); + Node eq_xxa_y = d_nm->mkNode(kind::EQUAL, xxa, y); + sameNormalForm(eq_xxa_repl_y, eq_xxa_y); + + // (= (str.++ x x a) (str.replace (str.++ x x a) "A" "B")) ---> false + Node eq_xxa_repl_xxa = d_nm->mkNode( + kind::EQUAL, xxa, d_nm->mkNode(kind::STRING_STRREPL, xxa, a, b)); + sameNormalForm(eq_xxa_repl_xxa, f); + + // Same normal form for: + // + // (= (str.replace x "A" "B") "") + // + // (= x "") + Node eq_repl = d_nm->mkNode( + kind::EQUAL, d_nm->mkNode(kind::STRING_STRREPL, x, a, b), empty); + Node eq_x = d_nm->mkNode(kind::EQUAL, x, empty); + sameNormalForm(eq_repl, eq_x); + + { + // Same normal form for: + // + // (= (str.replace y "A" "B") "B") + // + // (= (str.replace y "B" "A") "A") + Node lhs = d_nm->mkNode( + kind::EQUAL, d_nm->mkNode(kind::STRING_STRREPL, x, a, b), b); + Node rhs = d_nm->mkNode( + kind::EQUAL, d_nm->mkNode(kind::STRING_STRREPL, x, b, a), a); + sameNormalForm(lhs, rhs); + } + + { + // Same normal form for: + // + // (= (str.++ x "A" y) (str.++ "A" "A" (str.substr "AAA" 0 n))) + // + // (= (str.++ y x) (str.++ (str.substr "AAA" 0 n) "A")) + Node lhs = d_nm->mkNode( + kind::EQUAL, + d_nm->mkNode(kind::STRING_CONCAT, x, a, y), + d_nm->mkNode(kind::STRING_CONCAT, + a, + a, + d_nm->mkNode(kind::STRING_SUBSTR, aaa, zero, n))); + Node rhs = d_nm->mkNode( + kind::EQUAL, + d_nm->mkNode(kind::STRING_CONCAT, x, y), + d_nm->mkNode(kind::STRING_CONCAT, + d_nm->mkNode(kind::STRING_SUBSTR, aaa, zero, n), + a)); + sameNormalForm(lhs, rhs); + } + + { + // Same normal form for: + // + // (= (str.++ "A" x) "A") + // + // (= x "") + Node lhs = + d_nm->mkNode(kind::EQUAL, d_nm->mkNode(kind::STRING_CONCAT, a, x), a); + Node rhs = d_nm->mkNode(kind::EQUAL, x, empty); + sameNormalForm(lhs, rhs); + } + + { + // (= (str.++ x "A") "") ---> false + Node eq = d_nm->mkNode( + kind::EQUAL, d_nm->mkNode(kind::STRING_CONCAT, x, a), empty); + sameNormalForm(eq, f); + } + + { + // (= (str.++ x "B") "AAA") ---> false + Node eq = d_nm->mkNode( + kind::EQUAL, d_nm->mkNode(kind::STRING_CONCAT, x, b), aaa); + sameNormalForm(eq, f); + } + + { + // (= (str.++ x "AAA") "A") ---> false + Node eq = d_nm->mkNode( + kind::EQUAL, d_nm->mkNode(kind::STRING_CONCAT, x, aaa), a); + sameNormalForm(eq, f); + } + + { + // (= (str.++ "AAA" (str.substr "A" 0 n)) (str.++ x "B")) ---> false + Node eq = d_nm->mkNode( + kind::EQUAL, + d_nm->mkNode( + kind::STRING_CONCAT, + aaa, + d_nm->mkNode(kind::STRING_CONCAT, + a, + a, + d_nm->mkNode(kind::STRING_SUBSTR, x, zero, n))), + d_nm->mkNode(kind::STRING_CONCAT, x, b)); + sameNormalForm(eq, f); + } + + { + // (= (str.++ "A" (int.to.str n)) "A") -/-> false + Node eq = d_nm->mkNode( + kind::EQUAL, + d_nm->mkNode( + kind::STRING_CONCAT, a, d_nm->mkNode(kind::STRING_ITOS, n)), + a); + differentNormalForms(eq, f); + } + + { + // (= (str.++ "A" x y) (str.++ x "B" z)) --> false + Node eq = d_nm->mkNode( + kind::EQUAL, + d_nm->mkNode(kind::STRING_CONCAT, a, x, y), + d_nm->mkNode(kind::STRING_CONCAT, x, b, z)); + sameNormalForm(eq, f); + } + + { + // (= (str.++ "B" x y) (str.++ x "AAA" z)) --> false + Node eq = d_nm->mkNode(kind::EQUAL, + d_nm->mkNode(kind::STRING_CONCAT, b, x, y), + d_nm->mkNode(kind::STRING_CONCAT, x, aaa, z)); + sameNormalForm(eq, f); + } + + { + Node xrepl = d_nm->mkNode(kind::STRING_STRREPL, x, a, b); + + // Same normal form for: + // + // (= (str.++ "B" (str.replace x "A" "B") z y w) + // (str.++ z x "BA" z)) + // + // (and (= (str.++ "B" (str.replace x "A" "B") z) + // (str.++ z x "B")) + // (= (str.++ y w) (str.++ "A" z))) + Node lhs = + d_nm->mkNode(kind::EQUAL, + d_nm->mkNode(kind::STRING_CONCAT, b, xrepl, z, y, w), + d_nm->mkNode(kind::STRING_CONCAT, z, x, ba, z)); + Node rhs = d_nm->mkNode( + kind::AND, + d_nm->mkNode(kind::EQUAL, + d_nm->mkNode(kind::STRING_CONCAT, b, xrepl, z), + d_nm->mkNode(kind::STRING_CONCAT, z, x, b)), + d_nm->mkNode(kind::EQUAL, + d_nm->mkNode(kind::STRING_CONCAT, y, w), + d_nm->mkNode(kind::STRING_CONCAT, a, z))); + sameNormalForm(lhs, rhs); + } + } + + void testStripConstantEndpoints() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node a = d_nm->mkConst(::CVC4::String("A")); + Node ab = d_nm->mkConst(::CVC4::String("AB")); + Node abc = d_nm->mkConst(::CVC4::String("ABC")); + Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); + Node bc = d_nm->mkConst(::CVC4::String("BC")); + Node c = d_nm->mkConst(::CVC4::String("C")); + Node cd = d_nm->mkConst(::CVC4::String("CD")); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + Node n = d_nm->mkVar("n", intType); + + { + // stripConstantEndpoints({ "" }, { "A" }, {}, {}, 0) ---> false + std::vector n1 = {empty}; + std::vector n2 = {a}; + std::vector nb; + std::vector ne; + bool res = + SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 0); + TS_ASSERT(!res); + } + + { + // stripConstantEndpoints({ "A" }, { "A". (int.to.str n) }, {}, {}, 0) + // ---> false + std::vector n1 = {a}; + std::vector n2 = {a, d_nm->mkNode(kind::STRING_ITOS, n)}; + std::vector nb; + std::vector ne; + bool res = + SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 0); + TS_ASSERT(!res); + } + + { + // stripConstantEndpoints({ "ABCD" }, { "C" }, {}, {}, 1) + // ---> true + // n1 is updated to { "CD" } + // nb is updated to { "AB" } + std::vector n1 = {abcd}; + std::vector n2 = {c}; + std::vector nb; + std::vector ne; + std::vector n1r = {cd}; + std::vector nbr = {ab}; + bool res = + SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 1); + TS_ASSERT(res); + TS_ASSERT_EQUALS(n1, n1r); + TS_ASSERT_EQUALS(nb, nbr); + } + + { + // stripConstantEndpoints({ "ABC", x }, { "CD" }, {}, {}, 1) + // ---> true + // n1 is updated to { "C", x } + // nb is updated to { "AB" } + std::vector n1 = {abc, x}; + std::vector n2 = {cd}; + std::vector nb; + std::vector ne; + std::vector n1r = {c, x}; + std::vector nbr = {ab}; + bool res = + SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, 1); + TS_ASSERT(res); + TS_ASSERT_EQUALS(n1, n1r); + TS_ASSERT_EQUALS(nb, nbr); + } + + { + // stripConstantEndpoints({ "ABC" }, { "A" }, {}, {}, -1) + // ---> true + // n1 is updated to { "A" } + // nb is updated to { "BC" } + std::vector n1 = {abc}; + std::vector n2 = {a}; + std::vector nb; + std::vector ne; + std::vector n1r = {a}; + std::vector ner = {bc}; + bool res = + SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, -1); + TS_ASSERT(res); + TS_ASSERT_EQUALS(n1, n1r); + TS_ASSERT_EQUALS(ne, ner); + } + + { + // stripConstantEndpoints({ x, "ABC" }, { y, "A" }, {}, {}, -1) + // ---> true + // n1 is updated to { x, "A" } + // nb is updated to { "BC" } + std::vector n1 = {x, abc}; + std::vector n2 = {y, a}; + std::vector nb; + std::vector ne; + std::vector n1r = {x, a}; + std::vector ner = {bc}; + bool res = + SequencesRewriter::stripConstantEndpoints(n1, n2, nb, ne, -1); + TS_ASSERT(res); + TS_ASSERT_EQUALS(n1, n1r); + TS_ASSERT_EQUALS(ne, ner); + } + } + + void testRewriteMembership() + { + TypeNode strType = d_nm->stringType(); + + std::vector vec_empty; + Node abc = d_nm->mkConst(::CVC4::String("ABC")); + Node re_abc = d_nm->mkNode(kind::STRING_TO_REGEXP, abc); + Node x = d_nm->mkVar("x", strType); + + { + // Same normal form for: + // + // (str.in.re x (re.++ (re.* re.allchar) + // (re.* re.allchar) + // (str.to.re "ABC") + // (re.* re.allchar))) + // + // (str.contains x "ABC") + Node sig_star = d_nm->mkNode(kind::REGEXP_STAR, + d_nm->mkNode(kind::REGEXP_SIGMA, vec_empty)); + Node lhs = d_nm->mkNode( + kind::STRING_IN_REGEXP, + x, + d_nm->mkNode( + kind::REGEXP_CONCAT, sig_star, sig_star, re_abc, sig_star)); + Node rhs = d_nm->mkNode(kind::STRING_STRCTN, x, abc); + sameNormalForm(lhs, rhs); + } + + { + // Different normal forms for: + // + // (str.in.re x (re.++ (re.* re.allchar) (str.to.re "ABC"))) + // + // (str.contains x "ABC") + Node sig_star = d_nm->mkNode(kind::REGEXP_STAR, + d_nm->mkNode(kind::REGEXP_SIGMA, vec_empty)); + Node lhs = + d_nm->mkNode(kind::STRING_IN_REGEXP, + x, + d_nm->mkNode(kind::REGEXP_CONCAT, sig_star, re_abc)); + Node rhs = d_nm->mkNode(kind::STRING_STRCTN, x, abc); + differentNormalForms(lhs, rhs); + } + } + + void testRewriteRegexpConcat() + { + TypeNode strType = d_nm->stringType(); + + std::vector emptyArgs; + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + Node allStar = d_nm->mkNode(kind::REGEXP_STAR, + d_nm->mkNode(kind::REGEXP_SIGMA, emptyArgs)); + Node xReg = d_nm->mkNode(kind::STRING_TO_REGEXP, x); + Node yReg = d_nm->mkNode(kind::STRING_TO_REGEXP, y); + + { + // In normal form: + // + // (re.++ (re.* re.allchar) (re.union (str.to.re x) (str.to.re y))) + Node n = d_nm->mkNode(kind::REGEXP_CONCAT, + allStar, + d_nm->mkNode(kind::REGEXP_UNION, xReg, yReg)); + inNormalForm(n); + } + + { + // In normal form: + // + // (re.++ (str.to.re x) (re.* re.allchar)) + Node n = d_nm->mkNode(kind::REGEXP_CONCAT, xReg, allStar); + inNormalForm(n); + } + } + + private: + ExprManager* d_em; + SmtEngine* d_smt; + SmtScope* d_scope; + ExtendedRewriter* d_rewriter; + + NodeManager* d_nm; +}; diff --git a/test/unit/theory/theory_strings_rewriter_white.h b/test/unit/theory/theory_strings_rewriter_white.h deleted file mode 100644 index af8b24a0b..000000000 --- a/test/unit/theory/theory_strings_rewriter_white.h +++ /dev/null @@ -1,1578 +0,0 @@ -/********************* */ -/*! \file theory_strings_rewriter_white.h - ** \verbatim - ** Top contributors (to current version): - ** Andres Noetzli - ** This file is part of the CVC4 project. - ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS - ** in the top-level source directory) and their institutional affiliations. - ** All rights reserved. See the file COPYING in the top-level source - ** directory for licensing information.\endverbatim - ** - ** \brief Unit tests for the strings rewriter - ** - ** Unit tests for the strings rewriter. - **/ - -#include "expr/node.h" -#include "expr/node_manager.h" -#include "smt/smt_engine.h" -#include "smt/smt_engine_scope.h" -#include "theory/quantifiers/extended_rewrite.h" -#include "theory/rewriter.h" -#include "theory/strings/theory_strings_rewriter.h" - -#include -#include -#include -#include - -using namespace CVC4; -using namespace CVC4::smt; -using namespace CVC4::theory; -using namespace CVC4::theory::quantifiers; -using namespace CVC4::theory::strings; - -class TheoryStringsRewriterWhite : public CxxTest::TestSuite -{ - public: - TheoryStringsRewriterWhite() {} - - void setUp() override - { - Options opts; - opts.setOutputLanguage(language::output::LANG_SMTLIB_V2); - d_em = new ExprManager(opts); - d_smt = new SmtEngine(d_em); - d_scope = new SmtScope(d_smt); - d_rewriter = new ExtendedRewriter(true); - - d_nm = NodeManager::currentNM(); - } - - void tearDown() override - { - delete d_rewriter; - delete d_scope; - delete d_smt; - delete d_em; - } - - void inNormalForm(Node t) - { - Node res_t = d_rewriter->extendedRewrite(t); - - std::cout << std::endl; - std::cout << t << " ---> " << res_t << std::endl; - TS_ASSERT_EQUALS(t, res_t); - } - - void sameNormalForm(Node t1, Node t2) - { - Node res_t1 = d_rewriter->extendedRewrite(t1); - Node res_t2 = d_rewriter->extendedRewrite(t2); - - std::cout << std::endl; - std::cout << t1 << " ---> " << res_t1 << std::endl; - std::cout << t2 << " ---> " << res_t2 << std::endl; - TS_ASSERT_EQUALS(res_t1, res_t2); - } - - void differentNormalForms(Node t1, Node t2) - { - Node res_t1 = d_rewriter->extendedRewrite(t1); - Node res_t2 = d_rewriter->extendedRewrite(t2); - - std::cout << std::endl; - std::cout << t1 << " ---> " << res_t1 << std::endl; - std::cout << t2 << " ---> " << res_t2 << std::endl; - TS_ASSERT_DIFFERS(res_t1, res_t2); - } - - void testCheckEntailLengthOne() - { - TypeNode intType = d_nm->integerType(); - TypeNode strType = d_nm->stringType(); - - Node a = d_nm->mkConst(::CVC4::String("A")); - Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); - Node aaad = d_nm->mkConst(::CVC4::String("AAAD")); - Node b = d_nm->mkConst(::CVC4::String("B")); - Node x = d_nm->mkVar("x", strType); - Node y = d_nm->mkVar("y", strType); - Node negOne = d_nm->mkConst(Rational(-1)); - Node zero = d_nm->mkConst(Rational(0)); - Node one = d_nm->mkConst(Rational(1)); - Node two = d_nm->mkConst(Rational(2)); - Node three = d_nm->mkConst(Rational(3)); - Node i = d_nm->mkVar("i", intType); - - TS_ASSERT(TheoryStringsRewriter::checkEntailLengthOne(a)); - TS_ASSERT(TheoryStringsRewriter::checkEntailLengthOne(a, true)); - - Node substr = d_nm->mkNode(kind::STRING_SUBSTR, x, zero, one); - TS_ASSERT(TheoryStringsRewriter::checkEntailLengthOne(substr)); - TS_ASSERT(!TheoryStringsRewriter::checkEntailLengthOne(substr, true)); - - substr = d_nm->mkNode(kind::STRING_SUBSTR, - d_nm->mkNode(kind::STRING_CONCAT, a, x), - zero, - one); - TS_ASSERT(TheoryStringsRewriter::checkEntailLengthOne(substr)); - TS_ASSERT(TheoryStringsRewriter::checkEntailLengthOne(substr, true)); - - substr = d_nm->mkNode(kind::STRING_SUBSTR, x, zero, two); - TS_ASSERT(!TheoryStringsRewriter::checkEntailLengthOne(substr)); - TS_ASSERT(!TheoryStringsRewriter::checkEntailLengthOne(substr, true)); - } - - void testCheckEntailArith() - { - TypeNode intType = d_nm->integerType(); - TypeNode strType = d_nm->stringType(); - - Node z = d_nm->mkVar("z", strType); - Node n = d_nm->mkVar("n", intType); - Node one = d_nm->mkConst(Rational(1)); - - // 1 >= (str.len (str.substr z n 1)) ---> true - Node substr_z = d_nm->mkNode(kind::STRING_LENGTH, - d_nm->mkNode(kind::STRING_SUBSTR, z, n, one)); - TS_ASSERT(TheoryStringsRewriter::checkEntailArith(one, substr_z)); - - // (str.len (str.substr z n 1)) >= 1 ---> false - TS_ASSERT(!TheoryStringsRewriter::checkEntailArith(substr_z, one)); - } - - void testCheckEntailArithWithAssumption() - { - TypeNode intType = d_nm->integerType(); - TypeNode strType = d_nm->stringType(); - - Node x = d_nm->mkVar("x", intType); - Node y = d_nm->mkVar("y", strType); - Node z = d_nm->mkVar("z", intType); - - Node zero = d_nm->mkConst(Rational(0)); - Node one = d_nm->mkConst(Rational(1)); - - Node empty = d_nm->mkConst(::CVC4::String("")); - Node a = d_nm->mkConst(::CVC4::String("A")); - - Node slen_y = d_nm->mkNode(kind::STRING_LENGTH, y); - Node x_plus_slen_y = d_nm->mkNode(kind::PLUS, x, slen_y); - Node x_plus_slen_y_eq_zero = - Rewriter::rewrite(d_nm->mkNode(kind::EQUAL, x_plus_slen_y, zero)); - - // x + (str.len y) = 0 |= 0 >= x --> true - TS_ASSERT(TheoryStringsRewriter::checkEntailArithWithAssumption( - x_plus_slen_y_eq_zero, zero, x, false)); - - // x + (str.len y) = 0 |= 0 > x --> false - TS_ASSERT(!TheoryStringsRewriter::checkEntailArithWithAssumption( - x_plus_slen_y_eq_zero, zero, x, true)); - - Node x_plus_slen_y_plus_z_eq_zero = Rewriter::rewrite(d_nm->mkNode( - kind::EQUAL, d_nm->mkNode(kind::PLUS, x_plus_slen_y, z), zero)); - - // x + (str.len y) + z = 0 |= 0 > x --> false - TS_ASSERT(!TheoryStringsRewriter::checkEntailArithWithAssumption( - x_plus_slen_y_plus_z_eq_zero, zero, x, true)); - - Node x_plus_slen_y_plus_slen_y_eq_zero = Rewriter::rewrite(d_nm->mkNode( - kind::EQUAL, d_nm->mkNode(kind::PLUS, x_plus_slen_y, slen_y), zero)); - - // x + (str.len y) + (str.len y) = 0 |= 0 >= x --> true - TS_ASSERT(TheoryStringsRewriter::checkEntailArithWithAssumption( - x_plus_slen_y_plus_slen_y_eq_zero, zero, x, false)); - - Node five = d_nm->mkConst(Rational(5)); - Node six = d_nm->mkConst(Rational(6)); - Node x_plus_five = d_nm->mkNode(kind::PLUS, x, five); - Node x_plus_five_lt_six = - Rewriter::rewrite(d_nm->mkNode(kind::LT, x_plus_five, six)); - - // x + 5 < 6 |= 0 >= x --> true - TS_ASSERT(TheoryStringsRewriter::checkEntailArithWithAssumption( - x_plus_five_lt_six, zero, x, false)); - - // x + 5 < 6 |= 0 > x --> false - TS_ASSERT(!TheoryStringsRewriter::checkEntailArithWithAssumption( - x_plus_five_lt_six, zero, x, true)); - - Node neg_x = d_nm->mkNode(kind::UMINUS, x); - Node x_plus_five_lt_five = - Rewriter::rewrite(d_nm->mkNode(kind::LT, x_plus_five, five)); - - // x + 5 < 5 |= -x >= 0 --> true - TS_ASSERT(TheoryStringsRewriter::checkEntailArithWithAssumption( - x_plus_five_lt_five, neg_x, zero, false)); - - // x + 5 < 5 |= 0 > x --> true - TS_ASSERT(TheoryStringsRewriter::checkEntailArithWithAssumption( - x_plus_five_lt_five, zero, x, false)); - - // 0 < x |= x >= (str.len (int.to.str x)) - Node assm = Rewriter::rewrite(d_nm->mkNode(kind::LT, zero, x)); - TS_ASSERT(TheoryStringsRewriter::checkEntailArithWithAssumption( - assm, - x, - d_nm->mkNode(kind::STRING_LENGTH, d_nm->mkNode(kind::STRING_ITOS, x)), - false)); - } - - void testRewriteSubstr() - { - TypeNode intType = d_nm->integerType(); - TypeNode strType = d_nm->stringType(); - - Node empty = d_nm->mkConst(::CVC4::String("")); - Node a = d_nm->mkConst(::CVC4::String("A")); - Node b = d_nm->mkConst(::CVC4::String("B")); - Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); - Node negone = d_nm->mkConst(Rational(-1)); - Node zero = d_nm->mkConst(Rational(0)); - Node one = d_nm->mkConst(Rational(1)); - Node two = d_nm->mkConst(Rational(2)); - Node three = d_nm->mkConst(Rational(3)); - - Node s = d_nm->mkVar("s", strType); - Node s2 = d_nm->mkVar("s2", strType); - Node x = d_nm->mkVar("x", intType); - Node y = d_nm->mkVar("y", intType); - - // (str.substr "A" x x) --> "" - Node n = d_nm->mkNode(kind::STRING_SUBSTR, a, x, x); - Node res = TheoryStringsRewriter::rewriteSubstr(n); - TS_ASSERT_EQUALS(res, empty); - - // (str.substr "A" (+ x 1) x) -> "" - n = d_nm->mkNode(kind::STRING_SUBSTR, - a, - d_nm->mkNode(kind::PLUS, x, d_nm->mkConst(Rational(1))), - x); - res = TheoryStringsRewriter::rewriteSubstr(n); - TS_ASSERT_EQUALS(res, empty); - - // (str.substr "A" (+ x (str.len s2)) x) -> "" - n = d_nm->mkNode( - kind::STRING_SUBSTR, - a, - d_nm->mkNode(kind::PLUS, x, d_nm->mkNode(kind::STRING_LENGTH, s)), - x); - res = TheoryStringsRewriter::rewriteSubstr(n); - TS_ASSERT_EQUALS(res, empty); - - // (str.substr "A" x y) -> (str.substr "A" x y) - n = d_nm->mkNode(kind::STRING_SUBSTR, a, x, y); - res = TheoryStringsRewriter::rewriteSubstr(n); - TS_ASSERT_EQUALS(res, n); - - // (str.substr "ABCD" (+ x 3) x) -> "" - n = d_nm->mkNode( - kind::STRING_SUBSTR, abcd, d_nm->mkNode(kind::PLUS, x, three), x); - res = TheoryStringsRewriter::rewriteSubstr(n); - TS_ASSERT_EQUALS(res, empty); - - // (str.substr "ABCD" (+ x 2) x) -> (str.substr "ABCD" (+ x 2) x) - n = d_nm->mkNode( - kind::STRING_SUBSTR, abcd, d_nm->mkNode(kind::PLUS, x, two), x); - res = TheoryStringsRewriter::rewriteSubstr(n); - TS_ASSERT_EQUALS(res, n); - - // (str.substr (str.substr s x x) x x) -> "" - n = d_nm->mkNode( - kind::STRING_SUBSTR, d_nm->mkNode(kind::STRING_SUBSTR, s, x, x), x, x); - sameNormalForm(n, empty); - - // Same normal form for: - // - // (str.substr (str.replace "" s "B") x x) - // - // (str.replace "" s (str.substr "B" x x))) - Node lhs = d_nm->mkNode(kind::STRING_SUBSTR, - d_nm->mkNode(kind::STRING_STRREPL, empty, s, b), - x, - x); - Node rhs = d_nm->mkNode(kind::STRING_STRREPL, - empty, - s, - d_nm->mkNode(kind::STRING_SUBSTR, b, x, x)); - sameNormalForm(lhs, rhs); - - // Same normal form: - // - // (str.substr (str.replace s "A" "B") 0 x) - // - // (str.replace (str.substr s 0 x) "A" "B") - Node substr_repl = d_nm->mkNode(kind::STRING_SUBSTR, - d_nm->mkNode(kind::STRING_STRREPL, s, a, b), - zero, - x); - Node repl_substr = - d_nm->mkNode(kind::STRING_STRREPL, - d_nm->mkNode(kind::STRING_SUBSTR, s, zero, x), - a, - b); - sameNormalForm(substr_repl, repl_substr); - - // Same normal form: - // - // (str.substr (str.replace s (str.substr (str.++ s2 "A") 0 1) "B") 0 x) - // - // (str.replace (str.substr s 0 x) (str.substr (str.++ s2 "A") 0 1) "B") - Node substr_y = d_nm->mkNode(kind::STRING_SUBSTR, - d_nm->mkNode(kind::STRING_CONCAT, s2, a), - zero, - one); - substr_repl = - d_nm->mkNode(kind::STRING_SUBSTR, - d_nm->mkNode(kind::STRING_STRREPL, s, substr_y, b), - zero, - x); - repl_substr = d_nm->mkNode(kind::STRING_STRREPL, - d_nm->mkNode(kind::STRING_SUBSTR, s, zero, x), - substr_y, - b); - sameNormalForm(substr_repl, repl_substr); - - // (str.substr (str.int.to.str x) x x) ---> empty - Node substr_itos = d_nm->mkNode( - kind::STRING_SUBSTR, d_nm->mkNode(kind::STRING_ITOS, x), x, x); - sameNormalForm(substr_itos, empty); - - // (str.substr s (* (- 1) (str.len s)) 1) ---> empty - Node substr = d_nm->mkNode( - kind::STRING_SUBSTR, - s, - d_nm->mkNode(kind::MULT, negone, d_nm->mkNode(kind::STRING_LENGTH, s)), - one); - sameNormalForm(substr, empty); - } - - void testRewriteConcat() - { - TypeNode intType = d_nm->integerType(); - TypeNode strType = d_nm->stringType(); - - Node empty = d_nm->mkConst(::CVC4::String("")); - Node a = d_nm->mkConst(::CVC4::String("A")); - Node zero = d_nm->mkConst(Rational(0)); - Node three = d_nm->mkConst(Rational(3)); - - Node i = d_nm->mkVar("i", intType); - Node s = d_nm->mkVar("s", strType); - Node x = d_nm->mkVar("x", strType); - Node y = d_nm->mkVar("y", strType); - - // Same normal form for: - // - // (str.++ (str.replace "A" x "") "A") - // - // (str.++ "A" (str.replace "A" x "")) - Node repl_a_x_e = d_nm->mkNode(kind::STRING_STRREPL, a, x, empty); - Node repl_a = d_nm->mkNode(kind::STRING_CONCAT, repl_a_x_e, a); - Node a_repl = d_nm->mkNode(kind::STRING_CONCAT, a, repl_a_x_e); - sameNormalForm(repl_a, a_repl); - - // Same normal form for: - // - // (str.++ y (str.replace "" x (str.substr y 0 3)) (str.substr y 0 3) "A" (str.substr y 0 3)) - // - // (str.++ y (str.substr y 0 3) (str.replace "" x (str.substr y 0 3)) "A" (str.substr y 0 3)) - Node z = d_nm->mkNode(kind::STRING_SUBSTR, y, zero, three); - Node repl_e_x_z = d_nm->mkNode(kind::STRING_STRREPL, empty, x, z); - repl_a = d_nm->mkNode(kind::STRING_CONCAT, y, repl_e_x_z, z, a, z); - a_repl = d_nm->mkNode(kind::STRING_CONCAT, y, z, repl_e_x_z, a, z); - sameNormalForm(repl_a, a_repl); - - // Same normal form for: - // - // (str.++ "A" (str.replace "A" x "") (str.substr "A" 0 i)) - // - // (str.++ (str.substr "A" 0 i) (str.replace "A" x "") "A") - Node substr_a = d_nm->mkNode(kind::STRING_SUBSTR, a, zero, i); - Node a_substr_repl = - d_nm->mkNode(kind::STRING_CONCAT, a, substr_a, repl_a_x_e); - Node substr_repl_a = - d_nm->mkNode(kind::STRING_CONCAT, substr_a, repl_a_x_e, a); - sameNormalForm(a_substr_repl, substr_repl_a); - - // Same normal form for: - // - // (str.++ (str.replace "" x (str.substr "A" 0 i)) (str.substr "A" 0 i) (str.at "A" i)) - // - // (str.++ (str.at "A" i) (str.replace "" x (str.substr "A" 0 i)) (str.substr "A" 0 i)) - Node charat_a = d_nm->mkNode(kind::STRING_CHARAT, a, i); - Node repl_e_x_s = d_nm->mkNode(kind::STRING_STRREPL, empty, x, substr_a); - Node repl_substr_a = - d_nm->mkNode(kind::STRING_CONCAT, repl_e_x_s, substr_a, charat_a); - Node a_repl_substr = - d_nm->mkNode(kind::STRING_CONCAT, charat_a, repl_e_x_s, substr_a); - sameNormalForm(repl_substr_a, a_repl_substr); - } - - void testLengthPreserveRewrite() - { - TypeNode intType = d_nm->integerType(); - TypeNode strType = d_nm->stringType(); - - Node empty = d_nm->mkConst(::CVC4::String("")); - Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); - Node f = d_nm->mkConst(::CVC4::String("F")); - Node gh = d_nm->mkConst(::CVC4::String("GH")); - Node ij = d_nm->mkConst(::CVC4::String("IJ")); - - Node i = d_nm->mkVar("i", intType); - Node s = d_nm->mkVar("s", strType); - Node x = d_nm->mkVar("x", strType); - Node y = d_nm->mkVar("y", strType); - - // Same length preserving rewrite for: - // - // (str.++ "ABCD" (str.++ x x)) - // - // (str.++ "GH" (str.repl "GH" "IJ") "IJ") - Node concat1 = d_nm->mkNode( - kind::STRING_CONCAT, abcd, d_nm->mkNode(kind::STRING_CONCAT, x, x)); - Node concat2 = d_nm->mkNode(kind::STRING_CONCAT, - gh, - x, - d_nm->mkNode(kind::STRING_STRREPL, x, gh, ij), - ij); - Node res_concat1 = TheoryStringsRewriter::lengthPreserveRewrite(concat1); - Node res_concat2 = TheoryStringsRewriter::lengthPreserveRewrite(concat2); - TS_ASSERT_EQUALS(res_concat1, res_concat2); - } - - void testRewriteIndexOf() - { - TypeNode intType = d_nm->integerType(); - TypeNode strType = d_nm->stringType(); - - Node a = d_nm->mkConst(::CVC4::String("A")); - Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); - Node aaad = d_nm->mkConst(::CVC4::String("AAAD")); - Node b = d_nm->mkConst(::CVC4::String("B")); - Node c = d_nm->mkConst(::CVC4::String("C")); - Node ccc = d_nm->mkConst(::CVC4::String("CCC")); - Node x = d_nm->mkVar("x", strType); - Node y = d_nm->mkVar("y", strType); - Node negOne = d_nm->mkConst(Rational(-1)); - Node zero = d_nm->mkConst(Rational(0)); - Node one = d_nm->mkConst(Rational(1)); - Node two = d_nm->mkConst(Rational(2)); - Node three = d_nm->mkConst(Rational(3)); - Node i = d_nm->mkVar("i", intType); - Node j = d_nm->mkVar("j", intType); - - // Same normal form for: - // - // (str.to.int (str.indexof "A" x 1)) - // - // (str.to.int (str.indexof "B" x 1)) - Node a_idof_x = d_nm->mkNode(kind::STRING_STRIDOF, a, x, two); - Node itos_a_idof_x = d_nm->mkNode(kind::STRING_ITOS, a_idof_x); - Node b_idof_x = d_nm->mkNode(kind::STRING_STRIDOF, b, x, two); - Node itos_b_idof_x = d_nm->mkNode(kind::STRING_ITOS, b_idof_x); - sameNormalForm(itos_a_idof_x, itos_b_idof_x); - - // Same normal form for: - // - // (str.indexof (str.++ "ABCD" x) y 3) - // - // (str.indexof (str.++ "AAAD" x) y 3) - Node idof_abcd = d_nm->mkNode(kind::STRING_STRIDOF, - d_nm->mkNode(kind::STRING_CONCAT, abcd, x), - y, - three); - Node idof_aaad = d_nm->mkNode(kind::STRING_STRIDOF, - d_nm->mkNode(kind::STRING_CONCAT, aaad, x), - y, - three); - sameNormalForm(idof_abcd, idof_aaad); - - // (str.indexof (str.substr x 1 i) "A" i) ---> -1 - Node idof_substr = - d_nm->mkNode(kind::STRING_STRIDOF, - d_nm->mkNode(kind::STRING_SUBSTR, x, one, i), - a, - i); - sameNormalForm(idof_substr, negOne); - - { - // Same normal form for: - // - // (str.indexof (str.++ "B" (str.substr "CCC" i j) x "A") "A" 0) - // - // (+ 1 (str.len (str.substr "CCC" i j)) - // (str.indexof (str.++ "A" x y) "A" 0)) - Node lhs = d_nm->mkNode( - kind::STRING_STRIDOF, - d_nm->mkNode(kind::STRING_CONCAT, - b, - d_nm->mkNode(kind::STRING_SUBSTR, ccc, i, j), - x, - a), - a, - zero); - Node rhs = d_nm->mkNode( - kind::PLUS, - one, - d_nm->mkNode(kind::STRING_LENGTH, - d_nm->mkNode(kind::STRING_SUBSTR, ccc, i, j)), - d_nm->mkNode(kind::STRING_STRIDOF, - d_nm->mkNode(kind::STRING_CONCAT, x, a), - a, - zero)); - sameNormalForm(lhs, rhs); - } - - { - // Same normal form for: - // - // (str.indexof (str.++ "B" "C" "A" x y) "A" 0) - // - // (+ 2 (str.indexof (str.++ "A" x y) "A" 0)) - Node lhs = d_nm->mkNode(kind::STRING_STRIDOF, - d_nm->mkNode(kind::STRING_CONCAT, b, c, a, x, y), - a, - zero); - Node rhs = - d_nm->mkNode(kind::PLUS, - two, - d_nm->mkNode(kind::STRING_STRIDOF, - d_nm->mkNode(kind::STRING_CONCAT, a, x, y), - a, - zero)); - sameNormalForm(lhs, rhs); - } - } - - void testRewriteReplace() - { - TypeNode intType = d_nm->integerType(); - TypeNode strType = d_nm->stringType(); - - Node empty = d_nm->mkConst(::CVC4::String("")); - Node a = d_nm->mkConst(::CVC4::String("A")); - Node ab = d_nm->mkConst(::CVC4::String("AB")); - Node b = d_nm->mkConst(::CVC4::String("B")); - Node c = d_nm->mkConst(::CVC4::String("C")); - Node d = d_nm->mkConst(::CVC4::String("D")); - Node x = d_nm->mkVar("x", strType); - Node y = d_nm->mkVar("y", strType); - Node z = d_nm->mkVar("z", strType); - Node zero = d_nm->mkConst(Rational(0)); - Node one = d_nm->mkConst(Rational(1)); - Node n = d_nm->mkVar("n", intType); - - // (str.replace (str.replace x "B" x) x "A") --> - // (str.replace (str.replace x "B" "A") x "A") - Node repl_repl = d_nm->mkNode(kind::STRING_STRREPL, - d_nm->mkNode(kind::STRING_STRREPL, x, b, x), - x, - a); - Node repl_repl_short = - d_nm->mkNode(kind::STRING_STRREPL, - d_nm->mkNode(kind::STRING_STRREPL, x, b, a), - x, - a); - sameNormalForm(repl_repl, repl_repl_short); - - // (str.replace "A" (str.replace "B", x, "C") "D") --> "A" - repl_repl = d_nm->mkNode(kind::STRING_STRREPL, - a, - d_nm->mkNode(kind::STRING_STRREPL, b, x, c), - d); - sameNormalForm(repl_repl, a); - - // (str.replace "A" (str.replace "B", x, "A") "D") -/-> "A" - repl_repl = d_nm->mkNode(kind::STRING_STRREPL, - a, - d_nm->mkNode(kind::STRING_STRREPL, b, x, a), - d); - differentNormalForms(repl_repl, a); - - // Same normal form for: - // - // (str.replace x (str.++ x y z) y) - // - // (str.replace x (str.++ x y z) z) - Node xyz = d_nm->mkNode(kind::STRING_CONCAT, x, y, z); - Node repl_x_xyz = d_nm->mkNode(kind::STRING_STRREPL, x, xyz, y); - Node repl_x_zyx = d_nm->mkNode(kind::STRING_STRREPL, x, xyz, z); - sameNormalForm(repl_x_xyz, repl_x_zyx); - - // (str.replace "" (str.++ x x) x) --> "" - Node repl_empty_xx = d_nm->mkNode(kind::STRING_STRREPL, - empty, - d_nm->mkNode(kind::STRING_CONCAT, x, x), - x); - sameNormalForm(repl_empty_xx, empty); - - // (str.replace "AB" (str.++ x "A") x) --> (str.replace "AB" (str.++ x "A") - // "") - Node repl_ab_xa_x = d_nm->mkNode(kind::STRING_STRREPL, - d_nm->mkNode(kind::STRING_CONCAT, a, b), - d_nm->mkNode(kind::STRING_CONCAT, x, a), - x); - Node repl_ab_xa_e = d_nm->mkNode(kind::STRING_STRREPL, - d_nm->mkNode(kind::STRING_CONCAT, a, b), - d_nm->mkNode(kind::STRING_CONCAT, x, a), - empty); - sameNormalForm(repl_ab_xa_x, repl_ab_xa_e); - - // (str.replace "AB" (str.++ x "A") x) -/-> (str.replace "AB" (str.++ "A" x) - // "") - Node repl_ab_ax_e = d_nm->mkNode(kind::STRING_STRREPL, - d_nm->mkNode(kind::STRING_CONCAT, a, b), - d_nm->mkNode(kind::STRING_CONCAT, a, x), - empty); - differentNormalForms(repl_ab_ax_e, repl_ab_xa_e); - - // (str.replace "" (str.replace y x "A") y) ---> "" - repl_repl = d_nm->mkNode(kind::STRING_STRREPL, - empty, - d_nm->mkNode(kind::STRING_STRREPL, y, x, a), - y); - sameNormalForm(repl_repl, empty); - - // (str.replace "" (str.replace x y x) x) ---> "" - repl_repl = d_nm->mkNode(kind::STRING_STRREPL, - empty, - d_nm->mkNode(kind::STRING_STRREPL, x, y, x), - x); - sameNormalForm(repl_repl, empty); - - // (str.replace "" (str.substr x 0 1) x) ---> "" - repl_repl = d_nm->mkNode(kind::STRING_STRREPL, - empty, - d_nm->mkNode(kind::STRING_SUBSTR, x, zero, one), - x); - sameNormalForm(repl_repl, empty); - - // Same normal form for: - // - // (str.replace "" (str.replace x y x) y) - // - // (str.replace "" x y) - repl_repl = d_nm->mkNode(kind::STRING_STRREPL, - empty, - d_nm->mkNode(kind::STRING_STRREPL, x, y, x), - y); - Node repl = d_nm->mkNode(kind::STRING_STRREPL, empty, x, y); - sameNormalForm(repl_repl, repl); - - // Same normal form: - // - // (str.replace "B" (str.replace x "A" "B") "B") - // - // (str.replace "B" x "B")) - repl_repl = d_nm->mkNode(kind::STRING_STRREPL, - b, - d_nm->mkNode(kind::STRING_STRREPL, x, a, b), - b); - repl = d_nm->mkNode(kind::STRING_STRREPL, b, x, b); - sameNormalForm(repl_repl, repl); - - // Different normal forms for: - // - // (str.replace "B" (str.replace "" x "A") "B") - // - // (str.replace "B" x "B") - repl_repl = d_nm->mkNode(kind::STRING_STRREPL, - b, - d_nm->mkNode(kind::STRING_STRREPL, empty, x, a), - b); - repl = d_nm->mkNode(kind::STRING_STRREPL, b, x, b); - differentNormalForms(repl_repl, repl); - - { - // Same normal form: - // - // (str.replace (str.++ "AB" x) "C" y) - // - // (str.++ "AB" (str.replace x "C" y)) - Node lhs = d_nm->mkNode( - kind::STRING_STRREPL, d_nm->mkNode(kind::STRING_CONCAT, ab, x), c, y); - Node rhs = d_nm->mkNode( - kind::STRING_CONCAT, ab, d_nm->mkNode(kind::STRING_STRREPL, x, c, y)); - sameNormalForm(lhs, rhs); - } - } - - void testRewriteContains() - { - TypeNode intType = d_nm->integerType(); - TypeNode strType = d_nm->stringType(); - - Node empty = d_nm->mkConst(::CVC4::String("")); - Node a = d_nm->mkConst(::CVC4::String("A")); - Node ab = d_nm->mkConst(::CVC4::String("AB")); - Node b = d_nm->mkConst(::CVC4::String("B")); - Node c = d_nm->mkConst(::CVC4::String("C")); - Node e = d_nm->mkConst(::CVC4::String("E")); - Node h = d_nm->mkConst(::CVC4::String("H")); - Node j = d_nm->mkConst(::CVC4::String("J")); - Node p = d_nm->mkConst(::CVC4::String("P")); - Node abc = d_nm->mkConst(::CVC4::String("ABC")); - Node def = d_nm->mkConst(::CVC4::String("DEF")); - Node ghi = d_nm->mkConst(::CVC4::String("GHI")); - Node x = d_nm->mkVar("x", strType); - Node y = d_nm->mkVar("y", strType); - Node xy = d_nm->mkNode(kind::STRING_CONCAT, x, y); - Node yx = d_nm->mkNode(kind::STRING_CONCAT, y, x); - Node z = d_nm->mkVar("z", strType); - Node n = d_nm->mkVar("n", intType); - Node m = d_nm->mkVar("m", intType); - Node one = d_nm->mkConst(Rational(1)); - Node two = d_nm->mkConst(Rational(2)); - Node three = d_nm->mkConst(Rational(3)); - Node four = d_nm->mkConst(Rational(4)); - Node t = d_nm->mkConst(true); - Node f = d_nm->mkConst(false); - - // Same normal form for: - // - // (str.replace "A" (str.substr x 1 3) y z) - // - // (str.replace "A" (str.substr x 1 4) y z) - Node substr_3 = - d_nm->mkNode(kind::STRING_STRREPL, - a, - d_nm->mkNode(kind::STRING_SUBSTR, x, one, three), - z); - Node substr_4 = - d_nm->mkNode(kind::STRING_STRREPL, - a, - d_nm->mkNode(kind::STRING_SUBSTR, x, one, four), - z); - sameNormalForm(substr_3, substr_4); - - // Same normal form for: - // - // (str.replace "A" (str.++ y (str.substr x 1 3)) y z) - // - // (str.replace "A" (str.++ y (str.substr x 1 4)) y z) - Node concat_substr_3 = d_nm->mkNode( - kind::STRING_STRREPL, - a, - d_nm->mkNode(kind::STRING_CONCAT, - y, - d_nm->mkNode(kind::STRING_SUBSTR, x, one, three)), - z); - Node concat_substr_4 = d_nm->mkNode( - kind::STRING_STRREPL, - a, - d_nm->mkNode(kind::STRING_CONCAT, - y, - d_nm->mkNode(kind::STRING_SUBSTR, x, one, four)), - z); - sameNormalForm(concat_substr_3, concat_substr_4); - - // (str.contains "A" (str.++ a (str.replace "B", x, "C")) --> false - Node ctn_repl = - d_nm->mkNode(kind::STRING_STRCTN, - a, - d_nm->mkNode(kind::STRING_CONCAT, - a, - d_nm->mkNode(kind::STRING_STRREPL, b, x, c))); - sameNormalForm(ctn_repl, f); - - // (str.contains x (str.++ x x)) --> (= x "") - Node x_cnts_x_x = d_nm->mkNode( - kind::STRING_STRCTN, x, d_nm->mkNode(kind::STRING_CONCAT, x, x)); - sameNormalForm(x_cnts_x_x, d_nm->mkNode(kind::EQUAL, x, empty)); - - // Same normal form for: - // - // (str.contains (str.++ y x) (str.++ x z y)) - // - // (and (str.contains (str.++ y x) (str.++ x y)) (= z "")) - Node yx_cnts_xzy = d_nm->mkNode( - kind::STRING_STRCTN, yx, d_nm->mkNode(kind::STRING_CONCAT, x, z, y)); - Node yx_cnts_xy = d_nm->mkNode(kind::AND, - d_nm->mkNode(kind::EQUAL, z, empty), - d_nm->mkNode(kind::STRING_STRCTN, yx, xy)); - sameNormalForm(yx_cnts_xzy, yx_cnts_xy); - - // Same normal form for: - // - // (str.contains (str.substr x n (str.len y)) y) - // - // (= (str.substr x n (str.len y)) y) - Node ctn_substr = d_nm->mkNode( - kind::STRING_STRCTN, - d_nm->mkNode( - kind::STRING_SUBSTR, x, n, d_nm->mkNode(kind::STRING_LENGTH, y)), - y); - Node substr_eq = d_nm->mkNode( - kind::EQUAL, - d_nm->mkNode( - kind::STRING_SUBSTR, x, n, d_nm->mkNode(kind::STRING_LENGTH, y)), - y); - sameNormalForm(ctn_substr, substr_eq); - - // Same normal form for: - // - // (str.contains x (str.replace y x y)) - // - // (str.contains x y) - Node ctn_repl_y_x_y = d_nm->mkNode( - kind::STRING_STRCTN, x, d_nm->mkNode(kind::STRING_STRREPL, y, x, y)); - Node ctn_x_y = d_nm->mkNode(kind::STRING_STRCTN, x, y); - sameNormalForm(ctn_repl_y_x_y, ctn_repl_y_x_y); - - // Same normal form for: - // - // (str.contains x (str.replace x y x)) - // - // (= x (str.replace x y x)) - Node ctn_repl_self = d_nm->mkNode( - kind::STRING_STRCTN, x, d_nm->mkNode(kind::STRING_STRREPL, x, y, x)); - Node eq_repl = d_nm->mkNode( - kind::EQUAL, x, d_nm->mkNode(kind::STRING_STRREPL, x, y, x)); - sameNormalForm(ctn_repl_self, eq_repl); - - // (str.contains x (str.++ "A" (str.replace x y x))) ---> false - Node ctn_repl_self_f = - d_nm->mkNode(kind::STRING_STRCTN, - x, - d_nm->mkNode(kind::STRING_CONCAT, - a, - d_nm->mkNode(kind::STRING_STRREPL, x, y, x))); - sameNormalForm(ctn_repl_self_f, f); - - // Same normal form for: - // - // (str.contains x (str.replace "" x y)) - // - // (= "" (str.replace "" x y)) - Node ctn_repl_empty = - d_nm->mkNode(kind::STRING_STRCTN, - x, - d_nm->mkNode(kind::STRING_STRREPL, empty, x, y)); - Node eq_repl_empty = d_nm->mkNode( - kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, empty, x, y)); - sameNormalForm(ctn_repl_empty, eq_repl_empty); - - // Same normal form for: - // - // (str.contains x (str.++ x y)) - // - // (= "" y) - Node ctn_x_x_y = d_nm->mkNode( - kind::STRING_STRCTN, x, d_nm->mkNode(kind::STRING_CONCAT, x, y)); - Node eq_emp_y = d_nm->mkNode(kind::EQUAL, empty, y); - sameNormalForm(ctn_x_x_y, eq_emp_y); - - // Same normal form for: - // - // (str.contains (str.++ y x) (str.++ x y)) - // - // (= (str.++ y x) (str.++ x y)) - Node ctn_yxxy = d_nm->mkNode(kind::STRING_STRCTN, yx, xy); - Node eq_yxxy = d_nm->mkNode(kind::EQUAL, yx, xy); - sameNormalForm(ctn_yxxy, eq_yxxy); - - // (str.contains (str.replace x y x) x) ---> true - ctn_repl = d_nm->mkNode( - kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, y, x), x); - sameNormalForm(ctn_repl, t); - - // (str.contains (str.replace (str.++ x y) z (str.++ y x)) x) ---> true - ctn_repl = d_nm->mkNode( - kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, xy, z, yx), x); - sameNormalForm(ctn_repl, t); - - // (str.contains (str.++ z (str.replace (str.++ x y) z (str.++ y x))) x) - // ---> true - ctn_repl = d_nm->mkNode( - kind::STRING_STRCTN, - d_nm->mkNode(kind::STRING_CONCAT, - z, - d_nm->mkNode(kind::STRING_STRREPL, xy, z, yx)), - x); - sameNormalForm(ctn_repl, t); - - // Same normal form for: - // - // (str.contains (str.replace x y x) y) - // - // (str.contains x y) - Node lhs = d_nm->mkNode( - kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, y, x), y); - Node rhs = d_nm->mkNode(kind::STRING_STRCTN, x, y); - sameNormalForm(lhs, rhs); - - // Same normal form for: - // - // (str.contains (str.replace x y x) "B") - // - // (str.contains x "B") - lhs = d_nm->mkNode( - kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, y, x), b); - rhs = d_nm->mkNode(kind::STRING_STRCTN, x, b); - sameNormalForm(lhs, rhs); - - // Same normal form for: - // - // (str.contains (str.replace x y x) (str.substr z n 1)) - // - // (str.contains x (str.substr z n 1)) - Node substr_z = d_nm->mkNode(kind::STRING_SUBSTR, z, n, one); - lhs = d_nm->mkNode(kind::STRING_STRCTN, - d_nm->mkNode(kind::STRING_STRREPL, x, y, x), - substr_z); - rhs = d_nm->mkNode(kind::STRING_STRCTN, x, substr_z); - sameNormalForm(lhs, rhs); - - // Same normal form for: - // - // (str.contains (str.replace x y z) z) - // - // (str.contains (str.replace x z y) y) - lhs = d_nm->mkNode( - kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, y, z), z); - rhs = d_nm->mkNode( - kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, z, y), y); - sameNormalForm(lhs, rhs); - - // Same normal form for: - // - // (str.contains (str.replace x "A" "B") "A") - // - // (str.contains (str.replace x "A" "") "A") - lhs = d_nm->mkNode( - kind::STRING_STRCTN, d_nm->mkNode(kind::STRING_STRREPL, x, a, b), a); - rhs = d_nm->mkNode(kind::STRING_STRCTN, - d_nm->mkNode(kind::STRING_STRREPL, x, a, empty), - a); - sameNormalForm(lhs, rhs); - - { - // (str.contains (str.++ x "A") (str.++ "B" x)) ---> false - Node ctn = d_nm->mkNode(kind::STRING_STRCTN, - d_nm->mkNode(kind::STRING_CONCAT, x, a), - d_nm->mkNode(kind::STRING_CONCAT, b, x)); - sameNormalForm(ctn, f); - } - - { - // Same normal form for: - // - // (str.contains (str.replace x "ABC" "DEF") "GHI") - // - // (str.contains x "GHI") - lhs = d_nm->mkNode(kind::STRING_STRCTN, - d_nm->mkNode(kind::STRING_STRREPL, x, abc, def), - ghi); - rhs = d_nm->mkNode(kind::STRING_STRCTN, x, ghi); - sameNormalForm(lhs, rhs); - } - - { - // Different normal forms for: - // - // (str.contains (str.replace x "ABC" "DEF") "B") - // - // (str.contains x "B") - lhs = d_nm->mkNode(kind::STRING_STRCTN, - d_nm->mkNode(kind::STRING_STRREPL, x, abc, def), - b); - rhs = d_nm->mkNode(kind::STRING_STRCTN, x, b); - differentNormalForms(lhs, rhs); - } - - { - // Different normal forms for: - // - // (str.contains (str.replace x "B" "DEF") "ABC") - // - // (str.contains x "ABC") - lhs = d_nm->mkNode(kind::STRING_STRCTN, - d_nm->mkNode(kind::STRING_STRREPL, x, b, def), - abc); - rhs = d_nm->mkNode(kind::STRING_STRCTN, x, abc); - differentNormalForms(lhs, rhs); - } - - { - // Same normal form for: - // - // (str.contains (str.++ (str.substr "DEF" n m) x) "AB") - // - // (str.contains x "AB") - lhs = d_nm->mkNode( - kind::STRING_STRCTN, - d_nm->mkNode(kind::STRING_CONCAT, - d_nm->mkNode(kind::STRING_SUBSTR, def, n, m), - x), - ab); - rhs = d_nm->mkNode(kind::STRING_STRCTN, x, ab); - sameNormalForm(lhs, rhs); - } - - { - // Same normal form for: - // - // (str.contains "ABC" (str.at x n)) - // - // (or (= x "") - // (= x "A") (= x "B") (= x "C")) - Node cat = d_nm->mkNode(kind::STRING_CHARAT, x, n); - lhs = d_nm->mkNode(kind::STRING_STRCTN, abc, cat); - rhs = d_nm->mkNode(kind::OR, - d_nm->mkNode(kind::EQUAL, cat, empty), - d_nm->mkNode(kind::EQUAL, cat, a), - d_nm->mkNode(kind::EQUAL, cat, b), - d_nm->mkNode(kind::EQUAL, cat, c)); - sameNormalForm(lhs, rhs); - } - } - - void testInferEqsFromContains() - { - TypeNode strType = d_nm->stringType(); - - Node empty = d_nm->mkConst(::CVC4::String("")); - Node a = d_nm->mkConst(::CVC4::String("A")); - Node b = d_nm->mkConst(::CVC4::String("B")); - Node x = d_nm->mkVar("x", strType); - Node y = d_nm->mkVar("y", strType); - Node xy = d_nm->mkNode(kind::STRING_CONCAT, x, y); - Node f = d_nm->mkConst(false); - - // inferEqsFromContains("", (str.++ x y)) returns something equivalent to - // (= "" y) - Node empty_x_y = d_nm->mkNode(kind::AND, - d_nm->mkNode(kind::EQUAL, empty, x), - d_nm->mkNode(kind::EQUAL, empty, y)); - sameNormalForm(TheoryStringsRewriter::inferEqsFromContains(empty, xy), - empty_x_y); - - // inferEqsFromContains(x, (str.++ x y)) returns false - Node bxya = d_nm->mkNode(kind::STRING_CONCAT, b, y, x, a); - sameNormalForm(TheoryStringsRewriter::inferEqsFromContains(x, bxya), f); - - // inferEqsFromContains(x, y) returns null - Node n = TheoryStringsRewriter::inferEqsFromContains(x, y); - TS_ASSERT(n.isNull()); - - // inferEqsFromContains(x, x) returns something equivalent to (= x x) - Node eq_x_x = d_nm->mkNode(kind::EQUAL, x, x); - sameNormalForm(TheoryStringsRewriter::inferEqsFromContains(x, x), eq_x_x); - - // inferEqsFromContains((str.replace x "B" "A"), x) returns something - // equivalent to (= (str.replace x "B" "A") x) - Node repl = d_nm->mkNode(kind::STRING_STRREPL, x, b, a); - Node eq_repl_x = d_nm->mkNode(kind::EQUAL, repl, x); - sameNormalForm(TheoryStringsRewriter::inferEqsFromContains(repl, x), - eq_repl_x); - - // inferEqsFromContains(x, (str.replace x "B" "A")) returns something - // equivalent to (= (str.replace x "B" "A") x) - Node eq_x_repl = d_nm->mkNode(kind::EQUAL, x, repl); - sameNormalForm(TheoryStringsRewriter::inferEqsFromContains(x, repl), - eq_x_repl); - } - - void testRewritePrefixSuffix() - { - TypeNode strType = d_nm->stringType(); - - Node empty = d_nm->mkConst(::CVC4::String("")); - Node a = d_nm->mkConst(::CVC4::String("A")); - Node x = d_nm->mkVar("x", strType); - Node y = d_nm->mkVar("y", strType); - Node xx = d_nm->mkNode(kind::STRING_CONCAT, x, x); - Node xxa = d_nm->mkNode(kind::STRING_CONCAT, x, x, a); - Node xy = d_nm->mkNode(kind::STRING_CONCAT, x, y); - Node f = d_nm->mkConst(false); - - // Same normal form for: - // - // (str.prefix (str.++ x y) x) - // - // (= y "") - Node p_xy = d_nm->mkNode(kind::STRING_PREFIX, xy, x); - Node empty_y = d_nm->mkNode(kind::EQUAL, y, empty); - sameNormalForm(p_xy, empty_y); - - // Same normal form for: - // - // (str.suffix (str.++ x x) x) - // - // (= x "") - Node p_xx = d_nm->mkNode(kind::STRING_SUFFIX, xx, x); - Node empty_x = d_nm->mkNode(kind::EQUAL, x, empty); - sameNormalForm(p_xx, empty_x); - - // (str.suffix x (str.++ x x "A")) ---> false - Node p_xxa = d_nm->mkNode(kind::STRING_SUFFIX, xxa, x); - sameNormalForm(p_xxa, f); - } - - void testRewriteEqualityExt() - { - TypeNode strType = d_nm->stringType(); - TypeNode intType = d_nm->integerType(); - - Node empty = d_nm->mkConst(::CVC4::String("")); - Node a = d_nm->mkConst(::CVC4::String("A")); - Node aaa = d_nm->mkConst(::CVC4::String("AAA")); - Node b = d_nm->mkConst(::CVC4::String("B")); - Node ba = d_nm->mkConst(::CVC4::String("BA")); - Node w = d_nm->mkVar("w", strType); - Node x = d_nm->mkVar("x", strType); - Node y = d_nm->mkVar("y", strType); - Node z = d_nm->mkVar("z", strType); - Node xxa = d_nm->mkNode(kind::STRING_CONCAT, x, x, a); - Node f = d_nm->mkConst(false); - Node n = d_nm->mkVar("n", intType); - Node zero = d_nm->mkConst(Rational(0)); - Node one = d_nm->mkConst(Rational(1)); - Node three = d_nm->mkConst(Rational(3)); - - // Same normal form for: - // - // (= "" (str.replace "" x "B")) - // - // (not (= x "")) - Node empty_repl = d_nm->mkNode( - kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, empty, x, b)); - Node empty_x = d_nm->mkNode(kind::NOT, d_nm->mkNode(kind::EQUAL, x, empty)); - sameNormalForm(empty_repl, empty_x); - - // Same normal form for: - // - // (= "" (str.replace x y (str.++ x x "A"))) - // - // (and (= x "") (not (= y ""))) - Node empty_repl_xaa = d_nm->mkNode( - kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, x, y, xxa)); - Node empty_xy = d_nm->mkNode( - kind::AND, - d_nm->mkNode(kind::EQUAL, x, empty), - d_nm->mkNode(kind::NOT, d_nm->mkNode(kind::EQUAL, y, empty))); - sameNormalForm(empty_repl_xaa, empty_xy); - - // (= "" (str.replace (str.++ x x "A") x y)) ---> false - Node empty_repl_xxaxy = d_nm->mkNode( - kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, xxa, x, y)); - Node eq_xxa_repl = d_nm->mkNode( - kind::EQUAL, xxa, d_nm->mkNode(kind::STRING_STRREPL, empty, y, x)); - sameNormalForm(empty_repl_xxaxy, f); - - // Same normal form for: - // - // (= "" (str.replace "A" x y)) - // - // (= "A" (str.replace "" y x)) - Node empty_repl_axy = d_nm->mkNode( - kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, a, x, y)); - Node eq_a_repl = d_nm->mkNode( - kind::EQUAL, a, d_nm->mkNode(kind::STRING_STRREPL, empty, y, x)); - sameNormalForm(empty_repl_axy, eq_a_repl); - - // Same normal form for: - // - // (= "" (str.replace x "A" "")) - // - // (str.prefix x "A") - Node empty_repl_a = d_nm->mkNode( - kind::EQUAL, empty, d_nm->mkNode(kind::STRING_STRREPL, x, a, empty)); - Node prefix_a = d_nm->mkNode(kind::STRING_PREFIX, x, a); - sameNormalForm(empty_repl_a, prefix_a); - - // Same normal form for: - // - // (= "" (str.substr x 1 2)) - // - // (<= (str.len x) 1) - Node empty_substr = d_nm->mkNode( - kind::EQUAL, empty, d_nm->mkNode(kind::STRING_SUBSTR, x, one, three)); - Node leq_len_x = - d_nm->mkNode(kind::LEQ, d_nm->mkNode(kind::STRING_LENGTH, x), one); - sameNormalForm(empty_substr, leq_len_x); - - // Different normal form for: - // - // (= "" (str.substr x 0 n)) - // - // (<= n 0) - Node empty_substr_x = d_nm->mkNode( - kind::EQUAL, empty, d_nm->mkNode(kind::STRING_SUBSTR, x, zero, n)); - Node leq_n = d_nm->mkNode(kind::LEQ, n, zero); - differentNormalForms(empty_substr_x, leq_n); - - // Same normal form for: - // - // (= "" (str.substr "A" 0 n)) - // - // (<= n 0) - Node empty_substr_a = d_nm->mkNode( - kind::EQUAL, empty, d_nm->mkNode(kind::STRING_SUBSTR, a, zero, n)); - sameNormalForm(empty_substr_a, leq_n); - - // Same normal form for: - // - // (= (str.++ x x a) (str.replace y (str.++ x x a) y)) - // - // (= (str.++ x x a) y) - Node eq_xxa_repl_y = d_nm->mkNode( - kind::EQUAL, xxa, d_nm->mkNode(kind::STRING_STRREPL, y, xxa, y)); - Node eq_xxa_y = d_nm->mkNode(kind::EQUAL, xxa, y); - sameNormalForm(eq_xxa_repl_y, eq_xxa_y); - - // (= (str.++ x x a) (str.replace (str.++ x x a) "A" "B")) ---> false - Node eq_xxa_repl_xxa = d_nm->mkNode( - kind::EQUAL, xxa, d_nm->mkNode(kind::STRING_STRREPL, xxa, a, b)); - sameNormalForm(eq_xxa_repl_xxa, f); - - // Same normal form for: - // - // (= (str.replace x "A" "B") "") - // - // (= x "") - Node eq_repl = d_nm->mkNode( - kind::EQUAL, d_nm->mkNode(kind::STRING_STRREPL, x, a, b), empty); - Node eq_x = d_nm->mkNode(kind::EQUAL, x, empty); - sameNormalForm(eq_repl, eq_x); - - { - // Same normal form for: - // - // (= (str.replace y "A" "B") "B") - // - // (= (str.replace y "B" "A") "A") - Node lhs = d_nm->mkNode( - kind::EQUAL, d_nm->mkNode(kind::STRING_STRREPL, x, a, b), b); - Node rhs = d_nm->mkNode( - kind::EQUAL, d_nm->mkNode(kind::STRING_STRREPL, x, b, a), a); - sameNormalForm(lhs, rhs); - } - - { - // Same normal form for: - // - // (= (str.++ x "A" y) (str.++ "A" "A" (str.substr "AAA" 0 n))) - // - // (= (str.++ y x) (str.++ (str.substr "AAA" 0 n) "A")) - Node lhs = d_nm->mkNode( - kind::EQUAL, - d_nm->mkNode(kind::STRING_CONCAT, x, a, y), - d_nm->mkNode(kind::STRING_CONCAT, - a, - a, - d_nm->mkNode(kind::STRING_SUBSTR, aaa, zero, n))); - Node rhs = d_nm->mkNode( - kind::EQUAL, - d_nm->mkNode(kind::STRING_CONCAT, x, y), - d_nm->mkNode(kind::STRING_CONCAT, - d_nm->mkNode(kind::STRING_SUBSTR, aaa, zero, n), - a)); - sameNormalForm(lhs, rhs); - } - - { - // Same normal form for: - // - // (= (str.++ "A" x) "A") - // - // (= x "") - Node lhs = - d_nm->mkNode(kind::EQUAL, d_nm->mkNode(kind::STRING_CONCAT, a, x), a); - Node rhs = d_nm->mkNode(kind::EQUAL, x, empty); - sameNormalForm(lhs, rhs); - } - - { - // (= (str.++ x "A") "") ---> false - Node eq = d_nm->mkNode( - kind::EQUAL, d_nm->mkNode(kind::STRING_CONCAT, x, a), empty); - sameNormalForm(eq, f); - } - - { - // (= (str.++ x "B") "AAA") ---> false - Node eq = d_nm->mkNode( - kind::EQUAL, d_nm->mkNode(kind::STRING_CONCAT, x, b), aaa); - sameNormalForm(eq, f); - } - - { - // (= (str.++ x "AAA") "A") ---> false - Node eq = d_nm->mkNode( - kind::EQUAL, d_nm->mkNode(kind::STRING_CONCAT, x, aaa), a); - sameNormalForm(eq, f); - } - - { - // (= (str.++ "AAA" (str.substr "A" 0 n)) (str.++ x "B")) ---> false - Node eq = d_nm->mkNode( - kind::EQUAL, - d_nm->mkNode( - kind::STRING_CONCAT, - aaa, - d_nm->mkNode(kind::STRING_CONCAT, - a, - a, - d_nm->mkNode(kind::STRING_SUBSTR, x, zero, n))), - d_nm->mkNode(kind::STRING_CONCAT, x, b)); - sameNormalForm(eq, f); - } - - { - // (= (str.++ "A" (int.to.str n)) "A") -/-> false - Node eq = d_nm->mkNode( - kind::EQUAL, - d_nm->mkNode( - kind::STRING_CONCAT, a, d_nm->mkNode(kind::STRING_ITOS, n)), - a); - differentNormalForms(eq, f); - } - - { - // (= (str.++ "A" x y) (str.++ x "B" z)) --> false - Node eq = d_nm->mkNode( - kind::EQUAL, - d_nm->mkNode(kind::STRING_CONCAT, a, x, y), - d_nm->mkNode(kind::STRING_CONCAT, x, b, z)); - sameNormalForm(eq, f); - } - - { - // (= (str.++ "B" x y) (str.++ x "AAA" z)) --> false - Node eq = d_nm->mkNode(kind::EQUAL, - d_nm->mkNode(kind::STRING_CONCAT, b, x, y), - d_nm->mkNode(kind::STRING_CONCAT, x, aaa, z)); - sameNormalForm(eq, f); - } - - { - Node xrepl = d_nm->mkNode(kind::STRING_STRREPL, x, a, b); - - // Same normal form for: - // - // (= (str.++ "B" (str.replace x "A" "B") z y w) - // (str.++ z x "BA" z)) - // - // (and (= (str.++ "B" (str.replace x "A" "B") z) - // (str.++ z x "B")) - // (= (str.++ y w) (str.++ "A" z))) - Node lhs = - d_nm->mkNode(kind::EQUAL, - d_nm->mkNode(kind::STRING_CONCAT, b, xrepl, z, y, w), - d_nm->mkNode(kind::STRING_CONCAT, z, x, ba, z)); - Node rhs = d_nm->mkNode( - kind::AND, - d_nm->mkNode(kind::EQUAL, - d_nm->mkNode(kind::STRING_CONCAT, b, xrepl, z), - d_nm->mkNode(kind::STRING_CONCAT, z, x, b)), - d_nm->mkNode(kind::EQUAL, - d_nm->mkNode(kind::STRING_CONCAT, y, w), - d_nm->mkNode(kind::STRING_CONCAT, a, z))); - sameNormalForm(lhs, rhs); - } - } - - void testStripConstantEndpoints() - { - TypeNode intType = d_nm->integerType(); - TypeNode strType = d_nm->stringType(); - - Node empty = d_nm->mkConst(::CVC4::String("")); - Node a = d_nm->mkConst(::CVC4::String("A")); - Node ab = d_nm->mkConst(::CVC4::String("AB")); - Node abc = d_nm->mkConst(::CVC4::String("ABC")); - Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); - Node bc = d_nm->mkConst(::CVC4::String("BC")); - Node c = d_nm->mkConst(::CVC4::String("C")); - Node cd = d_nm->mkConst(::CVC4::String("CD")); - Node x = d_nm->mkVar("x", strType); - Node y = d_nm->mkVar("y", strType); - Node n = d_nm->mkVar("n", intType); - - { - // stripConstantEndpoints({ "" }, { "A" }, {}, {}, 0) ---> false - std::vector n1 = {empty}; - std::vector n2 = {a}; - std::vector nb; - std::vector ne; - bool res = - TheoryStringsRewriter::stripConstantEndpoints(n1, n2, nb, ne, 0); - TS_ASSERT(!res); - } - - { - // stripConstantEndpoints({ "A" }, { "A". (int.to.str n) }, {}, {}, 0) - // ---> false - std::vector n1 = {a}; - std::vector n2 = {a, d_nm->mkNode(kind::STRING_ITOS, n)}; - std::vector nb; - std::vector ne; - bool res = - TheoryStringsRewriter::stripConstantEndpoints(n1, n2, nb, ne, 0); - TS_ASSERT(!res); - } - - { - // stripConstantEndpoints({ "ABCD" }, { "C" }, {}, {}, 1) - // ---> true - // n1 is updated to { "CD" } - // nb is updated to { "AB" } - std::vector n1 = {abcd}; - std::vector n2 = {c}; - std::vector nb; - std::vector ne; - std::vector n1r = {cd}; - std::vector nbr = {ab}; - bool res = - TheoryStringsRewriter::stripConstantEndpoints(n1, n2, nb, ne, 1); - TS_ASSERT(res); - TS_ASSERT_EQUALS(n1, n1r); - TS_ASSERT_EQUALS(nb, nbr); - } - - { - // stripConstantEndpoints({ "ABC", x }, { "CD" }, {}, {}, 1) - // ---> true - // n1 is updated to { "C", x } - // nb is updated to { "AB" } - std::vector n1 = {abc, x}; - std::vector n2 = {cd}; - std::vector nb; - std::vector ne; - std::vector n1r = {c, x}; - std::vector nbr = {ab}; - bool res = - TheoryStringsRewriter::stripConstantEndpoints(n1, n2, nb, ne, 1); - TS_ASSERT(res); - TS_ASSERT_EQUALS(n1, n1r); - TS_ASSERT_EQUALS(nb, nbr); - } - - { - // stripConstantEndpoints({ "ABC" }, { "A" }, {}, {}, -1) - // ---> true - // n1 is updated to { "A" } - // nb is updated to { "BC" } - std::vector n1 = {abc}; - std::vector n2 = {a}; - std::vector nb; - std::vector ne; - std::vector n1r = {a}; - std::vector ner = {bc}; - bool res = - TheoryStringsRewriter::stripConstantEndpoints(n1, n2, nb, ne, -1); - TS_ASSERT(res); - TS_ASSERT_EQUALS(n1, n1r); - TS_ASSERT_EQUALS(ne, ner); - } - - { - // stripConstantEndpoints({ x, "ABC" }, { y, "A" }, {}, {}, -1) - // ---> true - // n1 is updated to { x, "A" } - // nb is updated to { "BC" } - std::vector n1 = {x, abc}; - std::vector n2 = {y, a}; - std::vector nb; - std::vector ne; - std::vector n1r = {x, a}; - std::vector ner = {bc}; - bool res = - TheoryStringsRewriter::stripConstantEndpoints(n1, n2, nb, ne, -1); - TS_ASSERT(res); - TS_ASSERT_EQUALS(n1, n1r); - TS_ASSERT_EQUALS(ne, ner); - } - } - - void testRewriteMembership() - { - TypeNode strType = d_nm->stringType(); - - std::vector vec_empty; - Node abc = d_nm->mkConst(::CVC4::String("ABC")); - Node re_abc = d_nm->mkNode(kind::STRING_TO_REGEXP, abc); - Node x = d_nm->mkVar("x", strType); - - { - // Same normal form for: - // - // (str.in.re x (re.++ (re.* re.allchar) - // (re.* re.allchar) - // (str.to.re "ABC") - // (re.* re.allchar))) - // - // (str.contains x "ABC") - Node sig_star = d_nm->mkNode(kind::REGEXP_STAR, - d_nm->mkNode(kind::REGEXP_SIGMA, vec_empty)); - Node lhs = d_nm->mkNode( - kind::STRING_IN_REGEXP, - x, - d_nm->mkNode( - kind::REGEXP_CONCAT, sig_star, sig_star, re_abc, sig_star)); - Node rhs = d_nm->mkNode(kind::STRING_STRCTN, x, abc); - sameNormalForm(lhs, rhs); - } - - { - // Different normal forms for: - // - // (str.in.re x (re.++ (re.* re.allchar) (str.to.re "ABC"))) - // - // (str.contains x "ABC") - Node sig_star = d_nm->mkNode(kind::REGEXP_STAR, - d_nm->mkNode(kind::REGEXP_SIGMA, vec_empty)); - Node lhs = - d_nm->mkNode(kind::STRING_IN_REGEXP, - x, - d_nm->mkNode(kind::REGEXP_CONCAT, sig_star, re_abc)); - Node rhs = d_nm->mkNode(kind::STRING_STRCTN, x, abc); - differentNormalForms(lhs, rhs); - } - } - - void testRewriteRegexpConcat() - { - TypeNode strType = d_nm->stringType(); - - std::vector emptyArgs; - Node x = d_nm->mkVar("x", strType); - Node y = d_nm->mkVar("y", strType); - Node allStar = d_nm->mkNode(kind::REGEXP_STAR, - d_nm->mkNode(kind::REGEXP_SIGMA, emptyArgs)); - Node xReg = d_nm->mkNode(kind::STRING_TO_REGEXP, x); - Node yReg = d_nm->mkNode(kind::STRING_TO_REGEXP, y); - - { - // In normal form: - // - // (re.++ (re.* re.allchar) (re.union (str.to.re x) (str.to.re y))) - Node n = d_nm->mkNode(kind::REGEXP_CONCAT, - allStar, - d_nm->mkNode(kind::REGEXP_UNION, xReg, yReg)); - inNormalForm(n); - } - - { - // In normal form: - // - // (re.++ (str.to.re x) (re.* re.allchar)) - Node n = d_nm->mkNode(kind::REGEXP_CONCAT, xReg, allStar); - inNormalForm(n); - } - } - - private: - ExprManager* d_em; - SmtEngine* d_smt; - SmtScope* d_scope; - ExtendedRewriter* d_rewriter; - - NodeManager* d_nm; -};