From 25a2af86e7beaa46a8159f87263f605818d14157 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Fri, 21 Feb 2020 10:55:04 -0600 Subject: [PATCH] Split extended functions solver in strings (#3768) --- src/CMakeLists.txt | 2 + src/theory/strings/extf_solver.cpp | 673 ++++++++++++++++++++++++++ src/theory/strings/extf_solver.h | 201 ++++++++ src/theory/strings/solver_state.cpp | 2 + src/theory/strings/solver_state.h | 4 +- src/theory/strings/theory_strings.cpp | 587 +--------------------- src/theory/strings/theory_strings.h | 130 +---- 7 files changed, 904 insertions(+), 695 deletions(-) create mode 100644 src/theory/strings/extf_solver.cpp create mode 100644 src/theory/strings/extf_solver.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1063452cb..45912a30d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -672,6 +672,8 @@ libcvc4_add_sources( theory/strings/base_solver.h theory/strings/core_solver.cpp theory/strings/core_solver.h + theory/strings/extf_solver.cpp + theory/strings/extf_solver.h theory/strings/infer_info.cpp theory/strings/infer_info.h theory/strings/inference_manager.cpp diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp new file mode 100644 index 000000000..e22528490 --- /dev/null +++ b/src/theory/strings/extf_solver.cpp @@ -0,0 +1,673 @@ +/********************* */ +/*! \file ext_solver.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of solver for extended functions of theory of strings. + **/ + +#include "theory/strings/extf_solver.h" + +#include "options/strings_options.h" +#include "theory/strings/theory_strings_preprocess.h" +#include "theory/strings/theory_strings_rewriter.h" +#include "theory/strings/theory_strings_utils.h" + +using namespace std; +using namespace CVC4::context; +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +ExtfSolver::ExtfSolver(context::Context* c, + context::UserContext* u, + SolverState& s, + InferenceManager& im, + SkolemCache& skc, + BaseSolver& bs, + CoreSolver& cs, + ExtTheory* et) + : d_state(s), + d_im(im), + d_skCache(skc), + d_bsolver(bs), + d_csolver(cs), + d_extt(et), + d_preproc(&skc, u), + d_hasExtf(c, false), + d_extfInferCache(c) +{ + d_extt->addFunctionKind(kind::STRING_SUBSTR); + d_extt->addFunctionKind(kind::STRING_STRIDOF); + d_extt->addFunctionKind(kind::STRING_ITOS); + d_extt->addFunctionKind(kind::STRING_STOI); + d_extt->addFunctionKind(kind::STRING_STRREPL); + d_extt->addFunctionKind(kind::STRING_STRREPLALL); + d_extt->addFunctionKind(kind::STRING_STRCTN); + d_extt->addFunctionKind(kind::STRING_IN_REGEXP); + d_extt->addFunctionKind(kind::STRING_LEQ); + d_extt->addFunctionKind(kind::STRING_CODE); + d_extt->addFunctionKind(kind::STRING_TOLOWER); + d_extt->addFunctionKind(kind::STRING_TOUPPER); + d_extt->addFunctionKind(kind::STRING_REV); + + d_true = NodeManager::currentNM()->mkConst(true); + d_false = NodeManager::currentNM()->mkConst(false); +} + +ExtfSolver::~ExtfSolver() {} + +bool ExtfSolver::doReduction(int effort, Node n, bool& isCd) +{ + Assert(d_extfInfoTmp.find(n) != d_extfInfoTmp.end()); + if (!d_extfInfoTmp[n].d_modelActive) + { + // n is not active in the model, no need to reduce + return false; + } + // determine the effort level to process the extf at + // 0 - at assertion time, 1+ - after no other reduction is applicable + int r_effort = -1; + // polarity : 1 true, -1 false, 0 neither + int pol = 0; + Kind k = n.getKind(); + if (n.getType().isBoolean() && !d_extfInfoTmp[n].d_const.isNull()) + { + pol = d_extfInfoTmp[n].d_const.getConst() ? 1 : -1; + } + if (k == STRING_STRCTN) + { + if (pol == 1) + { + r_effort = 1; + } + else if (pol == -1) + { + if (effort == 2) + { + Node x = n[0]; + Node s = n[1]; + std::vector lexp; + Node lenx = d_state.getLength(x, lexp); + Node lens = d_state.getLength(s, lexp); + if (d_state.areEqual(lenx, lens)) + { + Trace("strings-extf-debug") + << " resolve extf : " << n + << " based on equal lengths disequality." << std::endl; + // We can reduce negative contains to a disequality when lengths are + // equal. In other words, len( x ) = len( s ) implies + // ~contains( x, s ) reduces to x != s. + if (!d_state.areDisequal(x, s)) + { + // len( x ) = len( s ) ^ ~contains( x, s ) => x != s + lexp.push_back(lenx.eqNode(lens)); + lexp.push_back(n.negate()); + Node xneqs = x.eqNode(s).negate(); + d_im.sendInference(lexp, xneqs, "NEG-CTN-EQL", true); + } + // this depends on the current assertions, so we set that this + // inference is context-dependent. + isCd = true; + return true; + } + else + { + r_effort = 2; + } + } + } + } + else + { + if (k == STRING_SUBSTR) + { + r_effort = 1; + } + else if (k != STRING_IN_REGEXP) + { + r_effort = 2; + } + } + if (effort != r_effort) + { + // not the right effort level to reduce + return false; + } + Node c_n = pol == -1 ? n.negate() : n; + Trace("strings-process-debug") + << "Process reduction for " << n << ", pol = " << pol << std::endl; + if (k == STRING_STRCTN && pol == 1) + { + Node x = n[0]; + Node s = n[1]; + // positive contains reduces to a equality + Node sk1 = + d_skCache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1"); + Node sk2 = + d_skCache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2"); + Node eq = Rewriter::rewrite(x.eqNode(utils::mkNConcat(sk1, s, sk2))); + std::vector exp_vec; + exp_vec.push_back(n); + d_im.sendInference(d_emptyVec, exp_vec, eq, "POS-CTN", true); + Trace("strings-extf-debug") + << " resolve extf : " << n << " based on positive contain reduction." + << std::endl; + Trace("strings-red-lemma") << "Reduction (positive contains) lemma : " << n + << " => " << eq << std::endl; + // context-dependent because it depends on the polarity of n itself + isCd = true; + } + else if (k != kind::STRING_CODE) + { + NodeManager* nm = NodeManager::currentNM(); + Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF + || k == STRING_ITOS || k == STRING_STOI || k == STRING_STRREPL + || k == STRING_STRREPLALL || k == STRING_LEQ || k == STRING_TOLOWER + || k == STRING_TOUPPER || k == STRING_REV); + std::vector new_nodes; + Node res = d_preproc.simplify(n, new_nodes); + Assert(res != n); + new_nodes.push_back(res.eqNode(n)); + Node nnlem = + new_nodes.size() == 1 ? new_nodes[0] : nm->mkNode(AND, new_nodes); + nnlem = Rewriter::rewrite(nnlem); + Trace("strings-red-lemma") + << "Reduction_" << effort << " lemma : " << nnlem << std::endl; + Trace("strings-red-lemma") << "...from " << n << std::endl; + d_im.sendInference(d_emptyVec, nnlem, "Reduction", true); + Trace("strings-extf-debug") + << " resolve extf : " << n << " based on reduction." << std::endl; + isCd = false; + } + return true; +} + +void ExtfSolver::checkExtfReductions(int effort) +{ + // Notice we don't make a standard call to ExtTheory::doReductions here, + // since certain optimizations like context-dependent reductions and + // stratifying effort levels are done in doReduction below. + std::vector extf = d_extt->getActive(); + Trace("strings-process") << " checking " << extf.size() << " active extf" + << std::endl; + for (const Node& n : extf) + { + Assert(!d_state.isInConflict()); + Trace("strings-process") + << " check " << n + << ", active in model=" << d_extfInfoTmp[n].d_modelActive << std::endl; + // whether the reduction was context-dependent + bool isCd = false; + bool ret = doReduction(effort, n, isCd); + if (ret) + { + d_extt->markReduced(n, isCd); + if (d_im.hasProcessed()) + { + return; + } + } + } +} + +void ExtfSolver::checkExtfEval(int effort) +{ + Trace("strings-extf-list") + << "Active extended functions, effort=" << effort << " : " << std::endl; + d_extfInfoTmp.clear(); + NodeManager* nm = NodeManager::currentNM(); + bool has_nreduce = false; + std::vector terms = d_extt->getActive(); + for (const Node& n : terms) + { + // Setup information about n, including if it is equal to a constant. + ExtfInfoTmp& einfo = d_extfInfoTmp[n]; + Node r = d_state.getRepresentative(n); + einfo.d_const = d_bsolver.getConstantEqc(r); + // Get the current values of the children of n. + // Notice that we look up the value of the direct children of n, and not + // their free variables. In other words, given a term: + // t = (str.replace "B" (str.replace x "A" "B") "C") + // we may build the explanation that: + // ((str.replace x "A" "B") = "B") => t = (str.replace "B" "B" "C") + // instead of basing this on the free variable x: + // (x = "A") => t = (str.replace "B" (str.replace "A" "A" "B") "C") + // Although both allow us to infer t = "C", it is important to use the + // first kind of inference since it ensures that its subterms have the + // expected values. Otherwise, we may in rare cases fail to realize that + // the subterm (str.replace x "A" "B") does not currently have the correct + // value, say in this example that (str.replace x "A" "B") != "B". + std::vector exp; + std::vector schildren; + bool schanged = false; + for (const Node& nc : n) + { + Node sc = getCurrentSubstitutionFor(effort, nc, exp); + schildren.push_back(sc); + schanged = schanged || sc != nc; + } + // If there is information involving the children, attempt to do an + // inference and/or mark n as reduced. + Node to_reduce; + if (schanged) + { + Node sn = nm->mkNode(n.getKind(), schildren); + Trace("strings-extf-debug") + << "Check extf " << n << " == " << sn + << ", constant = " << einfo.d_const << ", effort=" << effort << "..." + << std::endl; + einfo.d_exp.insert(einfo.d_exp.end(), exp.begin(), exp.end()); + // inference is rewriting the substituted node + Node nrc = Rewriter::rewrite(sn); + // if rewrites to a constant, then do the inference and mark as reduced + if (nrc.isConst()) + { + if (effort < 3) + { + d_extt->markReduced(n); + Trace("strings-extf-debug") + << " resolvable by evaluation..." << std::endl; + std::vector exps; + // The following optimization gets the "symbolic definition" of + // an extended term. The symbolic definition of a term t is a term + // t' where constants are replaced by their corresponding proxy + // variables. + // For example, if lsym is a proxy variable for "", then + // str.replace( lsym, lsym, lsym ) is the symbolic definition for + // str.replace( "", "", "" ). It is generally better to use symbolic + // definitions when doing cd-rewriting for the purpose of minimizing + // clauses, e.g. we infer the unit equality: + // str.replace( lsym, lsym, lsym ) == "" + // instead of making this inference multiple times: + // x = "" => str.replace( x, x, x ) == "" + // y = "" => str.replace( y, y, y ) == "" + Trace("strings-extf-debug") + << " get symbolic definition..." << std::endl; + Node nrs; + // only use symbolic definitions if option is set + if (options::stringInferSym()) + { + nrs = d_im.getSymbolicDefinition(sn, exps); + } + if (!nrs.isNull()) + { + Trace("strings-extf-debug") + << " rewrite " << nrs << "..." << std::endl; + Node nrsr = Rewriter::rewrite(nrs); + // ensure the symbolic form is not rewritable + if (nrsr != nrs) + { + // we cannot use the symbolic definition if it rewrites + Trace("strings-extf-debug") + << " symbolic definition is trivial..." << std::endl; + nrs = Node::null(); + } + } + else + { + Trace("strings-extf-debug") + << " could not infer symbolic definition." << std::endl; + } + Node conc; + if (!nrs.isNull()) + { + Trace("strings-extf-debug") + << " symbolic def : " << nrs << std::endl; + if (!d_state.areEqual(nrs, nrc)) + { + // infer symbolic unit + if (n.getType().isBoolean()) + { + conc = nrc == d_true ? nrs : nrs.negate(); + } + else + { + conc = nrs.eqNode(nrc); + } + einfo.d_exp.clear(); + } + } + else + { + if (!d_state.areEqual(n, nrc)) + { + if (n.getType().isBoolean()) + { + if (d_state.areEqual(n, nrc == d_true ? d_false : d_true)) + { + einfo.d_exp.push_back(nrc == d_true ? n.negate() : n); + conc = d_false; + } + else + { + conc = nrc == d_true ? n : n.negate(); + } + } + else + { + conc = n.eqNode(nrc); + } + } + } + if (!conc.isNull()) + { + Trace("strings-extf") + << " resolve extf : " << sn << " -> " << nrc << std::endl; + d_im.sendInference( + einfo.d_exp, conc, effort == 0 ? "EXTF" : "EXTF-N", true); + if (d_state.isInConflict()) + { + Trace("strings-extf-debug") << " conflict, return." << std::endl; + return; + } + } + } + else + { + // check if it is already equal, if so, mark as reduced. Otherwise, do + // nothing. + if (d_state.areEqual(n, nrc)) + { + Trace("strings-extf") + << " resolved extf, since satisfied by model: " << n + << std::endl; + einfo.d_modelActive = false; + } + } + } + else + { + // if this was a predicate which changed after substitution + rewriting + if (!einfo.d_const.isNull() && nrc.getType().isBoolean() && nrc != n) + { + bool pol = einfo.d_const == d_true; + Node nrcAssert = pol ? nrc : nrc.negate(); + Node nAssert = pol ? n : n.negate(); + Assert(effort < 3); + einfo.d_exp.push_back(nAssert); + Trace("strings-extf-debug") << " decomposable..." << std::endl; + Trace("strings-extf") << " resolve extf : " << sn << " -> " << nrc + << ", const = " << einfo.d_const << std::endl; + // We send inferences internal here, which may help show unsat. + // However, we do not make a determination whether n can be marked + // reduced since this argument may be circular: we may infer than n + // can be reduced to something else, but that thing may argue that it + // can be reduced to n, in theory. + d_im.sendInternalInference( + einfo.d_exp, nrcAssert, effort == 0 ? "EXTF_d" : "EXTF_d-N"); + } + to_reduce = nrc; + } + } + else + { + to_reduce = n; + } + // if not reduced + if (!to_reduce.isNull()) + { + Assert(effort < 3); + if (effort == 1) + { + Trace("strings-extf") + << " cannot rewrite extf : " << to_reduce << std::endl; + } + checkExtfInference(n, to_reduce, einfo, effort); + if (Trace.isOn("strings-extf-list")) + { + Trace("strings-extf-list") << " * " << to_reduce; + if (!einfo.d_const.isNull()) + { + Trace("strings-extf-list") << ", const = " << einfo.d_const; + } + if (n != to_reduce) + { + Trace("strings-extf-list") << ", from " << n; + } + Trace("strings-extf-list") << std::endl; + } + if (d_extt->isActive(n) && einfo.d_modelActive) + { + has_nreduce = true; + } + } + } + d_hasExtf = has_nreduce; +} + +void ExtfSolver::checkExtfInference(Node n, + Node nr, + ExtfInfoTmp& in, + int effort) +{ + if (in.d_const.isNull()) + { + return; + } + NodeManager* nm = NodeManager::currentNM(); + Trace("strings-extf-infer") << "checkExtfInference: " << n << " : " << nr + << " == " << in.d_const << std::endl; + + // add original to explanation + if (n.getType().isBoolean()) + { + // if Boolean, it's easy + in.d_exp.push_back(in.d_const.getConst() ? n : n.negate()); + } + else + { + // otherwise, must explain via base node + Node r = d_state.getRepresentative(n); + // explain using the base solver + d_bsolver.explainConstantEqc(n, r, in.d_exp); + } + + // d_extfInferCache stores whether we have made the inferences associated + // with a node n, + // this may need to be generalized if multiple inferences apply + + if (nr.getKind() == STRING_STRCTN) + { + Assert(in.d_const.isConst()); + bool pol = in.d_const.getConst(); + if ((pol && nr[1].getKind() == STRING_CONCAT) + || (!pol && nr[0].getKind() == STRING_CONCAT)) + { + // If str.contains( x, str.++( y1, ..., yn ) ), + // we may infer str.contains( x, y1 ), ..., str.contains( x, yn ) + // The following recognizes two situations related to the above reasoning: + // (1) If ~str.contains( x, yi ) holds for some i, we are in conflict, + // (2) If str.contains( x, yj ) already holds for some j, then the term + // str.contains( x, yj ) is irrelevant since it is satisfied by all models + // for str.contains( x, str.++( y1, ..., yn ) ). + + // Notice that the dual of the above reasoning also holds, i.e. + // If ~str.contains( str.++( x1, ..., xn ), y ), + // we may infer ~str.contains( x1, y ), ..., ~str.contains( xn, y ) + // This is also handled here. + if (d_extfInferCache.find(nr) == d_extfInferCache.end()) + { + d_extfInferCache.insert(nr); + + int index = pol ? 1 : 0; + std::vector children; + children.push_back(nr[0]); + children.push_back(nr[1]); + for (const Node& nrc : nr[index]) + { + children[index] = nrc; + Node conc = nm->mkNode(STRING_STRCTN, children); + conc = Rewriter::rewrite(pol ? conc : conc.negate()); + // check if it already (does not) hold + if (d_state.hasTerm(conc)) + { + if (d_state.areEqual(conc, d_false)) + { + // we are in conflict + d_im.sendInference(in.d_exp, conc, "CTN_Decompose"); + } + else if (d_extt->hasFunctionKind(conc.getKind())) + { + // can mark as reduced, since model for n implies model for conc + d_extt->markReduced(conc); + } + } + } + } + } + else + { + if (std::find(d_extfInfoTmp[nr[0]].d_ctn[pol].begin(), + d_extfInfoTmp[nr[0]].d_ctn[pol].end(), + nr[1]) + == d_extfInfoTmp[nr[0]].d_ctn[pol].end()) + { + Trace("strings-extf-debug") << " store contains info : " << nr[0] + << " " << pol << " " << nr[1] << std::endl; + // Store s (does not) contains t, since nr = (~)contains( s, t ) holds. + d_extfInfoTmp[nr[0]].d_ctn[pol].push_back(nr[1]); + d_extfInfoTmp[nr[0]].d_ctnFrom[pol].push_back(n); + // Do transistive closure on contains, e.g. + // if contains( s, t ) and ~contains( s, r ), then ~contains( t, r ). + + // The following infers new (negative) contains based on the above + // reasoning, provided that ~contains( t, r ) does not + // already hold in the current context. We test this by checking that + // contains( t, r ) is not already asserted false in the current + // context. We also handle the case where contains( t, r ) is equivalent + // to t = r, in which case we check that t != r does not already hold + // in the current context. + + // Notice that form of the above inference is enough to find + // conflicts purely due to contains predicates. For example, if we + // have only positive occurrences of contains, then no conflicts due to + // contains predicates are possible and this schema does nothing. For + // example, note that contains( s, t ) and contains( t, r ) implies + // contains( s, r ), which we could but choose not to infer. Instead, + // we prefer being lazy: only if ~contains( s, r ) appears later do we + // infer ~contains( t, r ), which suffices to show a conflict. + bool opol = !pol; + for (unsigned i = 0, size = d_extfInfoTmp[nr[0]].d_ctn[opol].size(); + i < size; + i++) + { + Node onr = d_extfInfoTmp[nr[0]].d_ctn[opol][i]; + Node concOrig = + nm->mkNode(STRING_STRCTN, pol ? nr[1] : onr, pol ? onr : nr[1]); + Node conc = Rewriter::rewrite(concOrig); + // For termination concerns, we only do the inference if the contains + // does not rewrite (and thus does not introduce new terms). + if (conc == concOrig) + { + bool do_infer = false; + conc = conc.negate(); + bool pol = conc.getKind() != NOT; + Node lit = pol ? conc : conc[0]; + if (lit.getKind() == EQUAL) + { + do_infer = pol ? !d_state.areEqual(lit[0], lit[1]) + : !d_state.areDisequal(lit[0], lit[1]); + } + else + { + do_infer = !d_state.areEqual(lit, pol ? d_true : d_false); + } + if (do_infer) + { + std::vector exp_c; + exp_c.insert(exp_c.end(), in.d_exp.begin(), in.d_exp.end()); + Node ofrom = d_extfInfoTmp[nr[0]].d_ctnFrom[opol][i]; + Assert(d_extfInfoTmp.find(ofrom) != d_extfInfoTmp.end()); + exp_c.insert(exp_c.end(), + d_extfInfoTmp[ofrom].d_exp.begin(), + d_extfInfoTmp[ofrom].d_exp.end()); + d_im.sendInference(exp_c, conc, "CTN_Trans"); + } + } + } + } + else + { + // If we already know that s (does not) contain t, then n is redundant. + // For example, if str.contains( x, y ), str.contains( z, y ), and x=z + // are asserted in the current context, then str.contains( z, y ) is + // satisfied by all models of str.contains( x, y ) ^ x=z and thus can + // be ignored. + Trace("strings-extf-debug") << " redundant." << std::endl; + d_extt->markReduced(n); + } + } + return; + } + + // If it's not a predicate, see if we can solve the equality n = c, where c + // is the constant that extended term n is equal to. + Node inferEq = nr.eqNode(in.d_const); + Node inferEqr = Rewriter::rewrite(inferEq); + Node inferEqrr = inferEqr; + if (inferEqr.getKind() == EQUAL) + { + // try to use the extended rewriter for equalities + inferEqrr = TheoryStringsRewriter::rewriteEqualityExt(inferEqr); + } + if (inferEqrr != inferEqr) + { + inferEqrr = Rewriter::rewrite(inferEqrr); + Trace("strings-extf-infer") << "checkExtfInference: " << inferEq + << " ...reduces to " << inferEqrr << std::endl; + d_im.sendInternalInference(in.d_exp, inferEqrr, "EXTF_equality_rew"); + } +} + +Node ExtfSolver::getCurrentSubstitutionFor(int effort, + Node n, + std::vector& exp) +{ + if (effort >= 3) + { + // model values + Node mv = d_state.getModel()->getRepresentative(n); + Trace("strings-subs") << " model val : " << mv << std::endl; + return mv; + } + Node nr = d_state.getRepresentative(n); + Node c = d_bsolver.explainConstantEqc(n, nr, exp); + if (!c.isNull()) + { + return c; + } + else if (effort >= 1 && n.getType().isString()) + { + Assert(effort < 3); + // normal forms + NormalForm& nfnr = d_csolver.getNormalForm(nr); + Node ns = d_csolver.getNormalString(nfnr.d_base, exp); + Trace("strings-subs") << " normal eqc : " << ns << " " << nfnr.d_base + << " " << nr << std::endl; + if (!nfnr.d_base.isNull()) + { + d_im.addToExplanation(n, nfnr.d_base, exp); + } + return ns; + } + return n; +} + +const std::map& ExtfSolver::getInfo() const +{ + return d_extfInfoTmp; +} +bool ExtfSolver::hasExtendedFunctions() const { return d_hasExtf.get(); } + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/extf_solver.h b/src/theory/strings/extf_solver.h new file mode 100644 index 000000000..e0e41bc3d --- /dev/null +++ b/src/theory/strings/extf_solver.h @@ -0,0 +1,201 @@ +/********************* */ +/*! \file ext_solver.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Solver for extended functions of theory of strings. + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__EXTF_SOLVER_H +#define CVC4__THEORY__STRINGS__EXTF_SOLVER_H + +#include +#include + +#include "context/cdo.h" +#include "expr/node.h" +#include "theory/ext_theory.h" +#include "theory/strings/base_solver.h" +#include "theory/strings/core_solver.h" +#include "theory/strings/inference_manager.h" +#include "theory/strings/skolem_cache.h" +#include "theory/strings/solver_state.h" +#include "theory/strings/theory_strings_preprocess.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** + * Non-static information about an extended function t. This information is + * constructed and used during the check extended function evaluation + * inference schema. + * + * In the following, we refer to the "context-dependent simplified form" + * of a term t to be the result of rewriting t * sigma, where sigma is a + * derivable substitution in the current context. For example, the + * context-depdendent simplified form of contains( x++y, "a" ) given + * sigma = { x -> "" } is contains(y,"a"). + */ +class ExtfInfoTmp +{ + public: + ExtfInfoTmp() : d_modelActive(true) {} + /** + * If s is in d_ctn[true] (resp. d_ctn[false]), then contains( t, s ) + * (resp. ~contains( t, s )) holds in the current context. The vector + * d_ctnFrom is the explanation for why this holds. For example, + * if d_ctn[false][i] is "A", then d_ctnFrom[false][i] might be + * t = x ++ y AND x = "" AND y = "B". + */ + std::map > d_ctn; + std::map > d_ctnFrom; + /** + * The constant that t is entailed to be equal to, or null if none exist. + */ + Node d_const; + /** + * The explanation for why t is equal to its context-dependent simplified + * form. + */ + std::vector d_exp; + /** This flag is false if t is reduced in the model. */ + bool d_modelActive; +}; + +/** + * Extended function solver for the theory of strings. This manages extended + * functions for the theory of strings using a combination of context-dependent + * simplification (Reynolds et al CAV 2017) and lazy reductions. + */ +class ExtfSolver +{ + typedef context::CDHashSet NodeSet; + + public: + ExtfSolver(context::Context* c, + context::UserContext* u, + SolverState& s, + InferenceManager& im, + SkolemCache& skc, + BaseSolver& bs, + CoreSolver& cs, + ExtTheory* et); + ~ExtfSolver(); + + /** check extended functions evaluation + * + * This applies "context-dependent simplification" for all active extended + * function terms in this SAT context. This infers facts of the form: + * x = c => f( t1 ... tn ) = c' + * where the rewritten form of f( t1...tn ) { x |-> c } is c', and x = c + * is a (tuple of) equalities that are asserted in this SAT context, and + * f( t1 ... tn ) is a term from this SAT context. + * + * For more details, this is steps 4 when effort=0 and step 6 when + * effort=1 from Strategy 1 in Reynolds et al, "Scaling up DPLL(T) String + * Solvers using Context-Dependent Simplification", CAV 2017. When called with + * effort=3, we apply context-dependent simplification based on model values. + */ + void checkExtfEval(int effort); + /** check extended function reductions + * + * This adds "reduction" lemmas for each active extended function in this SAT + * context. These are generally lemmas of the form: + * F[t1...tn,k] ^ f( t1 ... tn ) = k + * where f( t1 ... tn ) is an active extended function, k is a fresh constant + * and F is a formula that constrains k based on the definition of f. + * + * For more details, this is step 7 from Strategy 1 in Reynolds et al, + * CAV 2017. We stratify this in practice, where calling this with effort=1 + * reduces some of the "easier" extended functions, and effort=2 reduces + * the rest. + */ + void checkExtfReductions(int effort); + /** get preprocess module */ + StringsPreprocess* getPreprocess() { return &d_preproc; } + + /** + * Get the current substitution for term n. + * + * This method returns a term that n is currently equal to in the current + * context. It updates exp to contain an explanation of why it is currently + * equal to that term. + * + * The argument effort determines what kind of term to return, either + * a constant in the equivalence class of n (effort=0), the normal form of + * n (effort=1,2) or the model value of n (effort>=3). The latter is only + * valid at LAST_CALL effort. If a term of the above form cannot be returned, + * then n itself is returned. + */ + Node getCurrentSubstitutionFor(int effort, Node n, std::vector& exp); + /** get mapping from extended functions to their information + * + * This information is valid during a full effort check after a call to + * checkExtfEval. + */ + const std::map& getInfo() const; + /** Are there any active extended functions? */ + bool hasExtendedFunctions() const; + + private: + /** do reduction + * + * This is called when an extended function application n is not able to be + * simplified by context-depdendent simplification, and we are resorting to + * expanding n to its full semantics via a reduction. This method returns + * true if it successfully reduced n by some reduction and sets isCd to + * true if the reduction was (SAT)-context-dependent, and false otherwise. + * The argument effort has the same meaning as in checkExtfReductions. + */ + bool doReduction(int effort, Node n, bool& isCd); + /** check extended function inferences + * + * This function makes additional inferences for n that do not contribute + * to its reduction, but may help show a refutation. + * + * This function is called when the context-depdendent simplified form of + * n is nr. The argument "in" is the information object for n. The argument + * "effort" has the same meaning as the effort argument of checkExtfEval. + */ + void checkExtfInference(Node n, Node nr, ExtfInfoTmp& in, int effort); + /** The solver state object */ + SolverState& d_state; + /** The (custom) output channel of the theory of strings */ + InferenceManager& d_im; + /** cache of all skolems */ + SkolemCache& d_skCache; + /** reference to the base solver, used for certain queries */ + BaseSolver& d_bsolver; + /** reference to the core solver, used for certain queries */ + CoreSolver& d_csolver; + /** the extended theory object for the theory of strings */ + ExtTheory* d_extt; + /** preprocessing utility, for performing strings reductions */ + StringsPreprocess d_preproc; + /** Common constants */ + Node d_true; + Node d_false; + /** Empty vector */ + std::vector d_emptyVec; + /** map extended functions to the above information */ + std::map d_extfInfoTmp; + /** any non-reduced extended functions exist? */ + context::CDO d_hasExtf; + /** extended functions inferences cache */ + NodeSet d_extfInferCache; +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__EXTF_SOLVER_H */ diff --git a/src/theory/strings/solver_state.cpp b/src/theory/strings/solver_state.cpp index 664b56b17..2fc49d8b7 100644 --- a/src/theory/strings/solver_state.cpp +++ b/src/theory/strings/solver_state.cpp @@ -268,6 +268,8 @@ EqcInfo* SolverState::getOrMakeEqcInfo(Node eqc, bool doMake) return nullptr; } +TheoryModel* SolverState::getModel() const { return d_valuation.getModel(); } + void SolverState::addEndpointsToEqcInfo(Node t, Node concat, Node eqc) { Assert(concat.getKind() == STRING_CONCAT diff --git a/src/theory/strings/solver_state.h b/src/theory/strings/solver_state.h index cb17e6d1b..28dfbfd09 100644 --- a/src/theory/strings/solver_state.h +++ b/src/theory/strings/solver_state.h @@ -23,6 +23,7 @@ #include "context/context.h" #include "expr/node.h" #include "options/theory_options.h" +#include "theory/theory_model.h" #include "theory/uf/equality_engine.h" #include "theory/valuation.h" @@ -169,6 +170,8 @@ class SolverState * should currently be a representative of the equality engine of this class. */ EqcInfo* getOrMakeEqcInfo(Node eqc, bool doMake = true); + /** Get pointer to the model object of the Valuation object */ + TheoryModel* getModel() const; /** add endpoints to eqc info * @@ -195,7 +198,6 @@ class SolverState void separateByLength(const std::vector& n, std::vector >& cols, std::vector& lts); - private: /** Pointer to the SAT context object used by the theory of strings. */ context::Context* d_context; diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 23a41a0bb..8a3c32fd8 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -74,18 +74,19 @@ TheoryStrings::TheoryStrings(context::Context* c, d_im(*this, c, u, d_state, d_sk_cache, out), d_pregistered_terms_cache(u), d_registered_terms_cache(u), - d_preproc(&d_sk_cache, u), - d_extf_infer_cache(c), d_functionsTerms(c), - d_has_extf(c, false), d_has_str_code(false), d_bsolver(c, u, d_state, d_im), d_csolver(c, u, d_state, d_im, d_sk_cache, d_bsolver), + d_esolver(nullptr), d_regexp_solver(*this, d_state, d_im, c, u), d_stringsFmf(c, u, valuation, d_sk_cache), d_strategy_init(false) { setupExtTheory(); + ExtTheory* extt = getExtTheory(); + d_esolver.reset(new ExtfSolver( + c, u, d_state, d_im, d_sk_cache, d_bsolver, d_csolver, extt)); getExtTheory()->addFunctionKind(kind::STRING_SUBSTR); getExtTheory()->addFunctionKind(kind::STRING_STRIDOF); getExtTheory()->addFunctionKind(kind::STRING_ITOS); @@ -221,172 +222,12 @@ bool TheoryStrings::getCurrentSubstitution( int effort, std::vector< Node >& var for( unsigned i=0; igetCurrentSubstitutionFor(effort, n, exp[n]); subs.push_back(s); } return true; } -Node TheoryStrings::getCurrentSubstitutionFor(int effort, - Node n, - std::vector& exp) -{ - if (effort >= 3) - { - // model values - Node mv = d_valuation.getModel()->getRepresentative(n); - Trace("strings-subs") << " model val : " << mv << std::endl; - return mv; - } - Node nr = d_state.getRepresentative(n); - Node c = d_bsolver.explainConstantEqc(n, nr, exp); - if (!c.isNull()) - { - return c; - } - else if (effort >= 1 && n.getType().isString()) - { - Assert(effort < 3); - // normal forms - NormalForm& nfnr = d_csolver.getNormalForm(nr); - Node ns = d_csolver.getNormalString(nfnr.d_base, exp); - Trace("strings-subs") << " normal eqc : " << ns << " " << nfnr.d_base - << " " << nr << std::endl; - if (!nfnr.d_base.isNull()) - { - d_im.addToExplanation(n, nfnr.d_base, exp); - } - return ns; - } - return n; -} - -bool TheoryStrings::doReduction(int effort, Node n, bool& isCd) -{ - Assert(d_extf_info_tmp.find(n) != d_extf_info_tmp.end()); - if (!d_extf_info_tmp[n].d_model_active) - { - // n is not active in the model, no need to reduce - return false; - } - //determine the effort level to process the extf at - // 0 - at assertion time, 1+ - after no other reduction is applicable - int r_effort = -1; - // polarity : 1 true, -1 false, 0 neither - int pol = 0; - Kind k = n.getKind(); - if (n.getType().isBoolean() && !d_extf_info_tmp[n].d_const.isNull()) - { - pol = d_extf_info_tmp[n].d_const.getConst() ? 1 : -1; - } - if (k == STRING_STRCTN) - { - if (pol == 1) - { - r_effort = 1; - } - else if (pol == -1) - { - if (effort == 2) - { - Node x = n[0]; - Node s = n[1]; - std::vector lexp; - Node lenx = d_state.getLength(x, lexp); - Node lens = d_state.getLength(s, lexp); - if (d_state.areEqual(lenx, lens)) - { - Trace("strings-extf-debug") - << " resolve extf : " << n - << " based on equal lengths disequality." << std::endl; - // We can reduce negative contains to a disequality when lengths are - // equal. In other words, len( x ) = len( s ) implies - // ~contains( x, s ) reduces to x != s. - if (!d_state.areDisequal(x, s)) - { - // len( x ) = len( s ) ^ ~contains( x, s ) => x != s - lexp.push_back(lenx.eqNode(lens)); - lexp.push_back(n.negate()); - Node xneqs = x.eqNode(s).negate(); - d_im.sendInference(lexp, xneqs, "NEG-CTN-EQL", true); - } - // this depends on the current assertions, so we set that this - // inference is context-dependent. - isCd = true; - return true; - } - else - { - r_effort = 2; - } - } - } - } - else - { - if (k == STRING_SUBSTR) - { - r_effort = 1; - } - else if (k != STRING_IN_REGEXP) - { - r_effort = 2; - } - } - if (effort != r_effort) - { - // not the right effort level to reduce - return false; - } - Node c_n = pol == -1 ? n.negate() : n; - Trace("strings-process-debug") - << "Process reduction for " << n << ", pol = " << pol << std::endl; - if (k == STRING_STRCTN && pol == 1) - { - Node x = n[0]; - Node s = n[1]; - // positive contains reduces to a equality - Node sk1 = - d_sk_cache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_PRE, "sc1"); - Node sk2 = - d_sk_cache.mkSkolemCached(x, s, SkolemCache::SK_FIRST_CTN_POST, "sc2"); - Node eq = Rewriter::rewrite(x.eqNode(utils::mkNConcat(sk1, s, sk2))); - std::vector exp_vec; - exp_vec.push_back(n); - d_im.sendInference(d_empty_vec, exp_vec, eq, "POS-CTN", true); - Trace("strings-extf-debug") - << " resolve extf : " << n << " based on positive contain reduction." - << std::endl; - Trace("strings-red-lemma") << "Reduction (positive contains) lemma : " << n - << " => " << eq << std::endl; - // context-dependent because it depends on the polarity of n itself - isCd = true; - } - else if (k != kind::STRING_CODE) - { - NodeManager* nm = NodeManager::currentNM(); - Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF - || k == STRING_ITOS || k == STRING_STOI || k == STRING_STRREPL - || k == STRING_STRREPLALL || k == STRING_LEQ || k == STRING_TOLOWER - || k == STRING_TOUPPER || k == STRING_REV); - std::vector new_nodes; - Node res = d_preproc.simplify(n, new_nodes); - Assert(res != n); - new_nodes.push_back(res.eqNode(n)); - Node nnlem = - new_nodes.size() == 1 ? new_nodes[0] : nm->mkNode(AND, new_nodes); - nnlem = Rewriter::rewrite(nnlem); - Trace("strings-red-lemma") - << "Reduction_" << effort << " lemma : " << nnlem << std::endl; - Trace("strings-red-lemma") << "...from " << n << std::endl; - d_im.sendInference(d_empty_vec, nnlem, "Reduction", true); - Trace("strings-extf-debug") - << " resolve extf : " << n << " based on reduction." << std::endl; - isCd = false; - } - return true; -} - ///////////////////////////////////////////////////////////////////////////// // NOTIFICATIONS ///////////////////////////////////////////////////////////////////////////// @@ -880,36 +721,9 @@ void TheoryStrings::check(Effort e) { bool TheoryStrings::needsCheckLastEffort() { if( options::stringGuessModel() ){ - return d_has_extf.get(); - }else{ - return false; - } -} - -void TheoryStrings::checkExtfReductions( int effort ) { - // Notice we don't make a standard call to ExtTheory::doReductions here, - // since certain optimizations like context-dependent reductions and - // stratifying effort levels are done in doReduction below. - std::vector< Node > extf = getExtTheory()->getActive(); - Trace("strings-process") << " checking " << extf.size() << " active extf" - << std::endl; - for( unsigned i=0; i terms = getExtTheory()->getActive(); - for (const Node& n : terms) - { - // Setup information about n, including if it is equal to a constant. - ExtfInfoTmp& einfo = d_extf_info_tmp[n]; - Node r = d_state.getRepresentative(n); - einfo.d_const = d_bsolver.getConstantEqc(r); - // Get the current values of the children of n. - // Notice that we look up the value of the direct children of n, and not - // their free variables. In other words, given a term: - // t = (str.replace "B" (str.replace x "A" "B") "C") - // we may build the explanation that: - // ((str.replace x "A" "B") = "B") => t = (str.replace "B" "B" "C") - // instead of basing this on the free variable x: - // (x = "A") => t = (str.replace "B" (str.replace "A" "A" "B") "C") - // Although both allow us to infer t = "C", it is important to use the - // first kind of inference since it ensures that its subterms have the - // expected values. Otherwise, we may in rare cases fail to realize that - // the subterm (str.replace x "A" "B") does not currently have the correct - // value, say in this example that (str.replace x "A" "B") != "B". - std::vector exp; - std::vector schildren; - bool schanged = false; - for (const Node& nc : n) - { - Node sc = getCurrentSubstitutionFor(effort, nc, exp); - schildren.push_back(sc); - schanged = schanged || sc != nc; - } - // If there is information involving the children, attempt to do an - // inference and/or mark n as reduced. - Node to_reduce; - if (schanged) - { - Node sn = nm->mkNode(n.getKind(), schildren); - Trace("strings-extf-debug") - << "Check extf " << n << " == " << sn - << ", constant = " << einfo.d_const << ", effort=" << effort << "..." - << std::endl; - einfo.d_exp.insert(einfo.d_exp.end(), exp.begin(), exp.end()); - // inference is rewriting the substituted node - Node nrc = Rewriter::rewrite( sn ); - //if rewrites to a constant, then do the inference and mark as reduced - if( nrc.isConst() ){ - if( effort<3 ){ - getExtTheory()->markReduced( n ); - Trace("strings-extf-debug") << " resolvable by evaluation..." << std::endl; - std::vector< Node > exps; - // The following optimization gets the "symbolic definition" of - // an extended term. The symbolic definition of a term t is a term - // t' where constants are replaced by their corresponding proxy - // variables. - // For example, if lsym is a proxy variable for "", then - // str.replace( lsym, lsym, lsym ) is the symbolic definition for - // str.replace( "", "", "" ). It is generally better to use symbolic - // definitions when doing cd-rewriting for the purpose of minimizing - // clauses, e.g. we infer the unit equality: - // str.replace( lsym, lsym, lsym ) == "" - // instead of making this inference multiple times: - // x = "" => str.replace( x, x, x ) == "" - // y = "" => str.replace( y, y, y ) == "" - Trace("strings-extf-debug") << " get symbolic definition..." << std::endl; - Node nrs; - // only use symbolic definitions if option is set - if (options::stringInferSym()) - { - nrs = d_im.getSymbolicDefinition(sn, exps); - } - if( !nrs.isNull() ){ - Trace("strings-extf-debug") << " rewrite " << nrs << "..." << std::endl; - Node nrsr = Rewriter::rewrite(nrs); - // ensure the symbolic form is not rewritable - if (nrsr != nrs) - { - // we cannot use the symbolic definition if it rewrites - Trace("strings-extf-debug") << " symbolic definition is trivial..." << std::endl; - nrs = Node::null(); - } - }else{ - Trace("strings-extf-debug") << " could not infer symbolic definition." << std::endl; - } - Node conc; - if( !nrs.isNull() ){ - Trace("strings-extf-debug") << " symbolic def : " << nrs << std::endl; - if (!d_state.areEqual(nrs, nrc)) - { - //infer symbolic unit - if( n.getType().isBoolean() ){ - conc = nrc==d_true ? nrs : nrs.negate(); - }else{ - conc = nrs.eqNode( nrc ); - } - einfo.d_exp.clear(); - } - }else{ - if (!d_state.areEqual(n, nrc)) - { - if( n.getType().isBoolean() ){ - if (d_state.areEqual(n, nrc == d_true ? d_false : d_true)) - { - einfo.d_exp.push_back(nrc == d_true ? n.negate() : n); - conc = d_false; - } - else - { - conc = nrc==d_true ? n : n.negate(); - } - }else{ - conc = n.eqNode( nrc ); - } - } - } - if( !conc.isNull() ){ - Trace("strings-extf") << " resolve extf : " << sn << " -> " << nrc << std::endl; - d_im.sendInference( - einfo.d_exp, conc, effort == 0 ? "EXTF" : "EXTF-N", true); - if (d_state.isInConflict()) - { - Trace("strings-extf-debug") << " conflict, return." << std::endl; - return; - } - } - }else{ - //check if it is already equal, if so, mark as reduced. Otherwise, do nothing. - if (d_state.areEqual(n, nrc)) - { - Trace("strings-extf") << " resolved extf, since satisfied by model: " << n << std::endl; - einfo.d_model_active = false; - } - } - } - else - { - // if this was a predicate which changed after substitution + rewriting - if (!einfo.d_const.isNull() && nrc.getType().isBoolean() && nrc != n) - { - bool pol = einfo.d_const == d_true; - Node nrcAssert = pol ? nrc : nrc.negate(); - Node nAssert = pol ? n : n.negate(); - Assert(effort < 3); - einfo.d_exp.push_back(nAssert); - Trace("strings-extf-debug") << " decomposable..." << std::endl; - Trace("strings-extf") << " resolve extf : " << sn << " -> " << nrc - << ", const = " << einfo.d_const << std::endl; - // We send inferences internal here, which may help show unsat. - // However, we do not make a determination whether n can be marked - // reduced since this argument may be circular: we may infer than n - // can be reduced to something else, but that thing may argue that it - // can be reduced to n, in theory. - d_im.sendInternalInference( - einfo.d_exp, nrcAssert, effort == 0 ? "EXTF_d" : "EXTF_d-N"); - } - to_reduce = nrc; - } - } - else - { - to_reduce = n; - } - //if not reduced - if( !to_reduce.isNull() ){ - Assert(effort < 3); - if( effort==1 ){ - Trace("strings-extf") << " cannot rewrite extf : " << to_reduce << std::endl; - } - checkExtfInference(n, to_reduce, einfo, effort); - if( Trace.isOn("strings-extf-list") ){ - Trace("strings-extf-list") << " * " << to_reduce; - if (!einfo.d_const.isNull()) - { - Trace("strings-extf-list") << ", const = " << einfo.d_const; - } - if( n!=to_reduce ){ - Trace("strings-extf-list") << ", from " << n; - } - Trace("strings-extf-list") << std::endl; - } - if (getExtTheory()->isActive(n) && einfo.d_model_active) - { - has_nreduce = true; - } - } - } - d_has_extf = has_nreduce; -} - -void TheoryStrings::checkExtfInference( Node n, Node nr, ExtfInfoTmp& in, int effort ){ - if (in.d_const.isNull()) - { - return; - } - NodeManager* nm = NodeManager::currentNM(); - Trace("strings-extf-infer") << "checkExtfInference: " << n << " : " << nr - << " == " << in.d_const << std::endl; - - // add original to explanation - if (n.getType().isBoolean()) - { - // if Boolean, it's easy - in.d_exp.push_back(in.d_const.getConst() ? n : n.negate()); - } - else - { - // otherwise, must explain via base node - Node r = d_state.getRepresentative(n); - // explain using the base solver - d_bsolver.explainConstantEqc(n, r, in.d_exp); - } - - // d_extf_infer_cache stores whether we have made the inferences associated - // with a node n, - // this may need to be generalized if multiple inferences apply - - if (nr.getKind() == STRING_STRCTN) - { - Assert(in.d_const.isConst()); - bool pol = in.d_const.getConst(); - if ((pol && nr[1].getKind() == STRING_CONCAT) - || (!pol && nr[0].getKind() == STRING_CONCAT)) - { - // If str.contains( x, str.++( y1, ..., yn ) ), - // we may infer str.contains( x, y1 ), ..., str.contains( x, yn ) - // The following recognizes two situations related to the above reasoning: - // (1) If ~str.contains( x, yi ) holds for some i, we are in conflict, - // (2) If str.contains( x, yj ) already holds for some j, then the term - // str.contains( x, yj ) is irrelevant since it is satisfied by all models - // for str.contains( x, str.++( y1, ..., yn ) ). - - // Notice that the dual of the above reasoning also holds, i.e. - // If ~str.contains( str.++( x1, ..., xn ), y ), - // we may infer ~str.contains( x1, y ), ..., ~str.contains( xn, y ) - // This is also handled here. - if (d_extf_infer_cache.find(nr) == d_extf_infer_cache.end()) - { - d_extf_infer_cache.insert(nr); - - int index = pol ? 1 : 0; - std::vector children; - children.push_back(nr[0]); - children.push_back(nr[1]); - for (const Node& nrc : nr[index]) - { - children[index] = nrc; - Node conc = nm->mkNode(STRING_STRCTN, children); - conc = Rewriter::rewrite(pol ? conc : conc.negate()); - // check if it already (does not) hold - if (d_state.hasTerm(conc)) - { - if (d_state.areEqual(conc, d_false)) - { - // we are in conflict - d_im.sendInference(in.d_exp, conc, "CTN_Decompose"); - } - else if (getExtTheory()->hasFunctionKind(conc.getKind())) - { - // can mark as reduced, since model for n implies model for conc - getExtTheory()->markReduced(conc); - } - } - } - } - } - else - { - if (std::find(d_extf_info_tmp[nr[0]].d_ctn[pol].begin(), - d_extf_info_tmp[nr[0]].d_ctn[pol].end(), - nr[1]) - == d_extf_info_tmp[nr[0]].d_ctn[pol].end()) - { - Trace("strings-extf-debug") << " store contains info : " << nr[0] - << " " << pol << " " << nr[1] << std::endl; - // Store s (does not) contains t, since nr = (~)contains( s, t ) holds. - d_extf_info_tmp[nr[0]].d_ctn[pol].push_back(nr[1]); - d_extf_info_tmp[nr[0]].d_ctn_from[pol].push_back(n); - // Do transistive closure on contains, e.g. - // if contains( s, t ) and ~contains( s, r ), then ~contains( t, r ). - - // The following infers new (negative) contains based on the above - // reasoning, provided that ~contains( t, r ) does not - // already hold in the current context. We test this by checking that - // contains( t, r ) is not already asserted false in the current - // context. We also handle the case where contains( t, r ) is equivalent - // to t = r, in which case we check that t != r does not already hold - // in the current context. - - // Notice that form of the above inference is enough to find - // conflicts purely due to contains predicates. For example, if we - // have only positive occurrences of contains, then no conflicts due to - // contains predicates are possible and this schema does nothing. For - // example, note that contains( s, t ) and contains( t, r ) implies - // contains( s, r ), which we could but choose not to infer. Instead, - // we prefer being lazy: only if ~contains( s, r ) appears later do we - // infer ~contains( t, r ), which suffices to show a conflict. - bool opol = !pol; - for (unsigned i = 0, size = d_extf_info_tmp[nr[0]].d_ctn[opol].size(); - i < size; - i++) - { - Node onr = d_extf_info_tmp[nr[0]].d_ctn[opol][i]; - Node concOrig = - nm->mkNode(STRING_STRCTN, pol ? nr[1] : onr, pol ? onr : nr[1]); - Node conc = Rewriter::rewrite(concOrig); - // For termination concerns, we only do the inference if the contains - // does not rewrite (and thus does not introduce new terms). - if (conc == concOrig) - { - bool do_infer = false; - conc = conc.negate(); - bool pol = conc.getKind() != NOT; - Node lit = pol ? conc : conc[0]; - if (lit.getKind() == EQUAL) - { - do_infer = pol ? !d_state.areEqual(lit[0], lit[1]) - : !d_state.areDisequal(lit[0], lit[1]); - } - else - { - do_infer = !d_state.areEqual(lit, pol ? d_true : d_false); - } - if (do_infer) - { - std::vector exp_c; - exp_c.insert(exp_c.end(), in.d_exp.begin(), in.d_exp.end()); - Node ofrom = d_extf_info_tmp[nr[0]].d_ctn_from[opol][i]; - Assert(d_extf_info_tmp.find(ofrom) != d_extf_info_tmp.end()); - exp_c.insert(exp_c.end(), - d_extf_info_tmp[ofrom].d_exp.begin(), - d_extf_info_tmp[ofrom].d_exp.end()); - d_im.sendInference(exp_c, conc, "CTN_Trans"); - } - } - } - } - else - { - // If we already know that s (does not) contain t, then n is redundant. - // For example, if str.contains( x, y ), str.contains( z, y ), and x=z - // are asserted in the current context, then str.contains( z, y ) is - // satisfied by all models of str.contains( x, y ) ^ x=z and thus can - // be ignored. - Trace("strings-extf-debug") << " redundant." << std::endl; - getExtTheory()->markReduced(n); - } - } - return; - } - - // If it's not a predicate, see if we can solve the equality n = c, where c - // is the constant that extended term n is equal to. - Node inferEq = nr.eqNode(in.d_const); - Node inferEqr = Rewriter::rewrite(inferEq); - Node inferEqrr = inferEqr; - if (inferEqr.getKind() == EQUAL) - { - // try to use the extended rewriter for equalities - inferEqrr = TheoryStringsRewriter::rewriteEqualityExt(inferEqr); - } - if (inferEqrr != inferEqr) - { - inferEqrr = Rewriter::rewrite(inferEqrr); - Trace("strings-extf-infer") << "checkExtfInference: " << inferEq - << " ...reduces to " << inferEqrr << std::endl; - d_im.sendInternalInference(in.d_exp, inferEqrr, "EXTF_equality_rew"); - } -} - void TheoryStrings::checkRegisterTermsPreNormalForm() { const std::vector& seqc = d_bsolver.getStringEqc(); @@ -1822,7 +1268,8 @@ Node TheoryStrings::ppRewrite(TNode atom) { if( !options::stringLazyPreproc() ){ //eager preprocess here std::vector< Node > new_nodes; - Node ret = d_preproc.processAssertion( atom, new_nodes ); + StringsPreprocess* p = d_esolver->getPreprocess(); + Node ret = p->processAssertion(atom, new_nodes); if( ret!=atom ){ Trace("strings-ppr") << " rewrote " << atom << " -> " << ret << ", with " << new_nodes.size() << " lemmas." << std::endl; for( unsigned i=0; icheckExtfEval(effort); break; case CHECK_CYCLES: d_csolver.checkCycles(); break; case CHECK_FLAT_FORMS: d_csolver.checkFlatForms(); break; case CHECK_REGISTER_TERMS_PRE_NF: checkRegisterTermsPreNormalForm(); break; @@ -1879,7 +1326,7 @@ void TheoryStrings::runInferStep(InferStep s, int effort) case CHECK_CODES: checkCodes(); break; case CHECK_LENGTH_EQC: d_csolver.checkLengthsEqc(); break; case CHECK_REGISTER_TERMS_NF: checkRegisterTermsNormalForms(); break; - case CHECK_EXTF_REDUCTION: checkExtfReductions(effort); break; + case CHECK_EXTF_REDUCTION: d_esolver->checkExtfReductions(effort); break; case CHECK_MEMBERSHIP: checkMemberships(); break; case CHECK_CARDINALITY: checkCardinality(); break; default: Unreachable(); break; diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 67b7482ca..55852490f 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -25,6 +25,7 @@ #include "expr/node_trie.h" #include "theory/strings/base_solver.h" #include "theory/strings/core_solver.h" +#include "theory/strings/extf_solver.h" #include "theory/strings/infer_info.h" #include "theory/strings/inference_manager.h" #include "theory/strings/normal_form.h" @@ -34,7 +35,6 @@ #include "theory/strings/skolem_cache.h" #include "theory/strings/solver_state.h" #include "theory/strings/strings_fmf.h" -#include "theory/strings/theory_strings_preprocess.h" #include "theory/theory.h" #include "theory/uf/equality_engine.h" @@ -121,18 +121,6 @@ class TheoryStrings : public Theory { std::vector& vars, std::vector& subs, std::map >& exp) override; - //--------------------------for checkExtfReductions - /** do reduction - * - * This is called when an extended function application n is not able to be - * simplified by context-depdendent simplification, and we are resorting to - * expanding n to its full semantics via a reduction. This method returns - * true if it successfully reduced n by some reduction and sets isCd to - * true if the reduction was (SAT)-context-dependent, and false otherwise. - * The argument effort has the same meaning as in checkExtfReductions. - */ - bool doReduction(int effort, Node n, bool& isCd); - //--------------------------end for checkExtfReductions // NotifyClass for equality engine class NotifyClass : public eq::EqualityEngineNotify { @@ -230,26 +218,8 @@ class TheoryStrings : public Theory { // preReg cache NodeSet d_pregistered_terms_cache; NodeSet d_registered_terms_cache; - /** preprocessing utility, for performing strings reductions */ - StringsPreprocess d_preproc; - // extended functions inferences cache - NodeSet d_extf_infer_cache; std::vector< Node > d_empty_vec; private: - /** - * Get the current substitution for term n. - * - * This method returns a term that n is currently equal to in the current - * context. It updates exp to contain an explanation of why it is currently - * equal to that term. - * - * The argument effort determines what kind of term to return, either - * a constant in the equivalence class of n (effort=0), the normal form of - * n (effort=1,2) or the model value of n (effort>=3). The latter is only - * valid at LAST_CALL effort. If a term of the above form cannot be returned, - * then n itself is returned. - */ - Node getCurrentSubstitutionFor(int effort, Node n, std::vector& exp); std::map< Node, Node > d_eqc_to_len_term; @@ -277,8 +247,6 @@ private: /** All the function terms that the theory has seen */ context::CDList d_functionsTerms; private: - //any non-reduced extended functions exist - context::CDO< bool > d_has_extf; /** have we asserted any str.code terms? */ bool d_has_str_code; // static information about extf @@ -293,57 +261,6 @@ private: /** cache of all skolems */ SkolemCache d_sk_cache; - //--------------------------for checkExtfEval - /** - * Non-static information about an extended function t. This information is - * constructed and used during the check extended function evaluation - * inference schema. - * - * In the following, we refer to the "context-dependent simplified form" - * of a term t to be the result of rewriting t * sigma, where sigma is a - * derivable substitution in the current context. For example, the - * context-depdendent simplified form of contains( x++y, "a" ) given - * sigma = { x -> "" } is contains(y,"a"). - */ - class ExtfInfoTmp - { - public: - ExtfInfoTmp() : d_model_active(true) {} - /** - * If s is in d_ctn[true] (resp. d_ctn[false]), then contains( t, s ) - * (resp. ~contains( t, s )) holds in the current context. The vector - * d_ctn_from is the explanation for why this holds. For example, - * if d_ctn[false][i] is "A", then d_ctn_from[false][i] might be - * t = x ++ y AND x = "" AND y = "B". - */ - std::map > d_ctn; - std::map > d_ctn_from; - /** - * The constant that t is entailed to be equal to, or null if none exist. - */ - Node d_const; - /** - * The explanation for why t is equal to its context-dependent simplified - * form. - */ - std::vector d_exp; - /** This flag is false if t is reduced in the model. */ - bool d_model_active; - }; - /** map extended functions to the above information */ - std::map d_extf_info_tmp; - /** check extended function inferences - * - * This function makes additional inferences for n that do not contribute - * to its reduction, but may help show a refutation. - * - * This function is called when the context-depdendent simplified form of - * n is nr. The argument "in" is the information object for n. The argument - * "effort" has the same meaning as the effort argument of checkExtfEval. - */ - void checkExtfInference(Node n, Node nr, ExtfInfoTmp& in, int effort); - //--------------------------end for checkExtfEval - private: void addCarePairs(TNodeTrie* t1, TNodeTrie* t2, @@ -369,8 +286,6 @@ private: void eqNotifyPostMerge(TNode t1, TNode t2); /** called when two equivalence classes are made disequal */ void eqNotifyDisequal(TNode t1, TNode t2, TNode reason); - /** get preprocess */ - StringsPreprocess* getPreprocess() { return &d_preproc; } protected: /** compute care graph */ @@ -423,6 +338,11 @@ private: * with length constraints. */ CoreSolver d_csolver; + /** + * Extended function solver, responsible for reductions and simplifications + * involving extended string functions. + */ + std::unique_ptr d_esolver; /** regular expression solver module */ RegExpSolver d_regexp_solver; /** regular expression elimination module */ @@ -449,21 +369,6 @@ private: private: //-----------------------inference steps - /** check extended functions evaluation - * - * This applies "context-dependent simplification" for all active extended - * function terms in this SAT context. This infers facts of the form: - * x = c => f( t1 ... tn ) = c' - * where the rewritten form of f( t1...tn ) { x |-> c } is c', and x = c - * is a (tuple of) equalities that are asserted in this SAT context, and - * f( t1 ... tn ) is a term from this SAT context. - * - * For more details, this is steps 4 when effort=0 and step 6 when - * effort=1 from Strategy 1 in Reynolds et al, "Scaling up DPLL(T) String - * Solvers using Context-Dependent Simplification", CAV 2017. When called with - * effort=3, we apply context-dependent simplification based on model values. - */ - void checkExtfEval(int effort); /** check register terms pre-normal forms * * This calls registerTerm(n,2) on all non-congruent strings in the @@ -480,15 +385,6 @@ private: * str.code(x) == -1 V str.code(x) != str.code(y) V x == y */ void checkCodes(); - /** check lengths for equivalence classes - * - * This inference schema adds lemmas of the form: - * E => len( x ) = rewrite( len( r1 ++ ... ++ rn ) ) - * where [r1, ..., rn] is the normal form of the equivalence class containing - * x. This schema is not required for correctness but experimentally has - * shown to be helpful. - */ - void checkLengthsEqc(); /** check register terms for normal forms * * This calls registerTerm(str.++(t1, ..., tn ), 3) on the normal forms @@ -496,20 +392,6 @@ private: * there does not exist a term of the form str.len(si) in the current context. */ void checkRegisterTermsNormalForms(); - /** check extended function reductions - * - * This adds "reduction" lemmas for each active extended function in this SAT - * context. These are generally lemmas of the form: - * F[t1...tn,k] ^ f( t1 ... tn ) = k - * where f( t1 ... tn ) is an active extended function, k is a fresh constant - * and F is a formula that constrains k based on the definition of f. - * - * For more details, this is step 7 from Strategy 1 in Reynolds et al, - * CAV 2017. We stratify this in practice, where calling this with effort=1 - * reduces some of the "easier" extended functions, and effort=2 reduces - * the rest. - */ - void checkExtfReductions(int effort); /** check regular expression memberships * * This checks the satisfiability of all regular expression memberships -- 2.30.2