From 9815b2b391c2fa708188da81b162bb98931c5d14 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Tue, 23 Nov 2021 16:47:09 -0600 Subject: [PATCH] Enable model-based reduction technique for strings (#7680) This changes our default strategy for deferring (some) reductions until after the model is constructed. It introduces the option `--strings-mbr` (model-based reduction) which is enabled by default. When using model-based reductions, only *negatIve* contains and negative memberships are deferred for reduction/unfolding until LAST_CALL effort, where a candidate model is available. These steps are performed only if the constraints are not already satisfied in the model. The intuition is that negative contains/membership are the *most* expensive constraints to process and are moreover the *least* likely to be false in the model. It makes a few fixes to the extended/RE solvers: - 2 kinds of inferences in `checkExtfEval` should not be performed for substitutions based on models - The regular expression solver should not use inclusion tests to justify reduction of memberships when the basis for the reduction is an unfolded membership, due to the reasoning being potentially cyclic. This led to a bogus model on a regression when the new strategy was enabled. It also does minor refactoring of those solvers that was required for implementing the new policy. This branch is +446-1 on SMT-LIB, with all gains coming on "sat" benchmarks, mostly from pyex. It also solves 2 previously unsolved challenge Amazon benchmarks quickly. --- src/options/strings_options.toml | 8 +- src/theory/strings/extf_solver.cpp | 134 ++++---- src/theory/strings/extf_solver.h | 6 + src/theory/strings/regexp_solver.cpp | 441 ++++++++++++++------------ src/theory/strings/regexp_solver.h | 11 +- src/theory/strings/strategy.cpp | 11 +- src/theory/strings/theory_strings.cpp | 12 +- 7 files changed, 353 insertions(+), 270 deletions(-) diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml index c32c3adbb..caebcec3a 100644 --- a/src/options/strings_options.toml +++ b/src/options/strings_options.toml @@ -115,12 +115,12 @@ name = "Strings Theory" help = "minimize explanations for prefix of normal forms in strings" [[option]] - name = "stringGuessModel" + name = "stringModelBasedReduction" category = "regular" - long = "strings-guess-model" + long = "strings-mbr" type = "bool" - default = "false" - help = "use model guessing to avoid string extended function reductions" + default = "true" + help = "use models to avoid reductions for extended functions that introduce quantified formulas" [[option]] name = "regExpElim" diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index cbd67f232..52b0a6907 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -80,11 +80,12 @@ ExtfSolver::~ExtfSolver() {} bool ExtfSolver::doReduction(int effort, Node n) { - Assert(d_extfInfoTmp.find(n) != d_extfInfoTmp.end()); - if (!d_extfInfoTmp[n].d_modelActive) + Trace("strings-extf-debug") + << "doReduction " << n << ", effort " << effort << std::endl; + if (!isActiveInModel(n)) { // n is not active in the model, no need to reduce - Trace("strings-extf-debug") << "...skip due to model active" << std::endl; + Trace("strings-extf-debug") << "...skip due to model active" << std::endl; return false; } if (d_reduced.find(n)!=d_reduced.end()) @@ -93,9 +94,6 @@ bool ExtfSolver::doReduction(int effort, Node n) Trace("strings-extf-debug") << "...skip due to reduced" << std::endl; return false; } - // determine the effort level to process the extf at - // 0 - at assertion time, 1+ - after no other reduction is applicable - int r_effort = -1; // polarity : 1 true, -1 false, 0 neither int pol = 0; Kind k = n.getKind(); @@ -103,67 +101,65 @@ bool ExtfSolver::doReduction(int effort, Node n) { pol = d_extfInfoTmp[n].d_const.getConst() ? 1 : -1; } - if (k == STRING_CONTAINS) + // determine if it is the right effort + if (k == STRING_SUBSTR || (k == STRING_CONTAINS && pol == 1)) { - if (pol == 1) + // we reduce these semi-eagerly, at effort 1 + if (effort != 1) { - r_effort = 1; + return false; } - else if (pol == -1) + } + else if (k == STRING_CONTAINS && pol == -1) + { + // negative contains reduces at level 2, or 3 if guessing model + int reffort = options().strings.stringModelBasedReduction ? 3 : 2; + if (effort == reffort) { - if (effort == 2) + Node x = n[0]; + Node s = n[1]; + std::vector lexp; + Node lenx = d_state.getLength(x, lexp); + Node lens = d_state.getLength(s, lexp); + if (d_state.areEqual(lenx, lens)) { - Node x = n[0]; - Node s = n[1]; - std::vector lexp; - Node lenx = d_state.getLength(x, lexp); - Node lens = d_state.getLength(s, lexp); - if (d_state.areEqual(lenx, lens)) - { - Trace("strings-extf-debug") - << " resolve extf : " << n - << " based on equal lengths disequality." << std::endl; - // We can reduce negative contains to a disequality when lengths are - // equal. In other words, len( x ) = len( s ) implies - // ~contains( x, s ) reduces to x != s. - if (!d_state.areDisequal(x, s)) - { - // len( x ) = len( s ) ^ ~contains( x, s ) => x != s - lexp.push_back(lenx.eqNode(lens)); - lexp.push_back(n.negate()); - Node xneqs = x.eqNode(s).negate(); - d_im.sendInference( - lexp, xneqs, InferenceId::STRINGS_CTN_NEG_EQUAL, false, true); - } - // this depends on the current assertions, so this - // inference is context-dependent - d_extt.markReduced(n, ExtReducedId::STRINGS_NEG_CTN_DEQ, true); - return true; - } - else + Trace("strings-extf-debug") + << " resolve extf : " << n + << " based on equal lengths disequality." << std::endl; + // We can reduce negative contains to a disequality when lengths are + // equal. In other words, len( x ) = len( s ) implies + // ~contains( x, s ) reduces to x != s. + if (!d_state.areDisequal(x, s)) { - r_effort = 2; + // len( x ) = len( s ) ^ ~contains( x, s ) => x != s + lexp.push_back(lenx.eqNode(lens)); + lexp.push_back(n.negate()); + Node xneqs = x.eqNode(s).negate(); + d_im.sendInference( + lexp, xneqs, InferenceId::STRINGS_CTN_NEG_EQUAL, false, true); } + // this depends on the current assertions, so this + // inference is context-dependent + d_extt.markReduced(n, ExtReducedId::STRINGS_NEG_CTN_DEQ, true); + return true; } } + else + { + return false; + } } - else if (k == STRING_SUBSTR) - { - r_effort = 1; - } - else if (k == SEQ_UNIT) + else if (k == SEQ_UNIT || k == STRING_IN_REGEXP || k == STRING_TO_CODE + || (k == STRING_CONTAINS && pol == 0)) { - // never necessary to reduce seq.unit + // never necessary to reduce seq.unit. str.to_code or str.in_re here. + // also, we do not reduce str.contains that are preregistered but not + // asserted (pol=0). return false; } - else if (k != STRING_IN_REGEXP) - { - r_effort = 2; - } - if (effort != r_effort) + else if (effort != 2) { - Trace("strings-extf-debug") << "...skip due to effort" << std::endl; - // not the right effort level to reduce + // all other operators reduce at level 2 return false; } Node c_n = pol == -1 ? n.negate() : n; @@ -190,7 +186,7 @@ bool ExtfSolver::doReduction(int effort, Node n) // context-dependent because it depends on the polarity of n itself d_extt.markReduced(n, ExtReducedId::STRINGS_POS_CTN, true); } - else if (k != kind::STRING_TO_CODE) + else { NodeManager* nm = NodeManager::currentNM(); Assert(k == STRING_SUBSTR || k == STRING_UPDATE || k == STRING_CONTAINS @@ -198,7 +194,8 @@ bool ExtfSolver::doReduction(int effort, Node n) || k == STRING_STOI || k == STRING_REPLACE || k == STRING_REPLACE_ALL || k == SEQ_NTH || k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL || k == STRING_LEQ - || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV); + || k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV) + << "Unknown reduction: " << k; std::vector new_nodes; Node res = d_preproc.simplify(n, new_nodes); Assert(res != n); @@ -302,6 +299,9 @@ void ExtfSolver::checkExtfEval(int effort) // if rewrites to a constant, then do the inference and mark as reduced if (nrc.isConst()) { + // at effort=3, our substitution is from the model, and we don't do + // inferences based on the model, instead we check whether the + // cosntraint is already equal to its expected value below. if (effort < 3) { d_extt.markReduced(n, ExtReducedId::STRINGS_SR_CONST); @@ -412,15 +412,15 @@ void ExtfSolver::checkExtfEval(int effort) } reduced = true; } - else + else if (effort < 3) { // if this was a predicate which changed after substitution + rewriting + // We only do this before models are constructed (effort<3) if (!einfo.d_const.isNull() && nrc.getType().isBoolean() && nrc != n) { bool pol = einfo.d_const == d_true; Node nrcAssert = pol ? nrc : nrc.negate(); Node nAssert = pol ? n : n.negate(); - Assert(effort < 3); einfo.d_exp.push_back(nAssert); Trace("strings-extf-debug") << " decomposable..." << std::endl; Trace("strings-extf") << " resolve extf : " << sn << " -> " << nrc @@ -446,15 +446,18 @@ void ExtfSolver::checkExtfEval(int effort) && inferProcessed.find(n) == inferProcessed.end()) { inferProcessed.insert(n); - Assert(effort < 3); if (effort == 1) { Trace("strings-extf") << " cannot rewrite extf : " << to_reduce << std::endl; } // we take to_reduce to be the (partially) reduced version of n, which - // is justified by the explanation in einfo. - checkExtfInference(n, to_reduce, einfo, effort); + // is justified by the explanation in einfo. We only do this if we are + // not based on the model (effort<3). + if (effort < 3) + { + checkExtfInference(n, to_reduce, einfo, effort); + } if (Trace.isOn("strings-extf-list")) { Trace("strings-extf-list") << " * " << to_reduce; @@ -719,6 +722,17 @@ std::vector ExtfSolver::getActive(Kind k) const return d_extt.getActive(k); } +bool ExtfSolver::isActiveInModel(Node n) const +{ + std::map::const_iterator it = d_extfInfoTmp.find(n); + if (it == d_extfInfoTmp.end()) + { + Assert(false) << "isActiveInModel: Expected extf info for " << n; + return true; + } + return it->second.d_modelActive; +} + bool StringsExtfCallback::getCurrentSubstitution( int effort, const std::vector& vars, diff --git a/src/theory/strings/extf_solver.h b/src/theory/strings/extf_solver.h index 2a9ca2d45..d8837f2ed 100644 --- a/src/theory/strings/extf_solver.h +++ b/src/theory/strings/extf_solver.h @@ -155,6 +155,12 @@ class ExtfSolver : protected EnvObj * context (see ExtTheory::getActive). */ std::vector getActive(Kind k) const; + /** + * Return true if n is active in the model. If this method returns false, + * then n is already satisfied in the model (its value in the model is + * the same as its representative in the equality engine). + */ + bool isActiveInModel(Node n) const; //---------------------------------- end information about ExtTheory /** * Print the relevant information regarding why we have a model, return as a diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp index 04de0ea44..24ce64842 100644 --- a/src/theory/strings/regexp_solver.cpp +++ b/src/theory/strings/regexp_solver.cpp @@ -62,8 +62,10 @@ Node RegExpSolver::mkAnd(Node c1, Node c2) return NodeManager::currentNM()->mkNode(AND, c1, c2); } -void RegExpSolver::checkMemberships() +void RegExpSolver::checkMemberships(int effort) { + Trace("regexp-process") << "Checking Memberships, effort = " << effort + << " ... " << std::endl; // add the memberships std::vector mems = d_esolver.getActive(STRING_IN_REGEXP); // maps representatives to regular expression memberships in that class @@ -90,236 +92,260 @@ void RegExpSolver::checkMemberships() << " irrelevant (non-asserted) membership : " << n << std::endl; } } - check(assertedMems); + if (effort == 0) + { + // First check for conflict. We do this only if effort is 0, otherwise + // we have already run these checks in this SAT context. + if (checkInclInter(assertedMems)) + { + return; + } + } + checkUnfold(assertedMems, effort); } -void RegExpSolver::check(const std::map >& mems) +bool RegExpSolver::checkInclInter( + const std::map >& mems) { - bool addedLemma = false; - bool changed = false; - std::vector processed; - - Trace("regexp-process") << "Checking Memberships ... " << std::endl; + Trace("regexp-process") << "Checking inclusion/intersection ... " + << std::endl; for (const std::pair >& mr : mems) { + // copy the vector because it is modified in the call below std::vector mems2 = mr.second; Trace("regexp-process") << "Memberships(" << mr.first << ") = " << mr.second << std::endl; if (!checkEqcInclusion(mems2)) { // conflict discovered, return - return; + return true; } if (!checkEqcIntersect(mems2)) { // conflict discovered, return - return; + return true; } } + Trace("regexp-debug") << "... No Intersect Conflict in Memberships" + << std::endl; + return false; +} - Trace("regexp-debug") - << "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma - << std::endl; - if (!addedLemma) +void RegExpSolver::checkUnfold(const std::map >& mems, + int effort) +{ + Trace("regexp-process") << "Checking unfold ... " << std::endl; + bool addedLemma = false; + std::vector processed; + + // get all memberships + std::map allMems; + for (const std::pair >& mr : mems) { - // get all memberships - std::map allMems; - for (const std::pair >& mr : mems) + for (const Node& m : mr.second) { - for (const Node& m : mr.second) - { - allMems[m] = mr.first; - } + allMems[m] = mr.first; } + } - NodeManager* nm = NodeManager::currentNM(); - // representatives of strings that are the LHS of positive memberships that - // we unfolded - std::unordered_set repUnfold; - // check positive (e=0), then negative (e=1) memberships - for (unsigned e = 0; e < 2; e++) + NodeManager* nm = NodeManager::currentNM(); + // representatives of strings that are the LHS of positive memberships that + // we unfolded + std::unordered_set repUnfold; + // Check positive (e=0), then negative (e=1) memberships. If we are doing + // model-based reductions, we process positive ones at effort=0, and negative + // ones at effort=3. + bool mbr = options().strings.stringModelBasedReduction; + size_t startE = mbr ? (effort > 0 ? 1 : 0) : 0; + size_t endE = mbr ? (effort > 0 ? 2 : 1) : 2; + for (size_t e = startE; e < endE; e++) + { + for (const std::pair& mp : allMems) { - for (const std::pair& mp : allMems) + Node assertion = mp.first; + Node rep = mp.second; + bool polarity = assertion.getKind() != NOT; + if (polarity != (e == 0)) + { + continue; + } + // check regular expression membership + Trace("regexp-debug") + << "Check : " << assertion << " " + << (d_regexp_ucached.find(assertion) == d_regexp_ucached.end()) << " " + << (d_regexp_ccached.find(assertion) == d_regexp_ccached.end()) + << std::endl; + if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end() + || d_regexp_ccached.find(assertion) != d_regexp_ccached.end()) + { + continue; + } + Trace("strings-regexp") + << "We have regular expression assertion : " << assertion + << std::endl; + Node atom = assertion.getKind() == NOT ? assertion[0] : assertion; + Assert(atom == rewrite(atom)); + if (effort > 0 && !d_esolver.isActiveInModel(atom)) { - Node assertion = mp.first; - Node rep = mp.second; - // check regular expression membership - Trace("regexp-debug") - << "Check : " << assertion << " " - << (d_regexp_ucached.find(assertion) == d_regexp_ucached.end()) - << " " - << (d_regexp_ccached.find(assertion) == d_regexp_ccached.end()) - << std::endl; - if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end() - || d_regexp_ccached.find(assertion) != d_regexp_ccached.end()) - { - continue; - } Trace("strings-regexp") - << "We have regular expression assertion : " << assertion - << std::endl; - Node atom = assertion.getKind() == NOT ? assertion[0] : assertion; - Assert(atom == rewrite(atom)); - bool polarity = assertion.getKind() != NOT; - if (polarity != (e == 0)) - { - continue; - } - bool flag = true; - Node x = atom[0]; - Node r = atom[1]; - Assert(rep == d_state.getRepresentative(x)); - // The following code takes normal forms into account for the purposes - // of simplifying a regular expression membership x in R. For example, - // if x = "A" in the current context, then we may be interested in - // reasoning about ( x in R ) * { x -> "A" }. Say we update the - // membership to nx in R', then: - // - nfexp => ( x in R ) <=> nx in R' - // - rnfexp => R = R' - // We use these explanations below as assumptions on inferences when - // appropriate. Notice that for inferring conflicts and tautologies, - // we use the normal form of x always. This is because we always want to - // discover conflicts/tautologies whenever possible. - // For inferences based on regular expression unfolding, we do not use - // the normal form of x. The reason is that it is better to unfold - // regular expression memberships in a context-indepedent manner, - // that is, not taking into account the current normal form of x, since - // this ensures these lemmas are still relevant after backtracking. - std::vector nfexp; - std::vector rnfexp; - // The normal form of x is stored in nx, while x is left unchanged. - Node nx = x; - if (!x.isConst()) - { - nx = d_csolver.getNormalString(x, nfexp); - } - // If r is not a constant regular expression, we update it based on - // normal forms, which may concretize its variables. - if (!d_regexp_opr.checkConstRegExp(r)) - { - r = getNormalSymRegExp(r, rnfexp); - nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end()); - changed = true; - } - Trace("strings-regexp-nf") << "Term " << atom << " is normalized to " - << nx << " IN " << r << std::endl; - if (nx != x || changed) + << "...ignore since inactive in model" << std::endl; + continue; + } + Node x = atom[0]; + Node r = atom[1]; + Assert(rep == d_state.getRepresentative(x)); + // The following code takes normal forms into account for the purposes + // of simplifying a regular expression membership x in R. For example, + // if x = "A" in the current context, then we may be interested in + // reasoning about ( x in R ) * { x -> "A" }. Say we update the + // membership to nx in R', then: + // - nfexp => ( x in R ) <=> nx in R' + // - rnfexp => R = R' + // We use these explanations below as assumptions on inferences when + // appropriate. Notice that for inferring conflicts and tautologies, + // we use the normal form of x always. This is because we always want to + // discover conflicts/tautologies whenever possible. + // For inferences based on regular expression unfolding, we do not use + // the normal form of x. The reason is that it is better to unfold + // regular expression memberships in a context-indepedent manner, + // that is, not taking into account the current normal form of x, since + // this ensures these lemmas are still relevant after backtracking. + std::vector nfexp; + std::vector rnfexp; + // The normal form of x is stored in nx, while x is left unchanged. + Node nx = x; + if (!x.isConst()) + { + nx = d_csolver.getNormalString(x, nfexp); + } + // If r is not a constant regular expression, we update it based on + // normal forms, which may concretize its variables. + bool changed = false; + if (!d_regexp_opr.checkConstRegExp(r)) + { + r = getNormalSymRegExp(r, rnfexp); + nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end()); + changed = true; + } + Trace("strings-regexp-nf") << "Term " << atom << " is normalized to " + << nx << " IN " << r << std::endl; + if (nx != x || changed) + { + // We rewrite the membership nx IN r. + Node tmp = rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r)); + Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl; + if (tmp.isConst()) { - // We rewrite the membership nx IN r. - Node tmp = rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r)); - Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl; - if (tmp.isConst()) + if (tmp.getConst() == polarity) { - if (tmp.getConst() == polarity) - { - // it is satisfied in this SAT context - d_regexp_ccached.insert(assertion); - continue; - } - else - { - // we have a conflict - std::vector iexp = nfexp; - std::vector noExplain; - iexp.push_back(assertion); - noExplain.push_back(assertion); - Node conc = Node::null(); - d_im.sendInference( - iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT); - addedLemma = true; - break; - } + // it is satisfied in this SAT context + d_regexp_ccached.insert(assertion); + continue; + } + else + { + // we have a conflict + std::vector iexp = nfexp; + std::vector noExplain; + iexp.push_back(assertion); + noExplain.push_back(assertion); + Node conc = Node::null(); + d_im.sendInference( + iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT); + addedLemma = true; + break; } } - if (e == 1 && repUnfold.find(rep) != repUnfold.end()) - { - // do not unfold negative memberships of strings that have new - // positive unfoldings. For example: - // x in ("A")* ^ NOT x in ("B")* - // We unfold x = "A" ++ x' only. The intution here is that positive - // unfoldings lead to stronger constraints (equalities are stronger - // than disequalities), and are easier to check. - continue; - } - if (polarity) + } + if (e == 1 && repUnfold.find(rep) != repUnfold.end()) + { + // do not unfold negative memberships of strings that have new + // positive unfoldings. For example: + // x in ("A")* ^ NOT x in ("B")* + // We unfold x = "A" ++ x' only. The intution here is that positive + // unfoldings lead to stronger constraints (equalities are stronger + // than disequalities), and are easier to check. + continue; + } + bool doSimplify = true; + if (polarity) + { + doSimplify = checkPDerivative(x, r, atom, addedLemma, rnfexp); + } + else + { + if (!options().strings.stringExp) { - flag = checkPDerivative(x, r, atom, addedLemma, rnfexp); + throw LogicException( + "Strings Incomplete (due to Negative Membership) by default, " + "try --strings-exp option."); } - else + } + if (doSimplify) + { + // check if the term is atomic + Trace("strings-regexp") + << "Unroll/simplify membership of atomic term " << rep << std::endl; + // if so, do simple unrolling + Trace("strings-regexp") << "Simplify on " << atom << std::endl; + Node conc = d_regexp_opr.simplify(atom, polarity); + Trace("strings-regexp") << "...finished, got " << conc << std::endl; + // if simplifying successfully generated a lemma + if (!conc.isNull()) { - if (!options().strings.stringExp) + std::vector iexp; + std::vector noExplain; + iexp.push_back(assertion); + noExplain.push_back(assertion); + Assert(atom.getKind() == STRING_IN_REGEXP); + if (polarity) { - throw LogicException( - "Strings Incomplete (due to Negative Membership) by default, " - "try --strings-exp option."); + d_statistics.d_regexpUnfoldingsPos << atom[1].getKind(); } - } - if (flag) - { - // check if the term is atomic - Trace("strings-regexp") - << "Unroll/simplify membership of atomic term " << rep - << std::endl; - // if so, do simple unrolling - Trace("strings-regexp") << "Simplify on " << atom << std::endl; - Node conc = d_regexp_opr.simplify(atom, polarity); - Trace("strings-regexp") << "...finished, got " << conc << std::endl; - // if simplifying successfully generated a lemma - if (!conc.isNull()) + else { - std::vector iexp; - std::vector noExplain; - iexp.push_back(assertion); - noExplain.push_back(assertion); - Assert(atom.getKind() == STRING_IN_REGEXP); - if (polarity) - { - d_statistics.d_regexpUnfoldingsPos << atom[1].getKind(); - } - else - { - d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind(); - } - InferenceId inf = - polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG; - // in very rare cases, we may find out that the unfolding lemma - // for a membership is equivalent to true, in spite of the RE - // not being rewritten to true. - if (d_im.sendInference(iexp, noExplain, conc, inf)) - { - addedLemma = true; - if (e == 0) - { - // Remember that we have unfolded a membership for x - // notice that we only do this here, after we have definitely - // added a lemma. - repUnfold.insert(rep); - } - } - processed.push_back(assertion); + d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind(); } - else + InferenceId inf = polarity ? InferenceId::STRINGS_RE_UNFOLD_POS + : InferenceId::STRINGS_RE_UNFOLD_NEG; + // in very rare cases, we may find out that the unfolding lemma + // for a membership is equivalent to true, in spite of the RE + // not being rewritten to true. + if (d_im.sendInference(iexp, noExplain, conc, inf)) { - // otherwise we are incomplete - d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY); + addedLemma = true; + if (e == 0) + { + // Remember that we have unfolded a membership for x + // notice that we only do this here, after we have definitely + // added a lemma. + repUnfold.insert(rep); + } } + processed.push_back(assertion); } - if (d_state.isInConflict()) + else { - break; + // otherwise we are incomplete + d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY); } } + if (d_state.isInConflict()) + { + break; + } } } + if (addedLemma) { if (!d_state.isInConflict()) { - for (unsigned i = 0; i < processed.size(); i++) + for (const Node& p : processed) { Trace("strings-regexp") - << "...add " << processed[i] << " to u-cache." << std::endl; - d_regexp_ucached.insert(processed[i]); + << "...add " << p << " to u-cache." << std::endl; + d_regexp_ucached.insert(p); } } } @@ -350,27 +376,48 @@ bool RegExpSolver::checkEqcInclusion(std::vector& mems) bool m2Neg = m2.getKind() == NOT; Node m2Lit = m2Neg ? m2[0] : m2; - // Both regular expression memberships have the same polarity if (m1Neg == m2Neg) { - if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1])) + // Check whether the RE in membership m1 contains the one in m2, if + // so then m1 can be marked reduced if positive polarity, m2 if + // negative polarity. + // Notice that we do not do this if the non-reduced membership has + // already been unfolded, since memberships may reduce to other + // memberships that are included in the original, thus making the + // justification for the reduction cyclic. For example, to reduce: + // (not (str.in_re x (re.++ (re.* R1) R2))) + // We may rely on justifying this by the fact that (writing x[i:j] for + // substring) either: + // (not (str.in_re x[:0] (re.* R1))) + // (not (str.in_re x[0:] R2)) + // The first is trivially satisfied, the second is equivalent to + // (not (str.in_re x R2)) + // where R2 is included in (re.++ (re.* R1) R2)). However, we cannot + // mark the latter as reduced. + bool basisUnfolded = + d_regexp_ucached.find(m1Neg ? m1 : m2) != d_regexp_ucached.end(); + if (!basisUnfolded) { - if (m1Neg) - { - // ~str.in.re(x, R1) includes ~str.in.re(x, R2) ---> - // mark ~str.in.re(x, R2) as reduced - d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG); - remove.insert(m2); - } - else + // Both regular expression memberships have positive polarity + if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1])) { - // str.in.re(x, R1) includes str.in.re(x, R2) ---> - // mark str.in.re(x, R1) as reduced - d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE); - remove.insert(m1); + if (m1Neg) + { + // ~str.in.re(x, R1) includes ~str.in.re(x, R2) ---> + // mark ~str.in.re(x, R2) as reduced + d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG); + remove.insert(m2); + } + else + { + // str.in.re(x, R1) includes str.in.re(x, R2) ---> + // mark str.in.re(x, R1) as reduced + d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE); + remove.insert(m1); - // We don't need to process m1 anymore - break; + // We don't need to process m1 anymore + break; + } } } } diff --git a/src/theory/strings/regexp_solver.h b/src/theory/strings/regexp_solver.h index d6c7f517b..f60d104f1 100644 --- a/src/theory/strings/regexp_solver.h +++ b/src/theory/strings/regexp_solver.h @@ -65,7 +65,7 @@ class RegExpSolver : protected EnvObj * for Regular Membership and Length Constraints over Unbounded Strings", * FroCoS 2015. */ - void checkMemberships(); + void checkMemberships(int effort); private: /** check @@ -78,8 +78,15 @@ class RegExpSolver : protected EnvObj * The argument mems maps representative string terms r to memberships of the * form (t in R) or ~(t in R), where t = r currently holds in the equality * engine of the theory of strings. + * + * We check in two phases: + * (1) checkInclInter which checks if there are conflicts due to quick + * inclusion/intersection testing. This method returns true if a conflict is + * discovered. + * (2) checkUnfold, which unfolds regular expression memberships as necessary */ - void check(const std::map>& mems); + bool checkInclInter(const std::map>& mems); + void checkUnfold(const std::map>& mems, int effort); /** * Check memberships in equivalence class for regular expression * inclusion. diff --git a/src/theory/strings/strategy.cpp b/src/theory/strings/strategy.cpp index 3bd3ef6a0..7338b99fd 100644 --- a/src/theory/strings/strategy.cpp +++ b/src/theory/strings/strategy.cpp @@ -130,19 +130,22 @@ void Strategy::initializeStrategy() { addStrategyStep(CHECK_LENGTH_EQC); } - if (options::stringExp() && !options::stringGuessModel()) + if (options::stringExp()) { addStrategyStep(CHECK_EXTF_REDUCTION, 2); } addStrategyStep(CHECK_MEMBERSHIP); addStrategyStep(CHECK_CARDINALITY); step_end[Theory::EFFORT_FULL] = d_infer_steps.size() - 1; - if (options::stringExp() && options::stringGuessModel()) + if (options::stringModelBasedReduction()) { step_begin[Theory::EFFORT_LAST_CALL] = d_infer_steps.size(); - // these two steps are run in parallel - addStrategyStep(CHECK_EXTF_REDUCTION, 2, false); addStrategyStep(CHECK_EXTF_EVAL, 3); + if (options::stringExp()) + { + addStrategyStep(CHECK_EXTF_REDUCTION, 3); + } + addStrategyStep(CHECK_MEMBERSHIP, 3); step_end[Theory::EFFORT_LAST_CALL] = d_infer_steps.size() - 1; } // set the beginning/ending ranges diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index caeb8065e..7e79190e6 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -149,6 +149,9 @@ void TheoryStrings::finishInit() d_equalityEngine->addFunctionKind(kind::STRING_TOLOWER, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_TOUPPER, eagerEval); d_equalityEngine->addFunctionKind(kind::STRING_REV, eagerEval); + + // memberships are not relevant for model building + d_valuation.setIrrelevantKind(kind::STRING_IN_REGEXP); } std::string TheoryStrings::identify() const @@ -723,9 +726,12 @@ void TheoryStrings::postCheck(Effort e) } bool TheoryStrings::needsCheckLastEffort() { - if (options().strings.stringGuessModel) + if (options().strings.stringModelBasedReduction) { - return d_esolver.hasExtendedFunctions(); + bool hasExtf = d_esolver.hasExtendedFunctions(); + Trace("strings-process") + << "needsCheckLastEffort: hasExtf = " << hasExtf << std::endl; + return hasExtf; } return false; } @@ -1115,7 +1121,7 @@ void TheoryStrings::runInferStep(InferStep s, int effort) case CHECK_LENGTH_EQC: d_csolver.checkLengthsEqc(); break; case CHECK_REGISTER_TERMS_NF: checkRegisterTermsNormalForms(); break; case CHECK_EXTF_REDUCTION: d_esolver.checkExtfReductions(effort); break; - case CHECK_MEMBERSHIP: d_rsolver.checkMemberships(); break; + case CHECK_MEMBERSHIP: d_rsolver.checkMemberships(effort); break; case CHECK_CARDINALITY: d_bsolver.checkCardinality(); break; default: Unreachable(); break; } -- 2.30.2