From 30aca1c6289f57ec8178c20736e9a8420fd6f9f1 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Tue, 18 Jan 2022 15:03:26 -0600 Subject: [PATCH] Distinguish purification types for strings proof reconstruction (#7521) This improves the success rate for strings proof reconstruction for context-dependent simplifications. It also makes a minor modification to how EXTF inferences work. As a special case, A => B becomes A ^ ~B => false when B is already equal to false. This PR removes this behavior for the sake of consistency. (This could be explored as a more general strategy for turning facts to conflicts from any source if necessary). The strings solver applies context-dependent simplification in various ways. This ensures that the proof reconstruction is faithful for cases of congruence + rewriting instead of substitution+rewriting, which the inferences STRINGS_EXTF and STRINGS_EXTF_N rely on. This fixes 11 cases where proof reconstruction fails for string theory lemmas on Amazon benchmarks. --- src/theory/strings/extf_solver.cpp | 10 +- src/theory/strings/infer_proof_cons.cpp | 182 ++++++++++++++++++------ src/theory/strings/infer_proof_cons.h | 68 +++++++-- 3 files changed, 197 insertions(+), 63 deletions(-) diff --git a/src/theory/strings/extf_solver.cpp b/src/theory/strings/extf_solver.cpp index 07105a311..0b789c1a9 100644 --- a/src/theory/strings/extf_solver.cpp +++ b/src/theory/strings/extf_solver.cpp @@ -392,15 +392,7 @@ void ExtfSolver::checkExtfEval(int effort) { if (n.getType().isBoolean()) { - if (d_state.areEqual(n, nrc == d_true ? d_false : d_true)) - { - einfo.d_exp.push_back(nrc == d_true ? n.negate() : n); - conc = d_false; - } - else - { - conc = nrc == d_true ? n : n.negate(); - } + conc = nrc == d_true ? n : n.negate(); } else { diff --git a/src/theory/strings/infer_proof_cons.cpp b/src/theory/strings/infer_proof_cons.cpp index 81da50062..1bed68dc7 100644 --- a/src/theory/strings/infer_proof_cons.cpp +++ b/src/theory/strings/infer_proof_cons.cpp @@ -164,6 +164,8 @@ void InferProofCons::convert(InferenceId infer, switch (infer) { // ========================== equal by substitution+rewriting + case InferenceId::STRINGS_EXTF: + case InferenceId::STRINGS_EXTF_N: case InferenceId::STRINGS_I_NORM_S: case InferenceId::STRINGS_I_CONST_MERGE: case InferenceId::STRINGS_I_NORM: @@ -171,23 +173,37 @@ void InferProofCons::convert(InferenceId infer, case InferenceId::STRINGS_NORMAL_FORM: case InferenceId::STRINGS_CODE_PROXY: { + PurifyType pt = PurifyType::CORE_EQ; + if (infer == InferenceId::STRINGS_EXTF + || infer == InferenceId::STRINGS_EXTF_N) + { + // since we use congruence+rewriting, and not substitution+rewriting, + // these must purify a substitution over arguments to the left hand + // side of what we are proving. + pt = PurifyType::EXTF; + } std::vector pcs = ps.d_children; Node pconc = conc; // purify core substitution proves conc from pconc if necessary, // we apply MACRO_SR_PRED_INTRO to prove pconc - if (purifyCoreSubstitution(pconc, pcs, psb, false)) + if (purifyCoreSubstitutionAndTarget(pt, pconc, pcs, psb, false)) { + Trace("strings-ipc-core") << "...purified substitution to " << pcs + << ", now apply to " << pconc << std::endl; if (psb.applyPredIntro(pconc, pcs)) { useBuffer = true; } } + else + { + Trace("strings-ipc-core") + << "...failed to purify substitution" << std::endl; + } } break; // ========================== substitution + rewriting case InferenceId::STRINGS_RE_NF_CONFLICT: - case InferenceId::STRINGS_EXTF: - case InferenceId::STRINGS_EXTF_N: case InferenceId::STRINGS_EXTF_D: case InferenceId::STRINGS_EXTF_D_N: case InferenceId::STRINGS_I_CONST_CONFLICT: @@ -196,8 +212,10 @@ void InferProofCons::convert(InferenceId infer, if (!ps.d_children.empty()) { std::vector exps(ps.d_children.begin(), ps.d_children.end() - 1); - Node src = ps.d_children[ps.d_children.size() - 1]; - if (psb.applyPredTransform(src, conc, exps)) + Node psrc = ps.d_children[ps.d_children.size() - 1]; + // we apply the substitution on the purified form to get the + // original conclusion + if (psb.applyPredTransform(psrc, conc, exps)) { useBuffer = true; } @@ -331,7 +349,8 @@ void InferProofCons::convert(InferenceId infer, Node pmainEq = mainEq; // we transform mainEq to pmainEq and then use this as the first // argument to MACRO_SR_PRED_ELIM. - if (!purifyCoreSubstitution(pmainEq, pcsr, psb, true)) + if (!purifyCoreSubstitutionAndTarget( + PurifyType::CORE_EQ, pmainEq, pcsr, psb, true)) { break; } @@ -1164,24 +1183,57 @@ std::string InferProofCons::identify() const return "strings::InferProofCons"; } -bool InferProofCons::purifyCoreSubstitution(Node& tgt, - std::vector& children, - TheoryProofStepBuffer& psb, - bool concludeTgtNew) +bool InferProofCons::purifyCoreSubstitutionAndTarget( + PurifyType pt, + Node& tgt, + std::vector& children, + TheoryProofStepBuffer& psb, + bool concludeTgtNew) { // collect the terms to purify, which are the LHS of the substitution std::unordered_set termsToPurify; + if (!purifyCoreSubstitution(children, psb, termsToPurify)) + { + return false; + } + // no need to purify, e.g. if all LHS of substituion are variables + if (termsToPurify.empty()) + { + return true; + } + // now, purify the target predicate + tgt = purifyPredicate(pt, tgt, concludeTgtNew, psb, termsToPurify); + if (tgt.isNull()) + { + Trace("strings-ipc-pure-subs") + << "...failed to purify target " << tgt << std::endl; + return false; + } + return true; +} + +bool InferProofCons::purifyCoreSubstitution( + std::vector& children, + TheoryProofStepBuffer& psb, + std::unordered_set& termsToPurify) +{ for (const Node& nc : children) { - Assert(nc.getKind() == EQUAL && nc[0].getType().isStringLike()); - termsToPurify.insert(nc[0]); + Assert(nc.getKind() == EQUAL); + if (!nc[0].isVar()) + { + termsToPurify.insert(nc[0]); + } } // now, purify each of the children of the substitution for (size_t i = 0, nchild = children.size(); i < nchild; i++) { - Node pnc = purifyCorePredicate(children[i], true, psb, termsToPurify); + Node pnc = purifyPredicate( + PurifyType::SUBS_EQ, children[i], true, psb, termsToPurify); if (pnc.isNull()) { + Trace("strings-ipc-pure-subs") + << "...failed to purify " << children[i] << std::endl; return false; } if (children[i] != pnc) @@ -1193,44 +1245,72 @@ bool InferProofCons::purifyCoreSubstitution(Node& tgt, // we now should have a substitution with only atomic terms Assert(children[i][0].getNumChildren() == 0); } - // now, purify the target predicate - tgt = purifyCorePredicate(tgt, concludeTgtNew, psb, termsToPurify); - return !tgt.isNull(); + return true; } -Node InferProofCons::purifyCorePredicate( - Node lit, - bool concludeNew, - TheoryProofStepBuffer& psb, - std::unordered_set& termsToPurify) +Node InferProofCons::purifyPredicate(PurifyType pt, + Node lit, + bool concludeNew, + TheoryProofStepBuffer& psb, + std::unordered_set& termsToPurify) { bool pol = lit.getKind() != NOT; Node atom = pol ? lit : lit[0]; - if (atom.getKind() != EQUAL || !atom[0].getType().isStringLike()) + NodeManager* nm = NodeManager::currentNM(); + Node newLit; + if (pt == PurifyType::SUBS_EQ) { - // we only purify string (dis)equalities - return lit; + if (atom.getKind() != EQUAL) + { + Assert(false) << "Expected equality"; + return lit; + } + // purify the LHS directly, purify the RHS as a core term + newLit = nm->mkNode(EQUAL, + maybePurifyTerm(atom[0], termsToPurify), + purifyCoreTerm(atom[1], termsToPurify)); } - // purify both sides of the equality - std::vector pcs; - bool childChanged = false; - for (const Node& lc : atom) + else if (pt == PurifyType::CORE_EQ) { - Node plc = purifyCoreTerm(lc, termsToPurify); - childChanged = childChanged || plc != lc; - pcs.push_back(plc); + if (atom.getKind() != EQUAL || !atom[0].getType().isStringLike()) + { + // we only purify string (dis)equalities + return lit; + } + // purify both sides of the equality + std::vector pcs; + for (const Node& lc : atom) + { + pcs.push_back(purifyCoreTerm(lc, termsToPurify)); + } + newLit = nm->mkNode(EQUAL, pcs); } - if (!childChanged) + else if (pt == PurifyType::EXTF) { - return lit; + if (atom.getKind() == EQUAL) + { + // purify the left hand side, which should be an extended function + newLit = nm->mkNode(EQUAL, purifyApp(atom[0], termsToPurify), atom[1]); + } + else + { + // predicate case, e.g. for inferring contains + newLit = purifyApp(atom, termsToPurify); + } + } + else + { + Assert(false) << "Unknown purify type in InferProofCons::purifyPredicate"; } - NodeManager* nm = NodeManager::currentNM(); - Node newLit = nm->mkNode(EQUAL, pcs); if (!pol) { newLit = newLit.notNode(); } - Assert(lit != newLit); + if (lit == newLit) + { + // no change + return lit; + } // prove by transformation, should always succeed if (!psb.applyPredTransform( concludeNew ? lit : newLit, concludeNew ? newLit : lit, {})) @@ -1244,12 +1324,6 @@ Node InferProofCons::purifyCorePredicate( Node InferProofCons::purifyCoreTerm(Node n, std::unordered_set& termsToPurify) { - Assert(n.getType().isStringLike()); - if (n.getNumChildren() == 0) - { - return n; - } - NodeManager* nm = NodeManager::currentNM(); if (n.getKind() == STRING_CONCAT) { std::vector pcs; @@ -1257,14 +1331,34 @@ Node InferProofCons::purifyCoreTerm(Node n, { pcs.push_back(purifyCoreTerm(nc, termsToPurify)); } - return nm->mkNode(STRING_CONCAT, pcs); + return NodeManager::currentNM()->mkNode(STRING_CONCAT, pcs); + } + return maybePurifyTerm(n, termsToPurify); +} + +Node InferProofCons::purifyApp(Node n, std::unordered_set& termsToPurify) +{ + if (n.getNumChildren() == 0) + { + return n; } + std::vector pcs; + for (const Node& nc : n) + { + pcs.push_back(maybePurifyTerm(nc, termsToPurify)); + } + return NodeManager::currentNM()->mkNode(n.getKind(), pcs); +} + +Node InferProofCons::maybePurifyTerm(Node n, + std::unordered_set& termsToPurify) +{ if (termsToPurify.find(n) == termsToPurify.end()) { // did not need to purify return n; } - SkolemManager* sm = nm->getSkolemManager(); + SkolemManager* sm = NodeManager::currentNM()->getSkolemManager(); Node k = sm->mkPurifySkolem(n, "k"); return k; } diff --git a/src/theory/strings/infer_proof_cons.h b/src/theory/strings/infer_proof_cons.h index 02b266fd7..6022f5443 100644 --- a/src/theory/strings/infer_proof_cons.h +++ b/src/theory/strings/infer_proof_cons.h @@ -159,6 +159,15 @@ class InferProofCons : public ProofGenerator * conclusion, or null if we were not able to construct a TRANS step. */ static Node convertTrans(Node eqa, Node eqb, TheoryProofStepBuffer& psb); + enum class PurifyType + { + // we are purifying an equality corresponding to a substitution + SUBS_EQ, + // we are purifying a (dis)equality in the core calculus + CORE_EQ, + // we are purifying a equality or predicate for extended function rewriting + EXTF + }; /** * Purify core substitution. * @@ -188,6 +197,26 @@ class InferProofCons : public ProofGenerator * in psb ensure that a proof step involving the purified substitution will * have the same net effect as a proof step using the original substitution. * + * The argument pt determines which arguments are relevant for purification. + * For core calculus (CORE_EQ) rules, examples of relevant positions are + * non-concatenation terms in positions like: + * (= * (str.++ * (str.++ * *) * *)) + * (not (= (str.++ * *) *)) + * For extended function simplification, examples of relevant positions are: + * (= (str.replace * * *) "") + * (str.contains * *) + * If we are purifying a substitution with equality, the LHS is a relevant + * position to purify, and the RHS is treated like CORE_EQ. + * + * For example, given substitution (= t ""), an example + * inference in the core calculus is: + * (= (str.++ t "A") (str.++ "B" u)) => false + * An example of an extended function inference is: + * (= (str.replace (str.substr t 0 2) t "C") (str.++ "C" f[t])) + * Note for the latter, we do not apply the substitution to + * (str.substr t 0 2). + * + * @param pt Determines the positions that are relevant for purification. * @param tgt The term we were originally going to apply the substitution to. * @param children The premises corresponding to the substitution. * @param psb The proof step buffer @@ -198,22 +227,31 @@ class InferProofCons : public ProofGenerator * children'[i] from children[i] for all i, and tgt' from tgt (or vice versa * based on concludeTgtNew). */ - static bool purifyCoreSubstitution(Node& tgt, - std::vector& children, + static bool purifyCoreSubstitutionAndTarget(PurifyType pt, + Node& tgt, + std::vector& children, + TheoryProofStepBuffer& psb, + bool concludeTgtNew = false); + /** + * Same as above, without a target. This updates termsToPurify with the + * set of LHS of the substitutions, which are terms that must be purified + * when applying the resulting substitution to a target. + */ + static bool purifyCoreSubstitution(std::vector& children, TheoryProofStepBuffer& psb, - bool concludeTgtNew = false); + std::unordered_set& termsToPurify); /** * Return the purified form of the predicate lit with respect to a set of * terms to purify, call the returned literal lit'. * If concludeNew is true, then we add a proof of lit' from lit in psb; - * otherwise we add a proof of lit from lit'. - * Note that string predicates that require purification are string - * (dis)equalities only. + * otherwise we add a proof of lit from lit'. The positions which are purified + * are configurable based on the argument pt. */ - static Node purifyCorePredicate(Node lit, - bool concludeNew, - TheoryProofStepBuffer& psb, - std::unordered_set& termsToPurify); + static Node purifyPredicate(PurifyType pt, + Node lit, + bool concludeNew, + TheoryProofStepBuffer& psb, + std::unordered_set& termsToPurify); /** * Purify term with respect to a set of terms to purify. This replaces * all terms to purify with their purification variables that occur in @@ -221,6 +259,16 @@ class InferProofCons : public ProofGenerator * children of concat or equal). */ static Node purifyCoreTerm(Node n, std::unordered_set& termsToPurify); + /** + * Purify application, which replaces each direct child nc of n with + * maybePurifyTerm(nc, termsToPurify). + */ + static Node purifyApp(Node n, std::unordered_set& termsToPurify); + /** + * Maybe purify term, which returns the skolem variable for n if it occurs + * in termsToPurify. + */ + static Node maybePurifyTerm(Node n, std::unordered_set& termsToPurify); /** the proof node manager */ ProofNodeManager* d_pnm; /** The lazy fact map */ -- 2.30.2