From 1a97c19443833604d57f1453a1bebfe0714d3d8e Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 7 Oct 2020 07:23:53 -0500 Subject: [PATCH] (proof-new) Add the strings proof constructor (#4903) This is the method for converting strings InferInfo into instructions for building ProofNode. Notice that this is done as a standalone module, and thus the theory of strings uses Inferences only, not PfRule. The next step will be to integrate this utility into InferenceManager. --- src/CMakeLists.txt | 2 + src/theory/strings/infer_info.h | 23 + src/theory/strings/infer_proof_cons.cpp | 1027 +++++++++++++++++++++++ src/theory/strings/infer_proof_cons.h | 135 +++ src/theory/strings/sequences_stats.cpp | 3 + src/theory/strings/sequences_stats.h | 5 + 6 files changed, 1195 insertions(+) create mode 100644 src/theory/strings/infer_proof_cons.cpp create mode 100644 src/theory/strings/infer_proof_cons.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7366d6eb2..f70bdde3e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -816,6 +816,8 @@ libcvc4_add_sources( theory/strings/eqc_info.h theory/strings/infer_info.cpp theory/strings/infer_info.h + theory/strings/infer_proof_cons.cpp + theory/strings/infer_proof_cons.h theory/strings/inference_manager.cpp theory/strings/inference_manager.h theory/strings/normal_form.cpp diff --git a/src/theory/strings/infer_info.h b/src/theory/strings/infer_info.h index e5ee204a9..13e9a3f64 100644 --- a/src/theory/strings/infer_info.h +++ b/src/theory/strings/infer_info.h @@ -35,6 +35,13 @@ namespace strings { * Note: The order in this enum matters in certain cases (e.g. inferences * related to normal forms), inferences that come first are generally * preferred. + * + * Notice that an inference is intentionally distinct from PfRule. An + * inference captures *why* we performed a reasoning step, and a PfRule + * rule captures *what* reasoning step was used. For instance, the inference + * LEN_SPLIT translates to PfRule::SPLIT. The use of stats on inferences allows + * us to know that we performed N splits (PfRule::SPLIT) because we wanted + * to split on lengths for string equalities (Inference::LEN_SPLIT). */ enum class Inference : uint32_t { @@ -344,6 +351,22 @@ enum LengthStatus * An inference. This is a class to track an unprocessed call to either * send a fact, lemma, or conflict that is waiting to be asserted to the * equality engine or sent on the output channel. + * + * For the sake of proofs, the antecedants in InferInfo have a particular + * ordering for many of the core strings rules, which is expected by + * InferProofCons for constructing proofs of F_CONST, F_UNIFY, N_CONST, etc. + * which apply to a pair of string terms t and s. At a high level, the ordering + * expected in d_ant is: + * (1) (multiple) literals that explain why t and s have the same prefix/suffix, + * (2) t = s, + * (3) (optionally) a length constraint. + * For example, say we have: + * { x ++ y ++ v1 = z ++ w ++ v2, x = z ++ u, u = "", len(y) = len(w) } + * We can conclude y = w by the N_UNIFY rule from the left side. The antecedant + * has the following form: + * - (prefix up to y/w equal) x = z ++ u, u = "", + * - (main equality) x ++ y ++ v1 = z ++ w ++ v2, + * - (length constraint) len(y) = len(w). */ class InferInfo { diff --git a/src/theory/strings/infer_proof_cons.cpp b/src/theory/strings/infer_proof_cons.cpp new file mode 100644 index 000000000..c5275cc43 --- /dev/null +++ b/src/theory/strings/infer_proof_cons.cpp @@ -0,0 +1,1027 @@ +/********************* */ +/*! \file infer_proof_cons.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of inference to proof conversion + **/ + +#include "theory/strings/infer_proof_cons.h" + +#include "expr/skolem_manager.h" +#include "options/smt_options.h" +#include "options/strings_options.h" +#include "theory/builtin/proof_checker.h" +#include "theory/rewriter.h" +#include "theory/strings/regexp_operation.h" +#include "theory/strings/theory_strings_utils.h" + +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +InferProofCons::InferProofCons(context::Context* c, + ProofNodeManager* pnm, + SequencesStatistics& statistics) + : d_pnm(pnm), d_lazyFactMap(c), d_statistics(statistics) +{ + Assert(d_pnm != nullptr); +} + +void InferProofCons::notifyFact(const InferInfo& ii) +{ + Node fact = ii.d_conc; + Trace("strings-ipc-debug") + << "InferProofCons::notifyFact: " << ii << std::endl; + if (d_lazyFactMap.find(fact) != d_lazyFactMap.end()) + { + Trace("strings-ipc-debug") << "...duplicate!" << std::endl; + return; + } + Node symFact = CDProof::getSymmFact(fact); + if (!symFact.isNull() && d_lazyFactMap.find(symFact) != d_lazyFactMap.end()) + { + Trace("strings-ipc-debug") << "...duplicate (sym)!" << std::endl; + return; + } + std::shared_ptr iic = std::make_shared(ii); + d_lazyFactMap.insert(ii.d_conc, iic); +} + +void InferProofCons::convert(Inference infer, + bool isRev, + Node conc, + const std::vector& exp, + ProofStep& ps, + TheoryProofStepBuffer& psb, + bool& useBuffer) +{ + // by default, don't use the buffer + useBuffer = false; + // Must flatten children with respect to AND to be ready to explain. + // We store the index where each flattened vector begins, since some + // explanations are grouped together using AND. + std::vector startExpIndex; + for (const Node& ec : exp) + { + // store the index in the flattened vector + startExpIndex.push_back(ps.d_children.size()); + utils::flattenOp(AND, ec, ps.d_children); + } + // debug print + if (Trace.isOn("strings-ipc-debug")) + { + Trace("strings-ipc-debug") << "InferProofCons::convert: " << infer + << (isRev ? " :rev " : " ") << conc << std::endl; + for (const Node& ec : exp) + { + Trace("strings-ipc-debug") << " e: " << ec << std::endl; + } + } + // try to find a set of proof steps to incorporate into the buffer + psb.clear(); + NodeManager* nm = NodeManager::currentNM(); + Node nodeIsRev = nm->mkConst(isRev); + switch (infer) + { + // ========================== equal by substitution+rewriting + case Inference::I_NORM_S: + case Inference::I_CONST_MERGE: + case Inference::I_NORM: + case Inference::LEN_NORM: + case Inference::NORMAL_FORM: + case Inference::CODE_PROXY: + { + ps.d_args.push_back(conc); + // will attempt this rule + ps.d_rule = PfRule::MACRO_SR_PRED_INTRO; + } + break; + // ========================== substitution + rewriting + case Inference::RE_NF_CONFLICT: + case Inference::EXTF: + case Inference::EXTF_N: + case Inference::EXTF_D: + case Inference::EXTF_D_N: + case Inference::I_CONST_CONFLICT: + case Inference::UNIT_CONST_CONFLICT: + { + if (!ps.d_children.empty()) + { + std::vector exps(ps.d_children.begin(), ps.d_children.end() - 1); + Node src = ps.d_children[ps.d_children.size() - 1]; + if (psb.applyPredTransform(src, conc, exps)) + { + useBuffer = true; + } + } + if (!useBuffer) + { + // use the predicate version? + ps.d_args.push_back(conc); + ps.d_rule = PfRule::MACRO_SR_PRED_INTRO; + } + } + break; + // ========================== rewrite pred + case Inference::EXTF_EQ_REW: + case Inference::INFER_EMP: + { + // the last child is the predicate we are operating on, move to front + Node src = ps.d_children[ps.d_children.size() - 1]; + std::vector expe(ps.d_children.begin(), ps.d_children.end() - 1); + // start with a default rewrite + Node mainEqSRew = psb.applyPredElim(src, expe); + if (mainEqSRew == conc) + { + useBuffer = true; + break; + } + // may need the "extended equality rewrite" + Node mainEqSRew2 = psb.applyPredElim( + mainEqSRew, {}, MethodId::SB_DEFAULT, MethodId::RW_REWRITE_EQ_EXT); + if (mainEqSRew2 == conc) + { + useBuffer = true; + break; + } + // rewrite again with default rewriter + Node mainEqSRew3 = psb.applyPredElim(mainEqSRew2, {}); + useBuffer = (mainEqSRew3 == conc); + } + break; + // ========================== substitution+rewriting, CONCAT_EQ, ... + case Inference::F_CONST: + case Inference::F_UNIFY: + case Inference::F_ENDPOINT_EMP: + case Inference::F_ENDPOINT_EQ: + case Inference::F_NCTN: + case Inference::N_EQ_CONF: + case Inference::N_CONST: + case Inference::N_UNIFY: + case Inference::N_ENDPOINT_EMP: + case Inference::N_ENDPOINT_EQ: + case Inference::N_NCTN: + case Inference::SSPLIT_CST_PROP: + case Inference::SSPLIT_VAR_PROP: + case Inference::SSPLIT_CST: + case Inference::SSPLIT_VAR: + { + Trace("strings-ipc-core") << "Generate core rule for " << infer + << " (rev=" << isRev << ")" << std::endl; + // All of the above inferences have the form: + // (explanation for why t and s have the same prefix/suffix) ^ + // t = s ^ + // (length constraint)? + // We call t=s the "main equality" below. The length constraint is + // optional, which we split on below. + size_t nchild = ps.d_children.size(); + size_t mainEqIndex = 0; + bool mainEqIndexSet = false; + // the length constraint + std::vector lenConstraint; + // these inferences have a length constraint as the last explain + if (infer == Inference::N_UNIFY || infer == Inference::F_UNIFY + || infer == Inference::SSPLIT_CST || infer == Inference::SSPLIT_VAR + || infer == Inference::SSPLIT_VAR_PROP + || infer == Inference::SSPLIT_CST_PROP) + { + if (exp.size() >= 2) + { + Assert(exp.size() <= startExpIndex.size()); + // The index of the "main" equality is the last equality before + // the length explanation. + mainEqIndex = startExpIndex[exp.size() - 1] - 1; + mainEqIndexSet = true; + // the remainder is the length constraint + lenConstraint.insert(lenConstraint.end(), + ps.d_children.begin() + mainEqIndex + 1, + ps.d_children.end()); + } + } + else if (nchild >= 1) + { + // The index of the main equality is the last child. + mainEqIndex = nchild - 1; + mainEqIndexSet = true; + } + Node mainEq; + if (mainEqIndexSet) + { + mainEq = ps.d_children[mainEqIndex]; + Trace("strings-ipc-core") << "Main equality " << mainEq << " at index " + << mainEqIndex << std::endl; + } + if (mainEq.isNull() || mainEq.getKind() != EQUAL) + { + Trace("strings-ipc-core") + << "...failed to find main equality" << std::endl; + break; + } + // apply MACRO_SR_PRED_ELIM using equalities up to the main eq + std::vector childrenSRew; + childrenSRew.push_back(mainEq); + childrenSRew.insert(childrenSRew.end(), + ps.d_children.begin(), + ps.d_children.begin() + mainEqIndex); + Node mainEqSRew = + psb.tryStep(PfRule::MACRO_SR_PRED_ELIM, childrenSRew, {}); + if (CDProof::isSame(mainEqSRew, mainEq)) + { + Trace("strings-ipc-core") << "...undo step" << std::endl; + // the rule added above was not necessary + psb.popStep(); + } + else if (mainEqSRew == conc) + { + Trace("strings-ipc-core") << "...success after rewrite!" << std::endl; + useBuffer = true; + break; + } + Trace("strings-ipc-core") + << "Main equality after subs+rewrite " << mainEqSRew << std::endl; + // now, apply CONCAT_EQ to get the remainder + std::vector childrenCeq; + childrenCeq.push_back(mainEqSRew); + std::vector argsCeq; + argsCeq.push_back(nodeIsRev); + Node mainEqCeq = psb.tryStep(PfRule::CONCAT_EQ, childrenCeq, argsCeq); + Trace("strings-ipc-core") + << "Main equality after CONCAT_EQ " << mainEqCeq << std::endl; + if (mainEqCeq.isNull() || mainEqCeq.getKind() != EQUAL) + { + // fail + break; + } + else if (mainEqCeq == mainEqSRew) + { + Trace("strings-ipc-core") << "...undo step" << std::endl; + // not necessary, probably first component of equality + psb.popStep(); + } + // Now, mainEqCeq is an equality t ++ ... == s ++ ... where the + // inference involved t and s. + if (infer == Inference::N_ENDPOINT_EQ + || infer == Inference::N_ENDPOINT_EMP + || infer == Inference::F_ENDPOINT_EQ + || infer == Inference::F_ENDPOINT_EMP) + { + // Should be equal to conclusion already, or rewrite to it. + // Notice that this step is necessary to handle the "rproc" + // optimization in processSimpleNEq. Alternatively, this could + // possibly be done by CONCAT_EQ with !isRev. + std::vector cexp; + if (psb.applyPredTransform(mainEqCeq, + conc, + cexp, + MethodId::SB_DEFAULT, + MethodId::RW_REWRITE_EQ_EXT)) + { + Trace("strings-ipc-core") << "Transformed to " << conc + << " via pred transform" << std::endl; + // success + useBuffer = true; + Trace("strings-ipc-core") << "...success!" << std::endl; + } + // Otherwise, note that EMP rules conclude ti = "" where + // t1 ++ ... ++ tn == "". However, these are very rarely applied, let + // alone for 2+ children. This case is intentionally unhandled here. + } + else if (infer == Inference::N_CONST || infer == Inference::F_CONST + || infer == Inference::N_EQ_CONF) + { + // should be a constant conflict + std::vector childrenC; + childrenC.push_back(mainEqCeq); + std::vector argsC; + argsC.push_back(nodeIsRev); + Node mainEqC = psb.tryStep(PfRule::CONCAT_CONFLICT, childrenC, argsC); + if (mainEqC == conc) + { + useBuffer = true; + Trace("strings-ipc-core") << "...success!" << std::endl; + } + } + else + { + std::vector tvec; + std::vector svec; + utils::getConcat(mainEqCeq[0], tvec); + utils::getConcat(mainEqCeq[1], svec); + Node t0 = tvec[isRev ? tvec.size() - 1 : 0]; + Node s0 = svec[isRev ? svec.size() - 1 : 0]; + bool applySym = false; + // may need to apply symmetry + if ((infer == Inference::SSPLIT_CST + || infer == Inference::SSPLIT_CST_PROP) + && t0.isConst()) + { + Assert(!s0.isConst()); + applySym = true; + std::swap(t0, s0); + } + if (infer == Inference::N_UNIFY || infer == Inference::F_UNIFY) + { + if (conc.getKind() != EQUAL) + { + break; + } + // one side should match, the other side could be a split constant + if (conc[0] != t0 && conc[1] != s0) + { + applySym = true; + std::swap(t0, s0); + } + Assert(conc[0].isConst() == t0.isConst()); + Assert(conc[1].isConst() == s0.isConst()); + } + PfRule rule = PfRule::UNKNOWN; + // the form of the required length constraint expected by the proof + Node lenReq; + bool lenSuccess = false; + if (infer == Inference::N_UNIFY || infer == Inference::F_UNIFY) + { + // the required premise for unify is always len(x) = len(y), + // however the explanation may not be literally this. Thus, we + // need to reconstruct a proof from the given explanation. + // it should be the case that lenConstraint => lenReq. + // We use terms in the conclusion equality, not t0, s0 here. + lenReq = nm->mkNode(STRING_LENGTH, conc[0]) + .eqNode(nm->mkNode(STRING_LENGTH, conc[1])); + lenSuccess = convertLengthPf(lenReq, lenConstraint, psb); + rule = PfRule::CONCAT_UNIFY; + } + else if (infer == Inference::SSPLIT_VAR) + { + // it should be the case that lenConstraint => lenReq + lenReq = nm->mkNode(STRING_LENGTH, t0) + .eqNode(nm->mkNode(STRING_LENGTH, s0)) + .notNode(); + lenSuccess = convertLengthPf(lenReq, lenConstraint, psb); + rule = PfRule::CONCAT_SPLIT; + } + else if (infer == Inference::SSPLIT_CST) + { + // it should be the case that lenConstraint => lenReq + lenReq = nm->mkNode(STRING_LENGTH, t0) + .eqNode(nm->mkConst(Rational(0))) + .notNode(); + lenSuccess = convertLengthPf(lenReq, lenConstraint, psb); + rule = PfRule::CONCAT_CSPLIT; + } + else if (infer == Inference::SSPLIT_VAR_PROP) + { + // it should be the case that lenConstraint => lenReq + for (unsigned r = 0; r < 2; r++) + { + lenReq = nm->mkNode(GT, + nm->mkNode(STRING_LENGTH, t0), + nm->mkNode(STRING_LENGTH, s0)); + if (convertLengthPf(lenReq, lenConstraint, psb)) + { + lenSuccess = true; + break; + } + if (r == 0) + { + // may be the other direction + applySym = true; + std::swap(t0, s0); + } + } + rule = PfRule::CONCAT_LPROP; + } + else if (infer == Inference::SSPLIT_CST_PROP) + { + // it should be the case that lenConstraint => lenReq + lenReq = nm->mkNode(STRING_LENGTH, t0) + .eqNode(nm->mkConst(Rational(0))) + .notNode(); + lenSuccess = convertLengthPf(lenReq, lenConstraint, psb); + rule = PfRule::CONCAT_CPROP; + } + if (!lenSuccess) + { + Trace("strings-ipc-core") + << "...failed due to length constraint" << std::endl; + break; + } + // apply symmetry if necessary + if (applySym) + { + std::vector childrenSymm; + childrenSymm.push_back(mainEqCeq); + // note this explicit step may not be necessary + mainEqCeq = psb.tryStep(PfRule::SYMM, childrenSymm, {}); + Trace("strings-ipc-core") + << "Main equality after SYMM " << mainEqCeq << std::endl; + } + if (rule != PfRule::UNKNOWN) + { + Trace("strings-ipc-core") + << "Core rule length requirement is " << lenReq << std::endl; + // apply the given rule + std::vector childrenMain; + childrenMain.push_back(mainEqCeq); + childrenMain.push_back(lenReq); + std::vector argsMain; + argsMain.push_back(nodeIsRev); + Node mainEqMain = psb.tryStep(rule, childrenMain, argsMain); + Trace("strings-ipc-core") << "Main equality after " << rule << " " + << mainEqMain << std::endl; + if (mainEqMain == mainEqCeq) + { + Trace("strings-ipc-core") << "...undo step" << std::endl; + // not necessary, probably first component of equality + psb.popStep(); + } + // either equal or rewrites to it + std::vector cexp; + if (psb.applyPredTransform(mainEqMain, conc, cexp)) + { + // requires that length success is also true + useBuffer = true; + Trace("strings-ipc-core") << "...success" << std::endl; + } + else + { + Trace("strings-ipc-core") << "...fail" << std::endl; + } + } + else + { + // should always have given a rule to try above + Assert(false) << "No reconstruction rule given for " << infer; + } + } + } + break; + // ========================== Disequalities + case Inference::DEQ_DISL_FIRST_CHAR_STRING_SPLIT: + case Inference::DEQ_DISL_STRINGS_SPLIT: + { + if (conc.getKind() != AND || conc.getNumChildren() != 2 + || conc[0].getKind() != EQUAL || !conc[0][0].getType().isStringLike() + || conc[1].getKind() != EQUAL + || conc[1][0].getKind() != STRING_LENGTH) + { + Trace("strings-ipc-deq") << "malformed application" << std::endl; + Assert(false) << "unexpected conclusion " << conc << " for " << infer; + } + else + { + Node lenReq = + nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, conc[0][0]), conc[1][1]); + Trace("strings-ipc-deq") + << "length requirement is " << lenReq << std::endl; + if (convertLengthPf(lenReq, ps.d_children, psb)) + { + Trace("strings-ipc-deq") << "...success length" << std::endl; + // make the proof + std::vector childrenMain; + childrenMain.push_back(lenReq); + std::vector argsMain; + argsMain.push_back(nodeIsRev); + Node mainConc = + psb.tryStep(PfRule::STRING_DECOMPOSE, childrenMain, argsMain); + Trace("strings-ipc-deq") + << "...main conclusion is " << mainConc << std::endl; + useBuffer = (mainConc == conc); + Trace("strings-ipc-deq") + << "...success is " << useBuffer << std::endl; + } + else + { + Trace("strings-ipc-deq") << "...fail length" << std::endl; + } + } + } + break; + // ========================== Boolean split + case Inference::CARD_SP: + case Inference::LEN_SPLIT: + case Inference::LEN_SPLIT_EMP: + case Inference::DEQ_DISL_EMP_SPLIT: + case Inference::DEQ_DISL_FIRST_CHAR_EQ_SPLIT: + case Inference::DEQ_STRINGS_EQ: + case Inference::DEQ_LENS_EQ: + case Inference::DEQ_LENGTH_SP: + { + if (conc.getKind() != OR) + { + // This should never happen. If it does, we resort to using + // STRING_TRUST below (in production mode). + Assert(false) << "Expected OR conclusion for " << infer; + } + else + { + ps.d_rule = PfRule::SPLIT; + Assert(ps.d_children.empty()); + ps.d_args.push_back(conc[0]); + } + } + break; + // ========================== Regular expression unfolding + case Inference::RE_UNFOLD_POS: + case Inference::RE_UNFOLD_NEG: + { + if (infer == Inference::RE_UNFOLD_POS) + { + ps.d_rule = PfRule::RE_UNFOLD_POS; + } + else + { + ps.d_rule = PfRule::RE_UNFOLD_NEG; + // it may be an optimized form of concat simplification + Assert(ps.d_children.size() == 1); + Node mem = ps.d_children[0]; + Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP); + if (mem[0][1].getKind() == REGEXP_CONCAT) + { + size_t index; + Node reLen = RegExpOpr::getRegExpConcatFixed(mem[0][1], index); + // if we can find a fixed length for a component, use the optimized + // version + if (!reLen.isNull()) + { + ps.d_rule = PfRule::RE_UNFOLD_NEG_CONCAT_FIXED; + } + } + } + } + break; + // ========================== Reduction + case Inference::CTN_POS: + case Inference::CTN_NEG_EQUAL: + { + if (ps.d_children.size() != 1) + { + break; + } + bool polarity = ps.d_children[0].getKind() != NOT; + Node atom = polarity ? ps.d_children[0] : ps.d_children[0][0]; + std::vector args; + args.push_back(atom); + Node res = psb.tryStep(PfRule::STRING_EAGER_REDUCTION, {}, args); + if (res.isNull()) + { + break; + } + // ite( contains(x,t), x = k1 ++ t ++ k2, x != t ) + std::vector tiChildren; + tiChildren.push_back(ps.d_children[0]); + Node ctnt = psb.tryStep( + polarity ? PfRule::TRUE_INTRO : PfRule::FALSE_INTRO, tiChildren, {}); + if (ctnt.isNull() || ctnt.getKind() != EQUAL) + { + break; + } + std::vector tchildren; + tchildren.push_back(ctnt); + // apply substitution { contains(x,t) -> true|false } and rewrite to get + // conclusion x = k1 ++ t ++ k2 or x != t. + if (psb.applyPredTransform(res, conc, tchildren)) + { + useBuffer = true; + } + } + break; + + case Inference::REDUCTION: + { + size_t nchild = conc.getNumChildren(); + Node mainEq; + if (conc.getKind() == EQUAL) + { + mainEq = conc; + } + else if (conc.getKind() == AND && conc[nchild - 1].getKind() == EQUAL) + { + mainEq = conc[nchild - 1]; + } + if (mainEq.isNull()) + { + Trace("strings-ipc-red") << "Bad Reduction: " << conc << std::endl; + Assert(false) << "Unexpected reduction " << conc; + break; + } + std::vector argsRed; + // the left hand side of the last conjunct is the term we are reducing + argsRed.push_back(mainEq[0]); + Node red = psb.tryStep(PfRule::STRING_REDUCTION, {}, argsRed); + Trace("strings-ipc-red") << "Reduction : " << red << std::endl; + if (!red.isNull()) + { + // either equal or rewrites to it + std::vector cexp; + if (psb.applyPredTransform(red, conc, cexp)) + { + Trace("strings-ipc-red") << "...success!" << std::endl; + useBuffer = true; + } + else + { + Trace("strings-ipc-red") << "...failed to reduce" << std::endl; + } + } + } + break; + // ========================== code injectivity + case Inference::CODE_INJ: + { + ps.d_rule = PfRule::STRING_CODE_INJ; + Assert(conc.getKind() == OR && conc.getNumChildren() == 3 + && conc[2].getKind() == EQUAL); + ps.d_args.push_back(conc[2][0]); + ps.d_args.push_back(conc[2][1]); + } + break; + // ========================== unit injectivity + case Inference::UNIT_INJ: { ps.d_rule = PfRule::STRING_SEQ_UNIT_INJ; + } + break; + // ========================== prefix conflict + case Inference::PREFIX_CONFLICT: + { + Trace("strings-ipc-prefix") << "Prefix conflict..." << std::endl; + std::vector eqs; + for (const Node& e : ps.d_children) + { + Kind ek = e.getKind(); + if (ek == EQUAL) + { + Trace("strings-ipc-prefix") << "- equality : " << e << std::endl; + eqs.push_back(e); + } + else if (ek == STRING_IN_REGEXP) + { + // unfold it and extract the equality + std::vector children; + children.push_back(e); + std::vector args; + Node eunf = psb.tryStep(PfRule::RE_UNFOLD_POS, children, args); + Trace("strings-ipc-prefix") + << "--- " << e << " unfolds to " << eunf << std::endl; + if (eunf.isNull()) + { + continue; + } + else if (eunf.getKind() == AND) + { + // equality is the last conjunct + std::vector childrenAE; + childrenAE.push_back(eunf); + std::vector argsAE; + argsAE.push_back(nm->mkConst(Rational(eunf.getNumChildren() - 1))); + Node eunfAE = psb.tryStep(PfRule::AND_ELIM, childrenAE, argsAE); + Trace("strings-ipc-prefix") + << "--- and elim to " << eunfAE << std::endl; + if (eunfAE.isNull() || eunfAE.getKind() != EQUAL) + { + Assert(false) + << "Unexpected unfolded premise " << eunf << " for " << infer; + continue; + } + Trace("strings-ipc-prefix") + << "- equality : " << eunfAE << std::endl; + eqs.push_back(eunfAE); + } + else if (eunf.getKind() == EQUAL) + { + Trace("strings-ipc-prefix") << "- equality : " << eunf << std::endl; + eqs.push_back(eunf); + } + } + else + { + // not sure how to use this assumption + Assert(false) << "Unexpected premise " << e << " for " << infer; + } + } + if (eqs.empty()) + { + break; + } + // connect via transitivity + Node curr = eqs[0]; + for (size_t i = 1, esize = eqs.size(); i < esize; i++) + { + Node prev = curr; + curr = convertTrans(curr, eqs[1], psb); + if (curr.isNull()) + { + break; + } + Trace("strings-ipc-prefix") << "- Via trans: " << curr << std::endl; + } + if (curr.isNull()) + { + break; + } + Trace("strings-ipc-prefix") + << "- Possible conflicting equality : " << curr << std::endl; + std::vector emp; + Node concE = psb.applyPredElim(curr, emp); + Trace("strings-ipc-prefix") + << "- After pred elim: " << concE << std::endl; + if (concE == conc) + { + Trace("strings-ipc-prefix") << "...success!" << std::endl; + useBuffer = true; + } + } + break; + // ========================== regular expressions + case Inference::RE_INTER_INCLUDE: + case Inference::RE_INTER_CONF: + case Inference::RE_INTER_INFER: + { + std::vector reiExp; + std::vector reis; + std::vector reiChildren; + std::vector reiChildrenOrig; + Node x; + // make the regular expression intersection that summarizes all + // memberships in the explanation + for (const Node& c : ps.d_children) + { + bool polarity = c.getKind() != NOT; + Node catom = polarity ? c : c[0]; + if (catom.getKind() != STRING_IN_REGEXP) + { + Assert(c.getKind() == EQUAL); + if (c.getKind() == EQUAL) + { + reiExp.push_back(c); + } + continue; + } + if (x.isNull()) + { + // just take the first LHS; others should be equated to it by exp + x = catom[0]; + } + Node rcurr = + polarity ? catom[1] : nm->mkNode(REGEXP_COMPLEMENT, catom[1]); + reis.push_back(rcurr); + Node mem = nm->mkNode(STRING_IN_REGEXP, catom[0], rcurr); + reiChildren.push_back(mem); + reiChildrenOrig.push_back(c); + } + // go back and justify each premise + bool successChildren = true; + for (size_t i = 0, nchild = reiChildren.size(); i < nchild; i++) + { + if (!psb.applyPredTransform(reiChildrenOrig[i], reiChildren[i], reiExp)) + { + Trace("strings-ipc-re") + << "... failed to justify child " << reiChildren[i] << " from " + << reiChildrenOrig[i] << std::endl; + successChildren = false; + break; + } + } + if (!successChildren) + { + break; + } + Node mem = psb.tryStep(PfRule::RE_INTER, reiChildren, {}); + Trace("strings-ipc-re") + << "Regular expression summary: " << mem << std::endl; + // the conclusion is rewritable to the premises via rewriting? + if (psb.applyPredTransform(mem, conc, {})) + { + Trace("strings-ipc-re") << "... success!" << std::endl; + useBuffer = true; + } + else + { + Trace("strings-ipc-re") + << "...failed to rewrite to conclusion" << std::endl; + } + } + break; + // ========================== unknown and currently unsupported + case Inference::CARDINALITY: + case Inference::I_CYCLE_E: + case Inference::I_CYCLE: + case Inference::RE_DELTA: + case Inference::RE_DELTA_CONF: + case Inference::RE_DERIVE: + case Inference::FLOOP: + case Inference::FLOOP_CONFLICT: + case Inference::DEQ_NORM_EMP: + case Inference::CTN_TRANS: + case Inference::CTN_DECOMPOSE: + default: + // do nothing, these will be converted to STRING_TRUST below since the + // rule is unknown. + break; + } + + // now see if we would succeed with the checker-to-try + bool success = false; + if (ps.d_rule != PfRule::UNKNOWN) + { + Trace("strings-ipc") << "For " << infer << ", try proof rule " << ps.d_rule + << "..."; + Assert(ps.d_rule != PfRule::UNKNOWN); + Node pconc = psb.tryStep(ps.d_rule, ps.d_children, ps.d_args); + if (pconc.isNull() || pconc != conc) + { + Trace("strings-ipc") << "failed, pconc is " << pconc << " (expected " + << conc << ")" << std::endl; + ps.d_rule = PfRule::UNKNOWN; + } + else + { + // successfully set up a single step proof in ps + success = true; + Trace("strings-ipc") << "success!" << std::endl; + } + } + else if (useBuffer) + { + // successfully set up a multi step proof in psb + success = true; + } + else + { + Trace("strings-ipc") << "For " << infer << " " << conc + << ", no proof rule, failed" << std::endl; + } + if (!success) + { + // debug print + if (Trace.isOn("strings-ipc-fail")) + { + Trace("strings-ipc-fail") + << "InferProofCons::convert: Failed " << infer + << (isRev ? " :rev " : " ") << conc << std::endl; + for (const Node& ec : exp) + { + Trace("strings-ipc-fail") << " e: " << ec << std::endl; + } + } + // untrustworthy conversion, the argument of STRING_TRUST is its conclusion + ps.d_args.clear(); + ps.d_args.push_back(conc); + // use the trust rule + ps.d_rule = PfRule::STRING_TRUST; + // add to stats + d_statistics.d_inferencesNoPf << infer; + if (options::proofNewPedantic() > 0) + { + std::stringstream serr; + serr << "InferProofCons::convert: Failed " << infer + << (isRev ? " :rev " : " ") << conc << std::endl; + for (const Node& ec : exp) + { + serr << " e: " << ec << std::endl; + } + Unhandled() << serr.str(); + } + } + if (Trace.isOn("strings-ipc-debug")) + { + if (useBuffer) + { + Trace("strings-ipc-debug") + << "InferProofCons::convert returned buffer with " + << psb.getNumSteps() << " steps:" << std::endl; + const std::vector>& steps = psb.getSteps(); + for (const std::pair& step : steps) + { + Trace("strings-ipc-debug") + << "- " << step.first << " via " << step.second << std::endl; + } + } + else + { + Trace("strings-ipc-debug") + << "InferProofCons::convert returned " << ps << std::endl; + } + } +} + +bool InferProofCons::convertLengthPf(Node lenReq, + const std::vector& lenExp, + TheoryProofStepBuffer& psb) +{ + for (const Node& le : lenExp) + { + if (lenReq == le) + { + return true; + } + } + Trace("strings-ipc-len") << "Must explain " << lenReq << " by " << lenExp + << std::endl; + for (const Node& le : lenExp) + { + // probably rewrites to it? + std::vector exp; + if (psb.applyPredTransform(le, lenReq, exp)) + { + Trace("strings-ipc-len") << "...success by rewrite" << std::endl; + return true; + } + // maybe x != "" => len(x) != 0 + std::vector children; + children.push_back(le); + std::vector args; + Node res = psb.tryStep(PfRule::STRING_LENGTH_NON_EMPTY, children, args); + if (res == lenReq) + { + Trace("strings-ipc-len") << "...success by LENGTH_NON_EMPTY" << std::endl; + return true; + } + } + Trace("strings-ipc-len") << "...failed" << std::endl; + return false; +} + +Node InferProofCons::convertTrans(Node eqa, + Node eqb, + TheoryProofStepBuffer& psb) +{ + if (eqa.getKind() != EQUAL || eqb.getKind() != EQUAL) + { + return Node::null(); + } + for (uint32_t i = 0; i < 2; i++) + { + Node eqaSym = i == 0 ? eqa[1].eqNode(eqa[0]) : eqa; + for (uint32_t j = 0; j < 2; j++) + { + Node eqbSym = j == 0 ? eqb : eqb[1].eqNode(eqb[1]); + if (eqa[i] == eqb[j]) + { + std::vector children; + children.push_back(eqaSym); + children.push_back(eqbSym); + return psb.tryStep(PfRule::TRANS, children, {}); + } + } + } + return Node::null(); +} + +std::shared_ptr InferProofCons::getProofFor(Node fact) +{ + // temporary proof + CDProof pf(d_pnm); + // get the inference + NodeInferInfoMap::iterator it = d_lazyFactMap.find(fact); + if (it == d_lazyFactMap.end()) + { + Node factSym = CDProof::getSymmFact(fact); + if (!factSym.isNull()) + { + // Use the symmetric fact. There is no need to explictly make a + // SYMM proof, as this is handled by CDProof::getProofFor below. + it = d_lazyFactMap.find(factSym); + } + } + AlwaysAssert(it != d_lazyFactMap.end()); + // now go back and convert it to proof steps and add to proof + bool useBuffer = false; + ProofStep ps; + TheoryProofStepBuffer psb(d_pnm->getChecker()); + std::shared_ptr ii = (*it).second; + // run the conversion + convert(ii->d_id, ii->d_idRev, ii->d_conc, ii->d_ant, ps, psb, useBuffer); + // make the proof based on the step or the buffer + if (useBuffer) + { + if (!pf.addSteps(psb)) + { + return nullptr; + } + } + else + { + if (!pf.addStep(fact, ps)) + { + return nullptr; + } + } + return pf.getProofFor(fact); +} + +std::string InferProofCons::identify() const +{ + return "strings::InferProofCons"; +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/infer_proof_cons.h b/src/theory/strings/infer_proof_cons.h new file mode 100644 index 000000000..63e341dfe --- /dev/null +++ b/src/theory/strings/infer_proof_cons.h @@ -0,0 +1,135 @@ +/********************* */ +/*! \file infer_proof_cons.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Inference to proof conversion + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__INFER_PROOF_CONS_H +#define CVC4__THEORY__STRINGS__INFER_PROOF_CONS_H + +#include + +#include "expr/node.h" +#include "expr/proof_checker.h" +#include "expr/proof_rule.h" +#include "theory/builtin/proof_checker.h" +#include "theory/strings/infer_info.h" +#include "theory/strings/sequences_stats.h" +#include "theory/theory_proof_step_buffer.h" +#include "theory/uf/proof_equality_engine.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** + * Converts between the strings-specific (untrustworthy) InferInfo class and + * information about how to construct a trustworthy proof step + * (PfRule, children, args). It acts as a (lazy) proof generator where the + * former is registered via notifyFact and the latter is asked for in + * getProofFor, typically by the proof equality engine. + * + * The main (private) method of this class is convert below, which is + * called when we need to construct a proof node from an InferInfo. + */ +class InferProofCons : public ProofGenerator +{ + typedef context::CDHashMap, NodeHashFunction> + NodeInferInfoMap; + + public: + InferProofCons(context::Context* c, + ProofNodeManager* pnm, + SequencesStatistics& statistics); + ~InferProofCons() {} + /** + * This is called to notify that ii is an inference that may need a proof + * in the future. + * + * In detail, this class should be prepared to respond to a call to: + * getProofFor(ii.d_conc) + * in the remainder of the SAT context. This method copies ii and stores it + * in the context-dependent map d_lazyFactMap below. + * + * This is used for lazy proof construction, where proofs are constructed + * only for facts that are explained. + */ + void notifyFact(const InferInfo& ii); + + /** + * This returns the proof for fact. This is required for using this class as + * a lazy proof generator. + * + * It should be the case that a call was made to notifyFact(ii) where + * ii.d_conc is fact in this SAT context. + */ + std::shared_ptr getProofFor(Node fact) override; + /** Identify this generator (for debugging, etc..) */ + virtual std::string identify() const override; + + private: + /** convert + * + * This method is called when the theory of strings makes an inference + * described by an InferInfo, whose fields are given by the first four + * arguments of this method. + * + * This method converts this call to instructions on what the proof rule + * step(s) are for concluding the conclusion of the inference. This + * information is either: + * + * (A) stored in the argument ps, which consists of: + * - A proof rule identifier (ProofStep::d_rule). + * - The premises of the proof step (ProofStep::d_children). + * - Arguments to the proof step (ProofStep::d_args). + * + * (B) If the proof for the inference cannot be captured by a single + * step, then the d_rule field of ps is not set, and useBuffer is set to + * true. In this case, the argument psb is updated to contain (possibly + * multiple) proof steps for how to construct a proof for the given inference. + * In particular, psb will contain a set of steps that form a proof + * whose conclusion is ii.d_conc and whose free assumptions are ii.d_ant. + */ + void convert(Inference infer, + bool isRev, + Node conc, + const std::vector& exp, + ProofStep& ps, + TheoryProofStepBuffer& psb, + bool& useBuffer); + /** + * Convert length proof. If this method returns true, it adds proof step(s) + * to the buffer psb that conclude lenReq from premises lenExp. + */ + bool convertLengthPf(Node lenReq, + const std::vector& lenExp, + TheoryProofStepBuffer& psb); + /** + * Helper method, adds the proof of (TRANS eqa eqb) into the proof step + * buffer psb, where eqa and eqb are flipped as needed. Returns the + * conclusion, or null if we were not able to construct a TRANS step. + */ + Node convertTrans(Node eqa, Node eqb, TheoryProofStepBuffer& psb); + /** the proof node manager */ + ProofNodeManager* d_pnm; + /** The lazy fact map */ + NodeInferInfoMap d_lazyFactMap; + /** Reference to the statistics for the theory of strings/sequences. */ + SequencesStatistics& d_statistics; +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__INFER_PROOF_CONS_H */ diff --git a/src/theory/strings/sequences_stats.cpp b/src/theory/strings/sequences_stats.cpp index f47413783..80221cbcc 100644 --- a/src/theory/strings/sequences_stats.cpp +++ b/src/theory/strings/sequences_stats.cpp @@ -25,6 +25,7 @@ SequencesStatistics::SequencesStatistics() : d_checkRuns("theory::strings::checkRuns", 0), d_strategyRuns("theory::strings::strategyRuns", 0), d_inferences("theory::strings::inferences"), + d_inferencesNoPf("theory::strings::inferencesNoPf"), d_cdSimplifications("theory::strings::cdSimplifications"), d_reductions("theory::strings::reductions"), d_regexpUnfoldingsPos("theory::strings::regexpUnfoldingsPos"), @@ -43,6 +44,7 @@ SequencesStatistics::SequencesStatistics() smtStatisticsRegistry()->registerStat(&d_checkRuns); smtStatisticsRegistry()->registerStat(&d_strategyRuns); smtStatisticsRegistry()->registerStat(&d_inferences); + smtStatisticsRegistry()->registerStat(&d_inferencesNoPf); smtStatisticsRegistry()->registerStat(&d_cdSimplifications); smtStatisticsRegistry()->registerStat(&d_reductions); smtStatisticsRegistry()->registerStat(&d_regexpUnfoldingsPos); @@ -63,6 +65,7 @@ SequencesStatistics::~SequencesStatistics() smtStatisticsRegistry()->unregisterStat(&d_checkRuns); smtStatisticsRegistry()->unregisterStat(&d_strategyRuns); smtStatisticsRegistry()->unregisterStat(&d_inferences); + smtStatisticsRegistry()->unregisterStat(&d_inferencesNoPf); smtStatisticsRegistry()->unregisterStat(&d_cdSimplifications); smtStatisticsRegistry()->unregisterStat(&d_reductions); smtStatisticsRegistry()->unregisterStat(&d_regexpUnfoldingsPos); diff --git a/src/theory/strings/sequences_stats.h b/src/theory/strings/sequences_stats.h index 5f1b80ce9..b205310ed 100644 --- a/src/theory/strings/sequences_stats.h +++ b/src/theory/strings/sequences_stats.h @@ -62,6 +62,11 @@ class SequencesStatistics //--------------- inferences /** Counts the number of applications of each type of inference */ HistogramStat d_inferences; + /** + * Counts the number of applications of each type of inference that were not + * processed as a proof step. This is a subset of d_inferences. + */ + HistogramStat d_inferencesNoPf; /** * Counts the number of applications of each type of context-dependent * simplification. The sum of this map is equal to the number of EXTF or -- 2.30.2