From 56f0e3c3455cc522bd18f41664591cae2c564be3 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Fri, 16 Oct 2020 09:17:24 -0500 Subject: [PATCH] Add inference enumeration to datatypes (#5275) This adds an inference enumeration for datatypes, which is analogous to what is done in strings and non-linear arithmetic. Inferences are informal identifiers that serve the purpose of (1) debugging the code, (2) providing hints on how proofs can be reconstructed. This PR also splits the InferenceManager file in datatypes into 2. --- src/CMakeLists.txt | 2 + src/theory/datatypes/inference.cpp | 111 +++++++++++++++++++ src/theory/datatypes/inference.h | 123 +++++++++++++++++++++ src/theory/datatypes/inference_manager.cpp | 118 +++++++++----------- src/theory/datatypes/inference_manager.h | 59 ++++------ src/theory/datatypes/theory_datatypes.cpp | 13 ++- 6 files changed, 314 insertions(+), 112 deletions(-) create mode 100644 src/theory/datatypes/inference.cpp create mode 100644 src/theory/datatypes/inference.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3896d3111..ec8aaf8dc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -513,6 +513,8 @@ libcvc4_add_sources( theory/combination_engine.h theory/datatypes/datatypes_rewriter.cpp theory/datatypes/datatypes_rewriter.h + theory/datatypes/inference.cpp + theory/datatypes/inference.h theory/datatypes/inference_manager.cpp theory/datatypes/inference_manager.h theory/datatypes/sygus_datatype_utils.cpp diff --git a/src/theory/datatypes/inference.cpp b/src/theory/datatypes/inference.cpp new file mode 100644 index 000000000..f8dbd1153 --- /dev/null +++ b/src/theory/datatypes/inference.cpp @@ -0,0 +1,111 @@ +/********************* */ +/*! \file inference.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Gereon Kremer + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS + ** in the top-level source directory and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Datatypes inference + **/ + +#include "theory/datatypes/inference.h" + +#include "expr/dtype.h" +#include "options/datatypes_options.h" +#include "theory/datatypes/inference_manager.h" +#include "theory/theory.h" + +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace datatypes { + +const char* toString(InferId i) +{ + switch (i) + { + case InferId::NONE: return "NONE"; + case InferId::UNIF: return "UNIF"; + case InferId::INST: return "INST"; + case InferId::SPLIT: return "SPLIT"; + case InferId::LABEL_EXH: return "LABEL_EXH"; + case InferId::COLLAPSE_SEL: return "COLLAPSE_SEL"; + case InferId::CLASH_CONFLICT: return "CLASH_CONFLICT"; + case InferId::TESTER_CONFLICT: return "TESTER_CONFLICT"; + case InferId::TESTER_MERGE_CONFLICT: return "TESTER_MERGE_CONFLICT"; + case InferId::BISIMILAR: return "BISIMILAR"; + case InferId::CYCLE: return "CYCLE"; + default: return "?"; + } +} + +std::ostream& operator<<(std::ostream& out, InferId i) +{ + out << toString(i); + return out; +} + +DatatypesInference::DatatypesInference(InferenceManager* im, + Node conc, + Node exp, + InferId i) + : SimpleTheoryInternalFact(conc, exp, nullptr), d_im(im), d_id(i) +{ + // false is not a valid explanation + Assert(d_exp.isNull() || !d_exp.isConst() || d_exp.getConst()); +} + +bool DatatypesInference::mustCommunicateFact(Node n, Node exp) +{ + Trace("dt-lemma-debug") << "Compute for " << exp << " => " << n << std::endl; + bool addLemma = false; + if (options::dtInferAsLemmas() && !exp.isConst()) + { + // all units are lemmas + addLemma = true; + } + else if (n.getKind() == EQUAL) + { + // Note that equalities due to instantiate are forced as lemmas if + // necessary as they are created. This ensures that terms are shared with + // external theories when necessary. We send the lemma here only if + // the equality is not for datatype terms, which can happen for collapse + // selector / term size or unification. + TypeNode tn = n[0].getType(); + addLemma = !tn.isDatatype(); + } + else if (n.getKind() == LEQ || n.getKind() == OR) + { + addLemma = true; + } + if (addLemma) + { + Trace("dt-lemma-debug") << "Communicate " << n << std::endl; + return true; + } + Trace("dt-lemma-debug") << "Do not need to communicate " << n << std::endl; + return false; +} + +bool DatatypesInference::process(TheoryInferenceManager* im, bool asLemma) +{ + // Check to see if we have to communicate it to the rest of the system. + // The flag asLemma is true when the inference was marked that it must be + // sent as a lemma in addPendingInference below. + if (asLemma || mustCommunicateFact(d_conc, d_exp)) + { + return d_im->processDtInference(d_conc, d_exp, d_id, true); + } + return d_im->processDtInference(d_conc, d_exp, d_id, false); +} + +InferId DatatypesInference::getInferId() const { return d_id; } + +} // namespace datatypes +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/datatypes/inference.h b/src/theory/datatypes/inference.h new file mode 100644 index 000000000..31ede087b --- /dev/null +++ b/src/theory/datatypes/inference.h @@ -0,0 +1,123 @@ +/********************* */ +/*! \file inference.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS + ** in the top-level source directory and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Datatypes inference + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__DATATYPES__INFERENCE_H +#define CVC4__THEORY__DATATYPES__INFERENCE_H + +#include "context/cdhashmap.h" +#include "expr/node.h" +#include "theory/inference_manager_buffered.h" + +namespace CVC4 { +namespace theory { +namespace datatypes { + +enum class InferId : uint32_t +{ + NONE, + // (= (C t1 ... tn) (C s1 .. sn)) => (= ti si) + UNIF, + // ((_ is Ci) t) => (= t (Ci (sel_1 t) ... (sel_n t))) + INST, + // (or ((_ is C1) t) V ... V ((_ is Cn) t)) + SPLIT, + // (not ((_ is C1) t)) ^ ... [j] ... ^ (not ((_ is Cn) t)) => ((_ is Ci) t) + LABEL_EXH, + // (= t (Ci t1 ... tn)) => (= (sel_j t) rewrite((sel_j (Ci t1 ... tn)))) + COLLAPSE_SEL, + // (= (Ci t1...tn) (Cj t1...tn)) => false + CLASH_CONFLICT, + // ((_ is Ci) t) ^ (= t (Cj t1 ... tn)) => false + TESTER_CONFLICT, + // ((_ is Ci) t) ^ (= t s) ^ ((_ is Cj) s) => false + TESTER_MERGE_CONFLICT, + // bisimilarity for codatatypes + BISIMILAR, + // cycle conflict for datatypes + CYCLE, +}; + +/** + * Converts an inference to a string. Note: This function is also used in + * `safe_print()`. Changing this functions name or signature will result in + * `safe_print()` printing "" instead of the proper strings for + * the enum values. + * + * @param i The inference + * @return The name of the inference + */ +const char* toString(InferId i); + +/** + * Writes an inference name to a stream. + * + * @param out The stream to write to + * @param i The inference to write to the stream + * @return The stream + */ +std::ostream& operator<<(std::ostream& out, InferId i); + +class InferenceManager; + +/** + * A custom inference class. The main feature of this class is that it + * dynamically decides whether to process itself as a fact or as a lemma, + * based on the mustCommunicateFact method below. + */ +class DatatypesInference : public SimpleTheoryInternalFact +{ + public: + DatatypesInference(InferenceManager* im, + Node conc, + Node exp, + InferId i = InferId::NONE); + /** + * Must communicate fact method. + * The datatypes decision procedure makes "internal" inferences : + * (1) Unification : C( t1...tn ) = C( s1...sn ) => ti = si + * (2) Label : ~is_C1(t) ... ~is_C{i-1}(t) ~is_C{i+1}(t) ... ~is_Cn(t) => + * is_Ci( t ) + * (3) Instantiate : is_C( t ) => t = C( sel_1( t ) ... sel_n( t ) ) + * (4) collapse selector : S( C( t1...tn ) ) = t' + * (5) collapse term size : size( C( t1...tn ) ) = 1 + size( t1 ) + ... + + * size( tn ) + * (6) non-negative size : 0 <= size(t) + * This method returns true if the fact must be sent out as a lemma. If it + * returns false, then we assert the fact internally. We may need to + * communicate outwards if the conclusions involve other theories. Also + * communicate (6) and OR conclusions. + */ + static bool mustCommunicateFact(Node n, Node exp); + /** + * Process this fact, possibly as a fact or as a lemma, depending on the + * above method. + */ + bool process(TheoryInferenceManager* im, bool asLemma) override; + /** Get the inference identifier */ + InferId getInferId() const; + + private: + /** Pointer to the inference manager */ + InferenceManager* d_im; + /** The inference */ + InferId d_id; +}; + +} // namespace datatypes +} // namespace theory +} // namespace CVC4 + +#endif diff --git a/src/theory/datatypes/inference_manager.cpp b/src/theory/datatypes/inference_manager.cpp index fc7c93900..df0f38c0b 100644 --- a/src/theory/datatypes/inference_manager.cpp +++ b/src/theory/datatypes/inference_manager.cpp @@ -16,6 +16,7 @@ #include "expr/dtype.h" #include "options/datatypes_options.h" +#include "smt/smt_statistics_registry.h" #include "theory/theory.h" using namespace CVC4::kind; @@ -24,88 +25,35 @@ namespace CVC4 { namespace theory { namespace datatypes { -DatatypesInference::DatatypesInference(Node conc, Node exp, ProofGenerator* pg) - : SimpleTheoryInternalFact(conc, exp, pg) -{ - // false is not a valid explanation - Assert(d_exp.isNull() || !d_exp.isConst() || d_exp.getConst()); -} - -bool DatatypesInference::mustCommunicateFact(Node n, Node exp) -{ - Trace("dt-lemma-debug") << "Compute for " << exp << " => " << n << std::endl; - bool addLemma = false; - if (options::dtInferAsLemmas() && !exp.isConst()) - { - // all units are lemmas - addLemma = true; - } - else if (n.getKind() == EQUAL) - { - // Note that equalities due to instantiate are forced as lemmas if - // necessary as they are created. This ensures that terms are shared with - // external theories when necessary. We send the lemma here only if - // the equality is not for datatype terms, which can happen for collapse - // selector / term size or unification. - TypeNode tn = n[0].getType(); - addLemma = !tn.isDatatype(); - } - else if (n.getKind() == LEQ || n.getKind() == OR) - { - addLemma = true; - } - if (addLemma) - { - Trace("dt-lemma-debug") << "Communicate " << n << std::endl; - return true; - } - Trace("dt-lemma-debug") << "Do not need to communicate " << n << std::endl; - return false; -} - -bool DatatypesInference::process(TheoryInferenceManager* im, bool asLemma) -{ - // Check to see if we have to communicate it to the rest of the system. - // The flag asLemma is true when the inference was marked that it must be - // sent as a lemma in addPendingInference below. - if (asLemma || mustCommunicateFact(d_conc, d_exp)) - { - // send it as an (explained) lemma - std::vector exp; - if (!d_exp.isNull() && !d_exp.isConst()) - { - exp.push_back(d_exp); - } - Trace("dt-lemma-debug") - << "DatatypesInference : " << d_conc << " via " << exp << std::endl; - return im->lemmaExp(d_conc, exp, {}); - } - // assert the internal fact - bool polarity = d_conc.getKind() != NOT; - TNode atom = polarity ? d_conc : d_conc[0]; - im->assertInternalFact(atom, polarity, d_exp); - return true; -} - InferenceManager::InferenceManager(Theory& t, TheoryState& state, ProofNodeManager* pnm) - : InferenceManagerBuffered(t, state, nullptr) + : InferenceManagerBuffered(t, state, nullptr), + d_inferenceLemmas("theory::datatypes::inferenceLemmas"), + d_inferenceFacts("theory::datatypes::inferenceFacts") +{ + smtStatisticsRegistry()->registerStat(&d_inferenceLemmas); + smtStatisticsRegistry()->registerStat(&d_inferenceFacts); +} + +InferenceManager::~InferenceManager() { + smtStatisticsRegistry()->unregisterStat(&d_inferenceLemmas); + smtStatisticsRegistry()->unregisterStat(&d_inferenceFacts); } void InferenceManager::addPendingInference(Node conc, Node exp, - ProofGenerator* pg, - bool forceLemma) + bool forceLemma, + InferId i) { if (forceLemma) { - d_pendingLem.emplace_back(new DatatypesInference(conc, exp, pg)); + d_pendingLem.emplace_back(new DatatypesInference(this, conc, exp, i)); } else { - d_pendingFact.emplace_back(new DatatypesInference(conc, exp, pg)); + d_pendingFact.emplace_back(new DatatypesInference(this, conc, exp, i)); } } @@ -130,6 +78,40 @@ bool InferenceManager::sendLemmas(const std::vector& lemmas) return ret; } +bool InferenceManager::processDtInference(Node conc, + Node exp, + InferId id, + bool asLemma) +{ + Trace("dt-lemma-debug") << "processDtInference : " << conc << " via " << exp + << " by " << id << ", asLemma = " << asLemma + << std::endl; + if (asLemma) + { + // send it as an (explained) lemma + std::vector expv; + if (!exp.isNull() && !exp.isConst()) + { + expv.push_back(exp); + } + if (!lemmaExp(conc, expv, {})) + { + Trace("dt-lemma-debug") << "...duplicate lemma" << std::endl; + return false; + } + d_inferenceLemmas << id; + } + else + { + // assert the internal fact, which has the same issue as above + bool polarity = conc.getKind() != NOT; + TNode atom = polarity ? conc : conc[0]; + assertInternalFact(atom, polarity, exp); + d_inferenceFacts << id; + } + return true; +} + } // namespace datatypes } // namespace theory } // namespace CVC4 diff --git a/src/theory/datatypes/inference_manager.h b/src/theory/datatypes/inference_manager.h index 06c6ff2b5..163a736c4 100644 --- a/src/theory/datatypes/inference_manager.h +++ b/src/theory/datatypes/inference_manager.h @@ -19,71 +19,41 @@ #include "context/cdhashmap.h" #include "expr/node.h" +#include "theory/datatypes/inference.h" #include "theory/inference_manager_buffered.h" +#include "util/statistics_registry.h" namespace CVC4 { namespace theory { namespace datatypes { -/** - * A custom inference class. The main feature of this class is that it - * dynamically decides whether to process itself as a fact or as a lemma, - * based on the mustCommunicateFact method below. - */ -class DatatypesInference : public SimpleTheoryInternalFact -{ - public: - DatatypesInference(Node conc, Node exp, ProofGenerator* pg); - /** - * Must communicate fact method. - * The datatypes decision procedure makes "internal" inferences : - * (1) Unification : C( t1...tn ) = C( s1...sn ) => ti = si - * (2) Label : ~is_C1(t) ... ~is_C{i-1}(t) ~is_C{i+1}(t) ... ~is_Cn(t) => - * is_Ci( t ) - * (3) Instantiate : is_C( t ) => t = C( sel_1( t ) ... sel_n( t ) ) - * (4) collapse selector : S( C( t1...tn ) ) = t' - * (5) collapse term size : size( C( t1...tn ) ) = 1 + size( t1 ) + ... + - * size( tn ) - * (6) non-negative size : 0 <= size(t) - * This method returns true if the fact must be sent out as a lemma. If it - * returns false, then we assert the fact internally. We may need to - * communicate outwards if the conclusions involve other theories. Also - * communicate (6) and OR conclusions. - */ - static bool mustCommunicateFact(Node n, Node exp); - /** - * Process this fact, possibly as a fact or as a lemma, depending on the - * above method. - */ - bool process(TheoryInferenceManager* im, bool asLemma) override; -}; - /** * The datatypes inference manager, which uses the above class for * inferences. */ class InferenceManager : public InferenceManagerBuffered { - typedef context::CDHashSet NodeSet; + friend class DatatypesInference; public: InferenceManager(Theory& t, TheoryState& state, ProofNodeManager* pnm); - ~InferenceManager() {} + ~InferenceManager(); /** * Add pending inference, which may be processed as either a fact or * a lemma based on mustCommunicateFact in DatatypesInference above. * * @param conc The conclusion of the inference * @param exp The explanation of the inference - * @param pg The proof generator who can provide a proof of (conc => exp) * @param forceLemma Whether this inference *must* be processed as a lemma. * Otherwise, it may be processed as a fact or lemma based on * mustCommunicateFact. + * @param i The inference, used for stats and as a hint for constructing + * the proof of (conc => exp) */ void addPendingInference(Node conc, Node exp, - ProofGenerator* pg = nullptr, - bool forceLemma = false); + bool forceLemma = false, + InferId i = InferId::NONE); /** * Process the current lemmas and facts. This is a custom method that can * be seen as overriding the behavior of calling both doPendingLemmas and @@ -96,6 +66,19 @@ class InferenceManager : public InferenceManagerBuffered * Returns true if any lemma was sent. */ bool sendLemmas(const std::vector& lemmas); + + private: + /** + * Process datatype inference. We send a lemma if asLemma is true, and + * send an internal fact if asLemma is false. + */ + bool processDtInference(Node conc, Node exp, InferId id, bool asLemma); + /** + * Counts the number of applications of each type of inference processed by + * the above method as facts and lemmas. + */ + HistogramStat d_inferenceLemmas; + HistogramStat d_inferenceFacts; }; } // namespace datatypes diff --git a/src/theory/datatypes/theory_datatypes.cpp b/src/theory/datatypes/theory_datatypes.cpp index 3266c7ef4..08f3e1626 100644 --- a/src/theory/datatypes/theory_datatypes.cpp +++ b/src/theory/datatypes/theory_datatypes.cpp @@ -300,7 +300,7 @@ void TheoryDatatypes::postCheck(Effort level) //this may not be necessary? //if only one constructor, then this term must be this constructor Node t = utils::mkTester(n, 0, dt); - d_im.addPendingInference(t, d_true); + d_im.addPendingInference(t, d_true, false, InferId::SPLIT); Trace("datatypes-infer") << "DtInfer : 1-cons (full) : " << t << std::endl; }else{ Assert(consIndex != -1 || dt.isSygus()); @@ -667,7 +667,7 @@ void TheoryDatatypes::merge( Node t1, Node t2 ){ for( int i=0; i<(int)cons1.getNumChildren(); i++ ) { if( !areEqual( cons1[i], cons2[i] ) ){ Node eq = cons1[i].eqNode( cons2[i] ); - d_im.addPendingInference(eq, unifEq); + d_im.addPendingInference(eq, unifEq, false, InferId::UNIF); Trace("datatypes-infer") << "DtInfer : cons-inj : " << eq << " by " << unifEq << std::endl; } } @@ -953,7 +953,8 @@ void TheoryDatatypes::addTester( ? NodeManager::currentNM()->mkConst(false) : utils::mkTester(t_arg, testerIndex, dt); Node t_concl_exp = ( nb.getNumChildren() == 1 ) ? nb.getChild( 0 ) : nb; - d_im.addPendingInference(t_concl, t_concl_exp); + d_im.addPendingInference( + t_concl, t_concl_exp, false, InferId::LABEL_EXH); Trace("datatypes-infer") << "DtInfer : label : " << t_concl << " by " << t_concl_exp << std::endl; return; } @@ -1086,7 +1087,7 @@ void TheoryDatatypes::collapseSelector( Node s, Node c ) { Trace("datatypes-infer") << "DtInfer : collapse sel"; //Trace("datatypes-infer") << ( wrong ? " wrong" : ""); Trace("datatypes-infer") << " : " << eq << " by " << peq << std::endl; - d_im.addPendingInference(eq, peq); + d_im.addPendingInference(eq, peq, false, InferId::COLLAPSE_SEL); } } } @@ -1542,7 +1543,7 @@ void TheoryDatatypes::instantiate( EqcInfo* eqc, Node n ){ bool forceLemma = dt[index].hasFiniteExternalArgType(ttn); Trace("datatypes-infer-debug") << "DtInstantiate : " << eqc << " " << eq << " forceLemma = " << forceLemma << std::endl; - d_im.addPendingInference(eq, exp, nullptr, forceLemma); + d_im.addPendingInference(eq, exp, forceLemma, InferId::INST); Trace("datatypes-infer") << "DtInfer : instantiate : " << eq << " by " << exp << std::endl; } @@ -1628,7 +1629,7 @@ void TheoryDatatypes::checkCycles() { Trace("dt-cdt") << std::endl; Node eq = part_out[i][0].eqNode( part_out[i][j] ); Node eqExp = NodeManager::currentNM()->mkAnd(exp); - d_im.addPendingInference(eq, eqExp); + d_im.addPendingInference(eq, eqExp, false, InferId::BISIMILAR); Trace("datatypes-infer") << "DtInfer : cdt-bisimilar : " << eq << " by " << eqExp << std::endl; } } -- 2.30.2