From 18d42bc7f76b5b144ec498d3b4d3e906224e629f Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Thu, 25 Jul 2019 09:43:19 -0500 Subject: [PATCH] Split infer info data structure in strings (#3107) --- src/CMakeLists.txt | 2 + src/theory/strings/infer_info.cpp | 44 ++++++ src/theory/strings/infer_info.h | 153 ++++++++++++++++++++ src/theory/strings/inference_manager.cpp | 13 +- src/theory/strings/inference_manager.h | 13 +- src/theory/strings/theory_strings.cpp | 115 ++++++--------- src/theory/strings/theory_strings.h | 99 ++----------- src/theory/strings/theory_strings_utils.cpp | 43 +++++- src/theory/strings/theory_strings_utils.h | 8 +- 9 files changed, 327 insertions(+), 163 deletions(-) create mode 100644 src/theory/strings/infer_info.cpp create mode 100644 src/theory/strings/infer_info.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 058e848d6..9da9bd603 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -630,6 +630,8 @@ libcvc4_add_sources( theory/shared_terms_database.h theory/sort_inference.cpp theory/sort_inference.h + theory/strings/infer_info.cpp + theory/strings/infer_info.h theory/strings/inference_manager.cpp theory/strings/inference_manager.h theory/strings/normal_form.cpp diff --git a/src/theory/strings/infer_info.cpp b/src/theory/strings/infer_info.cpp new file mode 100644 index 000000000..a20f608a6 --- /dev/null +++ b/src/theory/strings/infer_info.cpp @@ -0,0 +1,44 @@ +/********************* */ +/*! \file infer_info.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of inference information utility. + **/ + +#include "theory/strings/infer_info.h" + +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +std::ostream& operator<<(std::ostream& out, Inference i) +{ + switch (i) + { + case INFER_SSPLIT_CST_PROP: out << "S-Split(CST-P)-prop"; break; + case INFER_SSPLIT_VAR_PROP: out << "S-Split(VAR)-prop"; break; + case INFER_LEN_SPLIT: out << "Len-Split(Len)"; break; + case INFER_LEN_SPLIT_EMP: out << "Len-Split(Emp)"; break; + case INFER_SSPLIT_CST_BINARY: out << "S-Split(CST-P)-binary"; break; + case INFER_SSPLIT_CST: out << "S-Split(CST-P)"; break; + case INFER_SSPLIT_VAR: out << "S-Split(VAR)"; break; + case INFER_FLOOP: out << "F-Loop"; break; + default: out << "?"; break; + } + return out; +} + +InferInfo::InferInfo() : d_id(INFER_NONE), d_index(0), d_rev(false) {} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/infer_info.h b/src/theory/strings/infer_info.h new file mode 100644 index 000000000..9b19eba4b --- /dev/null +++ b/src/theory/strings/infer_info.h @@ -0,0 +1,153 @@ +/********************* */ +/*! \file infer_info.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Inference information utility + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__INFER_INFO_H +#define CVC4__THEORY__STRINGS__INFER_INFO_H + +#include +#include +#include "expr/node.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** Types of inferences used in the procedure + * + * These are variants of the inference rules in Figures 3-5 of Liang et al. + * "A DPLL(T) Solver for a Theory of Strings and Regular Expressions", CAV 2014. + */ +enum Inference +{ + INFER_NONE = 0, + // string split constant propagation, for example: + // x = y, x = "abc", y = y1 ++ "b" ++ y2 + // implies y1 = "a" ++ y1' + INFER_SSPLIT_CST_PROP = 1, + // string split variable propagation, for example: + // x = y, x = x1 ++ x2, y = y1 ++ y2, len( x1 ) >= len( y1 ) + // implies x1 = y1 ++ x1' + // This is inspired by Zheng et al CAV 2015. + INFER_SSPLIT_VAR_PROP, + // length split, for example: + // len( x1 ) = len( y1 ) V len( x1 ) != len( y1 ) + // This is inferred when e.g. x = y, x = x1 ++ x2, y = y1 ++ y2. + INFER_LEN_SPLIT, + // length split empty, for example: + // z = "" V z != "" + // This is inferred when, e.g. x = y, x = z ++ x1, y = y1 ++ z + INFER_LEN_SPLIT_EMP, + // string split constant binary, for example: + // x1 = "aaaa" ++ x1' V "aaaa" = x1 ++ x1' + // This is inferred when, e.g. x = y, x = x1 ++ x2, y = "aaaaaaaa" ++ y2. + // This inference is disabled by default and is enabled by stringBinaryCsp(). + INFER_SSPLIT_CST_BINARY, + // string split constant + // x = y, x = "c" ++ x2, y = y1 ++ y2, y1 != "" + // implies y1 = "c" ++ y1' + // This is a special case of F-Split in Figure 5 of Liang et al CAV 2014. + INFER_SSPLIT_CST, + // string split variable, for example: + // x = y, x = x1 ++ x2, y = y1 ++ y2 + // implies x1 = y1 ++ x1' V y1 = x1 ++ y1' + // This is rule F-Split in Figure 5 of Liang et al CAV 2014. + INFER_SSPLIT_VAR, + // flat form loop, for example: + // x = y, x = x1 ++ z, y = z ++ y2 + // implies z = u2 ++ u1, u in ( u1 ++ u2 )*, x1 = u2 ++ u, y2 = u ++ u1 + // for fresh u, u1, u2. + // This is the rule F-Loop from Figure 5 of Liang et al CAV 2014. + INFER_FLOOP, +}; +std::ostream& operator<<(std::ostream& out, Inference i); + +/** + * Length status, used for indicating the length constraints for Skolems + * introduced by the theory of strings. + */ +enum LengthStatus +{ + // The length of the Skolem is not specified, and should be split on. + LENGTH_SPLIT, + // The length of the Skolem is exactly one. + LENGTH_ONE, + // The length of the Skolem is greater than or equal to one. + LENGTH_GEQ_ONE +}; + +/** + * This data structure encapsulates an inference for strings. This includes + * the form of the inference, as well as the side effects it generates. + */ +class InferInfo +{ + public: + InferInfo(); + /** + * The identifier for the inference, indicating the kind of reasoning used + * for this conclusion. + */ + Inference d_id; + /** The conclusion of the inference */ + Node d_conc; + /** + * The antecedant(s) of the inference, interpreted conjunctively. These are + * literals that currently hold in the equality engine. + */ + std::vector d_ant; + /** + * The "new literal" antecedant(s) of the inference, interpreted + * conjunctively. These are literals that were needed to show the conclusion + * but do not currently hold in the equality engine. + */ + std::vector d_antn; + /** + * A list of new skolems introduced as a result of this inference. They + * are mapped to by a length status, indicating the length constraint that + * can be assumed for them. + */ + std::map > d_new_skolem; + /** + * The pending phase requirements, see InferenceManager::sendPhaseRequirement. + */ + std::map d_pending_phase; + /** + * The index in the normal forms under which this inference is addressing. + * For example, if the inference is inferring x = y from |x|=|y| and + * w ++ x ++ ... = w ++ y ++ ... + * then d_index is 1, since x and y are at index 1 in these concat terms. + */ + unsigned d_index; + /** + * The normal form pair that is cached as a result of this inference. + */ + Node d_nf_pair[2]; + /** for debugging + * + * The base pair of strings d_i/d_j that led to the inference, and whether + * (d_rev) we were processing the normal forms of these strings in reverse + * direction. + */ + Node d_i; + Node d_j; + bool d_rev; +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__THEORY_STRINGS_H */ diff --git a/src/theory/strings/inference_manager.cpp b/src/theory/strings/inference_manager.cpp index ec144aa6d..f064a26e2 100644 --- a/src/theory/strings/inference_manager.cpp +++ b/src/theory/strings/inference_manager.cpp @@ -99,8 +99,8 @@ bool InferenceManager::sendInternalInference(std::vector& exp, return true; } -void InferenceManager::sendInference(std::vector& exp, - std::vector& exp_n, +void InferenceManager::sendInference(const std::vector& exp, + const std::vector& exp_n, Node eq, const char* c, bool asLemma) @@ -164,7 +164,7 @@ void InferenceManager::sendInference(std::vector& exp, } } -void InferenceManager::sendInference(std::vector& exp, +void InferenceManager::sendInference(const std::vector& exp, Node eq, const char* c, bool asLemma) @@ -173,6 +173,13 @@ void InferenceManager::sendInference(std::vector& exp, sendInference(exp, exp_n, eq, c, asLemma); } +void InferenceManager::sendInference(const InferInfo& i) +{ + std::stringstream ssi; + ssi << i.d_id; + sendInference(i.d_ant, i.d_antn, i.d_conc, ssi.str().c_str(), true); +} + void InferenceManager::sendLemma(Node ant, Node conc, const char* c) { if (conc.isNull() || conc == d_false) diff --git a/src/theory/strings/inference_manager.h b/src/theory/strings/inference_manager.h index 71651f7df..bb78b4a1a 100644 --- a/src/theory/strings/inference_manager.h +++ b/src/theory/strings/inference_manager.h @@ -24,6 +24,7 @@ #include "context/context.h" #include "expr/node.h" #include "theory/output_channel.h" +#include "theory/strings/infer_info.h" #include "theory/uf/equality_engine.h" namespace CVC4 { @@ -125,16 +126,22 @@ class InferenceManager * If the flag asLemma is true, then this method will send a lemma instead * of an inference whenever applicable. */ - void sendInference(std::vector& exp, - std::vector& exp_n, + void sendInference(const std::vector& exp, + const std::vector& exp_n, Node eq, const char* c, bool asLemma = false); /** same as above, but where exp_n is empty */ - void sendInference(std::vector& exp, + void sendInference(const std::vector& exp, Node eq, const char* c, bool asLemma = false); + /** Send inference + * + * Makes the appropriate call to send inference based on the infer info + * data structure (see sendInference documentation above). + */ + void sendInference(const InferInfo& i); /** Send split * * This requests that ( a = b V a != b ) is sent on the output channel as a diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 5e7061d22..952d82c21 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -39,23 +39,6 @@ namespace CVC4 { namespace theory { namespace strings { -std::ostream& operator<<(std::ostream& out, Inference i) -{ - switch (i) - { - case INFER_SSPLIT_CST_PROP: out << "S-Split(CST-P)-prop"; break; - case INFER_SSPLIT_VAR_PROP: out << "S-Split(VAR)-prop"; break; - case INFER_LEN_SPLIT: out << "Len-Split(Len)"; break; - case INFER_LEN_SPLIT_EMP: out << "Len-Split(Emp)"; break; - case INFER_SSPLIT_CST_BINARY: out << "S-Split(CST-P)-binary"; break; - case INFER_SSPLIT_CST: out << "S-Split(CST-P)"; break; - case INFER_SSPLIT_VAR: out << "S-Split(VAR)"; break; - case INFER_FLOOP: out << "F-Loop"; break; - default: out << "?"; break; - } - return out; -} - std::ostream& operator<<(std::ostream& out, InferStep s) { switch (s) @@ -2947,25 +2930,24 @@ void TheoryStrings::processNEqc(std::vector& normal_forms) set_use_index = true; } } + doInferInfo(pinfer[use_index]); +} + +void TheoryStrings::doInferInfo(const InferInfo& ii) +{ // send the inference - if (!pinfer[use_index].d_nf_pair[0].isNull()) + if (!ii.d_nf_pair[0].isNull()) { - Assert(!pinfer[use_index].d_nf_pair[1].isNull()); - addNormalFormPair(pinfer[use_index].d_nf_pair[0], - pinfer[use_index].d_nf_pair[1]); - } - std::stringstream ssi; - ssi << pinfer[use_index].d_id; - d_im.sendInference(pinfer[use_index].d_ant, - pinfer[use_index].d_antn, - pinfer[use_index].d_conc, - ssi.str().c_str(), - pinfer[use_index].sendAsLemma()); + Assert(!ii.d_nf_pair[1].isNull()); + addNormalFormPair(ii.d_nf_pair[0], ii.d_nf_pair[1]); + } + // send the inference + d_im.sendInference(ii); // Register the new skolems from this inference. We register them here // (lazily), since the code above has now decided to use the inference // at use_index that involves them. for (const std::pair >& sks : - pinfer[use_index].d_new_skolem) + ii.d_new_skolem) { for (const Node& n : sks.second) { @@ -2974,10 +2956,6 @@ void TheoryStrings::processNEqc(std::vector& normal_forms) } } -bool TheoryStrings::InferInfo::sendAsLemma() { - return true; -} - void TheoryStrings::processSimpleNEq(NormalForm& nfi, NormalForm& nfj, unsigned& index, @@ -4110,50 +4088,44 @@ Node TheoryStrings::mkLength( Node t ) { return Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t ) ); } -Node TheoryStrings::mkExplain( std::vector< Node >& a ) { +Node TheoryStrings::mkExplain(const std::vector& a) +{ std::vector< Node > an; return mkExplain( a, an ); } -Node TheoryStrings::mkExplain( std::vector< Node >& a, std::vector< Node >& an ) { +Node TheoryStrings::mkExplain(const std::vector& a, + const std::vector& an) +{ std::vector< TNode > antec_exp; - for( unsigned i=0; i aconj; + for (const Node& ac : a) + { + utils::flattenOp(AND, ac, aconj); + } + for (const Node& apc : aconj) + { + Assert(apc.getKind() != AND); + Debug("strings-explain") << "Add to explanation " << apc << std::endl; + if (apc.getKind() == NOT && apc[0].getKind() == EQUAL) + { + Assert(hasTerm(apc[0][0])); + Assert(hasTerm(apc[0][1])); + // ensure that we are ready to explain the disequality + AlwaysAssert(d_equalityEngine.areDisequal(apc[0][0], apc[0][1], true)); } + Assert(apc.getKind() != EQUAL || d_equalityEngine.areEqual(apc[0], apc[1])); + // now, explain + explain(apc, antec_exp); } - for( unsigned i=0; i& a, std::vector< Node >& an ) } else { ant = NodeManager::currentNM()->mkNode( kind::AND, antec_exp ); } - //ant = Rewriter::rewrite( ant ); return ant; } diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index a83a6ad12..e3bb2e719 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -24,6 +24,7 @@ #include "expr/attribute.h" #include "expr/node_trie.h" #include "theory/decision_manager.h" +#include "theory/strings/infer_info.h" #include "theory/strings/inference_manager.h" #include "theory/strings/normal_form.h" #include "theory/strings/regexp_elim.h" @@ -46,55 +47,6 @@ namespace strings { * */ -/** Types of inferences used in the procedure - * - * These are variants of the inference rules in Figures 3-5 of Liang et al. - * "A DPLL(T) Solver for a Theory of Strings and Regular Expressions", CAV 2014. - */ -enum Inference -{ - INFER_NONE, - // string split constant propagation, for example: - // x = y, x = "abc", y = y1 ++ "b" ++ y2 - // implies y1 = "a" ++ y1' - INFER_SSPLIT_CST_PROP, - // string split variable propagation, for example: - // x = y, x = x1 ++ x2, y = y1 ++ y2, len( x1 ) >= len( y1 ) - // implies x1 = y1 ++ x1' - // This is inspired by Zheng et al CAV 2015. - INFER_SSPLIT_VAR_PROP, - // length split, for example: - // len( x1 ) = len( y1 ) V len( x1 ) != len( y1 ) - // This is inferred when e.g. x = y, x = x1 ++ x2, y = y1 ++ y2. - INFER_LEN_SPLIT, - // length split empty, for example: - // z = "" V z != "" - // This is inferred when, e.g. x = y, x = z ++ x1, y = y1 ++ z - INFER_LEN_SPLIT_EMP, - // string split constant binary, for example: - // x1 = "aaaa" ++ x1' V "aaaa" = x1 ++ x1' - // This is inferred when, e.g. x = y, x = x1 ++ x2, y = "aaaaaaaa" ++ y2. - // This inference is disabled by default and is enabled by stringBinaryCsp(). - INFER_SSPLIT_CST_BINARY, - // string split constant - // x = y, x = "c" ++ x2, y = y1 ++ y2, y1 != "" - // implies y1 = "c" ++ y1' - // This is a special case of F-Split in Figure 5 of Liang et al CAV 2014. - INFER_SSPLIT_CST, - // string split variable, for example: - // x = y, x = x1 ++ x2, y = y1 ++ y2 - // implies x1 = y1 ++ x1' V y1 = x1 ++ y1' - // This is rule F-Split in Figure 5 of Liang et al CAV 2014. - INFER_SSPLIT_VAR, - // flat form loop, for example: - // x = y, x = x1 ++ z, y = z ++ y2 - // implies z = u2 ++ u1, u in ( u1 ++ u2 )*, x1 = u2 ++ u, y2 = u ++ u1 - // for fresh u, u1, u2. - // This is the rule F-Loop from Figure 5 of Liang et al CAV 2014. - INFER_FLOOP, -}; -std::ostream& operator<<(std::ostream& out, Inference i); - /** inference steps * * Corresponds to a step in the overall strategy of the strings solver. For @@ -448,13 +400,6 @@ private: }; private: - /** Length status, used for the registerLength function below */ - enum LengthStatus - { - LENGTH_SPLIT, - LENGTH_ONE, - LENGTH_GEQ_ONE - }; /** register length * @@ -473,30 +418,6 @@ private: */ void registerLength(Node n, LengthStatus s); - //------------------------- candidate inferences - class InferInfo - { - public: - /** for debugging - * - * The base pair of strings d_i/d_j that led to the inference, and whether - * (d_rev) we were processing the normal forms of these strings in reverse - * direction. - */ - Node d_i; - Node d_j; - bool d_rev; - std::vector d_ant; - std::vector d_antn; - std::map > d_new_skolem; - Node d_conc; - Inference d_id; - std::map d_pending_phase; - unsigned d_index; - Node d_nf_pair[2]; - bool sendAsLemma(); - }; - //------------------------- end candidate inferences /** cache of all skolems */ SkolemCache d_sk_cache; @@ -738,6 +659,12 @@ private: * of atom, including calls to registerTerm. */ void assertPendingFact(Node atom, bool polarity, Node exp); + /** + * This processes the infer info ii as an inference. In more detail, it calls + * the inference manager to process the inference, it introduces Skolems, and + * updates the set of normal form pairs. + */ + void doInferInfo(const InferInfo& ii); /** * Adds equality a = b to the vector exp if a and b are distinct terms. It * must be the case that areEqual( a, b ) holds in this context. @@ -778,9 +705,15 @@ private: inline Node mkConcat(const std::vector& c); inline Node mkLength(Node n); - /** mkExplain **/ - Node mkExplain(std::vector& a); - Node mkExplain(std::vector& a, std::vector& an); + /** make explanation + * + * This returns a node corresponding to the explanation of formulas in a, + * interpreted conjunctively. The returned node is a conjunction of literals + * that have been asserted to the equality engine. + */ + Node mkExplain(const std::vector& a); + /** Same as above, but the new literals an are append to the result */ + Node mkExplain(const std::vector& a, const std::vector& an); protected: diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index bb2054b0e..c8951e10a 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -23,7 +23,7 @@ namespace theory { namespace strings { namespace utils { -Node mkAnd(std::vector& a) +Node mkAnd(const std::vector& a) { std::vector au; for (const Node& ai : a) @@ -44,6 +44,47 @@ Node mkAnd(std::vector& a) return NodeManager::currentNM()->mkNode(AND, au); } +void flattenOp(Kind k, Node n, std::vector& conj) +{ + if (n.getKind() != k) + { + // easy case, just add to conj if non-duplicate + if (std::find(conj.begin(), conj.end(), n) == conj.end()) + { + conj.push_back(n); + } + return; + } + // otherwise, traverse + std::unordered_set visited; + std::unordered_set::iterator it; + std::vector visit; + TNode cur; + visit.push_back(n); + do + { + cur = visit.back(); + visit.pop_back(); + it = visited.find(cur); + + if (it == visited.end()) + { + visited.insert(cur); + if (cur.getKind() == k) + { + for (const Node& cn : cur) + { + visit.push_back(cn); + } + } + else if (std::find(conj.begin(), conj.end(), cur) == conj.end()) + { + conj.push_back(cur); + } + } + } while (!visit.empty()); +} + void getConcat(Node n, std::vector& c) { Kind k = n.getKind(); diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h index 296f4e46e..69400d005 100644 --- a/src/theory/strings/theory_strings_utils.h +++ b/src/theory/strings/theory_strings_utils.h @@ -32,7 +32,13 @@ namespace utils { * Make the conjunction of nodes in a. Removes duplicate conjuncts, returns * true if a is empty, and a single literal if a has size 1. */ -Node mkAnd(std::vector& a); +Node mkAnd(const std::vector& a); + +/** + * Adds all (non-duplicate) children of applications from n to conj. For + * example, given ( ( A B) C A), we add { A, B, C } to conj. + */ +void flattenOp(Kind k, Node n, std::vector& conj); /** * Gets the "vector form" of term n, adds it to c. -- 2.30.2