From b1c9169af9e0a9d853c9be058d05ac98fc11c336 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Fri, 11 Mar 2022 21:29:23 -0600 Subject: [PATCH] Improvements for sygus query generation (#8224) Makes the two sygus query generators have a common base class, which makes it so that they both can dump queries. Also fixes the expression miner to use user-level sygus terms, otherwise we can make queries using invalid internal terms, e.g. SEQ_NTH_OOB skolem. The majority of the diff in this PR is moving query_generator to query_generator_sample_sat, and adding a basic version of query generation. --- src/CMakeLists.txt | 2 + src/options/quantifiers_options.toml | 17 +- src/theory/quantifiers/expr_miner.cpp | 6 +- src/theory/quantifiers/expr_miner_manager.cpp | 44 +- src/theory/quantifiers/expr_miner_manager.h | 7 +- src/theory/quantifiers/query_generator.cpp | 386 ++--------------- src/theory/quantifiers/query_generator.h | 94 +---- .../query_generator_sample_sat.cpp | 392 ++++++++++++++++++ .../quantifiers/query_generator_sample_sat.h | 115 +++++ .../quantifiers/query_generator_unsat.cpp | 16 +- .../quantifiers/query_generator_unsat.h | 9 +- 11 files changed, 600 insertions(+), 488 deletions(-) create mode 100644 src/theory/quantifiers/query_generator_sample_sat.cpp create mode 100644 src/theory/quantifiers/query_generator_sample_sat.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e1822991b..b5f34a246 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -857,6 +857,8 @@ libcvc5_add_sources( theory/quantifiers/quantifiers_statistics.h theory/quantifiers/query_generator.cpp theory/quantifiers/query_generator.h + theory/quantifiers/query_generator_sample_sat.cpp + theory/quantifiers/query_generator_sample_sat.h theory/quantifiers/query_generator_unsat.cpp theory/quantifiers/query_generator_unsat.h theory/quantifiers/relevant_domain.cpp diff --git a/src/options/quantifiers_options.toml b/src/options/quantifiers_options.toml index dce3bfa69..6ee27dfb0 100644 --- a/src/options/quantifiers_options.toml +++ b/src/options/quantifiers_options.toml @@ -1474,9 +1474,12 @@ name = "Quantifiers" [[option.mode.NONE]] name = "none" help = "Do not generate queries with SyGuS." -[[option.mode.SAT]] - name = "sat" - help = "Generate interesting SAT queries, for e.g. soundness testing." +[[option.mode.BASIC]] + name = "basic" + help = "Generate all queries using SyGuS enumeration of the given grammar" +[[option.mode.SAMPLE_SAT]] + name = "sample-sat" + help = "Generate interesting SAT queries based on sampling, for e.g. soundness testing." [[option.mode.UNSAT]] name = "unsat" help = "Generate interesting UNSAT queries, for e.g. proof testing." @@ -1489,14 +1492,6 @@ name = "Quantifiers" default = "5" help = "number of points that we allow to be equal for enumerating satisfiable queries with sygus-query-gen" -[[option]] - name = "sygusQueryGenCheck" - category = "regular" - long = "sygus-query-gen-check" - type = "bool" - default = "true" - help = "use interesting satisfiability queries to check soundness of cvc5" - [[option]] name = "sygusQueryGenDumpFiles" category = "regular" diff --git a/src/theory/quantifiers/expr_miner.cpp b/src/theory/quantifiers/expr_miner.cpp index fa4ff7007..eefe447bd 100644 --- a/src/theory/quantifiers/expr_miner.cpp +++ b/src/theory/quantifiers/expr_miner.cpp @@ -15,6 +15,8 @@ #include "theory/quantifiers/expr_miner.h" +#include + #include "expr/skolem_manager.h" #include "options/quantifiers_options.h" #include "theory/quantifiers/term_util.h" @@ -41,7 +43,9 @@ Node ExprMiner::convertToSkolem(Node n) SkolemManager* sm = nm->getSkolemManager(); for (const Node& v : d_vars) { - Node sk = sm->mkDummySkolem("rrck", v.getType()); + std::stringstream ss; + ss << "k_" << v; + Node sk = sm->mkDummySkolem(ss.str(), v.getType()); d_skolems.push_back(sk); d_fv_to_skolem[v] = sk; } diff --git a/src/theory/quantifiers/expr_miner_manager.cpp b/src/theory/quantifiers/expr_miner_manager.cpp index d5fa7fbdf..fe3b4cc8e 100644 --- a/src/theory/quantifiers/expr_miner_manager.cpp +++ b/src/theory/quantifiers/expr_miner_manager.cpp @@ -17,6 +17,9 @@ #include "options/quantifiers_options.h" #include "smt/env.h" +#include "theory/datatypes/sygus_datatype_utils.h" +#include "theory/quantifiers/query_generator_sample_sat.h" +#include "theory/quantifiers/query_generator_unsat.h" namespace cvc5 { namespace theory { @@ -25,7 +28,6 @@ namespace quantifiers { ExpressionMinerManager::ExpressionMinerManager(Env& env) : EnvObj(env), d_doRewSynth(false), - d_doQueryGen(false), d_doFilterLogicalStrength(false), d_use_sygus_type(false), d_tds(nullptr), @@ -33,6 +35,7 @@ ExpressionMinerManager::ExpressionMinerManager(Env& env) options().quantifiers.sygusRewSynthCheck, options().quantifiers.sygusRewSynthAccel, false), + d_qg(nullptr), d_sols(env), d_sampler(env) { @@ -44,7 +47,7 @@ void ExpressionMinerManager::initialize(const std::vector& vars, bool unique_type_ids) { d_doRewSynth = false; - d_doQueryGen = false; + d_qg = nullptr; d_doFilterLogicalStrength = false; d_sygus_fun = Node::null(); d_use_sygus_type = false; @@ -59,7 +62,7 @@ void ExpressionMinerManager::initializeSygus(TermDbSygus* tds, bool useSygusType) { d_doRewSynth = false; - d_doQueryGen = false; + d_qg = nullptr; d_doFilterLogicalStrength = false; d_sygus_fun = f; d_use_sygus_type = useSygusType; @@ -120,16 +123,15 @@ void ExpressionMinerManager::enableRewriteRuleSynth() void ExpressionMinerManager::enableQueryGeneration(unsigned deqThresh) { - if (d_doQueryGen) + if (d_qg != nullptr) { // already enabled return; } - d_doQueryGen = true; options::SygusQueryGenMode mode = options().quantifiers.sygusQueryGen; std::vector vars; d_sampler.getVariables(vars); - if (mode == options::SygusQueryGenMode::SAT) + if (mode == options::SygusQueryGenMode::SAMPLE_SAT) { // must also enable rewrite rule synthesis if (!d_doRewSynth) @@ -138,16 +140,20 @@ void ExpressionMinerManager::enableQueryGeneration(unsigned deqThresh) enableRewriteRuleSynth(); d_crd.setSilent(true); } - d_qg = std::make_unique(d_env); - // initialize the query generator - d_qg->initialize(vars, &d_sampler); - d_qg->setThreshold(deqThresh); + d_qg = std::make_unique(d_env, deqThresh); } else if (mode == options::SygusQueryGenMode::UNSAT) { - d_qgu = std::make_unique(d_env); + d_qg = std::make_unique(d_env); + } + else if (mode == options::SygusQueryGenMode::BASIC) + { + d_qg = std::make_unique(d_env); + } + if (d_qg != nullptr) + { // initialize the query generator - d_qgu->initialize(vars, &d_sampler); + d_qg->initialize(vars, &d_sampler); } } @@ -177,7 +183,7 @@ bool ExpressionMinerManager::addTerm(Node sol, Node solb = sol; if (d_use_sygus_type) { - solb = d_tds->sygusToBuiltin(sol); + solb = datatypes::utils::sygusToBuiltin(sol, true); } // add to the candidate rewrite rule database @@ -190,17 +196,9 @@ bool ExpressionMinerManager::addTerm(Node sol, } // a unique term, let's try the query generator - if (ret && d_doQueryGen) + if (ret && d_qg != nullptr) { - options::SygusQueryGenMode mode = options().quantifiers.sygusQueryGen; - if (mode == options::SygusQueryGenMode::SAT) - { - d_qg->addTerm(solb, out); - } - else if (mode == options::SygusQueryGenMode::UNSAT) - { - d_qgu->addTerm(solb, out); - } + d_qg->addTerm(solb, out); } // filter based on logical strength diff --git a/src/theory/quantifiers/expr_miner_manager.h b/src/theory/quantifiers/expr_miner_manager.h index 9d0352408..463309e4d 100644 --- a/src/theory/quantifiers/expr_miner_manager.h +++ b/src/theory/quantifiers/expr_miner_manager.h @@ -22,7 +22,6 @@ #include "smt/env_obj.h" #include "theory/quantifiers/candidate_rewrite_database.h" #include "theory/quantifiers/query_generator.h" -#include "theory/quantifiers/query_generator_unsat.h" #include "theory/quantifiers/solution_filter.h" #include "theory/quantifiers/sygus_sampler.h" @@ -95,8 +94,6 @@ class ExpressionMinerManager : protected EnvObj void enableFilterWeakSolutions(); /** whether we are doing rewrite synthesis */ bool d_doRewSynth; - /** whether we are doing query generation */ - bool d_doQueryGen; /** whether we are filtering solutions based on logical strength */ bool d_doFilterLogicalStrength; /** the sygus function passed to initializeSygus, if any */ @@ -107,10 +104,8 @@ class ExpressionMinerManager : protected EnvObj TermDbSygus* d_tds; /** candidate rewrite database */ CandidateRewriteDatabase d_crd; - /** query generator */ + /** The query generator we are using */ std::unique_ptr d_qg; - /** query generator */ - std::unique_ptr d_qgu; /** solution filter based on logical strength */ SolutionFilterStrength d_sols; /** sygus sampler object */ diff --git a/src/theory/quantifiers/query_generator.cpp b/src/theory/quantifiers/query_generator.cpp index d9e6954f6..535142227 100644 --- a/src/theory/quantifiers/query_generator.cpp +++ b/src/theory/quantifiers/query_generator.cpp @@ -21,8 +21,8 @@ #include "options/quantifiers_options.h" #include "smt/env.h" +#include "smt/logic_exception.h" #include "smt/print_benchmark.h" -#include "util/random.h" using namespace std; using namespace cvc5::kind; @@ -39,178 +39,25 @@ void QueryGenerator::initialize(const std::vector& vars, SygusSampler* ss) ExprMiner::initialize(vars, ss); } -void QueryGenerator::setThreshold(unsigned deqThresh) +void QueryGenerator::dumpQuery(Node qy, const Result& r) { - d_deqThresh = deqThresh; -} - -bool QueryGenerator::addTerm(Node n, std::ostream& out) -{ - Node nn = n.getKind() == NOT ? n[0] : n; - if (d_terms.find(nn) != d_terms.end()) - { - return false; - } - d_terms.insert(nn); - - Trace("sygus-qgen") << "QueryGenerator::addTerm : " << n << std::endl; - unsigned npts = d_sampler->getNumSamplePoints(); - TypeNode tn = n.getType(); - // TODO : as an optimization, use a shared lazy trie? - - // the queries we generate on this round - std::vector queries; - // For each query in the above vector, this stores the indices of the points - // for which that query evaluated to true on. - std::vector> queriesPtTrue; - // the sample point indices for which the above queries are true - std::unordered_set indices; - - // collect predicate queries (if n is Boolean) - if (tn.isBoolean()) - { - std::map> ev_to_pt; - unsigned index = 0; - // the number of {true,false} for which the #points evaluated to that - // constant is greater than the threshold. - unsigned threshCount = 0; - while (index < npts && threshCount < 2) - { - Node v = d_sampler->evaluate(nn, index); - // it may not evaluate, in which case we ignore the point - if (v.isConst()) - { - ev_to_pt[v].push_back(index); - if (ev_to_pt[v].size() == d_deqThresh + 1) - { - threshCount++; - } - } - index++; - } - if (threshCount < 2) - { - for (const auto& etp : ev_to_pt) - { - if (etp.second.size() < d_deqThresh) - { - indices.insert(etp.second.begin(), etp.second.end()); - Node qy = nn; - Assert(etp.first.isConst()); - if (!etp.first.getConst()) - { - qy = qy.negate(); - } - queries.push_back(qy); - queriesPtTrue.push_back(etp.second); - } - } - } - } - - // collect equality queries - findQueries(nn, queries, queriesPtTrue); - Assert(queries.size() == queriesPtTrue.size()); - if (queries.empty()) - { - return true; - } - Trace("sygus-qgen-debug") - << "query: Check " << queries.size() << " queries..." << std::endl; - // literal queries - for (unsigned i = 0, nqueries = queries.size(); i < nqueries; i++) - { - Node qy = queries[i]; - std::vector& tIndices = queriesPtTrue[i]; - // we have an interesting query - Trace("sygus-qgen-debug") - << "; " << tIndices.size() << "/" << npts << std::endl; - AlwaysAssert(!tIndices.empty()); - checkQuery(qy, tIndices[0], out); - // add information - for (unsigned& ti : tIndices) - { - d_ptToQueries[ti].push_back(qy); - d_qysToPoints[qy].push_back(ti); - indices.insert(ti); - } - } - // for each new index, we may have a new conjunctive query - NodeManager* nm = NodeManager::currentNM(); - for (const unsigned& i : indices) - { - std::vector& qsi = d_ptToQueries[i]; - if (qsi.size() > 1) - { - // take two random queries - size_t rindex = Random::getRandom().pick(0, qsi.size() - 1); - size_t rindex2 = Random::getRandom().pick(0, qsi.size() - 2); - if (rindex2 >= rindex) - { - rindex2 = rindex2 + 1; - } - Node qy = nm->mkNode(AND, qsi[rindex], qsi[rindex2]); - checkQuery(qy, i, out); - } - } - Trace("sygus-qgen-check") << "...finished." << std::endl; - return true; -} - -void QueryGenerator::checkQuery(Node qy, unsigned spIndex, std::ostream& out) -{ - if (d_allQueries.find(qy) != d_allQueries.end()) + d_queryCount++; + // return if we should not dump the query based on the options + if (options().quantifiers.sygusQueryGenDumpFiles + == options::SygusQueryDumpFilesMode::NONE) { return; } - d_allQueries.insert(qy); - out << "(query " << qy << ")" << std::endl; - // external query if (options().quantifiers.sygusQueryGenDumpFiles - == options::SygusQueryDumpFilesMode::ALL) + == options::SygusQueryDumpFilesMode::UNSOLVED) { - dumpQuery(qy); - } - - if (options().quantifiers.sygusQueryGenCheck) - { - Trace("sygus-qgen-check") << " query: check " << qy << "..." << std::endl; - // make the satisfiability query - std::unique_ptr queryChecker; - initializeChecker(queryChecker, qy); - Result r = queryChecker->checkSat(); - Trace("sygus-qgen-check") << " query: ...got : " << r << std::endl; - if (r.asSatisfiabilityResult().isSat() == Result::UNSAT) - { - std::stringstream ss; - ss << "--sygus-rr-query-gen detected unsoundness in cvc5 on input " << qy - << "!" << std::endl; - ss << "This query has a model : " << std::endl; - std::vector pt; - d_sampler->getSamplePoint(spIndex, pt); - Assert(pt.size() == d_vars.size()); - for (unsigned i = 0, size = pt.size(); i < size; i++) - { - ss << " " << d_vars[i] << " -> " << pt[i] << std::endl; - } - ss << "but cvc5 answered unsat!" << std::endl; - AlwaysAssert(false) << ss.str(); - } - if (options().quantifiers.sygusQueryGenDumpFiles - == options::SygusQueryDumpFilesMode::UNSOLVED) + if (r.asSatisfiabilityResult().isSat() == Result::SAT + || r.asSatisfiabilityResult().isSat() == Result::UNSAT) { - if (r.asSatisfiabilityResult().isSat() != Result::SAT) - { - dumpQuery(qy); - } + return; } } - d_queryCount++; -} - -void QueryGenerator::dumpQuery(Node qy) -{ Node kqy = convertToSkolem(qy); // Print the query to to queryN.smt2 std::stringstream fname; @@ -221,203 +68,28 @@ void QueryGenerator::dumpQuery(Node qy) fs.close(); } -void QueryGenerator::findQueries( - Node n, - std::vector& queries, - std::vector>& queriesPtTrue) +void QueryGenerator::ensureBoolean(const Node& n) const { - // At a high level, this method traverses the LazyTrie for the type of n - // and tries to find paths to leafs that contain terms n' such that n = n' - // or n != n' is an interesting query, i.e. satisfied for a small number of - // points. - TypeNode tn = n.getType(); - LazyTrie* lt = &d_qgtTrie[tn]; - // These vectors are the set of indices of sample points for which the current - // node we are considering are { equal, disequal } from n. - std::vector eqIndex[2]; - Trace("sygus-qgen-debug") << "Compute queries for " << n << "...\n"; - - LazyTrieEvaluator* ev = d_sampler; - unsigned ntotal = d_sampler->getNumSamplePoints(); - unsigned index = 0; - bool exact = true; - bool pushEq[2] = {false, false}; - bool pre = true; - // The following parallel vectors describe the state of the locations in the - // trie we are currently visiting. - // Reference to the location in the trie - std::vector visitTr; - // The index of the sample point we are testing - std::vector currIndex; - // Whether the path to this location exactly matches the evaluation of n - std::vector currExact; - // Whether we are adding to the points that are { equal, disequal } by - // traversing to this location. - std::vector pushIndex[2]; - // Whether we are in a pre-traversal for this location. - std::vector preVisit; - visitTr.push_back(lt); - currIndex.push_back(0); - currExact.push_back(true); - pushIndex[0].push_back(false); - pushIndex[1].push_back(false); - preVisit.push_back(true); - do + if (!n.getType().isBoolean()) { - lt = visitTr.back(); - index = currIndex.back(); - exact = currExact.back(); - for (unsigned r = 0; r < 2; r++) - { - pushEq[r] = pushIndex[r].back(); - } - pre = preVisit.back(); - if (!pre) - { - visitTr.pop_back(); - currIndex.pop_back(); - currExact.pop_back(); - preVisit.pop_back(); - // clean up the indices of points that are { equal, disequal } - for (unsigned r = 0; r < 2; r++) - { - if (pushEq[r]) - { - eqIndex[r].pop_back(); - } - pushIndex[r].pop_back(); - } - } - else - { - preVisit[preVisit.size() - 1] = false; - // add to the indices of points that are { equal, disequal } - for (unsigned r = 0; r < 2; r++) - { - if (pushEq[r]) - { - eqIndex[r].push_back(index - 1); - } - } - int eqAllow = d_deqThresh - eqIndex[0].size(); - int deqAllow = d_deqThresh - eqIndex[1].size(); - Trace("sygus-qgen-debug") - << "Find queries " << n << " " << index << "/" << ntotal - << ", deq/eq allow = " << deqAllow << "/" << eqAllow - << ", exact = " << exact << std::endl; - if (index == ntotal) - { - if (exact) - { - // add to the trie - lt->d_lazy_child = n; - } - else - { - Node nAlmostEq = lt->d_lazy_child; - // if made it here, we still should have either a equality or - // a disequality that is allowed. - Assert(deqAllow >= 0 || eqAllow >= 0); - Node query = n.eqNode(nAlmostEq); - std::vector tIndices; - if (eqAllow >= 0) - { - tIndices.insert( - tIndices.end(), eqIndex[0].begin(), eqIndex[0].end()); - } - else if (deqAllow >= 0) - { - query = query.negate(); - tIndices.insert( - tIndices.end(), eqIndex[1].begin(), eqIndex[1].end()); - } - AlwaysAssert(tIndices.size() <= d_deqThresh); - if (!tIndices.empty()) - { - queries.push_back(query); - queriesPtTrue.push_back(tIndices); - } - } - } - else - { - if (!lt->d_lazy_child.isNull()) - { - // if there is a lazy child here, push - Node e_lc = ev->evaluate(lt->d_lazy_child, index); - // store at next level - lt->d_children[e_lc].d_lazy_child = lt->d_lazy_child; - // replace - lt->d_lazy_child = Node::null(); - } - // compute - Node e_this = ev->evaluate(n, index); + std::stringstream ss; + ss << "SyGuS query generation in the current mode requires the grammar to " + "generate Boolean terms only"; + throw LogicException(ss.str()); + } +} - if (deqAllow >= 0) - { - // recursing on disequal points - deqAllow--; - // if there is use continuing - if (deqAllow >= 0 || eqAllow >= 0) - { - for (std::pair& ltc : lt->d_children) - { - if (ltc.first != e_this) - { - visitTr.push_back(<c.second); - currIndex.push_back(index + 1); - currExact.push_back(false); - pushIndex[0].push_back(false); - pushIndex[1].push_back(true); - preVisit.push_back(true); - } - } - } - deqAllow++; - } - bool pushEqNext = false; - if (eqAllow >= 0) - { - // below, we try recursing (if at all) on equal nodes. - eqAllow--; - pushEqNext = true; - } - // if we are on the exact path of n - if (exact) - { - if (lt->d_children.empty()) - { - // if no one has been here before, we are done - lt->d_lazy_child = n; - } - else - { - // otherwise, we recurse on the equal point - visitTr.push_back(&(lt->d_children[e_this])); - currIndex.push_back(index + 1); - currExact.push_back(true); - pushIndex[0].push_back(pushEqNext); - pushIndex[1].push_back(false); - preVisit.push_back(true); - } - } - else if (deqAllow >= 0 || eqAllow >= 0) - { - // recurse on the equal point if it exists - std::map::iterator iteq = lt->d_children.find(e_this); - if (iteq != lt->d_children.end()) - { - visitTr.push_back(&(iteq->second)); - currIndex.push_back(index + 1); - currExact.push_back(false); - pushIndex[0].push_back(pushEqNext); - pushIndex[1].push_back(false); - preVisit.push_back(true); - } - } - } - } - } while (!visitTr.empty()); +QueryGeneratorBasic::QueryGeneratorBasic(Env& env) : QueryGenerator(env) {} + +bool QueryGeneratorBasic::addTerm(Node n, std::ostream& out) +{ + ensureBoolean(n); + out << "(query " << n << ")" << std::endl; + std::unique_ptr queryChecker; + initializeChecker(queryChecker, n); + Result r = queryChecker->checkSat(); + dumpQuery(n, r); + return true; } } // namespace quantifiers diff --git a/src/theory/quantifiers/query_generator.h b/src/theory/quantifiers/query_generator.h index 38c99b6f9..984a5b18f 100644 --- a/src/theory/quantifiers/query_generator.h +++ b/src/theory/quantifiers/query_generator.h @@ -19,12 +19,8 @@ #ifndef CVC5__THEORY__QUANTIFIERS__QUERY_GENERATOR_H #define CVC5__THEORY__QUANTIFIERS__QUERY_GENERATOR_H -#include -#include #include "expr/node.h" #include "theory/quantifiers/expr_miner.h" -#include "theory/quantifiers/lazy_trie.h" -#include "theory/quantifiers/sygus_sampler.h" namespace cvc5 { namespace theory { @@ -32,22 +28,8 @@ namespace quantifiers { /** QueryGenerator * - * This module is used for finding satisfiable queries that are maximally - * likely to trigger an unsound response in an SMT solver. These queries are - * mined from a stream of enumerated expressions. We judge likelihood of - * triggering unsoundness by the frequency at which the query is satisfied. - * - * In detail, given a stream of expressions t_1, ..., t_{n-1}, upon generating - * term t_n, we consider a query (not) t_n = t_i to be an interesting query - * if it is satisfied by at most D points, where D is a predefined threshold - * given by the sygusQueryGenThresh option. If t_n has type Bool, we - * additionally consider the case where t_n is satisfied (or not satisfied) by - * fewer than D points. - * - * In addition to generating single literal queries, this module also generates - * conjunctive queries, for instance, by remembering that literals L1 and L2 - * were both satisfied by the same point, and thus L1 ^ L2 is an interesting - * query as well. + * This is the base class for modules that print queries based on sygus + * enumeration. */ class QueryGenerator : public ExprMiner { @@ -57,63 +39,33 @@ class QueryGenerator : public ExprMiner /** initialize */ void initialize(const std::vector& vars, SygusSampler* ss = nullptr) override; - /** - * Add term to this module. This may trigger the printing and/or checking of - * new queries. - */ - bool addTerm(Node n, std::ostream& out) override; - /** - * Set the threshold value. This is the maximal number of sample points that - * each query we generate is allowed to be satisfied by. - */ - void setThreshold(unsigned deqThresh); - private: - /** cache of all terms registered to this generator */ - std::unordered_set d_terms; - /** the threshold used by this module for maximum number of sat points */ - unsigned d_deqThresh; - /** - * For each type, a lazy trie storing the evaluation of all added terms on - * sample points. - */ - std::map d_qgtTrie; + protected: /** total number of queries generated by this class */ - unsigned d_queryCount; - /** find queries - * - * This function traverses the lazy trie for the type of n, finding equality - * and disequality queries between n and other terms in the trie. The argument - * queries collects the newly generated queries, and the argument - * queriesPtTrue collects the indices of points that each query was satisfied - * by (these indices are the indices of the points in the sampler used by this - * class). - */ - void findQueries(Node n, - std::vector& queries, - std::vector>& queriesPtTrue); - /** - * Maps the index of each sample point to the list of queries that it - * satisfies, and that were generated by the above function. This map is used - * for generating conjunctive queries. - */ - std::map> d_ptToQueries; + size_t d_queryCount; /** - * Map from queries to the indices of the points that satisfy them. - */ - std::map> d_qysToPoints; - /** Set of all queries generated */ - std::unordered_set d_allQueries; - /** - * Check query qy, which is satisfied by (at least) sample point spIndex, - * using a separate copy of the SMT engine. Throws an exception if qy is - * reported to be unsatisfiable. + * Dumps query qy to the a file queryN.smt2 for the current counter N */ - void checkQuery(Node qy, unsigned spIndex, std::ostream& out); + void dumpQuery(Node qy, const Result& r); + /** ensure that node n added to the generator is Boolean */ + void ensureBoolean(const Node& n) const; +}; + +/** + * QueryGeneratorBasic + * + * Generates query based on the sygus enumeration only. + */ +class QueryGeneratorBasic : public QueryGenerator +{ + public: + QueryGeneratorBasic(Env& env); + ~QueryGeneratorBasic() {} /** - * Dumps query qy to the a file queryN.smt2 for the current counter N + * Add term to this module. This may trigger the printing and/or checking of + * new queries. */ - void dumpQuery(Node qy); + bool addTerm(Node n, std::ostream& out) override; }; } // namespace quantifiers diff --git a/src/theory/quantifiers/query_generator_sample_sat.cpp b/src/theory/quantifiers/query_generator_sample_sat.cpp new file mode 100644 index 000000000..e259a875f --- /dev/null +++ b/src/theory/quantifiers/query_generator_sample_sat.cpp @@ -0,0 +1,392 @@ +/****************************************************************************** + * Top contributors (to current version): + * Andrew Reynolds, Mathias Preiner, Gereon Kremer + * + * This file is part of the cvc5 project. + * + * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS + * in the top-level source directory and their institutional affiliations. + * All rights reserved. See the file COPYING in the top-level source + * directory for licensing information. + * **************************************************************************** + * + * Implementation of a class for mining interesting satisfiability + * queries from a stream of generated expressions. + */ + +#include "theory/quantifiers/query_generator_sample_sat.h" + +#include +#include + +#include "options/quantifiers_options.h" +#include "smt/env.h" +#include "smt/print_benchmark.h" +#include "util/random.h" + +using namespace std; +using namespace cvc5::kind; + +namespace cvc5 { +namespace theory { +namespace quantifiers { + +QueryGeneratorSampleSat::QueryGeneratorSampleSat(Env& env, unsigned deqThresh) + : QueryGenerator(env), d_deqThresh(deqThresh) +{ +} + +bool QueryGeneratorSampleSat::addTerm(Node n, std::ostream& out) +{ + Node nn = n.getKind() == NOT ? n[0] : n; + if (d_terms.find(nn) != d_terms.end()) + { + return false; + } + d_terms.insert(nn); + + Trace("sygus-qgen") << "QueryGeneratorSampleSat::addTerm : " << n + << std::endl; + unsigned npts = d_sampler->getNumSamplePoints(); + TypeNode tn = n.getType(); + // TODO : as an optimization, use a shared lazy trie? + + // the queries we generate on this round + std::vector queries; + // For each query in the above vector, this stores the indices of the points + // for which that query evaluated to true on. + std::vector> queriesPtTrue; + // the sample point indices for which the above queries are true + std::unordered_set indices; + + // collect predicate queries (if n is Boolean) + if (tn.isBoolean()) + { + std::map> ev_to_pt; + unsigned index = 0; + // the number of {true,false} for which the #points evaluated to that + // constant is greater than the threshold. + unsigned threshCount = 0; + while (index < npts && threshCount < 2) + { + Node v = d_sampler->evaluate(nn, index); + // it may not evaluate, in which case we ignore the point + if (v.isConst()) + { + ev_to_pt[v].push_back(index); + if (ev_to_pt[v].size() == d_deqThresh + 1) + { + threshCount++; + } + } + index++; + } + if (threshCount < 2) + { + for (const auto& etp : ev_to_pt) + { + if (etp.second.size() < d_deqThresh) + { + indices.insert(etp.second.begin(), etp.second.end()); + Node qy = nn; + Assert(etp.first.isConst()); + if (!etp.first.getConst()) + { + qy = qy.negate(); + } + queries.push_back(qy); + queriesPtTrue.push_back(etp.second); + } + } + } + } + + // collect equality queries + findQueries(nn, queries, queriesPtTrue); + Assert(queries.size() == queriesPtTrue.size()); + if (queries.empty()) + { + return true; + } + Trace("sygus-qgen-debug") + << "query: Check " << queries.size() << " queries..." << std::endl; + // literal queries + for (unsigned i = 0, nqueries = queries.size(); i < nqueries; i++) + { + Node qy = queries[i]; + std::vector& tIndices = queriesPtTrue[i]; + // we have an interesting query + Trace("sygus-qgen-debug") + << "; " << tIndices.size() << "/" << npts << std::endl; + AlwaysAssert(!tIndices.empty()); + checkQuery(qy, tIndices[0], out); + // add information + for (unsigned& ti : tIndices) + { + d_ptToQueries[ti].push_back(qy); + d_qysToPoints[qy].push_back(ti); + indices.insert(ti); + } + } + // for each new index, we may have a new conjunctive query + NodeManager* nm = NodeManager::currentNM(); + for (const unsigned& i : indices) + { + std::vector& qsi = d_ptToQueries[i]; + if (qsi.size() > 1) + { + // take two random queries + size_t rindex = Random::getRandom().pick(0, qsi.size() - 1); + size_t rindex2 = Random::getRandom().pick(0, qsi.size() - 2); + if (rindex2 >= rindex) + { + rindex2 = rindex2 + 1; + } + Node qy = nm->mkNode(AND, qsi[rindex], qsi[rindex2]); + checkQuery(qy, i, out); + } + } + Trace("sygus-qgen-check") << "...finished." << std::endl; + return true; +} + +void QueryGeneratorSampleSat::checkQuery(Node qy, + unsigned spIndex, + std::ostream& out) +{ + if (d_allQueries.find(qy) != d_allQueries.end()) + { + return; + } + d_allQueries.insert(qy); + out << "(query " << qy << ")" << std::endl; + // external query + + Result r; + Trace("sygus-qgen-check") << " query: check " << qy << "..." << std::endl; + // make the satisfiability query + std::unique_ptr queryChecker; + initializeChecker(queryChecker, qy); + r = queryChecker->checkSat(); + Trace("sygus-qgen-check") << " query: ...got : " << r << std::endl; + if (r.asSatisfiabilityResult().isSat() == Result::UNSAT) + { + std::stringstream ss; + ss << "--sygus-rr-query-gen detected unsoundness in cvc5 on input " << qy + << "!" << std::endl; + ss << "This query has a model : " << std::endl; + std::vector pt; + d_sampler->getSamplePoint(spIndex, pt); + Assert(pt.size() == d_vars.size()); + for (unsigned i = 0, size = pt.size(); i < size; i++) + { + ss << " " << d_vars[i] << " -> " << pt[i] << std::endl; + } + ss << "but cvc5 answered unsat!" << std::endl; + AlwaysAssert(false) << ss.str(); + } + dumpQuery(qy, r); +} + +void QueryGeneratorSampleSat::findQueries( + Node n, + std::vector& queries, + std::vector>& queriesPtTrue) +{ + // At a high level, this method traverses the LazyTrie for the type of n + // and tries to find paths to leafs that contain terms n' such that n = n' + // or n != n' is an interesting query, i.e. satisfied for a small number of + // points. + TypeNode tn = n.getType(); + LazyTrie* lt = &d_qgtTrie[tn]; + // These vectors are the set of indices of sample points for which the current + // node we are considering are { equal, disequal } from n. + std::vector eqIndex[2]; + Trace("sygus-qgen-debug") << "Compute queries for " << n << "...\n"; + + LazyTrieEvaluator* ev = d_sampler; + unsigned ntotal = d_sampler->getNumSamplePoints(); + unsigned index = 0; + bool exact = true; + bool pushEq[2] = {false, false}; + bool pre = true; + // The following parallel vectors describe the state of the locations in the + // trie we are currently visiting. + // Reference to the location in the trie + std::vector visitTr; + // The index of the sample point we are testing + std::vector currIndex; + // Whether the path to this location exactly matches the evaluation of n + std::vector currExact; + // Whether we are adding to the points that are { equal, disequal } by + // traversing to this location. + std::vector pushIndex[2]; + // Whether we are in a pre-traversal for this location. + std::vector preVisit; + visitTr.push_back(lt); + currIndex.push_back(0); + currExact.push_back(true); + pushIndex[0].push_back(false); + pushIndex[1].push_back(false); + preVisit.push_back(true); + do + { + lt = visitTr.back(); + index = currIndex.back(); + exact = currExact.back(); + for (unsigned r = 0; r < 2; r++) + { + pushEq[r] = pushIndex[r].back(); + } + pre = preVisit.back(); + if (!pre) + { + visitTr.pop_back(); + currIndex.pop_back(); + currExact.pop_back(); + preVisit.pop_back(); + // clean up the indices of points that are { equal, disequal } + for (unsigned r = 0; r < 2; r++) + { + if (pushEq[r]) + { + eqIndex[r].pop_back(); + } + pushIndex[r].pop_back(); + } + } + else + { + preVisit[preVisit.size() - 1] = false; + // add to the indices of points that are { equal, disequal } + for (unsigned r = 0; r < 2; r++) + { + if (pushEq[r]) + { + eqIndex[r].push_back(index - 1); + } + } + int eqAllow = d_deqThresh - eqIndex[0].size(); + int deqAllow = d_deqThresh - eqIndex[1].size(); + Trace("sygus-qgen-debug") + << "Find queries " << n << " " << index << "/" << ntotal + << ", deq/eq allow = " << deqAllow << "/" << eqAllow + << ", exact = " << exact << std::endl; + if (index == ntotal) + { + if (exact) + { + // add to the trie + lt->d_lazy_child = n; + } + else + { + Node nAlmostEq = lt->d_lazy_child; + // if made it here, we still should have either a equality or + // a disequality that is allowed. + Assert(deqAllow >= 0 || eqAllow >= 0); + Node query = n.eqNode(nAlmostEq); + std::vector tIndices; + if (eqAllow >= 0) + { + tIndices.insert( + tIndices.end(), eqIndex[0].begin(), eqIndex[0].end()); + } + else if (deqAllow >= 0) + { + query = query.negate(); + tIndices.insert( + tIndices.end(), eqIndex[1].begin(), eqIndex[1].end()); + } + AlwaysAssert(tIndices.size() <= d_deqThresh); + if (!tIndices.empty()) + { + queries.push_back(query); + queriesPtTrue.push_back(tIndices); + } + } + } + else + { + if (!lt->d_lazy_child.isNull()) + { + // if there is a lazy child here, push + Node e_lc = ev->evaluate(lt->d_lazy_child, index); + // store at next level + lt->d_children[e_lc].d_lazy_child = lt->d_lazy_child; + // replace + lt->d_lazy_child = Node::null(); + } + // compute + Node e_this = ev->evaluate(n, index); + + if (deqAllow >= 0) + { + // recursing on disequal points + deqAllow--; + // if there is use continuing + if (deqAllow >= 0 || eqAllow >= 0) + { + for (std::pair& ltc : lt->d_children) + { + if (ltc.first != e_this) + { + visitTr.push_back(<c.second); + currIndex.push_back(index + 1); + currExact.push_back(false); + pushIndex[0].push_back(false); + pushIndex[1].push_back(true); + preVisit.push_back(true); + } + } + } + deqAllow++; + } + bool pushEqNext = false; + if (eqAllow >= 0) + { + // below, we try recursing (if at all) on equal nodes. + eqAllow--; + pushEqNext = true; + } + // if we are on the exact path of n + if (exact) + { + if (lt->d_children.empty()) + { + // if no one has been here before, we are done + lt->d_lazy_child = n; + } + else + { + // otherwise, we recurse on the equal point + visitTr.push_back(&(lt->d_children[e_this])); + currIndex.push_back(index + 1); + currExact.push_back(true); + pushIndex[0].push_back(pushEqNext); + pushIndex[1].push_back(false); + preVisit.push_back(true); + } + } + else if (deqAllow >= 0 || eqAllow >= 0) + { + // recurse on the equal point if it exists + std::map::iterator iteq = lt->d_children.find(e_this); + if (iteq != lt->d_children.end()) + { + visitTr.push_back(&(iteq->second)); + currIndex.push_back(index + 1); + currExact.push_back(false); + pushIndex[0].push_back(pushEqNext); + pushIndex[1].push_back(false); + preVisit.push_back(true); + } + } + } + } + } while (!visitTr.empty()); +} + +} // namespace quantifiers +} // namespace theory +} // namespace cvc5 diff --git a/src/theory/quantifiers/query_generator_sample_sat.h b/src/theory/quantifiers/query_generator_sample_sat.h new file mode 100644 index 000000000..7238a4c23 --- /dev/null +++ b/src/theory/quantifiers/query_generator_sample_sat.h @@ -0,0 +1,115 @@ +/****************************************************************************** + * Top contributors (to current version): + * Andrew Reynolds, Mathias Preiner + * + * This file is part of the cvc5 project. + * + * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS + * in the top-level source directory and their institutional affiliations. + * All rights reserved. See the file COPYING in the top-level source + * directory for licensing information. + * **************************************************************************** + * + * A class for mining interesting satisfiability queries from a stream + * of generated expressions. + */ + +#include "cvc5_private.h" + +#ifndef CVC5__THEORY__QUANTIFIERS__QUERY_GENERATOR_SAMPLE_SAT_H +#define CVC5__THEORY__QUANTIFIERS__QUERY_GENERATOR_SAMPLE_SAT_H + +#include +#include + +#include "expr/node.h" +#include "theory/quantifiers/lazy_trie.h" +#include "theory/quantifiers/query_generator.h" +#include "theory/quantifiers/sygus_sampler.h" + +namespace cvc5 { +namespace theory { +namespace quantifiers { + +/** QueryGeneratorSampleSat + * + * This module is used for finding satisfiable queries that are maximally + * likely to trigger an unsound response in an SMT solver. These queries are + * mined from a stream of enumerated expressions. We judge likelihood of + * triggering unsoundness by the frequency at which the query is satisfied. + * + * In detail, given a stream of expressions t_1, ..., t_{n-1}, upon generating + * term t_n, we consider a query (not) t_n = t_i to be an interesting query + * if it is satisfied by at most D points, where D is a predefined threshold + * given by the sygusQueryGenThresh option. If t_n has type Bool, we + * additionally consider the case where t_n is satisfied (or not satisfied) by + * fewer than D points. + * + * In addition to generating single literal queries, this module also generates + * conjunctive queries, for instance, by remembering that literals L1 and L2 + * were both satisfied by the same point, and thus L1 ^ L2 is an interesting + * query as well. + */ +class QueryGeneratorSampleSat : public QueryGenerator +{ + public: + /** + * @param env reference to the environment + * @param deqThresh the maximal number of sample points that each query we + * generate is allowed to be satisfied by. + */ + QueryGeneratorSampleSat(Env& env, unsigned deqThresh); + ~QueryGeneratorSampleSat() {} + /** + * Add term to this module. This may trigger the printing and/or checking of + * new queries. + */ + bool addTerm(Node n, std::ostream& out) override; + + private: + /** find queries + * + * This function traverses the lazy trie for the type of n, finding equality + * and disequality queries between n and other terms in the trie. The argument + * queries collects the newly generated queries, and the argument + * queriesPtTrue collects the indices of points that each query was satisfied + * by (these indices are the indices of the points in the sampler used by this + * class). + */ + void findQueries(Node n, + std::vector& queries, + std::vector>& queriesPtTrue); + /** + * Check query qy, which is satisfied by (at least) sample point spIndex, + * using a separate copy of the SMT engine. Throws an exception if qy is + * reported to be unsatisfiable. + */ + void checkQuery(Node qy, unsigned spIndex, std::ostream& out); + /** cache of all terms registered to this generator */ + std::unordered_set d_terms; + /** the threshold used by this module for maximum number of sat points */ + unsigned d_deqThresh; + /** + * For each type, a lazy trie storing the evaluation of all added terms on + * sample points. + */ + std::map d_qgtTrie; + /** + * Maps the index of each sample point to the list of queries that it + * satisfies, and that were generated by the above function. This map is used + * for generating conjunctive queries. + */ + std::map> d_ptToQueries; + /** + * Map from queries to the indices of the points that satisfy them. + */ + std::map> d_qysToPoints; + /** Set of all queries generated */ + std::unordered_set d_allQueries; +}; + +} // namespace quantifiers +} // namespace theory +} // namespace cvc5 + +#endif /* CVC5__THEORY__QUANTIFIERS__QUERY_GENERATOR_SAMPLE_SAT_H */ diff --git a/src/theory/quantifiers/query_generator_unsat.cpp b/src/theory/quantifiers/query_generator_unsat.cpp index 40131ffaa..90ac3bcd9 100644 --- a/src/theory/quantifiers/query_generator_unsat.cpp +++ b/src/theory/quantifiers/query_generator_unsat.cpp @@ -24,8 +24,7 @@ namespace cvc5 { namespace theory { namespace quantifiers { -QueryGeneratorUnsat::QueryGeneratorUnsat(Env& env) - : ExprMiner(env), d_queryCount(0) +QueryGeneratorUnsat::QueryGeneratorUnsat(Env& env) : QueryGenerator(env) { d_true = NodeManager::currentNM()->mkConst(true); d_false = NodeManager::currentNM()->mkConst(false); @@ -38,19 +37,11 @@ QueryGeneratorUnsat::QueryGeneratorUnsat(Env& env) d_subOptions.smt.checkModels = true; } -void QueryGeneratorUnsat::initialize(const std::vector& vars, - SygusSampler* ss) -{ - Assert(ss != nullptr); - d_queryCount = 0; - ExprMiner::initialize(vars, ss); -} - bool QueryGeneratorUnsat::addTerm(Node n, std::ostream& out) { - d_terms.push_back(n); Trace("sygus-qgen") << "Add term: " << n << std::endl; - Assert(n.getType().isBoolean()); + ensureBoolean(n); + d_terms.push_back(n); // the loop below conjoins a random subset of predicates we have enumerated // so far C1 ^ ... ^ Cn such that no subset of C1 ... Cn is an unsat core @@ -154,6 +145,7 @@ Result QueryGeneratorUnsat::checkCurrent(const std::vector& activeTerms, getModelFromSubsolver(*queryChecker.get(), d_skolems, currModel); Trace("sygus-qgen-check") << "...model: " << currModel << std::endl; } + dumpQuery(qy, r); return r; } diff --git a/src/theory/quantifiers/query_generator_unsat.h b/src/theory/quantifiers/query_generator_unsat.h index d03d8efb9..128b6253e 100644 --- a/src/theory/quantifiers/query_generator_unsat.h +++ b/src/theory/quantifiers/query_generator_unsat.h @@ -25,8 +25,8 @@ #include "expr/node.h" #include "expr/variadic_trie.h" #include "options/options.h" -#include "theory/quantifiers/expr_miner.h" #include "theory/quantifiers/lazy_trie.h" +#include "theory/quantifiers/query_generator.h" #include "theory/quantifiers/sygus_sampler.h" namespace cvc5 { @@ -40,14 +40,11 @@ namespace quantifiers { * enumeration. At a high level, this is based on conjoining predicates that * refine models and avoid previously encountered unsat cores. */ -class QueryGeneratorUnsat : public ExprMiner +class QueryGeneratorUnsat : public QueryGenerator { public: QueryGeneratorUnsat(Env& env); ~QueryGeneratorUnsat() {} - /** initialize */ - void initialize(const std::vector& vars, - SygusSampler* ss = nullptr) override; /** * Add term to this module. This may trigger the printing and/or checking of * new queries. @@ -74,8 +71,6 @@ class QueryGeneratorUnsat : public ExprMiner Node d_false; /** cache of all terms registered to this generator */ std::vector d_terms; - /** total number of queries generated by this class */ - size_t d_queryCount; /** containing the unsat cores */ VariadicTrie d_cores; /** The options for subsolver calls */ -- 2.30.2