From: Andrew Reynolds Date: Wed, 28 Nov 2018 21:49:56 +0000 (-0600) Subject: Information gain heuristic for PBE (#2719) X-Git-Tag: cvc5-1.0.0~4344 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=64624a5ff72c132b87b885780ad9c39f06e3cdbc;p=cvc5.git Information gain heuristic for PBE (#2719) --- diff --git a/src/theory/quantifiers/sygus/sygus_unif.cpp b/src/theory/quantifiers/sygus/sygus_unif.cpp index c262c77e5..a4f276792 100644 --- a/src/theory/quantifiers/sygus/sygus_unif.cpp +++ b/src/theory/quantifiers/sygus/sygus_unif.cpp @@ -42,25 +42,14 @@ void SygusUnif::initializeCandidate( d_strategy[f].initialize(qe, f, enums); } -Node SygusUnif::constructBestSolvedTerm(const std::vector& solved) +Node SygusUnif::constructBestSolvedTerm(Node e, const std::vector& solved) { Assert(!solved.empty()); return solved[0]; } -Node SygusUnif::constructBestStringSolvedTerm(const std::vector& solved) -{ - Assert(!solved.empty()); - return solved[0]; -} - -Node SygusUnif::constructBestSolvedConditional(const std::vector& solved) -{ - Assert(!solved.empty()); - return solved[0]; -} - -Node SygusUnif::constructBestConditional(const std::vector& conds) +Node SygusUnif::constructBestConditional(Node ce, + const std::vector& conds) { Assert(!conds.empty()); double r = Random::getRandom().pickDouble(0.0, 1.0); diff --git a/src/theory/quantifiers/sygus/sygus_unif.h b/src/theory/quantifiers/sygus/sygus_unif.h index 614a29d1c..67e798854 100644 --- a/src/theory/quantifiers/sygus/sygus_unif.h +++ b/src/theory/quantifiers/sygus/sygus_unif.h @@ -156,18 +156,20 @@ class SygusUnif */ virtual Node constructSol( Node f, Node e, NodeRole nrole, int ind, std::vector& lemmas) = 0; - /** Heuristically choose the best solved term from solved in context x, - * currently return the first. */ - virtual Node constructBestSolvedTerm(const std::vector& solved); - /** Heuristically choose the best solved string term from solved in context - * x, currently return the first. */ - virtual Node constructBestStringSolvedTerm(const std::vector& solved); - /** Heuristically choose the best solved conditional term from solved in - * context x, currently random */ - virtual Node constructBestSolvedConditional(const std::vector& solved); - /** Heuristically choose the best conditional term from conds in context x, - * currently random */ - virtual Node constructBestConditional(const std::vector& conds); + /** + * Heuristically choose the best solved term for enumerator e, + * currently return the first by default. A solved term is one that + * suffices to form part of the solution for the given context. For example, + * x is a solved term in the context "ite(x>0, _, 0)" for PBE problem + * with I/O pairs { 1 -> 1, 4 -> 4, -1 -> 0 }. + */ + virtual Node constructBestSolvedTerm(Node e, const std::vector& solved); + /** + * Heuristically choose the best conditional term from conds for condition + * enumerator ce, random by default. + */ + virtual Node constructBestConditional(Node ce, + const std::vector& conds); /** Heuristically choose the best string to concatenate from strs to the * solution in context x, currently random * incr stores the vector of indices that are incremented by this solution in diff --git a/src/theory/quantifiers/sygus/sygus_unif_io.cpp b/src/theory/quantifiers/sygus/sygus_unif_io.cpp index 3d26deeaa..6daeb1706 100644 --- a/src/theory/quantifiers/sygus/sygus_unif_io.cpp +++ b/src/theory/quantifiers/sygus/sygus_unif_io.cpp @@ -21,6 +21,8 @@ #include "theory/quantifiers/term_util.h" #include "util/random.h" +#include + using namespace CVC4::kind; namespace CVC4 { @@ -89,7 +91,6 @@ void UnifContextIo::initialize(SygusUnifIo* sui) d_str_pos.clear(); d_curr_role = role_equal; d_visit_role.clear(); - d_uinfo.clear(); // initialize with #examples unsigned sz = sui->d_examples.size(); @@ -467,7 +468,10 @@ void SubsumeTrie::getLeaves(const std::vector& vals, } SygusUnifIo::SygusUnifIo() - : d_check_sol(false), d_cond_count(0), d_sol_cons_nondet(false) + : d_check_sol(false), + d_cond_count(0), + d_sol_cons_nondet(false), + d_solConsUsingInfoGain(false) { d_true = NodeManager::currentNM()->mkConst(true); d_false = NodeManager::currentNM()->mkConst(false); @@ -778,6 +782,7 @@ Node SygusUnifIo::constructSolutionNode(std::vector& lemmas) Trace("sygus-pbe") << "Construct solution, #iterations = " << d_cond_count << std::endl; d_check_sol = false; + d_solConsUsingInfoGain = false; // try multiple times if we have done multiple conditions, due to // non-determinism unsigned sol_term_size = 0; @@ -802,6 +807,14 @@ Node SygusUnifIo::constructSolutionNode(std::vector& lemmas) Trace("sygus-pbe") << "...solved at iteration " << i << std::endl; d_solution = vcc; sol_term_size = d_tds->getSygusTermSize(vcc); + // We've determined its feasible, now, enable information gain and + // retry. We do this since information gain comes with an overhead, + // and we want testing feasibility to be fast. + if (!d_solConsUsingInfoGain) + { + d_solConsUsingInfoGain = true; + i = 0; + } } else if (!d_sol_cons_nondet) { @@ -991,7 +1004,7 @@ Node SygusUnifIo::constructSol( ecache.d_term_trie.getSubsumedBy(x.d_vals, true, subsumed_by); if (!subsumed_by.empty()) { - ret_dt = constructBestSolvedTerm(subsumed_by); + ret_dt = constructBestSolvedTerm(e, subsumed_by); indent("sygus-sui-dt", ind); Trace("sygus-sui-dt") << "return PBE: success : conditionally solved" << d_tds->sygusToBuiltin(ret_dt) << std::endl; @@ -1033,7 +1046,7 @@ Node SygusUnifIo::constructSol( } if (!str_solved.empty()) { - ret_dt = constructBestStringSolvedTerm(str_solved); + ret_dt = constructBestSolvedTerm(e, str_solved); indent("sygus-sui-dt", ind); Trace("sygus-sui-dt") << "return PBE: success : string solved " << d_tds->sygusToBuiltin(ret_dt) << std::endl; @@ -1256,54 +1269,6 @@ Node SygusUnifIo::constructSol( EnumCache& ecache_child = d_ecache[ce]; - // only used if the return value is not modified - if (!x.isReturnValueModified()) - { - if (x.d_uinfo.find(ce) == x.d_uinfo.end()) - { - x.d_uinfo[ce].clear(); - Trace("sygus-sui-dt-debug2") - << " reg : PBE: Look for direct solutions for conditional " - "enumerator " - << ce << " ... " << std::endl; - Assert(ecache_child.d_enum_vals.size() - == ecache_child.d_enum_vals_res.size()); - for (unsigned i = 1; i <= 2; i++) - { - std::pair& te_pair = etis->d_cenum[i]; - Node te = te_pair.first; - EnumCache& ecache_te = d_ecache[te]; - bool branch_pol = (i == 1); - // for each condition, get terms that satisfy it in this - // branch - for (unsigned k = 0, size = ecache_child.d_enum_vals.size(); - k < size; - k++) - { - Node cond = ecache_child.d_enum_vals[k]; - std::vector solved; - ecache_te.d_term_trie.getSubsumedBy( - ecache_child.d_enum_vals_res[k], branch_pol, solved); - Trace("sygus-sui-dt-debug2") - << " reg : PBE: " << d_tds->sygusToBuiltin(cond) - << " has " << solved.size() << " solutions in branch " - << i << std::endl; - if (!solved.empty()) - { - Node slv = constructBestSolvedTerm(solved); - Trace("sygus-sui-dt-debug2") - << " reg : PBE: ..." << d_tds->sygusToBuiltin(slv) - << " is a solution under branch " << i; - Trace("sygus-sui-dt-debug2") - << " of condition " << d_tds->sygusToBuiltin(cond) - << std::endl; - x.d_uinfo[ce].d_look_ahead_sols[cond][i] = slv; - } - } - } - } - } - // get the conditionals in the current context : they must be // distinguishable std::map > possible_cond; @@ -1328,56 +1293,14 @@ Node SygusUnifIo::constructSol( } } - // static look ahead conditional : choose conditionals that have - // solved terms in at least one branch - // only applicable if we have not modified the return value - std::map > solved_cond; - if (!x.isReturnValueModified() && !x.d_uinfo[ce].empty()) - { - int solve_max = 0; - for (Node& cond : itpc->second) - { - std::map >::iterator itla = - x.d_uinfo[ce].d_look_ahead_sols.find(cond); - if (itla != x.d_uinfo[ce].d_look_ahead_sols.end()) - { - int nsolved = itla->second.size(); - solve_max = nsolved > solve_max ? nsolved : solve_max; - solved_cond[nsolved].push_back(cond); - } - } - int n = solve_max; - while (n > 0) - { - if (!solved_cond[n].empty()) - { - rec_c = constructBestSolvedConditional(solved_cond[n]); - indent("sygus-sui-dt", ind + 1); - Trace("sygus-sui-dt") - << "PBE: ITE strategy : choose solved conditional " - << d_tds->sygusToBuiltin(rec_c) << " with " << n - << " solved children..." << std::endl; - std::map >::iterator itla = - x.d_uinfo[ce].d_look_ahead_sols.find(rec_c); - Assert(itla != x.d_uinfo[ce].d_look_ahead_sols.end()); - for (std::pair& las : itla->second) - { - look_ahead_solved_children[las.first] = las.second; - } - break; - } - n--; - } - } - // otherwise, guess a conditional if (rec_c.isNull()) { - rec_c = constructBestConditional(itpc->second); + rec_c = constructBestConditional(ce, itpc->second); Assert(!rec_c.isNull()); indent("sygus-sui-dt", ind); Trace("sygus-sui-dt") - << "PBE: ITE strategy : choose random conditional " + << "PBE: ITE strategy : choose best conditional " << d_tds->sygusToBuiltin(rec_c) << std::endl; } } @@ -1454,6 +1377,100 @@ Node SygusUnifIo::constructSol( return ret_dt; } +Node SygusUnifIo::constructBestConditional(Node ce, + const std::vector& conds) +{ + if (!d_solConsUsingInfoGain) + { + return SygusUnif::constructBestConditional(ce, conds); + } + UnifContextIo& x = d_context; + // use information gain heuristic + Trace("sygus-sui-dt-igain") << "Best information gain in context "; + print_val("sygus-sui-dt-igain", x.d_vals); + Trace("sygus-sui-dt-igain") << std::endl; + // set of indices that are active in this branch, i.e. x.d_vals[i] is true + std::vector activeIndices; + // map (j,t,s) -> n, such that the j^th condition in the vector conds + // evaluates to t (typically true/false) on n active I/O pairs with output s. + std::map>> eval; + // map (j,t) -> m, such that the j^th condition in the vector conds + // evaluates to t (typically true/false) for m active I/O pairs. + std::map> evalCount; + unsigned nconds = conds.size(); + EnumCache& ecache = d_ecache[ce]; + // Get the index of conds[j] in the enumerator cache, this is to look up + // its evaluation on each point. + std::vector eindex; + for (unsigned j = 0; j < nconds; j++) + { + eindex.push_back(ecache.d_enum_val_to_index[conds[j]]); + } + unsigned activePoints = 0; + for (unsigned i = 0, npoints = x.d_vals.size(); i < npoints; i++) + { + if (x.d_vals[i].getConst()) + { + activePoints++; + Node eo = d_examples_out[i]; + for (unsigned j = 0; j < nconds; j++) + { + Node resn = ecache.d_enum_vals_res[eindex[j]][i]; + Assert(resn.isConst()); + eval[j][resn][eo]++; + evalCount[j][resn]++; + } + } + } + AlwaysAssert(activePoints > 0); + // find the condition that leads to the lowest entropy + // initially set minEntropy to > 1.0. + double minEntropy = 2.0; + unsigned bestIndex = 0; + for (unsigned j = 0; j < nconds; j++) + { + // To compute the entropy for a condition C, for pair of terms (s, t), let + // prob(t) be the probability C evaluates to t on an active point, + // prob(s|t) be the probability that an active point on which C + // evaluates to t has output s. + // Then, the entropy of C is: + // sum{t}. prob(t)*( sum{s}. -prob(s|t)*log2(prob(s|t)) ) + // where notice this is always between 0 and 1. + double entropySum = 0.0; + Trace("sygus-sui-dt-igain") << j << " : "; + for (std::pair>& ej : eval[j]) + { + unsigned ecount = evalCount[j][ej.first]; + if (ecount > 0) + { + double probBranch = double(ecount) / double(activePoints); + Trace("sygus-sui-dt-igain") << ej.first << " -> ( "; + for (std::pair& eej : ej.second) + { + if (eej.second > 0) + { + double probVal = double(eej.second) / double(ecount); + Trace("sygus-sui-dt-igain") + << eej.first << ":" << eej.second << " "; + double factor = -probVal * log2(probVal); + entropySum += probBranch * factor; + } + } + Trace("sygus-sui-dt-igain") << ") "; + } + } + Trace("sygus-sui-dt-igain") << "..." << entropySum << std::endl; + if (entropySum < minEntropy) + { + minEntropy = entropySum; + bestIndex = j; + } + } + + Assert(!conds.empty()); + return conds[bestIndex]; +} + } /* CVC4::theory::quantifiers namespace */ } /* CVC4::theory namespace */ } /* CVC4 namespace */ diff --git a/src/theory/quantifiers/sygus/sygus_unif_io.h b/src/theory/quantifiers/sygus/sygus_unif_io.h index 2a4ac91c8..8fa8b95e1 100644 --- a/src/theory/quantifiers/sygus/sygus_unif_io.h +++ b/src/theory/quantifiers/sygus/sygus_unif_io.h @@ -124,33 +124,6 @@ class UnifContextIo : public UnifContext */ std::map> d_visit_role; - /** unif context enumerator information */ - class UEnumInfo - { - public: - UEnumInfo() {} - /** map from conditions and branch positions to a solved node - * - * For example, if we have: - * f( 1 ) = 2 ^ f( 3 ) = 4 ^ f( -1 ) = 1 - * Then, valid entries in this map is: - * d_look_ahead_sols[x>0][1] = x+1 - * d_look_ahead_sols[x>0][2] = 1 - * For the first entry, notice that for all input examples such that x>0 - * evaluates to true, which are (1) and (3), we have that their output - * values for x+1 under the substitution that maps x to the input value, - * resulting in 2 and 4, are equal to the output value for the respective - * pairs. - */ - std::map> d_look_ahead_sols; - /** clear */ - void clear() { d_look_ahead_sols.clear(); } - /** is empty */ - bool empty() { return d_look_ahead_sols.empty(); } - }; - /** map from enumerators to the above info class */ - std::map d_uinfo; - private: /** true and false nodes */ Node d_true; @@ -346,6 +319,11 @@ class SygusUnifIo : public SygusUnif * which can be closed with "B", giving us (x ++ "B") as a solution. */ bool d_sol_cons_nondet; + /** + * Whether we are using information gain heuristic during solution + * construction. + */ + bool d_solConsUsingInfoGain; /** true and false nodes */ Node d_true; Node d_false; @@ -460,6 +438,14 @@ class SygusUnifIo : public SygusUnif NodeRole nrole, int ind, std::vector& lemmas) override; + /** construct best conditional + * + * This returns the condition in conds that maximizes information gain with + * respect to the current active points in d_context. For example, see + * Alur et al. TACAS 2017 for an example of information gain. + */ + Node constructBestConditional(Node ce, + const std::vector& conds) override; }; } /* CVC4::theory::quantifiers namespace */