Eliminate calls to currentSmtEngine (#7060)
[cvc5.git] / src / theory / quantifiers / sygus_sampler.h
1 /******************************************************************************
2 * Top contributors (to current version):
3 * Andrew Reynolds, Mathias Preiner, Fabian Wolff
4 *
5 * This file is part of the cvc5 project.
6 *
7 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8 * in the top-level source directory and their institutional affiliations.
9 * All rights reserved. See the file COPYING in the top-level source
10 * directory for licensing information.
11 * ****************************************************************************
12 *
13 * sygus_sampler
14 */
15
16 #include "cvc5_private.h"
17
18 #ifndef CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H
19 #define CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H
20
21 #include <map>
22 #include "theory/evaluator.h"
23 #include "theory/quantifiers/lazy_trie.h"
24 #include "theory/quantifiers/sygus/term_database_sygus.h"
25 #include "theory/quantifiers/term_enumeration.h"
26
27 namespace cvc5 {
28 namespace theory {
29 namespace quantifiers {
30
31 /** SygusSampler
32 *
33 * This class can be used to test whether two expressions are equivalent
34 * by random sampling. We use this class for the following options:
35 *
36 * sygus-rr-synth: synthesize candidate rewrite rules by finding two terms
37 * t1 and t2 do not rewrite to the same term, but are identical when considering
38 * a set of sample points, and
39 *
40 * sygus-rr-verify: detect unsound rewrite rules by finding two terms t1 and
41 * t2 that rewrite to the same term, but not identical when considering a set
42 * of sample points.
43 *
44 * To use this class:
45 * (1) Call initialize( tds, f, nsamples) where f is sygus datatype term.
46 * (2) For terms n1....nm enumerated that correspond to builtin analog of sygus
47 * term f, we call registerTerm( n1 )....registerTerm( nm ). It may be the case
48 * that registerTerm( ni ) returns nj for some j < i. In this case, we have that
49 * ni and nj are equivalent under the sample points in this class.
50 *
51 * For example, say the grammar for f is:
52 * A = 0 | 1 | x | y | A+A | ite( B, A, A )
53 * B = A <= A
54 * If we call initialize( tds, f, 5 ), this class will generate 5 random sample
55 * points for (x,y), say (0,0), (1,1), (0,1), (1,0), (2,2). The return values
56 * of successive calls to registerTerm are listed below.
57 * registerTerm( 0 ) -> 0
58 * registerTerm( x ) -> x
59 * registerTerm( x+y ) -> x+y
60 * registerTerm( y+x ) -> x+y
61 * registerTerm( x+ite(x <= 1+1, 0, y ) ) -> x
62 * Notice that the number of sample points can be configured for the above
63 * options using sygus-samples=N.
64 */
65 class SygusSampler : public LazyTrieEvaluator
66 {
67 public:
68 SygusSampler();
69 ~SygusSampler() override {}
70
71 /** initialize
72 *
73 * tn : the return type of terms we will be testing with this class,
74 * vars : the variables we are testing substitutions for,
75 * nsamples : number of sample points this class will test,
76 * unique_type_ids : if this is set to true, then we consider each variable
77 * in vars to have a unique "type id". A type id is a finer-grained notion of
78 * type that is used to determine when a rewrite rule is redundant.
79 */
80 virtual void initialize(TypeNode tn,
81 const std::vector<Node>& vars,
82 unsigned nsamples,
83 bool unique_type_ids = false);
84 /** initialize sygus
85 *
86 * qe : pointer to quantifiers engine,
87 * f : a term of some SyGuS datatype type whose values we will be
88 * testing under the free variables in the grammar of f,
89 * nsamples : number of sample points this class will test,
90 * useSygusType : whether we will register terms with this sampler that have
91 * the same type as f. If this flag is false, then we will be registering
92 * terms of the analog of the type of f, that is, the builtin type that
93 * f's type encodes in the deep embedding.
94 */
95 virtual void initializeSygus(TermDbSygus* tds,
96 Node f,
97 unsigned nsamples,
98 bool useSygusType);
99 /** register term n with this sampler database
100 *
101 * forceKeep is whether we wish to force that n is chosen as a representative
102 * value in the trie.
103 */
104 virtual Node registerTerm(Node n, bool forceKeep = false);
105 /** get number of sample points */
106 unsigned getNumSamplePoints() const { return d_samples.size(); }
107 /** get variables, adds d_vars to vars */
108 void getVariables(std::vector<Node>& vars) const;
109 /** get sample point
110 *
111 * Appends sample point #index to the vector pt, d_vars to vars.
112 */
113 void getSamplePoint(unsigned index,
114 std::vector<Node>& pt);
115 /** Add pt to the set of sample points considered by this sampler */
116 void addSamplePoint(std::vector<Node>& pt);
117 /** evaluate n on sample point index */
118 Node evaluate(Node n, unsigned index) override;
119 /**
120 * Compute the variables from the domain of d_var_index that occur in n,
121 * store these in the vector fvs.
122 */
123 void computeFreeVariables(Node n, std::vector<Node>& fvs);
124 /**
125 * Returns the index of a sample point such that the evaluation of a and b
126 * diverge, or -1 if no such sample point exists.
127 */
128 int getDiffSamplePointIndex(Node a, Node b);
129
130 //--------------------------queries about terms
131 /** is contiguous
132 *
133 * This returns whether n's free variables (terms occurring in the range of
134 * d_type_vars) are a prefix of the list of variables in d_type_vars for each
135 * type id. For instance, if d_type_vars[id] = { x, y } for some id, then
136 * 0, x, x+y, y+x are contiguous but y is not. This is useful for excluding
137 * terms from consideration that are alpha-equivalent to others.
138 */
139 bool isContiguous(Node n);
140 /** is ordered
141 *
142 * This returns whether n's free variables are in order with respect to
143 * variables in d_type_vars for each type. For instance, if
144 * d_type_vars[id] = { x, y } for some id, then 0, x, x+y are ordered but
145 * y and y+x are not.
146 */
147 bool isOrdered(Node n);
148 /** is linear
149 *
150 * This returns whether n contains at most one occurrence of each free
151 * variable. For example, x, x+y are linear, but x+x, (x-y)+y, (x+0)+(x+0) are
152 * non-linear.
153 */
154 bool isLinear(Node n);
155 /** check variables
156 *
157 * This returns false if !isOrdered(n) and checkOrder is true or !isLinear(n)
158 * if checkLinear is true, or false otherwise.
159 */
160 bool checkVariables(Node n, bool checkOrder, bool checkLinear);
161 /** contains free variables
162 *
163 * Returns true if the free variables of b are a subset of those in a, where
164 * we require a strict subset if strict is true. Free variables are those that
165 * occur in the range d_type_vars.
166 */
167 bool containsFreeVariables(Node a, Node b, bool strict = false);
168 //--------------------------end queries about terms
169 /** check equivalent
170 *
171 * Check whether bv and bvr are equivalent on all sample points, print
172 * an error if not. Used with --sygus-rr-verify.
173 *
174 * @param bv The original term
175 * @param bvr The rewritten form of bvr
176 * @param out The output stream to write if the rewrite was unsound.
177 */
178 void checkEquivalent(Node bv, Node bvr, std::ostream& out);
179
180 protected:
181 /** sygus term database of d_qe */
182 TermDbSygus* d_tds;
183 /** term enumerator object (used for random sampling) */
184 TermEnumeration d_tenum;
185 /** samples */
186 std::vector<std::vector<Node> > d_samples;
187 /** evaluator class */
188 Evaluator d_eval;
189 /** data structure to check duplication of sample points */
190 class PtTrie
191 {
192 public:
193 /** add pt to this trie, returns true if pt is not a duplicate. */
194 bool add(std::vector<Node>& pt);
195
196 private:
197 /** the children of this node */
198 std::map<Node, PtTrie> d_children;
199 };
200 /** a trie for samples */
201 PtTrie d_samples_trie;
202 /** the sygus type for this sampler (if applicable). */
203 TypeNode d_ftn;
204 /** whether we are registering terms of sygus types with this sampler */
205 bool d_use_sygus_type;
206 /**
207 * For each (sygus) type, a map from builtin terms to the sygus term they
208 * correspond to.
209 */
210 std::map<TypeNode, std::map<Node, Node> > d_builtin_to_sygus;
211 /** all variables we are sampling values for */
212 std::vector<Node> d_vars;
213 /** type variables
214 *
215 * We group variables according to "type ids". Two variables have the same
216 * type id if they have indistinguishable status according to this sampler.
217 * This is a finer-grained grouping than types. For example, two variables
218 * of the same type may have different type ids if they occur as constructors
219 * of a different set of sygus types in the grammar we are considering.
220 * For instance, we assign x and y different type ids in this grammar:
221 * A -> B + C
222 * B -> x | 0 | 1
223 * C -> y
224 * Type ids are computed for each variable in d_vars during initialize(...).
225 *
226 * For each type id, a list of variables in the grammar we are considering,
227 * for that type. These typically correspond to the arguments of the
228 * function-to-synthesize whose grammar we are considering.
229 */
230 std::map<unsigned, std::vector<Node> > d_type_vars;
231 /**
232 * A map all variables in the grammar we are considering to their index in
233 * d_type_vars.
234 */
235 std::map<Node, unsigned> d_var_index;
236 /** Map from variables to the id (the domain of d_type_vars). */
237 std::map<Node, unsigned> d_type_ids;
238 /** constants
239 *
240 * For each type, a list of constants in the grammar we are considering, for
241 * that type.
242 */
243 std::map<TypeNode, std::vector<Node> > d_type_consts;
244 /** a lazy trie for each type
245 *
246 * This stores the evaluation of all terms registered to this class.
247 *
248 * Notice if we are registering sygus terms with this class, then terms
249 * are grouped into this trie according to their sygus type, and not their
250 * builtin type. For example, for grammar:
251 * A -> x | B+1
252 * B -> x | 0 | 1 | B+B
253 * If we register C^B_+( C^B_x(), C^B_0() ) and C^A_x() with this class,
254 * then x+0 is registered to d_trie[B] and x is registered to d_trie[A],
255 * and no rewrite rule is reported. The reason for this is that otherwise
256 * we would end up reporting many useless rewrites since the same builtin
257 * term can be generated by multiple sygus types (e.g. C^B_x() and C^A_x()).
258 */
259 std::map<TypeNode, LazyTrie> d_trie;
260 /** is this sampler valid?
261 *
262 * A sampler can be invalid if sample points cannot be generated for a type
263 * of an argument to function f.
264 */
265 bool d_is_valid;
266 /** initialize samples
267 *
268 * Adds nsamples sample points to d_samples.
269 */
270 void initializeSamples(unsigned nsamples);
271 /** get random value for a type
272 *
273 * Returns a random value for the given type based on the random number
274 * generator. Currently, supported types:
275 *
276 * Bool, Bitvector : returns a random value in the range of that type.
277 * Int, String : returns a random string of values in (base 10) of random
278 * length, currently by a repeated coin flip.
279 * Real : returns the division of two random integers, where the denominator
280 * is omitted if it is zero.
281 */
282 Node getRandomValue(TypeNode tn);
283 /** get sygus random value
284 *
285 * Returns a random value based on the sygus type tn. The return value is
286 * a constant in the analog type of tn. This function chooses either to
287 * return a random value, or otherwise will construct a constant based on
288 * a random constructor of tn whose builtin operator is not a variable.
289 *
290 * rchance: the chance that the call to this function will be forbidden
291 * from making recursive calls and instead must return a value based on
292 * a nullary constructor of tn or based on getRandomValue above.
293 * rinc: the percentage to increment rchance on recursive calls.
294 *
295 * For example, consider the grammar:
296 * A -> x | y | 0 | 1 | +( A, A ) | ite( B, A, A )
297 * B -> A = A
298 * If we call this function on A and rchance is 0.0, there are five evenly
299 * chosen possibilities, either we return a random value via getRandomValue
300 * above, or we choose one of the four non-variable constructors of A.
301 * Say we choose ite, then we recursively call this function for
302 * B, A, and A, which return constants c1, c2, and c3. Then, this function
303 * returns the rewritten form of ite( c1, c2, c3 ).
304 * If on the other hand, rchance was 0.5 and rand() < 0.5. Then, we force
305 * this call to terminate by either selecting a random value via
306 * getRandomValue, 0 or 1.
307 */
308 Node getSygusRandomValue(TypeNode tn,
309 double rchance,
310 double rinc,
311 unsigned depth = 0);
312 /** map from sygus types to non-variable constructors */
313 std::map<TypeNode, std::vector<unsigned> > d_rvalue_cindices;
314 /** map from sygus types to non-variable nullary constructors */
315 std::map<TypeNode, std::vector<unsigned> > d_rvalue_null_cindices;
316 /** the random string alphabet */
317 std::vector<unsigned> d_rstring_alphabet;
318 /** map from variables to sygus types that include them */
319 std::map<Node, std::vector<TypeNode> > d_var_sygus_types;
320 /** map from constants to sygus types that include them */
321 std::map<Node, std::vector<TypeNode> > d_const_sygus_types;
322 /** register sygus type, initializes the above two data structures */
323 void registerSygusType(TypeNode tn);
324 };
325
326 } // namespace quantifiers
327 } // namespace theory
328 } // namespace cvc5
329
330 #endif /* CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H */