Update sygus grammar normalization to use node-level datatype. (#3567)
[cvc5.git] / src / theory / quantifiers / sygus / sygus_grammar_norm.h
1 /********************* */
2 /*! \file sygus_grammar_norm.h
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Haniel Barbosa, Andrew Reynolds, Tim King
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief class for simplifying SyGuS grammars after they are encoded into
13 ** datatypes.
14 **/
15 #include "cvc4_private.h"
16
17 #ifndef CVC4__THEORY__QUANTIFIERS__SYGUS_GRAMMAR_NORM_H
18 #define CVC4__THEORY__QUANTIFIERS__SYGUS_GRAMMAR_NORM_H
19
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <vector>
24
25 #include "expr/datatype.h"
26 #include "expr/node.h"
27 #include "expr/sygus_datatype.h"
28 #include "expr/type.h"
29 #include "expr/type_node.h"
30 #include "theory/quantifiers/term_util.h"
31
32 namespace CVC4 {
33 namespace theory {
34 namespace quantifiers {
35
36 class SygusGrammarNorm;
37 class TermDbSygus;
38
39 /** Operator position trie class
40 *
41 * This data structure stores an unresolved type corresponding to the
42 * normalization of a type. This unresolved type is indexed by the positions of
43 * the construtors of the datatype associated with the original type. The list
44 * of positions represent the operators, associated with the respective
45 * considered constructors, that were used for building the unresolved type.
46 *
47 * Example:
48 *
49 * Let A be a type defined by the grammar "A -> x | 0 | 1 | A + A". In its
50 * datatype representation the operator for "x" is in position 0, for "0" in
51 * position "1" and so on. Consider entries (T, [op_1, ..., op_n]) -> T' to
52 * represent that a type T is normalized with operators [op_1, ..., op_n] into
53 * the type T'. For entries
54 *
55 * (A, [x, 0, 1, +]) -> A1
56 * (A, [x, 1, +]) -> A2
57 * (A, [1, +]) -> A3
58 * (A, [0]) -> AZ
59 * (A, [x]) -> AX
60 * (A, [1]) -> AO
61 *
62 * the OpPosTrie T we build for this type is :
63 *
64 * T[A] :
65 * T[A].d_children[0] : AX
66 * T[A].d_children[0].d_children[1] :
67 * T[A].d_children[0].d_children[1].d_children[2] :
68 * T[A].d_children[0].d_children[1].d_children[2].d_children[3] : A1
69 * T[A].d_children[0].d_children[2] :
70 * T[A].d_children[0].d_children[2].d_children[3] : A2
71 * T[A].d_children[1] : AZ
72 * T[A].d_children[2] : AO
73 * T[A].d_children[2].d_children[4] : A3
74 *
75 * Nodes store the types built for the path of positions up to that point, if
76 * any.
77 */
78 class OpPosTrie
79 {
80 public:
81 /** type retrieval/addition
82 *
83 * if type indexed by the given operator positions is already in the trie then
84 * unres_t becomes the indexed type and true is returned. Otherwise a new type
85 * is created, indexed by the given positions, and assigned to unres_t, with
86 * false being returned.
87 */
88 bool getOrMakeType(TypeNode tn,
89 TypeNode& unres_tn,
90 const std::vector<unsigned>& op_pos,
91 unsigned ind = 0);
92 /** clear all data from this trie */
93 void clear() { d_children.clear(); }
94
95 private:
96 /** the data (only set for the final node of an inserted path) */
97 TypeNode d_unres_tn;
98 /* the children of the trie node */
99 std::map<unsigned, OpPosTrie> d_children;
100 }; /* class OpPosTrie */
101
102 /** Utility for normalizing SyGuS grammars to avoid spurious enumerations
103 *
104 * Uses the datatype representation of a SyGuS grammar to identify entries that
105 * can normalized in order to have less possible enumerations. An example is
106 * with integer types, e.g.:
107 *
108 * Int -> x | y | Int + Int | 0 | 1 | ite(Bool, Int, Int)
109 *
110 * becomes
111 *
112 * Int0 -> IntZ | Int1
113 * IntZ -> 0
114 * Int1 -> IntX | IntX + Int1 | Int2
115 * IntX -> x
116 * Int2 -> IntY | IntY + Int2 | Int3
117 * IntY -> y
118 * Int3 -> IntO | IntO + Int3 | Int4
119 * IntO -> 1
120 * Int4 -> IntITE | IntITE + Int4
121 * IntITE -> ite(Bool, Int0, Int0)
122 *
123 * TODO: #1304 normalize more complex grammars
124 *
125 * This class also performs more straightforward normalizations, such as
126 * expanding definitions of functions declared with a "define-fun" command.
127 * These lighweight transformations are always applied, independently of the
128 * normalization option being enabled.
129 */
130 class SygusGrammarNorm
131 {
132 public:
133 SygusGrammarNorm(QuantifiersEngine* qe);
134 ~SygusGrammarNorm() {}
135 /** creates a normalized typenode from a given one.
136 *
137 * In a normalized typenode all typenodes it contains are normalized.
138 * Normalized typenodes can be structurally identicial to their original
139 * counterparts.
140 *
141 * sygus_vars are the input variables for the function to be synthesized,
142 * which are used as input for the built datatypes.
143 *
144 * This is the function that will resolve all types and datatypes built during
145 * normalization. This operation can only be performed after all types
146 * contained in "tn" have been normalized, since the resolution of datatypes
147 * depends on all types involved being defined.
148 */
149 TypeNode normalizeSygusType(TypeNode tn, Node sygus_vars);
150
151 /* Retrives, or, if none, creates, stores and returns, the node for the
152 * identity operator (\lambda x. x) for the given type node */
153 static inline Node getIdOp(TypeNode tn)
154 {
155 auto it = d_tn_to_id.find(tn);
156 if (it == d_tn_to_id.end())
157 {
158 std::vector<Node> vars = {NodeManager::currentNM()->mkBoundVar(tn)};
159 Node n = NodeManager::currentNM()->mkNode(
160 kind::LAMBDA,
161 NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, vars),
162 vars.back());
163 d_tn_to_id[tn] = n;
164 return n;
165 }
166 return it->second;
167 }
168
169 private:
170 /** Keeps the necessary information for bulding a normalized type:
171 *
172 * the original typenode, from which the datatype representation can be
173 * extracted
174 *
175 * the operators, names, print callbacks and list of argument types for each
176 * constructor
177 *
178 * the unresolved type node used as placeholder for references of the yet to
179 * be built normalized type
180 *
181 * A (SyGuS) datatype to represent the structure of the type node for the
182 * normalized type.
183 */
184 class TypeObject
185 {
186 public:
187 /* Stores the original type node and the unresolved placeholder. The
188 * datatype for the latter is created with the respective name. */
189 TypeObject(TypeNode src_tn, TypeNode unres_tn);
190 ~TypeObject() {}
191
192 /** adds information in "cons" (operator, name, print callback, argument
193 * types) as it is into "to"
194 *
195 * A side effect of this procedure is to expand the definitions in the sygus
196 * operator of "cons"
197 *
198 * The types of the arguments of "cons" are recursively normalized
199 */
200 void addConsInfo(SygusGrammarNorm* sygus_norm,
201 const DTypeConstructor& cons,
202 std::shared_ptr<SygusPrintCallback> spc);
203 /**
204 * Returns the total version of Kind k if it is a partial operator, or
205 * otherwise k itself.
206 */
207 static Kind getEliminateKind(Kind k);
208 /**
209 * Returns a version of n where all partial functions such as bvudiv
210 * have been replaced by their total versions like bvudiv_total.
211 */
212 static Node eliminatePartialOperators(Node n);
213
214 /** initializes a datatype with the information in the type object
215 *
216 * "dt" is the datatype of the original typenode. It is necessary for
217 * retrieving ancillary information during the datatype building, such as
218 * its sygus type (e.g. Int)
219 *
220 * The initialized datatype and its unresolved type are saved in the global
221 * accumulators of "sygus_norm"
222 */
223 void initializeDatatype(SygusGrammarNorm* sygus_norm, const DType& dt);
224
225 //---------- information stored from original type node
226
227 /* The original typenode this TypeObject is built from */
228 TypeNode d_tn;
229
230 //---------- information to build normalized type node
231
232 /* Unresolved type node placeholder */
233 TypeNode d_unres_tn;
234 /** A sygus datatype */
235 SygusDatatype d_sdt;
236 }; /* class TypeObject */
237
238 /** Transformation abstract class
239 *
240 * Classes extending this one will define specif transformationst for building
241 * normalized types based on applications of specific operators
242 */
243 class Transf
244 {
245 public:
246 virtual ~Transf() {}
247
248 /** abstract function for building normalized types
249 *
250 * Builds normalized types for the operators specifed by the positions in
251 * op_pos of constructors from dt. The built types are associated with the
252 * given type object and accumulated in the sygus_norm object, whose
253 * utilities for any extra necessary normalization.
254 */
255 virtual void buildType(SygusGrammarNorm* sygus_norm,
256 TypeObject& to,
257 const DType& dt,
258 std::vector<unsigned>& op_pos) = 0;
259 }; /* class Transf */
260
261 /** Drop transformation class
262 *
263 * This class builds a type by dropping a set of redundant constructors,
264 * whose indices are given as input to the constructor of this class.
265 */
266 class TransfDrop : public Transf
267 {
268 public:
269 TransfDrop(const std::vector<unsigned>& indices) : d_drop_indices(indices)
270 {
271 }
272 /** build type */
273 void buildType(SygusGrammarNorm* sygus_norm,
274 TypeObject& to,
275 const DType& dt,
276 std::vector<unsigned>& op_pos) override;
277
278 private:
279 std::vector<unsigned> d_drop_indices;
280 };
281
282 /** Chain transformation class
283 *
284 * Determines how to build normalized types by chaining the application of one
285 * of its operators. The resulting type should admit the same terms as the
286 * previous one modulo commutativity, associativity and identity of the
287 * neutral element.
288 *
289 * TODO: #1304:
290 * - define this transformation for more than just PLUS for Int.
291 * - improve the building such that elements that should not be entitled a
292 * "link in the chain" (such as 5 in opposition to variables and 1) do not get
293 * one
294 * - consider the case when operator is applied to different types, e.g.:
295 * A -> B + B | x; B -> 0 | 1
296 * - consider the case in which in which the operator occurs nested in an
297 * argument type of itself, e.g.:
298 * A -> (B + B) + B | x; B -> 0 | 1
299 */
300 class TransfChain : public Transf
301 {
302 public:
303 TransfChain(unsigned chain_op_pos, const std::vector<unsigned>& elem_pos)
304 : d_chain_op_pos(chain_op_pos), d_elem_pos(elem_pos){};
305
306 /** builds types encoding a chain in which each link contains a repetition
307 * of the application of the chain operator over a non-identity element
308 *
309 * Example: when considering, over the integers, the operator "+" and the
310 * elemenst "1", "x" and "y", the built chain is e.g.
311 *
312 * x + ... + x + y + ... + y + 1 + ...+ 1
313 *
314 * whose encoding in types would be e.g.
315 *
316 * A -> AX | AX + A | B
317 * AX -> x
318 * B -> BY | BY + B | C
319 * BY -> y
320 * C -> C1 | C1 + C
321 * C1 -> 1
322 *
323 * ++++++++
324 *
325 * The types composing links in the chain are built recursively by invoking
326 * sygus_norm, which caches results and handles the global normalization, on
327 * the operators not used in a given link, which will lead to recalling this
328 * transformation and so on until all operators originally given are
329 * considered.
330 */
331 void buildType(SygusGrammarNorm* sygus_norm,
332 TypeObject& to,
333 const DType& dt,
334 std::vector<unsigned>& op_pos) override;
335
336 /** Whether operator is chainable for the type (e.g. PLUS for Int)
337 *
338 * Since the map this function depends on cannot be built statically, this
339 * function first build maps the first time a type is checked. As a
340 * consequence the list of chainabel operators is hardcoded in the map
341 * building.
342 *
343 * TODO: #1304: Cover more types and operators, make this robust to more
344 * complex grammars
345 */
346 static bool isChainable(TypeNode tn, Node op);
347 /* Whether n is the identity for the chain operator of the type (e.g. 1 is
348 * not the identity 0 for PLUS for Int)
349 *
350 * TODO: #1304: Cover more types, make this robust to more complex grammars
351 */
352 static bool isId(TypeNode tn, Node op, Node n);
353
354 private:
355 /* TODO #1304: this should admit more than one, as well as which elements
356 * are associated with which operator */
357 /* Position of chain operator */
358 unsigned d_chain_op_pos;
359 /* Positions (of constructors in the datatype whose type is being
360 * normalized) of elements the chain operator is applied to */
361 std::vector<unsigned> d_elem_pos;
362 /** Specifies for each type node which are its chainable operators
363 *
364 * For example, for Int the map is {OP -> [+]}
365 *
366 * TODO #1304: consider more operators
367 */
368 static std::map<TypeNode, std::vector<Kind>> d_chain_ops;
369 /** Specifies for each type node and chainable operator its identity
370 *
371 * For example, for Int and PLUS the map is {Int -> {+ -> 0}}
372 *
373 * TODO #1304: consider more operators
374 */
375 static std::map<TypeNode, std::map<Kind, Node>> d_chain_op_id;
376
377 }; /* class TransfChain */
378
379 /** reference to quantifier engine */
380 QuantifiersEngine* d_qe;
381 /** sygus term database associated with this utility */
382 TermDbSygus* d_tds;
383 /** List of variable inputs of function-to-synthesize.
384 *
385 * This information is needed in the construction of each datatype
386 * representation of type nodes contained in the type node being normalized
387 */
388 TNode d_sygus_vars;
389 /* Datatypes to be resolved */
390 std::vector<Datatype> d_dt_all;
391 /* Types to be resolved */
392 std::set<Type> d_unres_t_all;
393 /* Associates type nodes with OpPosTries */
394 std::map<TypeNode, OpPosTrie> d_tries;
395 /* Map of type nodes into their identity operators (\lambda x. x) */
396 static std::map<TypeNode, Node> d_tn_to_id;
397
398 /** recursively traverses a typenode normalizing all of its elements
399 *
400 * "tn" is the typenode to be normalized
401 * "dt" is its datatype representation
402 * "op_pos" is the list of positions of construtors of dt that are being
403 * considered for the normalization
404 *
405 * The result of normalizing tn with the respective constructors is cached
406 * with an OpPosTrie. New types and datatypes created during normalization are
407 * accumulated grobally to be later resolved.
408 *
409 * The normalization occurs following some inferred transformation based on
410 * the sygus type (e.g. Int) of tn, and the operators being considered.
411 *
412 * Example: Let A be the type node encoding the grammar
413 *
414 * Int -> x | y | Int + Int | 0 | 1 | ite(Bool, Int, Int)
415 *
416 * and assume all its datatype constructors are being used for
417 * normalization. The inferred normalization transformation will consider the
418 * non-zero elements {x, y, 1, ite(...)} and the operator {+} to build a chain
419 * of monomials, as seen above. The operator for "0" is rebuilt as is (the
420 * default behaviour of operators not selected for transformations).
421 *
422 * recursion depth is limited by the height of the types, which is small
423 */
424 TypeNode normalizeSygusRec(TypeNode tn,
425 const DType& dt,
426 std::vector<unsigned>& op_pos);
427
428 /** wrapper for the above function
429 *
430 * invoked when all operators of "tn" are to be considered for normalization
431 */
432 TypeNode normalizeSygusRec(TypeNode tn);
433
434 /** infers a transformation for normalizing dt when allowed to use the
435 * operators in the positions op_pos.
436 *
437 * TODO: #1304: Infer more complex transformations
438 */
439 std::unique_ptr<Transf> inferTransf(TypeNode tn,
440 const DType& dt,
441 const std::vector<unsigned>& op_pos);
442 }; /* class SygusGrammarNorm */
443
444 } // namespace quantifiers
445 } // namespace theory
446 } // namespace CVC4
447
448 #endif