Minor cleaning of smt2 parser (#3823)
[cvc5.git] / src / parser / smt2 / smt2.h
1 /********************* */
2 /*! \file smt2.h
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Andrew Reynolds, Morgan Deters, Christopher L. Conway
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief Definitions of SMT2 constants.
13 **
14 ** Definitions of SMT2 constants.
15 **/
16
17 #include "cvc4parser_private.h"
18
19 #ifndef CVC4__PARSER__SMT2_H
20 #define CVC4__PARSER__SMT2_H
21
22 #include <sstream>
23 #include <stack>
24 #include <string>
25 #include <unordered_map>
26 #include <utility>
27
28 #include "api/cvc4cpp.h"
29 #include "parser/parse_op.h"
30 #include "parser/parser.h"
31 #include "smt/command.h"
32 #include "theory/logic_info.h"
33 #include "util/abstract_value.h"
34
35 namespace CVC4 {
36
37 class SExpr;
38
39 namespace api {
40 class Solver;
41 }
42
43 namespace parser {
44
45 class Smt2 : public Parser
46 {
47 friend class ParserBuilder;
48
49 public:
50 enum Theory
51 {
52 THEORY_ARRAYS,
53 THEORY_BITVECTORS,
54 THEORY_CORE,
55 THEORY_DATATYPES,
56 THEORY_INTS,
57 THEORY_REALS,
58 THEORY_TRANSCENDENTALS,
59 THEORY_REALS_INTS,
60 THEORY_QUANTIFIERS,
61 THEORY_SETS,
62 THEORY_STRINGS,
63 THEORY_UF,
64 THEORY_FP,
65 THEORY_SEP
66 };
67
68 private:
69 /** Has the logic been set (either by forcing it or a set-logic command)? */
70 bool d_logicSet;
71 /** Have we seen a set-logic command yet? */
72 bool d_seenSetLogic;
73
74 LogicInfo d_logic;
75 std::unordered_map<std::string, Kind> operatorKindMap;
76 /**
77 * Maps indexed symbols to the kind of the operator (e.g. "extract" to
78 * BITVECTOR_EXTRACT).
79 */
80 std::unordered_map<std::string, api::Kind> d_indexedOpKindMap;
81 std::pair<Expr, std::string> d_lastNamedTerm;
82 // for sygus
83 std::vector<Expr> d_sygusVars, d_sygusVarPrimed, d_sygusConstraints,
84 d_sygusFunSymbols;
85
86 protected:
87 Smt2(api::Solver* solver,
88 Input* input,
89 bool strictMode = false,
90 bool parseOnly = false);
91
92 public:
93 /**
94 * Add theory symbols to the parser state.
95 *
96 * @param theory the theory to open (e.g., Core, Ints)
97 */
98 void addTheory(Theory theory);
99
100 void addOperator(Kind k, const std::string& name);
101
102 /**
103 * Registers an indexed function symbol.
104 *
105 * @param tKind The kind of the term that uses the operator kind (e.g.
106 * BITVECTOR_EXTRACT). NOTE: this is an internal kind for now
107 * because that is what we use to create expressions. Eventually
108 * it will be an api::Kind.
109 * @param opKind The kind of the operator term (e.g. BITVECTOR_EXTRACT)
110 * @param name The name of the symbol (e.g. "extract")
111 */
112 void addIndexedOperator(Kind tKind,
113 api::Kind opKind,
114 const std::string& name);
115
116 Kind getOperatorKind(const std::string& name) const;
117
118 bool isOperatorEnabled(const std::string& name) const;
119
120 bool isTheoryEnabled(Theory theory) const;
121
122 bool logicIsSet() override;
123
124 /**
125 * Creates an indexed constant, e.g. (_ +oo 8 24) (positive infinity
126 * as a 32-bit floating-point constant).
127 *
128 * @param name The name of the constant (e.g. "+oo")
129 * @param numerals The parameters for the constant (e.g. [8, 24])
130 * @return The term corresponding to the constant or a parse error if name is
131 * not valid.
132 */
133 api::Term mkIndexedConstant(const std::string& name,
134 const std::vector<uint64_t>& numerals);
135
136 /**
137 * Creates an indexed operator term, e.g. (_ extract 5 0).
138 *
139 * @param name The name of the operator (e.g. "extract")
140 * @param numerals The parameters for the operator (e.g. [5, 0])
141 * @return The operator term corresponding to the indexed operator or a parse
142 * error if the name is not valid.
143 */
144 api::Op mkIndexedOp(const std::string& name,
145 const std::vector<uint64_t>& numerals);
146
147 /**
148 * Returns the expression that name should be interpreted as.
149 */
150 Expr getExpressionForNameAndType(const std::string& name, Type t) override;
151
152 /** Make function defined by a define-fun(s)-rec command.
153 *
154 * fname : the name of the function.
155 * sortedVarNames : the list of variable arguments for the function.
156 * t : the range type of the function we are defining.
157 *
158 * This function will create a bind a new function term to name fname.
159 * The type of this function is
160 * Parser::mkFlatFunctionType(sorts,t,flattenVars),
161 * where sorts are the types in the second components of sortedVarNames.
162 * As descibed in Parser::mkFlatFunctionType, new bound variables may be
163 * added to flattenVars in this function if the function is given a function
164 * range type.
165 */
166 Expr mkDefineFunRec(
167 const std::string& fname,
168 const std::vector<std::pair<std::string, Type> >& sortedVarNames,
169 Type t,
170 std::vector<Expr>& flattenVars);
171
172 /** Push scope for define-fun-rec
173 *
174 * This calls Parser::pushScope(bindingLevel) and sets up
175 * initial information for reading a body of a function definition
176 * in the define-fun-rec and define-funs-rec command.
177 * The input parameters func/flattenVars are the result
178 * of a call to mkDefineRec above.
179 *
180 * func : the function whose body we are defining.
181 * sortedVarNames : the list of variable arguments for the function.
182 * flattenVars : the implicit variables introduced when defining func.
183 *
184 * This function:
185 * (1) Calls Parser::pushScope(bindingLevel).
186 * (2) Computes the bound variable list for the quantified formula
187 * that defined this definition and stores it in bvs.
188 */
189 void pushDefineFunRecScope(
190 const std::vector<std::pair<std::string, Type> >& sortedVarNames,
191 Expr func,
192 const std::vector<Expr>& flattenVars,
193 std::vector<Expr>& bvs,
194 bool bindingLevel = false);
195
196 void reset() override;
197
198 void resetAssertions();
199
200 /**
201 * Class for creating instances of `SynthFunCommand`s. Creating an instance
202 * of this class pushes the scope, destroying it pops the scope.
203 */
204 class SynthFunFactory
205 {
206 public:
207 /**
208 * Creates an instance of `SynthFunFactory`.
209 *
210 * @param smt2 Pointer to the parser state
211 * @param fun Name of the function to synthesize
212 * @param isInv True if the goal is to synthesize an invariant, false
213 * otherwise
214 * @param range The return type of the function-to-synthesize
215 * @param sortedVarNames The parameters of the function-to-synthesize
216 */
217 SynthFunFactory(
218 Smt2* smt2,
219 const std::string& fun,
220 bool isInv,
221 Type range,
222 std::vector<std::pair<std::string, CVC4::Type>>& sortedVarNames);
223 ~SynthFunFactory();
224
225 const std::vector<Expr>& getSygusVars() const { return d_sygusVars; }
226
227 /**
228 * Create an instance of `SynthFunCommand`.
229 *
230 * @param grammar Optional grammar associated with the synth-fun command
231 * @return The instance of `SynthFunCommand`
232 */
233 std::unique_ptr<Command> mkCommand(Type grammar);
234
235 private:
236 Smt2* d_smt2;
237 std::string d_fun;
238 Expr d_synthFun;
239 Type d_sygusType;
240 bool d_isInv;
241 std::vector<Expr> d_sygusVars;
242 };
243
244 /**
245 * Creates a command that adds an invariant constraint.
246 *
247 * @param names Name of four symbols corresponding to the
248 * function-to-synthesize, precondition, postcondition,
249 * transition relation.
250 * @return The command that adds an invariant constraint
251 */
252 std::unique_ptr<Command> invConstraint(const std::vector<std::string>& names);
253
254 /**
255 * Sets the logic for the current benchmark. Declares any logic and
256 * theory symbols.
257 *
258 * @param name the name of the logic (e.g., QF_UF, AUFLIA)
259 * @param fromCommand should be set to true if the request originates from a
260 * set-logic command and false otherwise
261 * @return the command corresponding to setting the logic
262 */
263 Command* setLogic(std::string name, bool fromCommand = true);
264
265 /**
266 * Get the logic.
267 */
268 const LogicInfo& getLogic() const { return d_logic; }
269
270 bool v2_0() const
271 {
272 return getLanguage() == language::input::LANG_SMTLIB_V2_0;
273 }
274 /**
275 * Are we using smtlib 2.5 or above? If exact=true, then this method returns
276 * false if the input language is not exactly SMT-LIB 2.5.
277 */
278 bool v2_5(bool exact = false) const
279 {
280 return language::isInputLang_smt2_5(getLanguage(), exact);
281 }
282 /**
283 * Are we using smtlib 2.6 or above? If exact=true, then this method returns
284 * false if the input language is not exactly SMT-LIB 2.6.
285 */
286 bool v2_6(bool exact = false) const
287 {
288 return language::isInputLang_smt2_6(getLanguage(), exact);
289 }
290 /** Are we using a sygus language? */
291 bool sygus() const;
292 /** Are we using the sygus version 1.0 format? */
293 bool sygus_v1() const;
294
295 /**
296 * Returns true if the language that we are parsing (SMT-LIB version >=2.5
297 * and SyGuS) treats duplicate double quotes ("") as an escape sequence
298 * denoting a single double quote (").
299 */
300 bool escapeDupDblQuote() const { return v2_5() || sygus(); }
301
302 void setInfo(const std::string& flag, const SExpr& sexpr);
303
304 void setOption(const std::string& flag, const SExpr& sexpr);
305
306 void checkThatLogicIsSet();
307
308 void checkUserSymbol(const std::string& name) {
309 if(name.length() > 0 && (name[0] == '.' || name[0] == '@')) {
310 std::stringstream ss;
311 ss << "cannot declare or define symbol `" << name << "'; symbols starting with . and @ are reserved in SMT-LIB";
312 parseError(ss.str());
313 }
314 else if (isOperatorEnabled(name))
315 {
316 std::stringstream ss;
317 ss << "Symbol `" << name << "' is shadowing a theory function symbol";
318 parseError(ss.str());
319 }
320 }
321
322 void includeFile(const std::string& filename);
323
324 void setLastNamedTerm(Expr e, std::string name) {
325 d_lastNamedTerm = std::make_pair(e, name);
326 }
327
328 void clearLastNamedTerm() {
329 d_lastNamedTerm = std::make_pair(Expr(), "");
330 }
331
332 std::pair<Expr, std::string> lastNamedTerm() {
333 return d_lastNamedTerm;
334 }
335
336 /** Does name denote an abstract value? (of the form '@n' for numeral n). */
337 bool isAbstractValue(const std::string& name);
338
339 /** Make abstract value
340 *
341 * Abstract values are used for processing get-value calls. The argument
342 * name should be such that isAbstractValue(name) is true.
343 */
344 Expr mkAbstractValue(const std::string& name);
345
346 void mkSygusConstantsForType( const Type& type, std::vector<CVC4::Expr>& ops );
347
348 void processSygusGTerm(
349 CVC4::SygusGTerm& sgt,
350 int index,
351 std::vector<CVC4::Datatype>& datatypes,
352 std::vector<CVC4::Type>& sorts,
353 std::vector<std::vector<ParseOp>>& ops,
354 std::vector<std::vector<std::string>>& cnames,
355 std::vector<std::vector<std::vector<CVC4::Type>>>& cargs,
356 std::vector<bool>& allow_const,
357 std::vector<std::vector<std::string>>& unresolved_gterm_sym,
358 const std::vector<CVC4::Expr>& sygus_vars,
359 std::map<CVC4::Type, CVC4::Type>& sygus_to_builtin,
360 std::map<CVC4::Type, CVC4::Expr>& sygus_to_builtin_expr,
361 CVC4::Type& ret,
362 bool isNested = false);
363
364 bool pushSygusDatatypeDef(
365 Type t,
366 std::string& dname,
367 std::vector<CVC4::Datatype>& datatypes,
368 std::vector<CVC4::Type>& sorts,
369 std::vector<std::vector<ParseOp>>& ops,
370 std::vector<std::vector<std::string>>& cnames,
371 std::vector<std::vector<std::vector<CVC4::Type>>>& cargs,
372 std::vector<bool>& allow_const,
373 std::vector<std::vector<std::string>>& unresolved_gterm_sym);
374
375 bool popSygusDatatypeDef(
376 std::vector<CVC4::Datatype>& datatypes,
377 std::vector<CVC4::Type>& sorts,
378 std::vector<std::vector<ParseOp>>& ops,
379 std::vector<std::vector<std::string>>& cnames,
380 std::vector<std::vector<std::vector<CVC4::Type>>>& cargs,
381 std::vector<bool>& allow_const,
382 std::vector<std::vector<std::string>>& unresolved_gterm_sym);
383
384 void setSygusStartIndex(const std::string& fun,
385 int startIndex,
386 std::vector<CVC4::Datatype>& datatypes,
387 std::vector<CVC4::Type>& sorts,
388 std::vector<std::vector<ParseOp>>& ops);
389
390 void mkSygusDatatype(CVC4::Datatype& dt,
391 std::vector<ParseOp>& ops,
392 std::vector<std::string>& cnames,
393 std::vector<std::vector<CVC4::Type>>& cargs,
394 std::vector<std::string>& unresolved_gterm_sym,
395 std::map<CVC4::Type, CVC4::Type>& sygus_to_builtin);
396
397 /**
398 * Adds a constructor to sygus datatype dt whose sygus operator is term.
399 *
400 * ntsToUnres contains a mapping from non-terminal symbols to the unresolved
401 * types they correspond to. This map indicates how the argument term should
402 * be interpreted (instances of symbols from the domain of ntsToUnres
403 * correspond to constructor arguments).
404 *
405 * The sygus operator that is actually added to dt corresponds to replacing
406 * each occurrence of non-terminal symbols from the domain of ntsToUnres
407 * with bound variables via purifySygusGTerm, and binding these variables
408 * via a lambda.
409 */
410 void addSygusConstructorTerm(Datatype& dt,
411 Expr term,
412 std::map<Expr, Type>& ntsToUnres) const;
413 /**
414 * This adds constructors to dt for sygus variables in sygusVars whose
415 * type is argument type. This method should be called when the sygus grammar
416 * term (Variable type) is encountered.
417 */
418 void addSygusConstructorVariables(Datatype& dt,
419 const std::vector<Expr>& sygusVars,
420 Type type) const;
421
422 /**
423 * Smt2 parser provides its own checkDeclaration, which does the
424 * same as the base, but with some more helpful errors.
425 */
426 void checkDeclaration(const std::string& name,
427 DeclarationCheck check,
428 SymbolType type = SYM_VARIABLE,
429 std::string notes = "")
430 {
431 // if the symbol is something like "-1", we'll give the user a helpful
432 // syntax hint. (-1 is a valid identifier in SMT-LIB, NOT unary minus.)
433 if (name.length() > 1 && name[0] == '-'
434 && name.find_first_not_of("0123456789", 1) == std::string::npos)
435 {
436 if (sygus_v1())
437 {
438 // "-1" is allowed in SyGuS version 1.0
439 return;
440 }
441 std::stringstream ss;
442 ss << notes << "You may have intended to apply unary minus: `(- "
443 << name.substr(1) << ")'\n";
444 this->Parser::checkDeclaration(name, check, type, ss.str());
445 return;
446 }
447 this->Parser::checkDeclaration(name, check, type, notes);
448 }
449
450 void checkOperator(Kind kind, unsigned numArgs)
451 {
452 Parser::checkOperator(kind, numArgs);
453 // strict SMT-LIB mode enables extra checks for some bitvector operators
454 // that CVC4 permits as N-ary but the standard requires is binary
455 if(strictModeEnabled()) {
456 switch(kind) {
457 case kind::BITVECTOR_AND:
458 case kind::BITVECTOR_MULT:
459 case kind::BITVECTOR_OR:
460 case kind::BITVECTOR_PLUS:
461 case kind::BITVECTOR_XOR:
462 if (numArgs != 2 && !v2_6())
463 {
464 parseError(
465 "Operator requires exactly 2 arguments in strict SMT-LIB "
466 "compliance mode (for versions <2.6): "
467 + kindToString(kind));
468 }
469 break;
470 case kind::BITVECTOR_CONCAT:
471 if(numArgs != 2) {
472 parseError(
473 "Operator requires exactly 2 arguments in strict SMT-LIB "
474 "compliance mode: "
475 + kindToString(kind));
476 }
477 break;
478 default:
479 break; /* no problem */
480 }
481 }
482 }
483 /** Set named attribute
484 *
485 * This is called when expression expr is annotated with a name, i.e.
486 * (! expr :named sexpr). It sets up the necessary information to process
487 * this naming, including marking that expr is the last named term.
488 *
489 * We construct an expression symbol whose name is the name of s-expression
490 * which is used later for tracking assertions in unsat cores. This
491 * symbol is returned by this method.
492 */
493 Expr setNamedAttribute(Expr& expr, const SExpr& sexpr);
494
495 // Throw a ParserException with msg appended with the current logic.
496 inline void parseErrorLogic(const std::string& msg)
497 {
498 const std::string withLogic = msg + getLogic().getLogicString();
499 parseError(withLogic);
500 }
501
502 //------------------------- processing parse operators
503 /**
504 * Given a parse operator p, apply a type ascription to it. This method is run
505 * when we encounter "(as t type)" and information regarding t has been stored
506 * in p.
507 *
508 * This updates p to take into account the ascription. This may include:
509 * - Converting an (pre-ascribed) array constant specification "const" to
510 * an ascribed array constant specification (as const type) where type is
511 * (Array T1 T2) for some T1, T2.
512 * - Converting a (nullary or non-nullary) parametric datatype constructor to
513 * the specialized constructor for the given type.
514 * - Converting an empty set, universe set, or separation nil reference to
515 * the respective term of the given type.
516 * - If p's expression field is set, then we leave p unchanged, check if
517 * that expression has the given type and throw a parse error otherwise.
518 */
519 void applyTypeAscription(ParseOp& p, Type type);
520 /**
521 * This converts a ParseOp to expression, assuming it is a standalone term.
522 *
523 * In particular:
524 * - If p's expression field is set, then that expression is returned.
525 * - If p's name field is set, then we look up that name in the symbol table
526 * of this class.
527 * In other cases, a parse error is thrown.
528 */
529 Expr parseOpToExpr(ParseOp& p);
530 /**
531 * Apply parse operator to list of arguments, and return the resulting
532 * expression.
533 *
534 * This method involves two phases.
535 * (1) Processing the operator represented by p,
536 * (2) Applying that operator to the set of arguments.
537 *
538 * For (1), this involves determining the kind of the overall expression. We
539 * may be in one the following cases:
540 * - If p's expression field is set, we may choose to prepend it to args, or
541 * otherwise determine the appropriate kind of the overall expression based on
542 * this expression.
543 * - If p's name field is set, then we get the appropriate symbol for that
544 * name, which may involve disambiguating that name if it is overloaded based
545 * on the types of args. We then determine the overall kind of the return
546 * expression based on that symbol.
547 * - p's kind field may be already set.
548 *
549 * For (2), we construct the overall expression, which may involve the
550 * following:
551 * - If p is an array constant specification (as const (Array T1 T2)), then
552 * we return the appropriate array constant based on args[0].
553 * - If p represents a tuple selector, then we infer the appropriate tuple
554 * selector expression based on the type of args[0].
555 * - If the overall kind of the expression is chainable, we may convert it
556 * to a left- or right-associative chain.
557 * - If the overall kind is MINUS and args has size 1, then we return an
558 * application of UMINUS.
559 * - If the overall expression is a partial application, then we process this
560 * as a chain of HO_APPLY terms.
561 */
562 Expr applyParseOp(ParseOp& p, std::vector<Expr>& args);
563 //------------------------- end processing parse operators
564 private:
565 std::map< CVC4::Expr, CVC4::Type > d_sygus_bound_var_type;
566
567 Type processSygusNestedGTerm(
568 int sub_dt_index,
569 std::string& sub_dname,
570 std::vector<CVC4::Datatype>& datatypes,
571 std::vector<CVC4::Type>& sorts,
572 std::vector<std::vector<ParseOp>>& ops,
573 std::vector<std::vector<std::string>>& cnames,
574 std::vector<std::vector<std::vector<CVC4::Type>>>& cargs,
575 std::vector<bool>& allow_const,
576 std::vector<std::vector<std::string>>& unresolved_gterm_sym,
577 std::map<CVC4::Type, CVC4::Type>& sygus_to_builtin,
578 std::map<CVC4::Type, CVC4::Expr>& sygus_to_builtin_expr,
579 Type sub_ret);
580
581 /** make sygus bound var list
582 *
583 * This is used for converting non-builtin sygus operators to lambda
584 * expressions. It takes as input a datatype and constructor index (for
585 * naming) and a vector of type ltypes.
586 * It appends a bound variable to lvars for each type in ltypes, and returns
587 * a bound variable list whose children are lvars.
588 */
589 Expr makeSygusBoundVarList(Datatype& dt,
590 unsigned i,
591 const std::vector<Type>& ltypes,
592 std::vector<Expr>& lvars);
593
594 /** Purify sygus grammar term
595 *
596 * This returns a term where all occurrences of non-terminal symbols (those
597 * in the domain of ntsToUnres) are replaced by fresh variables. For each
598 * variable replaced in this way, we add the fresh variable it is replaced
599 * with to args, and the unresolved type corresponding to the non-terminal
600 * symbol to cargs (constructor args). In other words, args contains the
601 * free variables in the term returned by this method (which should be bound
602 * by a lambda), and cargs contains the types of the arguments of the
603 * sygus constructor.
604 */
605 Expr purifySygusGTerm(Expr term,
606 std::map<Expr, Type>& ntsToUnres,
607 std::vector<Expr>& args,
608 std::vector<Type>& cargs) const;
609
610 void addArithmeticOperators();
611
612 void addTranscendentalOperators();
613
614 void addQuantifiersOperators();
615
616 void addBitvectorOperators();
617
618 void addDatatypesOperators();
619
620 void addStringOperators();
621
622 void addFloatingPointOperators();
623
624 void addSepOperators();
625
626 InputLanguage getLanguage() const;
627
628 /**
629 * Utility function to create a conjunction of expressions.
630 *
631 * @param es Expressions in the conjunction
632 * @return True if `es` is empty, `e` if `es` consists of a single element
633 * `e`, the conjunction of expressions otherwise.
634 */
635 Expr mkAnd(const std::vector<Expr>& es);
636 }; /* class Smt2 */
637
638 }/* CVC4::parser namespace */
639 }/* CVC4 namespace */
640
641 #endif /* CVC4__PARSER__SMT2_H */