From: Christopher L. Conway Date: Thu, 29 Apr 2010 16:53:19 +0000 (+0000) Subject: First draft implementation of SMT v2 parser X-Git-Tag: cvc5-1.0.0~9099 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=194c5b6f04c7c9bec8c0f23b88ac8d0f0094186a;p=cvc5.git First draft implementation of SMT v2 parser --- diff --git a/.cproject b/.cproject index 0d7c6b9fc..488d7e4d4 100644 --- a/.cproject +++ b/.cproject @@ -44,7 +44,7 @@ - + diff --git a/src/expr/command.h b/src/expr/command.h index bb295a662..3be957feb 100644 --- a/src/expr/command.h +++ b/src/expr/command.h @@ -93,6 +93,7 @@ public: class CVC4_PUBLIC DeclarationCommand : public EmptyCommand { public: + DeclarationCommand(const std::string& id, const Type& t); DeclarationCommand(const std::vector& ids, const Type& t); void toStream(std::ostream& out) const; protected: @@ -259,6 +260,12 @@ inline void CommandSequence::addCommand(Command* cmd) { /* class DeclarationCommand */ +inline DeclarationCommand::DeclarationCommand(const std::string& id, const Type& t) : + d_type(t) +{ + d_declaredSymbols.push_back(id); +} + inline DeclarationCommand::DeclarationCommand(const std::vector& ids, const Type& t) : d_declaredSymbols(ids), d_type(t) diff --git a/src/main/getopt.cpp b/src/main/getopt.cpp index a272aaafd..2ad34597e 100644 --- a/src/main/getopt.cpp +++ b/src/main/getopt.cpp @@ -165,6 +165,9 @@ throw(OptionException) { } else if(!strcmp(optarg, "smtlib") || !strcmp(optarg, "smt")) { opts->lang = parser::LANG_SMTLIB; break; + } else if(!strcmp(optarg, "smtlib2") || !strcmp(optarg, "smt2")) { + opts->lang = parser::LANG_SMTLIB_V2; + break; } else if(!strcmp(optarg, "auto")) { opts->lang = parser::LANG_AUTO; break; diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index 1464eeac0..380f40c6a 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -17,7 +17,7 @@ AM_CPPFLAGS = \ -I@srcdir@/../include -I@srcdir@/.. $(ANTLR_INCLUDES) AM_CXXFLAGS = -Wall $(FLAG_VISIBILITY_HIDDEN) -SUBDIRS = smt cvc +SUBDIRS = smt smt2 cvc nobase_lib_LTLIBRARIES = libcvc4parser.la noinst_LTLIBRARIES = libcvc4parser_noinst.la @@ -30,7 +30,8 @@ libcvc4parser_la_LINK = $(CXXLINK) libcvc4parser_noinst_la_LDFLAGS = $(ANTLR_LDFLAGS) libcvc4parser_noinst_la_LIBADD = \ @builddir@/smt/libparsersmt.la \ - @builddir@/cvc/libparsercvc.la + @builddir@/smt2/libparsersmt2.la \ + @builddir@/cvc/libparsercvc.la libcvc4parser_la_SOURCES = libcvc4parser_noinst_la_SOURCES = \ diff --git a/src/parser/input.cpp b/src/parser/input.cpp index 5df017f16..33bee84a6 100644 --- a/src/parser/input.cpp +++ b/src/parser/input.cpp @@ -27,6 +27,7 @@ #include "expr/type.h" #include "parser/cvc/cvc_input.h" #include "parser/smt/smt_input.h" +#include "parser/smt2/smt2_input.h" #include "util/output.h" #include "util/Assert.h" @@ -182,6 +183,10 @@ Input* Input::newInput(InputLanguage lang, AntlrInputStream *inputStream) { input = new SmtInput(inputStream); break; + case LANG_SMTLIB_V2: + input = new Smt2Input(inputStream); + break; + default: Unhandled(lang); } diff --git a/src/parser/parser_options.h b/src/parser/parser_options.h index d593d6f5a..51d28e51d 100644 --- a/src/parser/parser_options.h +++ b/src/parser/parser_options.h @@ -25,6 +25,8 @@ namespace parser { enum InputLanguage { /** The SMTLIB input language */ LANG_SMTLIB, + /** The SMTLIB v2 input language */ + LANG_SMTLIB_V2, /** The CVC4 input language */ LANG_CVC4, /** Auto-detect the language */ diff --git a/src/parser/smt/Smt.g b/src/parser/smt/Smt.g index 6a34df375..ba5e8abf5 100644 --- a/src/parser/smt/Smt.g +++ b/src/parser/smt/Smt.g @@ -88,7 +88,7 @@ using namespace CVC4::parser; * @param parser the CVC4 Parser object * @param name the name of the logic (e.g., QF_UF, AUFLIA) */ -void +static void setLogic(Parser *parser, const std::string& name) { if( name == "QF_UF" ) { parser->mkSort("U"); diff --git a/src/parser/smt2/Makefile b/src/parser/smt2/Makefile new file mode 100644 index 000000000..fc9cc6db2 --- /dev/null +++ b/src/parser/smt2/Makefile @@ -0,0 +1,4 @@ +topdir = ../../.. +srcdir = src/parser/smt2 + +include $(topdir)/Makefile.subdir diff --git a/src/parser/smt2/Makefile.am b/src/parser/smt2/Makefile.am new file mode 100644 index 000000000..6391919d6 --- /dev/null +++ b/src/parser/smt2/Makefile.am @@ -0,0 +1,51 @@ +AM_CPPFLAGS = \ + -D__BUILDING_CVC4PARSERLIB \ + -I@srcdir@/../../include -I@srcdir@/../.. $(ANTLR_INCLUDES) +AM_CXXFLAGS = -Wall $(FLAG_VISIBILITY_HIDDEN) -Wno-unused-function -Wno-unused-variable + +# Compile generated C files using C++ compiler +AM_CFLAGS = $(AM_CXXFLAGS) +CC=$(CXX) + +noinst_LTLIBRARIES = libparsersmt2.la + +ANTLR_TOKEN_STUFF = \ + @srcdir@/generated/Smt2.tokens +ANTLR_LEXER_STUFF = \ + @srcdir@/generated/Smt2Lexer.h \ + @srcdir@/generated/Smt2Lexer.c \ + $(ANTLR_TOKEN_STUFF) +ANTLR_PARSER_STUFF = \ + @srcdir@/generated/Smt2Parser.h \ + @srcdir@/generated/Smt2Parser.c +ANTLR_STUFF = \ + $(ANTLR_LEXER_STUFF) \ + $(ANTLR_PARSER_STUFF) + +libparsersmt2_la_SOURCES = \ + Smt2.g \ + smt2_input.h \ + smt2_input.cpp \ + $(ANTLR_STUFF) + +BUILT_SOURCES = $(ANTLR_STUFF) +dist-hook: $(ANTLR_STUFF) +MAINTAINERCLEANFILES = $(ANTLR_STUFF) +maintainer-clean-local: + -$(AM_V_at)rmdir @srcdir@/generated + -$(AM_V_at)rm -f @srcdir@/stamp-generated + +@srcdir@/stamp-generated: + $(AM_V_at)mkdir -p @srcdir@/generated + $(AM_V_at)touch @srcdir@/stamp-generated + +# antlr doesn't overwrite output files, it just leaves them. So we have to delete them first. +@srcdir@/generated/Smt2Lexer.h: Smt2.g @srcdir@/stamp-generated + -$(AM_V_at)rm -f $(ANTLR_STUFF) + $(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/Smt2.g" + +# These don't actually depend on SmtLexer.h, but if we're doing parallel +# make and the lexer needs to be rebuilt, we have to keep the rules +# from running in parallel (since the token files will be deleted & +# recreated) +@srcdir@/generated/Smt2Lexer.c @srcdir@/generated/Smt2Parser.h @srcdir@/generated/Smt2Parser.c $(ANTLR_TOKEN_STUFF): @srcdir@/generated/Smt2Lexer.h diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g new file mode 100644 index 000000000..44dd041c3 --- /dev/null +++ b/src/parser/smt2/Smt2.g @@ -0,0 +1,462 @@ +/* ******************* */ +/* Smt2.g + ** Original author: cconway + ** Major contributors: none + ** Minor contributors (to current version): mdeters + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Parser for SMT-LIB v2 input language. + **/ + +grammar Smt2; + +options { + language = 'C'; // C output for antlr +// defaultErrorHandler = false; // Skip the default error handling, just break with exceptions + k = 2; +} + +@header { +/** + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + **/ +} + +@lexer::includes { +/** This suppresses warnings about the redefinition of token symbols between + * different parsers. The redefinitions should be harmless as long as no + * client: (a) #include's the lexer headers for two grammars AND (b) uses the + * token symbol definitions. + */ +#pragma GCC system_header + +/* This improves performance by ~10 percent on big inputs. + * This option is only valid if we know the input is ASCII (or some 8-bit encoding). + * If we know the input is UTF-16, we can use ANTLR3_INLINE_INPUT_UTF16. + * Otherwise, we have to let the lexer detect the encoding at runtime. + */ +#define ANTLR3_INLINE_INPUT_ASCII +} + +@parser::includes { +#include "expr/command.h" +#include "parser/parser.h" + +namespace CVC4 { + class Expr; +} +} + +@parser::postinclude { +#include "expr/expr.h" +#include "expr/kind.h" +#include "expr/type.h" +#include "parser/input.h" +#include "parser/parser.h" +#include "util/integer.h" +#include "util/output.h" +#include "util/rational.h" +#include + +using namespace CVC4; +using namespace CVC4::parser; + +/* These need to be macros so they can refer to the PARSER macro, which will be defined + * by ANTLR *after* this section. (If they were functions, PARSER would be undefined.) */ +#undef PARSER_STATE +#define PARSER_STATE ((Parser*)PARSER->super) +#undef EXPR_MANAGER +#define EXPR_MANAGER PARSER_STATE->getExprManager() +#undef MK_EXPR +#define MK_EXPR EXPR_MANAGER->mkExpr +#undef MK_CONST +#define MK_CONST EXPR_MANAGER->mkConst + +/** + * Sets the logic for the current benchmark. Declares any logic symbols. + * + * @param parser the CVC4 Parser object + * @param name the name of the logic (e.g., QF_UF, AUFLIA) + */ +static void +setLogic(Parser *parser, const std::string& name) { + if( name == "QF_UF" ) { + parser->mkSort("U"); + } else if(name == "QF_LRA"){ + parser->defineType("Real", parser->getExprManager()->realType()); + } else{ + // NOTE: Theory types go here + Unhandled(name); + } +} +} + + +/** + * Parses an expression. + * @return the parsed expression, or the Null Expr if we've reached the end of the input + */ +parseExpr returns [CVC4::Expr expr] + : term[expr] + | EOF + ; + +/** + * Parses a command + * @return the parsed command, or NULL if we've reached the end of the input + */ +parseCommand returns [CVC4::Command* cmd] + : LPAREN_TOK c = command RPAREN_TOK { $cmd = c; } + | EOF { $cmd = 0; } + ; + +/** + * Parse the internal portion of the command, ignoring the surrounding parentheses. + */ +command returns [CVC4::Command* cmd] +@declarations { + std::string name; + BenchmarkStatus b_status; + Expr expr; + Type t; + std::vector sorts; +} + : /* set the logic */ + SET_LOGIC_TOK identifier[name,CHECK_NONE,SYM_VARIABLE] + { Debug("parser") << "set logic: '" << name << "' " << std::endl; + setLogic(PARSER_STATE,name); + $cmd = new SetBenchmarkLogicCommand(name); } + | SET_INFO_TOK STATUS_TOK status[b_status] + { cmd = new SetBenchmarkStatusCommand(b_status); } + | /* sort declaration */ + DECLARE_SORT_TOK identifier[name,CHECK_UNDECLARED,SYM_SORT] n=NUMERAL_TOK + // FIXME: What does the numeral argument mean? + { Debug("parser") << "declare sort: '" << name << "' " << n << std::endl; + PARSER_STATE->mkSort(name); + $cmd = new DeclarationCommand(name,EXPR_MANAGER->kindType()); } + | /* function declaration */ + DECLARE_FUN_TOK identifier[name,CHECK_UNDECLARED,SYM_VARIABLE] + LPAREN_TOK sortList[sorts] RPAREN_TOK + sortSymbol[t] + { Debug("parser") << "declare fun: '" << name << "' " << std::endl; + if( sorts.size() > 0 ) { + t = EXPR_MANAGER->mkFunctionType(sorts,t); + } + PARSER_STATE->mkVar(name, t); + $cmd = new DeclarationCommand(name,t); } + | /* assertion */ + ASSERT_TOK term[expr] + { cmd = new AssertCommand(expr); } + | /* checksat */ + CHECKSAT_TOK + { cmd = new CheckSatCommand(MK_CONST(true)); } + ; + + +/** + * Matches a term. + * @return the expression representing the formula + */ +term[CVC4::Expr& expr] +@init { + Debug("parser") << "term: " << Input::tokenText(LT(1)) << std::endl; + Kind kind; + std::string name; + std::vector args; +} + : /* a built-in operator application */ + LPAREN_TOK builtinOp[kind] termList[args,expr] RPAREN_TOK + { if((kind == CVC4::kind::AND || kind == CVC4::kind::OR) && args.size() == 1) { + /* Unary AND/OR can be replaced with the argument. + It just so happens expr should already by the only argument. */ + Assert( expr == args[0] ); + } else { + PARSER_STATE->checkArity(kind, args.size()); + expr = MK_EXPR(kind, args); + } + } + + | /* A non-built-in function application */ + LPAREN_TOK + functionSymbol[expr] + { args.push_back(expr); } + termList[args,expr] RPAREN_TOK + // TODO: check arity + { expr = MK_EXPR(CVC4::kind::APPLY_UF,args); } + + | /* An ite expression */ + LPAREN_TOK ITE_TOK + term[expr] + { args.push_back(expr); } + term[expr] + { args.push_back(expr); } + term[expr] + { args.push_back(expr); } + RPAREN_TOK + { expr = MK_EXPR(CVC4::kind::ITE, args); } + + | /* a let binding */ + LPAREN_TOK LET_TOK LPAREN_TOK + { PARSER_STATE->pushScope(); } + ( LPAREN_TOK identifier[name,CHECK_UNDECLARED,SYM_VARIABLE] term[expr] RPAREN_TOK + { PARSER_STATE->defineVar(name,expr); } )+ + RPAREN_TOK + term[expr] + RPAREN_TOK + { PARSER_STATE->popScope(); } + + | /* a variable */ + identifier[name,CHECK_DECLARED,SYM_VARIABLE] + { expr = PARSER_STATE->getVariable(name); } + + /* constants */ + | TRUE_TOK { expr = MK_CONST(true); } + | FALSE_TOK { expr = MK_CONST(false); } + | NUMERAL_TOK + { Integer num( Input::tokenText($NUMERAL_TOK) ); + expr = MK_CONST(num); } + | RATIONAL_TOK + { // FIXME: This doesn't work because an SMT rational is not a valid GMP rational string + Rational rat( Input::tokenText($RATIONAL_TOK) ); + expr = MK_CONST(rat); } + // NOTE: Theory constants go here + ; + +/** + * Matches a sequence of terms and puts them into the formulas + * vector. + * @param formulas the vector to fill with terms + * @param expr an Expr reference for the elements of the sequence + */ +/* NOTE: We pass an Expr in here just to avoid allocating a fresh Expr every + * time through this rule. */ +termList[std::vector& formulas, CVC4::Expr& expr] + : ( term[expr] { formulas.push_back(expr); } )+ + ; + +/** +* Matches a builtin operator symbol and sets kind to its associated Expr kind. +*/ +builtinOp[CVC4::Kind& kind] +@init { + Debug("parser") << "builtin: " << Input::tokenText(LT(1)) << std::endl; +} + : NOT_TOK { $kind = CVC4::kind::NOT; } + | IMPLIES_TOK { $kind = CVC4::kind::IMPLIES; } + | AND_TOK { $kind = CVC4::kind::AND; } + | OR_TOK { $kind = CVC4::kind::OR; } + | XOR_TOK { $kind = CVC4::kind::XOR; } + | EQUAL_TOK { $kind = CVC4::kind::EQUAL; } + | DISTINCT_TOK { $kind = CVC4::kind::DISTINCT; } + | GREATER_THAN_TOK + { $kind = CVC4::kind::GT; } + | GREATER_THAN_TOK EQUAL_TOK + { $kind = CVC4::kind::GEQ; } + | LESS_THAN_TOK EQUAL_TOK + { $kind = CVC4::kind::LEQ; } + | LESS_THAN_TOK + { $kind = CVC4::kind::LT; } + | PLUS_TOK { $kind = CVC4::kind::PLUS; } + | STAR_TOK { $kind = CVC4::kind::MULT; } + | TILDE_TOK { $kind = CVC4::kind::UMINUS; } + | MINUS_TOK { $kind = CVC4::kind::MINUS; } + // NOTE: Theory operators go here + ; + +/** + * Matches a (possibly undeclared) function identifier (returning the string) + * @param check what kind of check to do with the symbol + */ +functionName[std::string& name, CVC4::parser::DeclarationCheck check] + : identifier[name,check,SYM_VARIABLE] + ; + +/** + * Matches an previously declared function symbol (returning an Expr) + */ +functionSymbol[CVC4::Expr& fun] +@declarations { + std::string name; +} + : functionName[name,CHECK_DECLARED] + { PARSER_STATE->checkFunction(name); + fun = PARSER_STATE->getVariable(name); } + ; + +/** + * Matches a sequence of sort symbols and fills them into the given vector. + */ +sortList[std::vector& sorts] +@declarations { + Type t; +} + : ( sortSymbol[t] { sorts.push_back(t); })* + ; + +/** + * Matches the sort symbol, which can be an arbitrary identifier. + * @param check the check to perform on the name + */ +sortName[std::string& name, CVC4::parser::DeclarationCheck check] + : identifier[name,check,SYM_SORT] + ; + +sortSymbol[CVC4::Type& t] +@declarations { + std::string name; +} + : sortName[name,CHECK_NONE] + { t = PARSER_STATE->getSort(name); } + | BOOL_TOK + { t = EXPR_MANAGER->booleanType(); } + ; + +/** + * Matches the status of the benchmark, one of 'sat', 'unsat' or 'unknown'. + */ +status[ CVC4::BenchmarkStatus& status ] + : SAT_TOK { $status = SMT_SATISFIABLE; } + | UNSAT_TOK { $status = SMT_UNSATISFIABLE; } + | UNKNOWN_TOK { $status = SMT_UNKNOWN; } + ; + +/** + * Matches an identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + * @param type the intended namespace for the identifier + */ +identifier[std::string& id, + CVC4::parser::DeclarationCheck check, + CVC4::parser::SymbolType type] + : IDENTIFIER + { id = Input::tokenText($IDENTIFIER); + Debug("parser") << "identifier: " << id + << " check? " << toString(check) + << " type? " << toString(type) << std::endl; + PARSER_STATE->checkDeclaration(id, check, type); } + ; + +// Base SMT-LIB tokens +ASSERT_TOK : 'assert'; +BOOL_TOK : 'Bool'; +CHECKSAT_TOK : 'check-sat'; +DECLARE_FUN_TOK : 'declare-fun'; +DECLARE_SORT_TOK : 'declare-sort'; +FALSE_TOK : 'false'; +ITE_TOK : 'ite'; +LET_TOK : 'let'; +LPAREN_TOK : '('; +RPAREN_TOK : ')'; +SAT_TOK : 'sat'; +SET_LOGIC_TOK : 'set-logic'; +SET_INFO_TOK : 'set-info'; +STATUS_TOK : ':status'; +TRUE_TOK : 'true'; +UNKNOWN_TOK : 'unknown'; +UNSAT_TOK : 'unsat'; + +// operators (NOTE: theory symbols go here) +AMPERSAND_TOK : '&'; +AND_TOK : 'and'; +AT_TOK : '@'; +DISTINCT_TOK : 'distinct'; +DIV_TOK : '/'; +EQUAL_TOK : '='; +EXISTS_TOK : 'exists'; +FORALL_TOK : 'forall'; +GREATER_THAN_TOK : '>'; +IMPLIES_TOK : '=>'; +LESS_THAN_TOK : '<'; +MINUS_TOK : '-'; +NOT_TOK : 'not'; +OR_TOK : 'or'; +PERCENT_TOK : '%'; +PIPE_TOK : '|'; +PLUS_TOK : '+'; +POUND_TOK : '#'; +STAR_TOK : '*'; +TILDE_TOK : '~'; +XOR_TOK : 'xor'; + + +/** + * Matches an identifier from the input. An identifier is a sequence of letters, + * digits and "_", "'", "." symbols, starting with a letter. + */ +IDENTIFIER + : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* + ; + +/** + * Matches an identifier starting with a colon. + */ +ATTR_IDENTIFIER + : ':' IDENTIFIER + ; + +/** + * Matches and skips whitespace in the input. + */ +WHITESPACE + : (' ' | '\t' | '\f' | '\r' | '\n')+ { $channel = HIDDEN;; } + ; + +/** + * Matches a numeral from the input (non-empty sequence of digits). + */ +NUMERAL_TOK + : DIGIT+ + ; + +RATIONAL_TOK + : DIGIT+ '.' DIGIT+ + ; + +/** + * Matches a double quoted string literal. Escaping is supported, and escape + * character '\' has to be escaped. + */ +STRING_LITERAL + : '"' (ESCAPE | ~('"'|'\\'))* '"' + ; + +/** + * Matches the comments and ignores them + */ +COMMENT + : ';' (~('\n' | '\r'))* { $channel = HIDDEN;; } + ; + + +/** + * Matches any letter ('a'-'z' and 'A'-'Z'). + */ +fragment +ALPHA + : 'a'..'z' + | 'A'..'Z' + ; + +/** + * Matches the digits (0-9) + */ +fragment DIGIT : '0'..'9'; +// fragment NON_ZERO_DIGIT : '1'..'9'; +// fragment NUMERAL_SEQ : '0' | NON_ZERO_DIGIT DIGIT*; + +/** + * Matches an allowed escaped character. + */ +fragment ESCAPE : '\\' ('"' | '\\' | 'n' | 't' | 'r'); + diff --git a/src/parser/smt2/smt2_input.cpp b/src/parser/smt2/smt2_input.cpp new file mode 100644 index 000000000..5db4a9bd7 --- /dev/null +++ b/src/parser/smt2/smt2_input.cpp @@ -0,0 +1,67 @@ +/********************* */ +/** smt2_input.cpp + ** Original author: cconway + ** Major contributors: none + ** Minor contributors (to current version): mdeters + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** [[ Add file-specific comments here ]] + **/ + +#include + +#include "smt2_input.h" +#include "expr/expr_manager.h" +#include "parser/parser.h" +#include "parser/parser_exception.h" +#include "parser/smt2/generated/Smt2Lexer.h" +#include "parser/smt2/generated/Smt2Parser.h" + +namespace CVC4 { +namespace parser { + +/* Use lookahead=2 */ +Smt2Input::Smt2Input(AntlrInputStream *inputStream) : + Input(inputStream, 2) { + pANTLR3_INPUT_STREAM input = inputStream->getAntlr3InputStream(); + AlwaysAssert( input != NULL ); + + d_pSmt2Lexer = Smt2LexerNew(input); + if( d_pSmt2Lexer == NULL ) { + throw ParserException("Failed to create SMT2 lexer."); + } + + setAntlr3Lexer( d_pSmt2Lexer->pLexer ); + + pANTLR3_COMMON_TOKEN_STREAM tokenStream = getTokenStream(); + AlwaysAssert( tokenStream != NULL ); + + d_pSmt2Parser = Smt2ParserNew(tokenStream); + if( d_pSmt2Parser == NULL ) { + throw ParserException("Failed to create SMT2 parser."); + } + + setAntlr3Parser(d_pSmt2Parser->pParser); +} + + +Smt2Input::~Smt2Input() { + d_pSmt2Lexer->free(d_pSmt2Lexer); + d_pSmt2Parser->free(d_pSmt2Parser); +} + +Command* Smt2Input::parseCommand() throw (ParserException) { + return d_pSmt2Parser->parseCommand(d_pSmt2Parser); +} + +Expr Smt2Input::parseExpr() throw (ParserException) { + return d_pSmt2Parser->parseExpr(d_pSmt2Parser); +} + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ diff --git a/src/parser/smt2/smt2_input.h b/src/parser/smt2/smt2_input.h new file mode 100644 index 000000000..48fcb7956 --- /dev/null +++ b/src/parser/smt2/smt2_input.h @@ -0,0 +1,95 @@ +/********************* */ +/** smt2_input.h + ** Original author: cconway + ** Major contributors: mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** [[ Add file-specific comments here ]] + **/ + +#include "cvc4parser_public.h" + +#ifndef __CVC4__PARSER__SMT2_INPUT_H +#define __CVC4__PARSER__SMT2_INPUT_H + +#include "parser/input.h" +#include "parser/smt2/generated/Smt2Lexer.h" +#include "parser/smt2/generated/Smt2Parser.h" + +// extern void Smt2ParserSetAntlrParser(CVC4::parser::AntlrParser* newAntlrParser); + +namespace CVC4 { + +class Command; +class Expr; +class ExprManager; + +namespace parser { + +class Smt2Input : public Input { + + /** The ANTLR3 SMT2 lexer for the input. */ + pSmt2Lexer d_pSmt2Lexer; + + /** The ANTLR3 CVC parser for the input. */ + pSmt2Parser d_pSmt2Parser; + +public: + + /** + * Create an input. + * + * @param inputStream the input stream to use + */ + Smt2Input(AntlrInputStream *inputStream); + + /** + * Create a string input. + * + * @param exprManager the manager to use when building expressions from the input + * @param input the string to read + * @param name the "filename" to use when reporting errors + */ +// Smt2Input(ExprManager* exprManager, const std::string& input, const std::string& name); + + /** Destructor. Frees the lexer and the parser. */ + ~Smt2Input(); + +protected: + + /** + * Parse a command from the input. Returns NULL if + * there is no command there to parse. + * + * @throws ParserException if an error is encountered during parsing. + */ + Command* parseCommand() throw(ParserException); + + /** + * Parse an expression from the input. Returns a null + * Expr if there is no expression there to parse. + * + * @throws ParserException if an error is encountered during parsing. + */ + Expr parseExpr() throw(ParserException); + +private: + + /** + * Initialize the class. Called from the constructors once the input + * stream is initialized. + */ + void init(); + +};/* class Smt2Input */ + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ + +#endif /* __CVC4__PARSER__SMT2_INPUT_H */ diff --git a/test/unit/parser/parser_black.h b/test/unit/parser/parser_black.h index 9a2864781..b7fbf243f 100644 --- a/test/unit/parser/parser_black.h +++ b/test/unit/parser/parser_black.h @@ -144,6 +144,69 @@ const string badSmtExprs[] = { const int numBadSmtExprs = sizeof(badSmtExprs) / sizeof(string); +/************************** SMT v2 test inputs ********************************/ + +const string goodSmt2Inputs[] = { + "", // empty string is OK + "(set-logic QF_UF)", + "(assert true)", + "(check-sat)", + "(assert false) (check-sat)", + "(declare-fun a () Bool) (declare-fun b () Bool)", + "(declare-fun a () Bool) (declare-fun b () Bool) (assert (=> (and (=> a b) a) b))", + "(declare-sort a 1) (declare-fun f (a) a) (declare-fun x () a) (assert (= (f x) x))", + "(declare-sort a 1) (declare-fun x () a) (declare-fun y () a) (assert (= (ite true x y) x))", + ";; nothing but a comment", + "; a comment\n(check-sat ; goodbye\n)" + }; + +const int numGoodSmt2Inputs = sizeof(goodSmt2Inputs) / sizeof(string); + +/* The following expressions are valid after setupContext. */ +const string goodSmt2Exprs[] = { + "(and a b)", + "(or (and a b) c)", + "(=> (and (=> a b) a) b)", + "(and (= a b) (not a))", + "(= (xor a b) (and (or a b) (not (and a b))))", + "(ite a (f x) y)", + "1", + "0"// , +// "1.5" +}; + +const int numGoodSmt2Exprs = sizeof(goodSmt2Exprs) / sizeof(string); + +const string badSmt2Inputs[] = { + "(assert)", // no args + "(check-sat true)", // shouldn't have an arg + "(declare-sort a)", // no arg + "(declare-sort a 1) (declare-sort a 1)", // double decl + "(declare-fun p Bool)" // should be "p () Bool" + }; + +const int numBadSmt2Inputs = sizeof(badSmt2Inputs) / sizeof(string); + +/* The following expressions are invalid even after setupContext. */ +const string badSmt2Exprs[] = { + "(and)", // wrong arity + "(and a b", // no closing paren + "(a and b)", // infix + "(implies a b)", // no implies in v2 + "(iff a b)", // no iff in v2 + "(OR (AND a b) c)", // wrong case + "(a IMPLIES b)", // infix AND wrong case + "(not a b)", // wrong arity + "not a", // needs parens + "(ite a x)", // wrong arity + "(if_then_else a (f x) y)", // no if_then_else in v2 + "(a b)", // using non-function as function + ".5", // rational constants must have integer prefix + "1." // rational constants must have fractional suffix +}; + +const int numBadSmt2Exprs = sizeof(badSmt2Exprs) / sizeof(string); + class ParserBlack : public CxxTest::TestSuite { ExprManager *d_exprManager; @@ -306,4 +369,20 @@ public: void testBadSmtExprs() { tryBadExprs(LANG_SMTLIB,badSmtExprs,numBadSmtExprs); } + + void testGoodSmt2Inputs() { + tryGoodInputs(LANG_SMTLIB_V2,goodSmt2Inputs,numGoodSmt2Inputs); + } + + void testBadSmt2Inputs() { + tryBadInputs(LANG_SMTLIB_V2,badSmt2Inputs,numBadSmt2Inputs); + } + + void testGoodSmt2Exprs() { + tryGoodExprs(LANG_SMTLIB_V2,goodSmt2Exprs,numGoodSmt2Exprs); + } + + void testBadSmt2Exprs() { + tryBadExprs(LANG_SMTLIB_V2,badSmt2Exprs,numBadSmt2Exprs); + } };