From 8730e9320a833a9eb0e65074f9988950b7424c0c Mon Sep 17 00:00:00 2001 From: "Christopher L. Conway" Date: Tue, 30 Mar 2010 20:22:33 +0000 Subject: [PATCH] Merging from branches/antlr3 (r246:354) --- .gitignore | 17 + .project | 2 +- .settings/net.certiv.antlrdt.core.prefs | 4 + config/.gitignore | 11 + config/antlr.m4 | 61 +- contrib/.gitignore | 2 + src/.gitignore | 2 + src/context/.gitignore | 2 + src/expr/.gitignore | 4 + src/expr/command.h | 34 +- src/expr/expr.cpp | 29 +- src/expr/expr.h | 17 + src/include/.gitignore | 2 + src/main/.gitignore | 2 + src/main/Makefile.am | 2 +- src/main/getopt.cpp | 11 +- src/main/main.cpp | 35 +- src/parser/.gitignore | 2 + src/parser/Makefile.am | 21 +- src/parser/antlr_input.cpp | 317 +++++++++ src/parser/antlr_input.h | 93 +++ src/parser/antlr_parser.h | 361 ----------- src/parser/bounded_token_buffer.cpp | 640 +++++++++++++++++++ src/parser/bounded_token_buffer.h | 73 +++ src/parser/bounded_token_factory.cpp | 134 ++++ src/parser/bounded_token_factory.h | 38 ++ src/parser/cvc/.gitignore | 4 + src/parser/cvc/Cvc.g | 496 ++++++++++++++ src/parser/cvc/Makefile.am | 46 +- src/parser/cvc/cvc_input.cpp | 73 +++ src/parser/cvc/cvc_input.h | 48 ++ src/parser/cvc/cvc_lexer.g | 151 ----- src/parser/cvc/cvc_parser.g | 386 ----------- src/parser/{antlr_parser.cpp => input.cpp} | 289 ++++++--- src/parser/input.h | 394 ++++++++++++ src/parser/memory_mapped_input_buffer.cpp | 106 +++ src/parser/memory_mapped_input_buffer.h | 64 +- src/parser/parser.cpp | 147 ----- src/parser/parser.h | 135 ---- src/parser/parser_exception.h | 37 +- src/parser/parser_options.h | 27 + src/parser/smt/.gitignore | 4 + src/parser/smt/Makefile.am | 38 +- src/parser/smt/Smt.g | 552 ++++++++++++++++ src/parser/smt/smt_input.cpp | 73 +++ src/parser/smt/smt_input.h | 47 ++ src/parser/smt/smt_lexer.g | 220 ------- src/parser/smt/smt_parser.g | 351 ---------- src/parser/symbol_table.h | 2 + src/prop/.gitignore | 2 + src/prop/minisat/.gitignore | 2 + src/smt/.gitignore | 2 + src/theory/.gitignore | 3 + src/theory/arith/.gitignore | 2 + src/theory/arrays/.gitignore | 2 + src/theory/booleans/.gitignore | 1 + src/theory/bv/.gitignore | 2 + src/theory/uf/.gitignore | 2 + src/util/.gitignore | 2 + src/util/options.h | 8 +- test/.gitignore | 2 + test/regress/.gitignore | 2 + test/regress/regress0/.gitignore | 2 + test/regress/regress0/Makefile.am | 13 +- test/regress/regress0/precedence/.gitignore | 1 + test/regress/regress0/precedence/Makefile.am | 3 +- test/regress/regress0/uf/.gitignore | 1 + test/regress/regress0/uf/Makefile.am | 11 +- test/regress/regress1/.gitignore | 2 + test/regress/regress2/.gitignore | 2 + test/regress/regress3/.gitignore | 2 + test/system/.gitignore | 1 + test/unit/.gitignore | 2 + test/unit/expr/.gitignore | 4 + test/unit/parser/parser_black.h | 132 ++-- 75 files changed, 3700 insertions(+), 2114 deletions(-) create mode 100644 .gitignore create mode 100644 .settings/net.certiv.antlrdt.core.prefs create mode 100644 config/.gitignore create mode 100644 contrib/.gitignore create mode 100644 src/.gitignore create mode 100644 src/context/.gitignore create mode 100644 src/expr/.gitignore create mode 100644 src/include/.gitignore create mode 100644 src/main/.gitignore create mode 100644 src/parser/.gitignore create mode 100644 src/parser/antlr_input.cpp create mode 100644 src/parser/antlr_input.h delete mode 100644 src/parser/antlr_parser.h create mode 100644 src/parser/bounded_token_buffer.cpp create mode 100644 src/parser/bounded_token_buffer.h create mode 100644 src/parser/bounded_token_factory.cpp create mode 100644 src/parser/bounded_token_factory.h create mode 100644 src/parser/cvc/.gitignore create mode 100644 src/parser/cvc/Cvc.g create mode 100644 src/parser/cvc/cvc_input.cpp create mode 100644 src/parser/cvc/cvc_input.h delete mode 100644 src/parser/cvc/cvc_lexer.g delete mode 100644 src/parser/cvc/cvc_parser.g rename src/parser/{antlr_parser.cpp => input.cpp} (50%) create mode 100644 src/parser/input.h create mode 100644 src/parser/memory_mapped_input_buffer.cpp delete mode 100644 src/parser/parser.cpp delete mode 100644 src/parser/parser.h create mode 100644 src/parser/parser_options.h create mode 100644 src/parser/smt/.gitignore create mode 100644 src/parser/smt/Smt.g create mode 100644 src/parser/smt/smt_input.cpp create mode 100644 src/parser/smt/smt_input.h delete mode 100644 src/parser/smt/smt_lexer.g delete mode 100644 src/parser/smt/smt_parser.g create mode 100644 src/prop/.gitignore create mode 100644 src/prop/minisat/.gitignore create mode 100644 src/smt/.gitignore create mode 100644 src/theory/.gitignore create mode 100644 src/theory/arith/.gitignore create mode 100644 src/theory/arrays/.gitignore create mode 100644 src/theory/booleans/.gitignore create mode 100644 src/theory/bv/.gitignore create mode 100644 src/theory/uf/.gitignore create mode 100644 src/util/.gitignore create mode 100644 test/.gitignore create mode 100644 test/regress/.gitignore create mode 100644 test/regress/regress0/.gitignore create mode 100644 test/regress/regress0/precedence/.gitignore create mode 100644 test/regress/regress0/uf/.gitignore create mode 100644 test/regress/regress1/.gitignore create mode 100644 test/regress/regress2/.gitignore create mode 100644 test/regress/regress3/.gitignore create mode 100644 test/system/.gitignore create mode 100644 test/unit/.gitignore create mode 100644 test/unit/expr/.gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..65b1e44d2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +/autom4te.cache +/stamp-h +/config.h.in +/config.log +/config.status +/config.cache +/libtool +/stamp-h1 +/cvc4-*.tar.gz +/cvc4-*.tar.bz2 +/builds +/doc +/Makefile.in +/configure +/aclocal.m4 +/callgrind.out* +/gmon.out diff --git a/.project b/.project index cd7732047..78c44f770 100644 --- a/.project +++ b/.project @@ -1,6 +1,6 @@ - cvc4 + cvc4-antlr3 diff --git a/.settings/net.certiv.antlrdt.core.prefs b/.settings/net.certiv.antlrdt.core.prefs new file mode 100644 index 000000000..fda7c5fb1 --- /dev/null +++ b/.settings/net.certiv.antlrdt.core.prefs @@ -0,0 +1,4 @@ +#Thu Mar 25 16:47:04 EDT 2010 +builderLoc=builderLocRelative +builderRelPath=./generated +eclipse.preferences.version=1 diff --git a/config/.gitignore b/config/.gitignore new file mode 100644 index 000000000..dfeb8c222 --- /dev/null +++ b/config/.gitignore @@ -0,0 +1,11 @@ +/libtool.m4 +/depcomp +/lt~obsolete.m4 +/config.guess +/config.sub +/ltmain.sh +/ltsugar.m4 +/ltversion.m4 +/missing +/ltoptions.m4 +/install-sh diff --git a/config/antlr.m4 b/config/antlr.m4 index 408df0d84..842b9b51d 100644 --- a/config/antlr.m4 +++ b/config/antlr.m4 @@ -3,28 +3,18 @@ # runantlr script ## AC_DEFUN([AC_PROG_ANTLR], [ - AC_ARG_VAR([ANTLR],[location of the runantlr script]) - - # Get the location of the runantlr script - # AC_ARG_WITH( - # [antlr], - # AS_HELP_STRING( - # [--with-antlr=RUNANTLR], - # [location of the ANTLR's `runantlr` script] - # ), - # ANTLR="$withval", - # ) + AC_ARG_VAR([ANTLR],[location of the antlr3 script]) # Check the existance of the runantlr script if test -z "$ANTLR"; then - AC_CHECK_PROGS(ANTLR, [runantlr antlr]) + AC_CHECK_PROGS(ANTLR, [antlr3]) else AC_CHECK_PROG(ANTLR, "$ANTLR", "$ANTLR", []) fi if test no$ANTLR = "no"; then AC_MSG_WARN( - [Couldn't find the runantlr script, make sure that the parser code has + [Couldn't find the antlr3 script, make sure that the parser code has been generated already. To obtain ANTLR see .] ) AC_MSG_RESULT(no) @@ -35,7 +25,7 @@ AC_DEFUN([AC_PROG_ANTLR], [ ]) ## -# Check the existnace of the ANTLR C++ runtime library and headers +# Check the existance of the ANTLR C++ runtime library and headers # Will set ANTLR_CPPFLAGS and ANTLR_LIBS to the location of the ANTLR headers # and library respectively ## @@ -46,16 +36,16 @@ AC_DEFUN([AC_LIB_ANTLR],[ [antlr-dir], AS_HELP_STRING( [--with-antlr-dir=PATH], - [path to ANTLR C++ headers and libraries] + [path to ANTLR C headers and libraries] ), ANTLR_PREFIXES="$withval", ANTLR_PREFIXES="/usr/local /usr /opt/local /opt" ) - AC_MSG_CHECKING(for antlr C++ runtime library) + AC_MSG_CHECKING(for ANTLR C runtime library) - # Use C++ and remember the variables we are changing - AC_LANG_PUSH(C++) + # Use C and remember the variables we are changing + AC_LANG_PUSH(C) OLD_CPPFLAGS="$CPPFLAGS" OLD_LIBS="$LIBS" @@ -63,46 +53,27 @@ AC_DEFUN([AC_LIB_ANTLR],[ for antlr_prefix in $ANTLR_PREFIXES do CPPFLAGS="$OLD_CPPFLAGS -I$antlr_prefix/include" - LIBS="$OLD_LIBS -L$antlr_prefix/lib -lantlr-pic" + LIBS="$OLD_LIBS -L$antlr_prefix/lib -lantlr3c" AC_LINK_IFELSE( [ - #include - class MyAST : public ANTLR_USE_NAMESPACE(antlr)CommonAST { - }; + #include + int main() { - MyAST ast; + pANTLR3_UINT8 fName = (pANTLR3_UINT8)"foo"; + pANTLR3_INPUT_STREAM input = antlr3AsciiFileStreamNew(fName); + return 0; } ], [ AC_MSG_RESULT(found in $antlr_prefix) ANTLR_INCLUDES="-I$antlr_prefix/include" - ANTLR_LDFLAGS="-L$antlr_prefix/lib -lantlr-pic" + ANTLR_LDFLAGS="-L$antlr_prefix/lib -lantlr3c" break ], - [ - CPPFLAGS="$OLD_CPPFLAGS -I$antlr_prefix/include" - LIBS="$OLD_LIBS -L$antlr_prefix/lib -lantlr" - AC_LINK_IFELSE( - [ - #include - class MyAST : public ANTLR_USE_NAMESPACE(antlr)CommonAST { - }; - int main() { - MyAST ast; - } - ], - [ - AC_MSG_RESULT(found in $antlr_prefix) - ANTLR_INCLUDES="-I$antlr_prefix/include" - ANTLR_LDFLAGS="-L$antlr_prefix/lib -lantlr" - break - ], [ AC_MSG_RESULT(no) - AC_MSG_ERROR([ANTLR C++ runtime not found, see ]) + AC_MSG_ERROR([ANTLR C runtime not found, see ]) ] - ) - ] ) done diff --git a/contrib/.gitignore b/contrib/.gitignore new file mode 100644 index 000000000..116a16b54 --- /dev/null +++ b/contrib/.gitignore @@ -0,0 +1,2 @@ +/ +/Makefile.in diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/src/context/.gitignore b/src/context/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/src/context/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/src/expr/.gitignore b/src/expr/.gitignore new file mode 100644 index 000000000..4618e07b0 --- /dev/null +++ b/src/expr/.gitignore @@ -0,0 +1,4 @@ +/.deps +/Makefile.in +/kind.h +/metakind.h diff --git a/src/expr/command.h b/src/expr/command.h index 3c42c153c..e5994b3c7 100644 --- a/src/expr/command.h +++ b/src/expr/command.h @@ -36,6 +36,20 @@ class Command; inline std::ostream& operator<<(std::ostream&, const Command&) CVC4_PUBLIC; std::ostream& operator<<(std::ostream&, const Command*) CVC4_PUBLIC; +/** The status an SMT benchmark can have */ +enum BenchmarkStatus { + /** Benchmark is satisfiable */ + SMT_SATISFIABLE, + /** Benchmark is unsatisfiable */ + SMT_UNSATISFIABLE, + /** The status of the benchmark is unknown */ + SMT_UNKNOWN +}; + +inline std::ostream& operator<<(std::ostream& out, + BenchmarkStatus status) + CVC4_PUBLIC; + class CVC4_PUBLIC Command { public: virtual void invoke(CVC4::SmtEngine* smt_engine) = 0; @@ -107,15 +121,6 @@ protected: class CVC4_PUBLIC SetBenchmarkStatusCommand : public Command { public: - /** The status an SMT benchmark can have */ - enum BenchmarkStatus { - /** Benchmark is satisfiable */ - SMT_SATISFIABLE, - /** Benchmark is unsatisfiable */ - SMT_UNSATISFIABLE, - /** The status of the benchmark is unknown */ - SMT_UNKNOWN - }; SetBenchmarkStatusCommand(BenchmarkStatus status); void invoke(SmtEngine* smt); void toStream(std::ostream& out) const; @@ -123,9 +128,6 @@ protected: BenchmarkStatus d_status; };/* class SetBenchmarkStatusCommand */ -inline std::ostream& operator<<(std::ostream& out, - SetBenchmarkStatusCommand::BenchmarkStatus status) - CVC4_PUBLIC; class CVC4_PUBLIC SetBenchmarkLogicCommand : public Command { public: @@ -288,16 +290,16 @@ inline void SetBenchmarkLogicCommand::toStream(std::ostream& out) const { /* output stream insertion operator for benchmark statuses */ inline std::ostream& operator<<(std::ostream& out, - SetBenchmarkStatusCommand::BenchmarkStatus status) { + BenchmarkStatus status) { switch(status) { - case SetBenchmarkStatusCommand::SMT_SATISFIABLE: + case SMT_SATISFIABLE: return out << "sat"; - case SetBenchmarkStatusCommand::SMT_UNSATISFIABLE: + case SMT_UNSATISFIABLE: return out << "unsat"; - case SetBenchmarkStatusCommand::SMT_UNKNOWN: + case SMT_UNKNOWN: return out << "unknown"; default: diff --git a/src/expr/expr.cpp b/src/expr/expr.cpp index 5acd0736a..2bcd28422 100644 --- a/src/expr/expr.cpp +++ b/src/expr/expr.cpp @@ -40,6 +40,12 @@ Expr::Expr(const Expr& e) : d_node(new Node(*e.d_node)), d_exprManager(e.d_exprManager) { } +Expr::Expr(uintptr_t n) : + d_node(new Node()), + d_exprManager(NULL) { + AlwaysAssert(n==0); +} + ExprManager* Expr::getExprManager() const { return d_exprManager; } @@ -50,13 +56,26 @@ Expr::~Expr() { } Expr& Expr::operator=(const Expr& e) { - if(this != &e) { - ExprManagerScope ems(*this); - delete d_node; - d_node = new Node(*e.d_node); - d_exprManager = e.d_exprManager; + Assert(d_node != NULL, "Unexpected NULL expression pointer!"); + Assert(e.d_node != NULL, "Unexpected NULL expression pointer!"); + ExprManagerScope ems(*this); + *d_node = *e.d_node; + d_exprManager = e.d_exprManager; + return *this; +} + +/* This should only ever be assigning NULL to a null Expr! */ +Expr& Expr::operator=(uintptr_t n) { + AlwaysAssert(n==0); + Assert(d_node != NULL, "Unexpected NULL expression pointer!"); + if( EXPECT_FALSE(!isNull()) ) { + *d_node = Node::null(); } return *this; +/* + Assert(isNull()); + return *this; +*/ } bool Expr::operator==(const Expr& e) const { diff --git a/src/expr/expr.h b/src/expr/expr.h index c5478b1da..48b64fd17 100644 --- a/src/expr/expr.h +++ b/src/expr/expr.h @@ -48,6 +48,14 @@ public: */ Expr(const Expr& e); + /** + * Initialize from an integer. Fails if the integer is not 0. + * NOTE: This is here purely to support the auto-initialization + * behavior of the ANTLR3 C backend. Should be removed if future + * versions of ANTLR fix the problem. + */ + Expr(uintptr_t n); + /** Destructor */ ~Expr(); @@ -60,6 +68,15 @@ public: */ Expr& operator=(const Expr& e); + /** + * Assignment from an integer. Fails if the integer is not 0. + * NOTE: This is here purely to support the auto-initialization + * behavior of the ANTLR3 C backend (i.e., a rule attribute + * Expr e gets initialized with e = NULL;. + * Should be removed if future versions of ANTLR fix the problem. + */ + Expr& operator=(uintptr_t n); + /** * Syntactic comparison operator. Returns true if expressions belong to the * same expression manager and are syntactically identical. diff --git a/src/include/.gitignore b/src/include/.gitignore new file mode 100644 index 000000000..b336cc7ce --- /dev/null +++ b/src/include/.gitignore @@ -0,0 +1,2 @@ +/Makefile +/Makefile.in diff --git a/src/main/.gitignore b/src/main/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/src/main/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/src/main/Makefile.am b/src/main/Makefile.am index 79eb8c74e..e73b38f1d 100644 --- a/src/main/Makefile.am +++ b/src/main/Makefile.am @@ -1,5 +1,5 @@ AM_CPPFLAGS = \ - -I@srcdir@/../include -I@srcdir@/.. + -I@srcdir@/../include -I@srcdir@/.. $(ANTLR_INCLUDES) AM_CXXFLAGS = -Wall bin_PROGRAMS = cvc4 diff --git a/src/main/getopt.cpp b/src/main/getopt.cpp index df94ef9ab..ad59e0039 100644 --- a/src/main/getopt.cpp +++ b/src/main/getopt.cpp @@ -30,11 +30,10 @@ #include "util/configuration.h" #include "util/output.h" #include "util/options.h" -#include "parser/parser.h" +#include "parser/parser_options.h" using namespace std; using namespace CVC4; -using namespace CVC4::parser; namespace CVC4 { namespace main { @@ -153,13 +152,13 @@ throw(OptionException) { case 'L': if(!strcmp(optarg, "cvc4") || !strcmp(optarg, "pl")) { - opts->lang = Parser::LANG_CVC4; + opts->lang = parser::LANG_CVC4; break; } else if(!strcmp(optarg, "smtlib") || !strcmp(optarg, "smt")) { - opts->lang = Parser::LANG_SMTLIB; + opts->lang = parser::LANG_SMTLIB; break; } else if(!strcmp(optarg, "auto")) { - opts->lang = Parser::LANG_AUTO; + opts->lang = parser::LANG_AUTO; break; } @@ -187,7 +186,7 @@ throw(OptionException) { // silences CVC4 (except "sat" or "unsat" or "unknown", forces smtlib input) opts->smtcomp_mode = true; opts->verbosity = -1; - opts->lang = Parser::LANG_SMTLIB; + opts->lang = parser::LANG_SMTLIB; break; case PARSE_ONLY: diff --git a/src/main/main.cpp b/src/main/main.cpp index f5e53f34a..b65d4f50a 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -22,7 +22,7 @@ #include "config.h" #include "main.h" #include "usage.h" -#include "parser/parser.h" +#include "parser/input.h" #include "expr/expr_manager.h" #include "smt/smt_engine.h" #include "expr/command.h" @@ -93,6 +93,11 @@ int runCvc4(int argc, char* argv[]) { cout << unitbuf; } + /* NOTE: ANTLR3 doesn't support input from stdin */ + if(firstArgIndex >= argc) { + throw Exception("No input file specified."); + } + // We only accept one input file if(argc > firstArgIndex + 1) { throw Exception("Too many input files specified."); @@ -105,17 +110,17 @@ int runCvc4(int argc, char* argv[]) { SmtEngine smt(&exprMgr, &options); // If no file supplied we read from standard input - bool inputFromStdin = firstArgIndex >= argc || !strcmp("-", argv[firstArgIndex]); + // bool inputFromStdin = firstArgIndex >= argc || !strcmp("-", argv[firstArgIndex]); // Auto-detect input language by filename extension - if(!inputFromStdin && options.lang == Parser::LANG_AUTO) { + if(/*!inputFromStdin && */options.lang == parser::LANG_AUTO) { const char* filename = argv[firstArgIndex]; unsigned len = strlen(filename); if(len >= 4 && !strcmp(".smt", filename + len - 4)) { - options.lang = Parser::LANG_SMTLIB; + options.lang = parser::LANG_SMTLIB; } else if(( len >= 4 && !strcmp(".cvc", filename + len - 4) ) || ( len >= 5 && !strcmp(".cvc4", filename + len - 5) )) { - options.lang = Parser::LANG_CVC4; + options.lang = parser::LANG_CVC4; } } @@ -141,21 +146,15 @@ int runCvc4(int argc, char* argv[]) { } // Create the parser - Parser* parser; + Input* parser; istream* input = NULL; - if(inputFromStdin) { - parser = Parser::getNewParser(&exprMgr, options.lang, cin, ""); - } else if( options.memoryMap ) { - parser = Parser::getMemoryMappedParser(&exprMgr, options.lang, argv[firstArgIndex]); - } else { - string filename = argv[firstArgIndex]; - input = new ifstream(filename.c_str()); - if(!*input) { - throw Exception("file does not exist or is unreadable: " + filename); - } - parser = Parser::getNewParser(&exprMgr, options.lang, *input, filename); - } +// if(inputFromStdin) { + // parser = Parser::getNewParser(&exprMgr, options.lang, cin, ""); +// } else { + parser = Input::newFileParser(&exprMgr, options.lang, argv[firstArgIndex], + options.memoryMap); +// } if(!options.semanticChecks) { parser->disableChecks(); diff --git a/src/parser/.gitignore b/src/parser/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/src/parser/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index 5ac1c9e35..ee0a23c98 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -34,10 +34,17 @@ libcvc4parser_noinst_la_LIBADD = \ libcvc4parser_la_SOURCES = libcvc4parser_noinst_la_SOURCES = \ - parser.h \ - parser.cpp \ - parser_exception.h \ - symbol_table.h \ - antlr_parser.h \ - antlr_parser.cpp \ - memory_mapped_input_buffer.h + antlr_input.h \ + antlr_input.cpp \ + bounded_token_buffer.h \ + bounded_token_buffer.cpp \ + bounded_token_factory.h \ + bounded_token_factory.cpp \ + input.h \ + input.cpp \ + memory_mapped_input_buffer.h \ + memory_mapped_input_buffer.cpp \ + parser_options.h \ + parser_exception.h \ + symbol_table.h + diff --git a/src/parser/antlr_input.cpp b/src/parser/antlr_input.cpp new file mode 100644 index 000000000..02e07bc8f --- /dev/null +++ b/src/parser/antlr_input.cpp @@ -0,0 +1,317 @@ +/********************* */ +/** antlr_parser.cpp + ** Original author: dejan + ** Major contributors: cconway + ** Minor contributors (to current version): mdeters + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** A super-class for ANTLR-generated input language parsers + **/ + +/* + * antlr_parser.cpp + * + * Created on: Nov 30, 2009 + * Author: dejan + */ + +#include +#include +#include + +#include "util/output.h" +#include "util/Assert.h" +#include "expr/command.h" +#include "expr/type.h" +#include "parser/antlr_input.h" +#include "parser/bounded_token_buffer.h" +#include "parser/bounded_token_factory.h" +#include "parser/memory_mapped_input_buffer.h" +#include "parser/parser_exception.h" + +using namespace std; +using namespace CVC4; +using namespace CVC4::parser; +using namespace CVC4::kind; + +namespace CVC4 { +namespace parser { + +AntlrInput::AntlrInput(ExprManager* exprManager, const std::string& filename, unsigned int lookahead, bool useMmap) : + Input(exprManager, filename), + d_lookahead(lookahead), + d_lexer(NULL), + d_parser(NULL), + d_tokenStream(NULL) { + + if( useMmap ) { + d_input = MemoryMappedInputBufferNew(filename); + } else { + d_input = antlr3AsciiFileStreamNew((pANTLR3_UINT8) filename.c_str()); + } + if( d_input == NULL ) { + throw ParserException("Couldn't open file: " + filename); + } +} + +/* +AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead) + Parser(exprManager,name), + d_lookahead(lookahead) { + +} +*/ + +AntlrInput::AntlrInput(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) : + Input(exprManager,name), + d_lookahead(lookahead), + d_lexer(NULL), + d_parser(NULL), + d_tokenStream(NULL) { + char* inputStr = strdup(input.c_str()); + char* nameStr = strdup(name.c_str()); + if( inputStr==NULL || nameStr==NULL ) { + throw ParserException("Couldn't initialize string input: '" + input + "'"); + } + d_input = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr); + if( d_input == NULL ) { + throw ParserException("Couldn't create input stream for string: '" + input + "'"); + } +} + +AntlrInput::~AntlrInput() { + d_tokenStream->free(d_tokenStream); + d_input->close(d_input); +} + +pANTLR3_INPUT_STREAM AntlrInput::getInputStream() { + return d_input; +} + +pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() { + return d_tokenStream; +} + +void AntlrInput::parseError(const std::string& message) + throw (ParserException) { + Debug("parser") << "Throwing exception: " << getFilename() << ":" + << d_lexer->getLine(d_lexer) << "." + << d_lexer->getCharPositionInLine(d_lexer) << ": " + << message << endl; + throw ParserException(message, getFilename(), d_lexer->getLine(d_lexer), + d_lexer->getCharPositionInLine(d_lexer)); +} + +void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer) { + pANTLR3_EXCEPTION ex = recognizer->state->exception; + pANTLR3_UINT8 * tokenNames = recognizer->state->tokenNames; + stringstream ss; +// std::string msg; + + // Signal we are in error recovery now + recognizer->state->errorRecovery = ANTLR3_TRUE; + + // Indicate this recognizer had an error while processing. + recognizer->state->errorCount++; + + // Call the builtin error formatter + // recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames); + + /* This switch statement is adapted from antlr3baserecognizer.c:displayRecognitionError in libantlr3c. + * TODO: Make error messages more useful, maybe by including more expected tokens and information + * about the current token. */ + switch(ex->type) { + case ANTLR3_UNWANTED_TOKEN_EXCEPTION: + + // Indicates that the recognizer was fed a token which seems to be + // spurious input. We can detect this when the token that follows + // this unwanted token would normally be part of the syntactically + // correct stream. Then we can see that the token we are looking at + // is just something that should not be there and throw this exception. + // + if(tokenNames == NULL) { + ss << "Unexpected token." ; + } else { + if(ex->expecting == ANTLR3_TOKEN_EOF) { + ss << "Expected end of file."; + } else { + ss << "Expected " << tokenNames[ex->expecting] << "."; + } + } + break; + + case ANTLR3_MISSING_TOKEN_EXCEPTION: + + // Indicates that the recognizer detected that the token we just + // hit would be valid syntactically if preceded by a particular + // token. Perhaps a missing ';' at line end or a missing ',' in an + // expression list, and such like. + // + if(tokenNames == NULL) { + ss << "Missing token (" << ex->expecting << ")."; + } else { + if(ex->expecting == ANTLR3_TOKEN_EOF) { + ss << "Missing end of file marker."; + } else { + ss << "Missing " << tokenNames[ex->expecting] << "."; + } + } + break; + + case ANTLR3_RECOGNITION_EXCEPTION: + + // Indicates that the recognizer received a token + // in the input that was not predicted. This is the basic exception type + // from which all others are derived. So we assume it was a syntax error. + // You may get this if there are not more tokens and more are needed + // to complete a parse for instance. + // + ss <<"Syntax error."; + break; + + case ANTLR3_MISMATCHED_TOKEN_EXCEPTION: + + // We were expecting to see one thing and got another. This is the + // most common error if we could not detect a missing or unwanted token. + // Here you can spend your efforts to + // derive more useful error messages based on the expected + // token set and the last token and so on. The error following + // bitmaps do a good job of reducing the set that we were looking + // for down to something small. Knowing what you are parsing may be + // able to allow you to be even more specific about an error. + // + if(tokenNames == NULL) { + ss << "Syntax error."; + } else { + if(ex->expecting == ANTLR3_TOKEN_EOF) { + ss << "Expected end of file."; + } else { + ss << "Expected " << tokenNames[ex->expecting] << "."; + } + } + break; + + case ANTLR3_NO_VIABLE_ALT_EXCEPTION: + + // We could not pick any alt decision from the input given + // so god knows what happened - however when you examine your grammar, + // you should. It means that at the point where the current token occurred + // that the DFA indicates nowhere to go from here. + // + ss << "Cannot match to any predicted input."; + + break; + + case ANTLR3_MISMATCHED_SET_EXCEPTION: + + { + ANTLR3_UINT32 count; + ANTLR3_UINT32 bit; + ANTLR3_UINT32 size; + ANTLR3_UINT32 numbits; + pANTLR3_BITSET errBits; + + // This means we were able to deal with one of a set of + // possible tokens at this point, but we did not see any + // member of that set. + // + ss << "Unexpected input. Expected one of : "; + + // What tokens could we have accepted at this point in the + // parse? + // + count = 0; + errBits = antlr3BitsetLoad(ex->expectingSet); + numbits = errBits->numBits(errBits); + size = errBits->size(errBits); + + if(size > 0) { + // However many tokens we could have dealt with here, it is usually + // not useful to print ALL of the set here. I arbitrarily chose 8 + // here, but you should do whatever makes sense for you of course. + // No token number 0, so look for bit 1 and on. + // + for(bit = 1; bit < numbits && count < 8 && count < size; bit++) { + // TODO: This doesn;t look right - should be asking if the bit is set!! + // + if(tokenNames[bit]) { + if( count++ > 0 ) { + ss << ", "; + } + ss << tokenNames[bit]; + } + } + } else { + Unreachable("Parse error with empty set of expected tokens."); + } + } + break; + + case ANTLR3_EARLY_EXIT_EXCEPTION: + + // We entered a loop requiring a number of token sequences + // but found a token that ended that sequence earlier than + // we should have done. + // + ss << "Missing elements."; + break; + + default: + + // We don't handle any other exceptions here, but you can + // if you wish. If we get an exception that hits this point + // then we are just going to report what we know about the + // token. + // + Unhandled("Unexpected exception in parser."); + break; + } + + // Now get ready to throw an exception + pANTLR3_PARSER parser = (pANTLR3_PARSER)(recognizer->super); + AlwaysAssert(parser!=NULL); + AntlrInput *input = (AntlrInput*)(parser->super); + AlwaysAssert(input!=NULL); + + // Call the error display routine + input->parseError(ss.str()); +} + +void AntlrInput::setLexer(pANTLR3_LEXER pLexer) { + d_lexer = pLexer; + + pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory; + if( pTokenFactory != NULL ) { + pTokenFactory->close(pTokenFactory); + } + + /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */ + pTokenFactory = BoundedTokenFactoryNew(d_input, 2*d_lookahead); + if( pTokenFactory == NULL ) { + throw ParserException("Couldn't create token factory."); + } + d_lexer->rec->state->tokFactory = pTokenFactory; + + pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource); + if( buffer == NULL ) { + throw ParserException("Couldn't create token buffer."); + } + + d_tokenStream = buffer->commonTstream; +} + +void AntlrInput::setParser(pANTLR3_PARSER pParser) { + d_parser = pParser; + // ANTLR isn't using super, AFAICT. + d_parser->super = this; + d_parser->rec->reportError = &reportError; +} + + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ diff --git a/src/parser/antlr_input.h b/src/parser/antlr_input.h new file mode 100644 index 000000000..01ebe8339 --- /dev/null +++ b/src/parser/antlr_input.h @@ -0,0 +1,93 @@ +/********************* */ +/** antlr_parser.h + ** Original author: dejan + ** Major contributors: cconway + ** Minor contributors (to current version): mdeters + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Base for ANTLR parser classes. + **/ + +#include "cvc4parser_private.h" + +#ifndef __CVC4__PARSER__ANTLR_PARSER_H +#define __CVC4__PARSER__ANTLR_PARSER_H + +#include +#include +#include +#include + +#include "expr/expr.h" +#include "expr/expr_manager.h" +#include "parser/input.h" +#include "parser/symbol_table.h" +#include "util/Assert.h" + +namespace CVC4 { + +class Command; +class Type; +class FunctionType; + +namespace parser { + +/** + * Wrapper for an ANTLR parser that includes convenience methods to set up input and token streams. + */ +class AntlrInput : public Input { + + unsigned int d_lookahead; + pANTLR3_LEXER d_lexer; + pANTLR3_PARSER d_parser; + pANTLR3_COMMON_TOKEN_STREAM d_tokenStream; + pANTLR3_INPUT_STREAM d_input; + + static void reportError(pANTLR3_BASE_RECOGNIZER recognizer); + +public: + AntlrInput(ExprManager* exprManager, const std::string& filename, unsigned int lookahead, bool useMmap); + // AntlrParser(ExprManager* em, std::istream& input, const std::string& name, unsigned int lookahead); + AntlrInput(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead); + ~AntlrInput(); + + inline static std::string tokenText(pANTLR3_COMMON_TOKEN token); + +protected: + + /** + * Throws a semantic exception with the given message. + */ + void parseError(const std::string& msg) throw (ParserException); + + /** Get the lexer */ + pANTLR3_LEXER getLexer(); + /** Retrieve the input stream for this parser. */ + pANTLR3_INPUT_STREAM getInputStream(); + /** Retrieve the token stream for this parser. Must not be called before setLexer(). */ + pANTLR3_COMMON_TOKEN_STREAM getTokenStream(); + /** Set the ANTLR lexer for this parser. */ + void setLexer(pANTLR3_LEXER pLexer); + /** Set the ANTLR parser implementation for this parser. */ + void setParser(pANTLR3_PARSER pParser); +}; + +std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) { + ANTLR3_MARKER start = token->getStartIndex(token); + ANTLR3_MARKER end = token->getStopIndex(token); + std::string txt( (const char *)start, end-start+1 ); + Debug("parser-extra") << "tokenText: start=" << start << std::endl + << "end=" << end << std::endl + << "txt='" << txt << "'" << std::endl; + return txt; +} + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ + +#endif /* __CVC4__PARSER__ANTLR_PARSER_H */ diff --git a/src/parser/antlr_parser.h b/src/parser/antlr_parser.h deleted file mode 100644 index e970ebd52..000000000 --- a/src/parser/antlr_parser.h +++ /dev/null @@ -1,361 +0,0 @@ -/********************* */ -/** antlr_parser.h - ** Original author: dejan - ** Major contributors: cconway - ** Minor contributors (to current version): mdeters - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Base for ANTLR parser classes. - **/ - -#include "cvc4parser_private.h" - -#ifndef __CVC4__PARSER__ANTLR_PARSER_H -#define __CVC4__PARSER__ANTLR_PARSER_H - -#include -#include -#include - -#include -#include - -#include "expr/expr.h" -#include "expr/expr_manager.h" -#include "util/Assert.h" -#include "parser/symbol_table.h" - -namespace CVC4 { - -class Command; -class Type; -class FunctionType; - -namespace parser { - -/** - * Wrapper of the ANTLR parser that includes convenience methods that interacts - * with the expression manager. The grammars should have as little C++ code as - * possible and all the state and actual functionality (besides parsing) should - * go into this class. - */ -class AntlrParser : public antlr::LLkParser { - -public: - - /** Types of check for the symols */ - enum DeclarationCheck { - /** Enforce that the symbol has been declared */ - CHECK_DECLARED, - /** Enforce that the symbol has not been declared */ - CHECK_UNDECLARED, - /** Don't check anything */ - CHECK_NONE - }; - - /** - * Sets the expression manager to use when creating/managing expression. - * @param expr_manager the expression manager - */ - void setExpressionManager(ExprManager* expr_manager); - - /** - * Sets the logic for the current benchmark. Declares any logic symbols. - * - * @param name the name of the logic (e.g., QF_UF, AUFLIA) - */ - void setLogic(const std::string& name); - - /** - * Parse a command. - * @return a command - */ - virtual Command* parseCommand() = 0; - - /** - * Parse an expression. - * @return the expression - */ - virtual Expr parseExpr() = 0; - - /** Enable semantic checks during parsing. */ - void enableChecks(); - - /** Disable semantic checks during parsing. Disabling checks may lead to crashes on bad inputs. */ - void disableChecks(); - -protected: - - /** - * Create a parser with the given input state and token lookahead. - * - * @param state the shared input state - * @param k lookahead - */ - AntlrParser(const antlr::ParserSharedInputState& state, int k); - - /** - * Create a parser with the given token buffer and lookahead. - * - * @param tokenBuf the token buffer to use in parsing - * @param k lookahead - */ - AntlrParser(antlr::TokenBuffer& tokenBuf, int k); - - /** - * Create a parser with the given token stream and lookahead. - * - * @param lexer the lexer to use in parsing - * @param k lookahead - */ - AntlrParser(antlr::TokenStream& lexer, int k); - - /** - * Throws an ANTLR semantic exception with the given message. - */ - void parseError(const std::string& msg) throw (antlr::SemanticException); - - /** - * Returns a variable, given a name and a type. - * @param var_name the name of the variable - * @return the variable expression - */ - Expr getVariable(const std::string& var_name); - - /** - * Returns a function, given a name and a type. - * @param name the name of the function - * @return the function expression - */ - Expr getFunction(const std::string& name); - - /** - * Returns a sort, given a name - */ - Type* getSort(const std::string& sort_name); - - /** - * Types of symbols. Used to define namespaces. - */ - enum SymbolType { - /** Variables */ - SYM_VARIABLE, - /** Functions */ - SYM_FUNCTION, - /** Sorts */ - SYM_SORT - }; - - /** - * Checks if a symbol has been declared. - * @param name the symbol name - * @param type the symbol type - * @return true iff the symbol has been declared with the given type - */ - bool isDeclared(const std::string& name, SymbolType type = SYM_VARIABLE); - - /** - * Checks if the declaration policy we want to enforce holds - * for the given symbol. - * @param name the symbol to check - * @param check the kind of check to perform - * @param type the type of the symbol - * @throws SemanticException if checks are enabled and the check fails - */ - void checkDeclaration(const std::string& name, - DeclarationCheck check, - SymbolType type = SYM_VARIABLE) - throw (antlr::SemanticException); - - /** - * Checks whether the given name is bound to a function. - * @param name the name to check - * @throws SemanticException if checks are enabled and name is not bound to a function - */ - void checkFunction(const std::string& name) throw (antlr::SemanticException); - - /** - * Check that kind can accept numArgs arguments. - * @param kind the built-in operator to check - * @param numArgs the number of actual arguments - * @throws SemanticException if checks are enabled and the operator kind cannot be - * applied to numArgs arguments. - */ - void checkArity(Kind kind, unsigned int numArgs) throw (antlr::SemanticException); - - /** - * Returns the type for the variable with the given name. - * @param name the symbol to lookup - * @param type the (namespace) type of the symbol - */ - Type* getType(const std::string& var_name, - SymbolType type = SYM_VARIABLE); - - /** - * Returns the true expression. - * @return the true expression - */ - Expr getTrueExpr() const; - - /** - * Returns the false expression. - * @return the false expression - */ - Expr getFalseExpr() const; - - /** - * Creates a new unary CVC4 expression using the expression manager. - * @param kind the kind of the expression - * @param child the child - * @return the new unary expression - */ - Expr mkExpr(Kind kind, const Expr& child); - - /** - * Creates a new binary CVC4 expression using the expression manager. - * @param kind the kind of the expression - * @param child1 the first child - * @param child2 the second child - * @return the new binary expression - */ - Expr mkExpr(Kind kind, const Expr& child_1, const Expr& child_2); - - /** - * Creates a new ternary CVC4 expression using the expression manager. - * @param kind the kind of the expression - * @param child_1 the first child - * @param child_2 the second child - * @param child_3 - * @return the new ternary expression - */ - Expr mkExpr(Kind kind, const Expr& child_1, const Expr& child_2, - const Expr& child_3); - - /** - * Creates a new CVC4 expression using the expression manager. - * @param kind the kind of the expression - * @param children the children of the expression - */ - Expr mkExpr(Kind kind, const std::vector& children); - - /** Create a new CVC4 variable expression of the given type. */ - Expr mkVar(const std::string& name, Type* type); - - /** Create a set of new CVC4 variable expressions of the given - type. */ - const std::vector - mkVars(const std::vector names, - Type* type); - - /** Create a new variable definition (e.g., from a let binding). */ - void defineVar(const std::string& name, const Expr& val); - /** Remove a variable definition (e.g., from a let binding). */ - void undefineVar(const std::string& name); - - /** Returns a function type over the given domain and range types. */ - Type* functionType(Type* domain, Type* range); - - /** Returns a function type over the given types. argTypes must be - non-empty. */ - Type* functionType(const std::vector& argTypes, - Type* rangeType); - - /** - * Returns a function type over the given types. If types has only - * one element, then the type is just types[0]. - * - * @param types a non-empty list of input and output types. - */ - Type* functionType(const std::vector& types); - - /** - * Returns a predicate type over the given sorts. If sorts is empty, - * then the type is just BOOLEAN. - - * @param sorts a list of input types - */ - Type* predicateType(const std::vector& sorts); - - /** - * Creates a new sort with the given name. - */ - Type* newSort(const std::string& name); - - /** - * Creates a new sorts with the given names. - */ - const std::vector - newSorts(const std::vector& names); - - /** Is the symbol bound to a boolean variable? */ - bool isBoolean(const std::string& name); - - /** Is the symbol bound to a function? */ - bool isFunction(const std::string& name); - - /** Is the symbol bound to a predicate? */ - bool isPredicate(const std::string& name); - - /** Returns the boolean type. */ - BooleanType* booleanType(); - - /** Returns the kind type (i.e., the type of types). */ - KindType* kindType(); - - /** Returns the minimum arity of the given kind. */ - static unsigned int minArity(Kind kind); - - /** Returns the maximum arity of the given kind. */ - static unsigned int maxArity(Kind kind); - - /** Returns a string representation of the given object (for - debugging). */ - inline std::string toString(DeclarationCheck check) { - switch(check) { - case CHECK_NONE: return "CHECK_NONE"; - case CHECK_DECLARED: return "CHECK_DECLARED"; - case CHECK_UNDECLARED: return "CHECK_UNDECLARED"; - default: Unreachable(); - } - } - - /** Returns a string representation of the given object (for - debugging). */ - inline std::string toString(SymbolType type) { - switch(type) { - case SYM_VARIABLE: return "SYM_VARIABLE"; - case SYM_FUNCTION: return "SYM_FUNCTION"; - case SYM_SORT: return "SYM_SORT"; - default: Unreachable(); - } - } - -// bool checksEnabled(); - -private: - - /** The expression manager */ - ExprManager* d_exprManager; - - /** The symbol table lookup */ - SymbolTable d_varSymbolTable; - - /** The sort table */ - SymbolTable d_sortTable; - - /** Are semantic checks enabled during parsing? */ - bool d_checksEnabled; - - /** Lookup a symbol in the given namespace. */ - Expr getSymbol(const std::string& var_name, SymbolType type); -}; - -}/* CVC4::parser namespace */ -}/* CVC4 namespace */ - -#endif /* __CVC4__PARSER__ANTLR_PARSER_H */ diff --git a/src/parser/bounded_token_buffer.cpp b/src/parser/bounded_token_buffer.cpp new file mode 100644 index 000000000..f53b6d548 --- /dev/null +++ b/src/parser/bounded_token_buffer.cpp @@ -0,0 +1,640 @@ +/// \file +/// Default implementation of CommonTokenStream +/// + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include + +#include "bounded_token_buffer.h" +#include "cvc4_config.h" +#include "util/Assert.h" + +namespace CVC4 { +namespace parser { + +#ifdef ANTLR3_WINDOWS +#pragma warning( disable : 4100 ) +#endif + +// COMMON_TOKEN_STREAM API +// +static void setTokenTypeChannel (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 ttype, ANTLR3_UINT32 channel); +static void discardTokenType (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_INT32 ttype); +static void discardOffChannel (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_BOOLEAN discard); +static pANTLR3_VECTOR getTokens (pANTLR3_COMMON_TOKEN_STREAM cts); +static pANTLR3_LIST getTokenRange (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop); +static pANTLR3_LIST getTokensSet (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_BITSET types); +static pANTLR3_LIST getTokensList (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_LIST list); +static pANTLR3_LIST getTokensType (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, ANTLR3_UINT32 type); + +// TOKEN_STREAM API +// +static pANTLR3_COMMON_TOKEN tokLT (pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k); +static pANTLR3_COMMON_TOKEN dbgTokLT (pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k); +static pANTLR3_COMMON_TOKEN get (pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 i); +static pANTLR3_TOKEN_SOURCE getTokenSource (pANTLR3_TOKEN_STREAM ts); +static void setTokenSource (pANTLR3_TOKEN_STREAM ts, pANTLR3_TOKEN_SOURCE tokenSource); +static pANTLR3_STRING toString (pANTLR3_TOKEN_STREAM ts); +static pANTLR3_STRING toStringSS (pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop); +static pANTLR3_STRING toStringTT (pANTLR3_TOKEN_STREAM ts, pANTLR3_COMMON_TOKEN start, pANTLR3_COMMON_TOKEN stop); +static void setDebugListener (pANTLR3_TOKEN_STREAM ts, pANTLR3_DEBUG_EVENT_LISTENER debugger); + +// INT STREAM API +// +static void consume (pANTLR3_INT_STREAM is); +static void dbgConsume (pANTLR3_INT_STREAM is); +static ANTLR3_UINT32 _LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i); +static ANTLR3_UINT32 dbgLA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i); +static ANTLR3_MARKER mark (pANTLR3_INT_STREAM is); +static ANTLR3_MARKER dbgMark (pANTLR3_INT_STREAM is); +static void release (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark); +static ANTLR3_UINT32 size (pANTLR3_INT_STREAM is); +static ANTLR3_MARKER tindex (pANTLR3_INT_STREAM is); +static void rewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker); +static void dbgRewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker); +static void rewindLast (pANTLR3_INT_STREAM is); +static void dbgRewindLast (pANTLR3_INT_STREAM is); +static void seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index); +static void dbgSeek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index); +static pANTLR3_STRING getSourceName (pANTLR3_INT_STREAM is); +static pANTLR3_COMMON_TOKEN LB (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_INT32 i); + +static pANTLR3_COMMON_TOKEN nextToken(pBOUNDED_TOKEN_BUFFER buffer); +static pANTLR3_COMMON_TOKEN simpleEmit (pANTLR3_LEXER lexer); + +void +BoundedTokenBufferFree(pBOUNDED_TOKEN_BUFFER buffer) { + buffer->commonTstream->free(buffer->commonTstream); + ANTLR3_FREE(buffer->tokenBuffer); + ANTLR3_FREE(buffer); +} + +/*ANTLR3_API pANTLR3_COMMON_TOKEN_STREAM +antlr3CommonTokenDebugStreamSourceNew(ANTLR3_UINT32 hint, pANTLR3_TOKEN_SOURCE source, pANTLR3_DEBUG_EVENT_LISTENER debugger) +{ + pANTLR3_COMMON_TOKEN_STREAM stream; + + // Create a standard token stream + // + stream = antlr3CommonTokenStreamSourceNew(hint, source); + + // Install the debugger object + // + stream->tstream->debugger = debugger; + + // Override standard token stream methods with debugging versions + // + stream->tstream->initialStreamState = ANTLR3_FALSE; + + stream->tstream->_LT = dbgTokLT; + + stream->tstream->istream->consume = dbgConsume; + stream->tstream->istream->_LA = dbgLA; + stream->tstream->istream->mark = dbgMark; + stream->tstream->istream->rewind = dbgRewindStream; + stream->tstream->istream->rewindLast = dbgRewindLast; + stream->tstream->istream->seek = dbgSeek; + + return stream; +}*/ + +pBOUNDED_TOKEN_BUFFER +BoundedTokenBufferSourceNew(ANTLR3_UINT32 k, pANTLR3_TOKEN_SOURCE source) +{ + pBOUNDED_TOKEN_BUFFER buffer; + pANTLR3_COMMON_TOKEN_STREAM stream; + + + AlwaysAssert( k > 0 ); + + /* Memory for the interface structure + */ + buffer = (pBOUNDED_TOKEN_BUFFER) ANTLR3_MALLOC(sizeof(BOUNDED_TOKEN_BUFFER_struct)); + + if (buffer == NULL) + { + return NULL; + } + + buffer->tokenBuffer = (pANTLR3_COMMON_TOKEN*) ANTLR3_MALLOC(2*k*sizeof(pANTLR3_COMMON_TOKEN)); + buffer->currentIndex = 0; + buffer->maxIndex = 0; + buffer->k = k; + buffer->bufferSize = 2*k; + buffer->empty = ANTLR3_TRUE; + buffer->done = ANTLR3_FALSE; + + stream = antlr3CommonTokenStreamSourceNew(k,source); + if (stream == NULL) + { + return NULL; + } + + stream->super = buffer; + buffer->commonTstream = stream; + + /* Defaults + */ + stream->p = -1; + + /* Install the token stream API + */ + stream->tstream->_LT = tokLT; + stream->tstream->get = get; + stream->tstream->getTokenSource = getTokenSource; + stream->tstream->setTokenSource = setTokenSource; + stream->tstream->toString = toString; + stream->tstream->toStringSS = toStringSS; + stream->tstream->toStringTT = toStringTT; + stream->tstream->setDebugListener = setDebugListener; + + /* Install INT_STREAM interface + */ + stream->tstream->istream->_LA = _LA; + stream->tstream->istream->mark = mark; + stream->tstream->istream->release = release; + stream->tstream->istream->size = size; + stream->tstream->istream->index = tindex; + stream->tstream->istream->rewind = rewindStream; + stream->tstream->istream->rewindLast= rewindLast; + stream->tstream->istream->seek = seek; + stream->tstream->istream->consume = consume; + stream->tstream->istream->getSourceName = getSourceName; + + return buffer; +} + +// Install a debug listener adn switch to debug mode methods +// +static void +setDebugListener (pANTLR3_TOKEN_STREAM ts, pANTLR3_DEBUG_EVENT_LISTENER debugger) +{ + // Install the debugger object + // + ts->debugger = debugger; + + // Override standard token stream methods with debugging versions + // + ts->initialStreamState = ANTLR3_FALSE; + + ts->_LT = dbgTokLT; + + ts->istream->consume = dbgConsume; + ts->istream->_LA = dbgLA; + ts->istream->mark = dbgMark; + ts->istream->rewind = dbgRewindStream; + ts->istream->rewindLast = dbgRewindLast; + ts->istream->seek = dbgSeek; +} + +/** Get the ith token from the current position 1..n where k=1 is the +* first symbol of lookahead. +*/ +static pANTLR3_COMMON_TOKEN tokLT(pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k) { + pANTLR3_COMMON_TOKEN_STREAM cts; + pBOUNDED_TOKEN_BUFFER buffer; + + cts = (pANTLR3_COMMON_TOKEN_STREAM) ts->super; + buffer = (pBOUNDED_TOKEN_BUFFER) cts->super; + + /* k must be in the range [-buffer->k..buffer->k] */ + AlwaysAssert( k <= (ANTLR3_INT32)buffer->k + && -k <= (ANTLR3_INT32)buffer->k ); + + if(k == 0) { + return NULL; + } + + /* Initialize the buffer on our first call. */ + if( EXPECT_FALSE(buffer->empty == ANTLR3_TRUE) ) { + AlwaysAssert( buffer->tokenBuffer != NULL ); + buffer->tokenBuffer[ 0 ] = nextToken(buffer); + buffer->maxIndex = 0; + buffer->currentIndex = 0; + buffer->empty = ANTLR3_FALSE; + } + + ANTLR3_UINT32 kIndex; + if( k > 0 ) { + /* look-ahead token k is at offset k-1 */ + kIndex = buffer->currentIndex + k - 1; + } else { + /* Can't look behind more tokens than we've consumed. */ + AlwaysAssert( -k <= (ANTLR3_INT32)buffer->currentIndex ); + /* look-behind token k is at offset -k */ + kIndex = buffer->currentIndex + k; + } + + while( kIndex > buffer->maxIndex ) { + buffer->maxIndex++; + buffer->tokenBuffer[ buffer->maxIndex % buffer->bufferSize ] = nextToken(buffer); + } + + return buffer->tokenBuffer[ kIndex % buffer->bufferSize ]; +} + + +/// As per the normal tokLT but sends information to the debugger +/// +static pANTLR3_COMMON_TOKEN +dbgTokLT (pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k) +{ + return tokLT(ts, k); +} + +#ifdef ANTLR3_WINDOWS + /* When fully optimized VC7 complains about non reachable code. + * Not yet sure if this is an optimizer bug, or a bug in the flow analysis + */ +#pragma warning( disable : 4702 ) +#endif + +static pANTLR3_COMMON_TOKEN +LB(pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_INT32 k) +{ + AlwaysAssert(false); +} + +static pANTLR3_COMMON_TOKEN +get (pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 i) +{ + AlwaysAssert(false); +} + +static pANTLR3_TOKEN_SOURCE +getTokenSource (pANTLR3_TOKEN_STREAM ts) +{ + return ts->tokenSource; +} + +static void +setTokenSource ( pANTLR3_TOKEN_STREAM ts, + pANTLR3_TOKEN_SOURCE tokenSource) +{ + ts->tokenSource = tokenSource; +} + +static pANTLR3_STRING +toString (pANTLR3_TOKEN_STREAM ts) +{ + AlwaysAssert(false); +} + +static pANTLR3_STRING +toStringSS(pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop) +{ + AlwaysAssert(false); +} + +static pANTLR3_STRING +toStringTT (pANTLR3_TOKEN_STREAM ts, pANTLR3_COMMON_TOKEN start, pANTLR3_COMMON_TOKEN stop) +{ + AlwaysAssert(false); +} + +/** Move the input pointer to the next incoming token. The stream + * must become active with LT(1) available. consume() simply + * moves the input pointer so that LT(1) points at the next + * input symbol. Consume at least one token. + * + * Walk past any token not on the channel the parser is listening to. + */ +static void +consume (pANTLR3_INT_STREAM is) +{ + pANTLR3_COMMON_TOKEN_STREAM cts; + pANTLR3_TOKEN_STREAM ts; + pBOUNDED_TOKEN_BUFFER buffer; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + cts = (pANTLR3_COMMON_TOKEN_STREAM) ts->super; + buffer = (pBOUNDED_TOKEN_BUFFER) cts->super; + + buffer->currentIndex++; +} + + +/// As per ordinary consume but notifies the debugger about hidden +/// tokens and so on. +/// +static void +dbgConsume (pANTLR3_INT_STREAM is) +{ + consume(is); +} + +/** A simple filter mechanism whereby you can tell this token stream + * to force all tokens of type ttype to be on channel. For example, + * when interpreting, we cannot execute actions so we need to tell + * the stream to force all WS and NEWLINE to be a different, ignored, + * channel. + */ +static void +setTokenTypeChannel (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 ttype, ANTLR3_UINT32 channel) +{ + if (tokenStream->channelOverrides == NULL) + { + tokenStream->channelOverrides = antlr3ListNew(10); + } + + /* We add one to the channel so we can distinguish NULL as being no entry in the + * table for a particular token type. + */ + tokenStream->channelOverrides->put(tokenStream->channelOverrides, ttype, ANTLR3_FUNC_PTR((ANTLR3_UINT32)channel + 1), NULL); +} + +static void +discardTokenType (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_INT32 ttype) +{ + if (tokenStream->discardSet == NULL) + { + tokenStream->discardSet = antlr3ListNew(31); + } + + /* We add one to the channel so we can distinguish NULL as being no entry in the + * table for a particular token type. We could use bitsets for this I suppose too. + */ + tokenStream->discardSet->put(tokenStream->discardSet, ttype, ANTLR3_FUNC_PTR((ANTLR3_UINT32)ttype + 1), NULL); +} + +static void +discardOffChannel (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_BOOLEAN discard) +{ + tokenStream->discardOffChannel = discard; +} + +static pANTLR3_VECTOR +getTokens (pANTLR3_COMMON_TOKEN_STREAM tokenStream) +{ + AlwaysAssert(false); +} + +static pANTLR3_LIST +getTokenRange (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop) +{ + AlwaysAssert(false); +} +/** Given a start and stop index, return a List of all tokens in + * the token type BitSet. Return null if no tokens were found. This + * method looks at both on and off channel tokens. + */ +static pANTLR3_LIST +getTokensSet (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_BITSET types) +{ + AlwaysAssert(false); +} + +static pANTLR3_LIST +getTokensList (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_LIST list) +{ + AlwaysAssert(false); +} + +static pANTLR3_LIST +getTokensType (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, ANTLR3_UINT32 type) +{ + AlwaysAssert(false); +} + +static ANTLR3_UINT32 +_LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i) +{ + pANTLR3_TOKEN_STREAM ts; + pANTLR3_COMMON_TOKEN tok; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + + tok = ts->_LT(ts, i); + + if (tok != NULL) + { + return tok->getType(tok); + } + else + { + return ANTLR3_TOKEN_INVALID; + } +} + +/// As per _LA() but for debug mode. +/// +static ANTLR3_UINT32 +dbgLA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i) +{ + AlwaysAssert(false); +} + +static ANTLR3_MARKER +mark (pANTLR3_INT_STREAM is) +{ + AlwaysAssert(false); +} + +/// As per mark() but with a call to tell the debugger we are doing this +/// +static ANTLR3_MARKER +dbgMark (pANTLR3_INT_STREAM is) +{ + AlwaysAssert(false); +} + +static void +release (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) +{ + return; +} + +static ANTLR3_UINT32 +size (pANTLR3_INT_STREAM is) +{ + AlwaysAssert(false); +} + +static ANTLR3_MARKER +tindex (pANTLR3_INT_STREAM is) +{ + pANTLR3_COMMON_TOKEN_STREAM cts; + pANTLR3_TOKEN_STREAM ts; + pBOUNDED_TOKEN_BUFFER buffer; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + cts = (pANTLR3_COMMON_TOKEN_STREAM) ts->super; + buffer = (pBOUNDED_TOKEN_BUFFER) cts->super; + + return buffer->currentIndex; +} + +static void +dbgRewindLast (pANTLR3_INT_STREAM is) +{ + AlwaysAssert(false); +} +static void +rewindLast (pANTLR3_INT_STREAM is) +{ + AlwaysAssert(false); +} +static void +rewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker) +{ + AlwaysAssert(false); +} +static void +dbgRewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker) +{ + AlwaysAssert(false); +} + +static void +seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index) +{ + AlwaysAssert(false); +} +static void +dbgSeek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index) +{ + AlwaysAssert(false); +} + +static pANTLR3_COMMON_TOKEN nextToken(pBOUNDED_TOKEN_BUFFER buffer) { + pANTLR3_COMMON_TOKEN_STREAM tokenStream; + pANTLR3_COMMON_TOKEN tok; + ANTLR3_BOOLEAN discard; + void * channelI; + + tokenStream = buffer->commonTstream; + + if( buffer->done == ANTLR3_TRUE ) { + return &(tokenStream->tstream->tokenSource->eofToken); + } + + /* Pick out the next token from the token source + * Remember we just get a pointer (reference if you like) here + * and so if we store it anywhere, we don't set any pointers to auto free it. + */ + do { + tok + = tokenStream->tstream->tokenSource->nextToken( + tokenStream->tstream->tokenSource); + if(tok == NULL || tok->type == ANTLR3_TOKEN_EOF) { + buffer->done = ANTLR3_TRUE; + break; + } + + discard = ANTLR3_FALSE; /* Assume we are not discarding */ + + /* I employ a bit of a trick, or perhaps hack here. Rather than + * store a pointer to a structure in the override map and discard set + * we store the value + 1 cast to a void *. Hence on systems where NULL = (void *)0 + * we can distinguish "not being there" from "being channel or type 0" + */ + + if(tokenStream->discardSet != NULL + && tokenStream->discardSet->get(tokenStream->discardSet, + tok->getType(tok)) != NULL) { + discard = ANTLR3_TRUE; + } else if(tok->getChannel(tok) != tokenStream->channel) { + discard = ANTLR3_TRUE; + } + + } while(discard == ANTLR3_TRUE); + + return tok; +} + + +/// Return a string that represents the name assoicated with the input source +/// +/// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream. +/// +/// /returns +/// /implements ANTLR3_INT_STREAM_struct::getSourceName() +/// +static pANTLR3_STRING +getSourceName (pANTLR3_INT_STREAM is) +{ + // Slightly convoluted as we must trace back to the lexer's input source + // via the token source. The streamName that is here is not initialized + // because this is a token stream, not a file or string stream, which are the + // only things that have a context for a source name. + // + return ((pANTLR3_TOKEN_STREAM)(is->super))->tokenSource->fileName; +} + +static pANTLR3_COMMON_TOKEN +simpleEmit (pANTLR3_LEXER lexer) +{ + pANTLR3_COMMON_TOKEN token; + + /* We could check pointers to token factories and so on, but + * we are in code that we want to run as fast as possible + * so we are not checking any errors. So make sure you have installed an input stream before + * trying to emit a new token. + */ + token = antlr3CommonTokenNew( lexer->rec->state->type ); + // lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory); + + /* Install the supplied information, and some other bits we already know + * get added automatically, such as the input stream it is associated with + * (though it can all be overridden of course) + */ + // token->type = lexer->rec->state->type; + token->channel = lexer->rec->state->channel; + token->start = lexer->rec->state->tokenStartCharIndex; + token->stop = lexer->getCharIndex(lexer) - 1; + token->line = lexer->rec->state->tokenStartLine; + token->charPosition = lexer->rec->state->tokenStartCharPositionInLine; + + if (lexer->rec->state->text != NULL) + { + token->textState = ANTLR3_TEXT_STRING; + token->tokText.text = lexer->rec->state->text; + } + else + { + token->textState = ANTLR3_TEXT_NONE; + } + token->lineStart = lexer->input->currentLine; + token->user1 = lexer->rec->state->user1; + token->user2 = lexer->rec->state->user2; + token->user3 = lexer->rec->state->user3; + token->custom = lexer->rec->state->custom; + + lexer->rec->state->token = token; + + return token; +} + + +} +} diff --git a/src/parser/bounded_token_buffer.h b/src/parser/bounded_token_buffer.h new file mode 100644 index 000000000..9c18ec3de --- /dev/null +++ b/src/parser/bounded_token_buffer.h @@ -0,0 +1,73 @@ +/** \file + * Defines the interface for an ANTLR3 common token stream. Custom token streams should create + * one of these and then override any functions by installing their own pointers + * to implement the various functions. + */ +#ifndef __CVC4__PARSER__BOUNDED_TOKEN_BUFFER_H +#define __CVC4__PARSER__BOUNDED_TOKEN_BUFFER_H + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +namespace CVC4 { +namespace parser { + +#ifdef __cplusplus +extern "C" { +#endif + + +/** A "super" structure for COMMON_TOKEN_STREAM. */ +typedef struct BOUNDED_TOKEN_BUFFER_struct +{ + pANTLR3_COMMON_TOKEN_STREAM commonTstream; + pANTLR3_COMMON_TOKEN* tokenBuffer; + // tokenNeg1, token1, token2; + ANTLR3_UINT32 currentIndex, maxIndex, k, bufferSize; + ANTLR3_BOOLEAN empty, done; +} BOUNDED_TOKEN_BUFFER, *pBOUNDED_TOKEN_BUFFER; + +pBOUNDED_TOKEN_BUFFER +BoundedTokenBufferSourceNew(ANTLR3_UINT32 k, pANTLR3_TOKEN_SOURCE source); + +void +BoundedTokenBufferFree(pBOUNDED_TOKEN_BUFFER buffer); + +#ifdef __cplusplus +} +#endif + +} +} + + +#endif /* __CVC4__PARSER__BOUNDED_TOKEN_BUFFER_H */ + diff --git a/src/parser/bounded_token_factory.cpp b/src/parser/bounded_token_factory.cpp new file mode 100644 index 000000000..dae2f46e5 --- /dev/null +++ b/src/parser/bounded_token_factory.cpp @@ -0,0 +1,134 @@ +/* + * bounded_token_factory.cpp + * + * Created on: Mar 2, 2010 + * Author: dejan + */ + +#include +#include +#include +#include "bounded_token_factory.h" + +namespace CVC4 { +namespace parser { + +static pANTLR3_COMMON_TOKEN +newPoolToken(pANTLR3_TOKEN_FACTORY factory); + +static void +setInputStream (pANTLR3_TOKEN_FACTORY factory, pANTLR3_INPUT_STREAM input); + +static void +factoryClose (pANTLR3_TOKEN_FACTORY factory); + +pANTLR3_TOKEN_FACTORY +BoundedTokenFactoryNew(pANTLR3_INPUT_STREAM input,ANTLR3_UINT32 size) +{ + pANTLR3_TOKEN_FACTORY factory; + pANTLR3_COMMON_TOKEN tok; + int i; + + /* allocate memory + */ + factory = (pANTLR3_TOKEN_FACTORY) ANTLR3_MALLOC(sizeof(ANTLR3_TOKEN_FACTORY)); + + if (factory == NULL) + { + return NULL; + } + + /* Install factory API + */ + factory->newToken = newPoolToken; + factory->close = factoryClose; + factory->setInputStream = setInputStream; + + /* Allocate the initial pool + */ + factory->thisPool = size; + factory->nextToken = 0; + factory->pools = (pANTLR3_COMMON_TOKEN*) ANTLR3_MALLOC(sizeof(pANTLR3_COMMON_TOKEN)); + factory->pools[0] = + (pANTLR3_COMMON_TOKEN) + ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_COMMON_TOKEN) * size)); + + /* Set up the tokens once and for all */ + for( i=0; i < size; i++ ) { + tok = factory->pools[0] + i; + antlr3SetTokenAPI(tok); + + /* It is factory made, and we need to copy the string factory pointer + */ + tok->factoryMade = ANTLR3_TRUE; + tok->strFactory = input == NULL ? NULL : input->strFactory; + tok->input = input; + } + + /* Factory space is good, we now want to initialize our cheating token + * which one it is initialized is the model for all tokens we manufacture + */ + antlr3SetTokenAPI(&factory->unTruc); + + /* Set some initial variables for future copying + */ + factory->unTruc.factoryMade = ANTLR3_TRUE; + + // Input stream + // + setInputStream(factory, input); + + return factory; + +} + +static pANTLR3_COMMON_TOKEN +newPoolToken(pANTLR3_TOKEN_FACTORY factory) +{ + ANTLR3_UINT32 size = factory->thisPool; + pANTLR3_COMMON_TOKEN tok = factory->pools[0] + (factory->nextToken % size); + if(factory->nextToken >= size && tok->custom != NULL && tok->freeCustom != NULL) { + tok->freeCustom(tok->custom); + tok->custom = NULL; + } + factory->nextToken++; + + return tok; +} + +static void +factoryClose (pANTLR3_TOKEN_FACTORY factory) +{ + ANTLR3_UINT32 i; + ANTLR3_UINT32 size = factory->thisPool; + pANTLR3_COMMON_TOKEN tok; + + for(i = 0; i < size && i < factory->nextToken; i++) { + tok = factory->pools[0] + i; + if(tok->custom != NULL && tok->freeCustom != NULL) { + tok->freeCustom(tok->custom); + tok->custom = NULL; + } + } + ANTLR3_FREE(factory->pools[0]); + ANTLR3_FREE(factory->pools); + ANTLR3_FREE(factory); + +} + +static void +setInputStream (pANTLR3_TOKEN_FACTORY factory, pANTLR3_INPUT_STREAM input) +{ + factory->input = input; + factory->unTruc.input = input; + if (input != NULL) + { + factory->unTruc.strFactory = input->strFactory; + } + else + { + factory->unTruc.strFactory = NULL; + } +} +} +} diff --git a/src/parser/bounded_token_factory.h b/src/parser/bounded_token_factory.h new file mode 100644 index 000000000..6f8729c19 --- /dev/null +++ b/src/parser/bounded_token_factory.h @@ -0,0 +1,38 @@ +/* + * bounded_token_factory.h + * + * Created on: Mar 2, 2010 + * Author: dejan + */ + +#ifndef BOUNDED_TOKEN_FACTORY_H_ +#define BOUNDED_TOKEN_FACTORY_H_ + +namespace CVC4 { +namespace parser { + +#ifdef __cplusplus +extern "C" { +#endif + +/** Create a token factory with a pool of exactly size tokens, + * attached to the input stream input. size must be + * greater than the maximum lookahead in the parser, or tokens will be prematurely re-used. + * + * We abuse pANTLR3_TOKEN_FACTORY fields for our own purposes: + * pANTLR3_COMMON_TOKEN *pools: a pointer to a single-element array, a single pool of tokens + * ANTLR3_INT32 thisPool: the size of the pool + * ANTLR3_UINT32 nextToken: the index of the next token to be returned + * */ +pANTLR3_TOKEN_FACTORY +BoundedTokenFactoryNew(pANTLR3_INPUT_STREAM input,ANTLR3_UINT32 size); + +#ifdef __cplusplus +} +#endif + +} +} + + +#endif /* BOUNDED_TOKEN_FACTORY_H_ */ diff --git a/src/parser/cvc/.gitignore b/src/parser/cvc/.gitignore new file mode 100644 index 000000000..7fd0cf319 --- /dev/null +++ b/src/parser/cvc/.gitignore @@ -0,0 +1,4 @@ +/.deps +/stamp-generated +/generated +/Makefile.in diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g new file mode 100644 index 000000000..a9dff77bf --- /dev/null +++ b/src/parser/cvc/Cvc.g @@ -0,0 +1,496 @@ +/* ******************* */ +/* Cvc.g + ** Original author: cconway + ** Major contributors: dejan, mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Parser for CVC-LIB input language. + **/ + +grammar Cvc; + +options { + language = 'C'; // C output for antlr +// defaultErrorHandler = false; // Skip the default error handling, just break with exceptions + k = 2; +} + +@header { +/** + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + **/ +} + +@lexer::includes { +/* This improves performance by ~10 percent on big inputs. + * This option is only valid if we know the input is ASCII (or some 8-bit encoding). + * If we know the input is UTF-16, we can use ANTLR3_INLINE_INPUT_UTF16. + * Otherwise, we have to let the lexer detect the encoding at runtime. + */ +#define ANTLR3_INLINE_INPUT_ASCII +} + +@parser::includes { +#include "expr/command.h" +#include "parser/input.h" + +namespace CVC4 { + class Expr; +namespace parser { + class CvcInput; +} +} + +extern +void SetCvcInput(CVC4::parser::CvcInput* input); + +} + +@parser::postinclude { +#include "expr/expr.h" +#include "expr/kind.h" +#include "expr/type.h" +#include "parser/input.h" +#include "parser/cvc/cvc_input.h" +#include "util/output.h" +#include + +using namespace CVC4; +using namespace CVC4::parser; +} + +@members { +static CVC4::parser::CvcInput *input; + +extern +void SetCvcInput(CVC4::parser::CvcInput* _input) { + input = _input; +} +} + +/** + * Parses an expression. + * @return the parsed expression + */ +parseExpr returns [CVC4::Expr expr] + : formula[expr] + | EOF + ; + +/** + * Parses a command (the whole benchmark) + * @return the command of the benchmark + */ +parseCommand returns [CVC4::Command* cmd] + : c = command { $cmd = c; } + ; + +/** + * Matches a command of the input. If a declaration, it will return an empty + * command. + */ +command returns [CVC4::Command* cmd = 0] +@init { + Expr f; + Debug("parser-extra") << "command: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : ASSERT_TOK formula[f] SEMICOLON { cmd = new AssertCommand(f); } + | QUERY_TOK formula[f] SEMICOLON { cmd = new QueryCommand(f); } + | CHECKSAT_TOK formula[f] SEMICOLON { cmd = new CheckSatCommand(f); } + | CHECKSAT_TOK SEMICOLON { cmd = new CheckSatCommand(input->getTrueExpr()); } + | PUSH_TOK SEMICOLON { cmd = new PushCommand(); } + | POP_TOK SEMICOLON { cmd = new PopCommand(); } + | declaration[cmd] + | EOF + ; + +/** + * Match a declaration + */ + +declaration[CVC4::Command*& cmd] +@init { + std::vector ids; + Type* t; + Debug("parser-extra") << "declaration: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : // FIXME: These could be type or function declarations, if that matters. + identifierList[ids, CHECK_UNDECLARED, SYM_VARIABLE] COLON declType[t,ids] SEMICOLON + { cmd = new DeclarationCommand(ids,t); } + ; + +/** Match the right-hand side of a declaration. Returns the type. */ +declType[CVC4::Type*& t, std::vector& idList] +@init { + Debug("parser-extra") << "declType: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* A sort declaration (e.g., "T : TYPE") */ + TYPE_TOK { input->newSorts(idList); t = input->kindType(); } + | /* A variable declaration */ + type[t] { input->mkVars(idList,t); } + ; + +/** + * Match the type in a declaration and do the declaration binding. + * TODO: Actually parse sorts into Type objects. + */ +type[CVC4::Type*& t] +@init { + std::vector typeList; + Debug("parser-extra") << "type: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* Simple type */ + baseType[t] + | /* Single-parameter function type */ + baseType[t] { typeList.push_back(t); } + ARROW_TOK baseType[t] + { t = input->functionType(typeList,t); } + | /* Multi-parameter function type */ + LPAREN baseType[t] + { typeList.push_back(t); } + (COMMA baseType[t] { typeList.push_back(t); } )+ + RPAREN ARROW_TOK baseType[t] + { t = input->functionType(typeList,t); } + ; + +/** + * Matches a list of identifiers separated by a comma and puts them in the + * given list. + * @param idList the list to fill with identifiers. + * @param check what kinds of check to perform on the symbols + */ +identifierList[std::vector& idList, + CVC4::parser::DeclarationCheck check, + CVC4::parser::SymbolType type] +@init { + std::string id; +} + : identifier[id,check,type] { idList.push_back(id); } + (COMMA identifier[id,check,type] { idList.push_back(id); })* + ; + + +/** + * Matches an identifier and returns a string. + */ +identifier[std::string& id, + CVC4::parser::DeclarationCheck check, + CVC4::parser::SymbolType type] + : IDENTIFIER + { id = AntlrInput::tokenText($IDENTIFIER); + input->checkDeclaration(id, check, type); } + ; + +/** + * Matches a type. + * TODO: parse more types + */ +baseType[CVC4::Type*& t] +@init { + std::string id; + Debug("parser-extra") << "base type: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : BOOLEAN_TOK { t = input->booleanType(); } + | typeSymbol[t] + ; + +/** + * Matches a type identifier + */ +typeSymbol[CVC4::Type*& t] +@init { + std::string id; + Debug("parser-extra") << "type symbol: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : identifier[id,CHECK_DECLARED,SYM_SORT] + { t = input->getSort(id); } + ; + +/** + * Matches a CVC4 formula. + * @return the expression representing the formula + */ +formula[CVC4::Expr& formula] +@init { + Debug("parser-extra") << "formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : iffFormula[formula] + ; + +/** + * Matches a comma-separated list of formulas + */ +formulaList[std::vector& formulas] +@init { + Expr f; +} + : formula[f] { formulas.push_back(f); } + ( COMMA formula[f] + { formulas.push_back(f); } + )* + ; + +/** + * Matches a Boolean IFF formula (right-associative) + */ +iffFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "<=> formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : impliesFormula[f] + ( IFF_TOK + iffFormula[e] + { f = input->mkExpr(CVC4::kind::IFF, f, e); } + )? + ; + +/** + * Matches a Boolean IMPLIES formula (right-associative) + */ +impliesFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "=> Formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : orFormula[f] + ( IMPLIES_TOK impliesFormula[e] + { f = input->mkExpr(CVC4::kind::IMPLIES, f, e); } + )? + ; + +/** + * Matches a Boolean OR formula (left-associative) + */ +orFormula[CVC4::Expr& f] +@init { + std::vector exprs; + Debug("parser-extra") << "OR Formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : xorFormula[f] + ( OR_TOK { exprs.push_back(f); } + xorFormula[f] { exprs.push_back(f); } )* + { + if( exprs.size() > 0 ) { + f = input->mkExpr(CVC4::kind::OR, exprs); + } + } + ; + +/** + * Matches a Boolean XOR formula (left-associative) + */ +xorFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "XOR formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : andFormula[f] + ( XOR_TOK andFormula[e] + { f = input->mkExpr(CVC4::kind::XOR,f, e); } + )* + ; + +/** + * Matches a Boolean AND formula (left-associative) + */ +andFormula[CVC4::Expr& f] +@init { + std::vector exprs; + Debug("parser-extra") << "AND Formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : notFormula[f] + ( AND_TOK { exprs.push_back(f); } + notFormula[f] { exprs.push_back(f); } )* + { + if( exprs.size() > 0 ) { + f = input->mkExpr(CVC4::kind::AND, exprs); + } + } + ; + +/** + * Parses a NOT formula. + * @return the expression representing the formula + */ +notFormula[CVC4::Expr& f] +@init { + Debug("parser-extra") << "NOT formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* negation */ + NOT_TOK notFormula[f] + { f = input->mkExpr(CVC4::kind::NOT, f); } + | /* a boolean atom */ + predFormula[f] + ; + +predFormula[CVC4::Expr& f] +@init { + Expr e; + Debug("parser-extra") << "predicate formula: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : term[f] + (EQUAL_TOK term[e] + { f = input->mkExpr(CVC4::kind::EQUAL, f, e); } + )? + ; // TODO: lt, gt, etc. + +/** + * Parses a term. + */ +term[CVC4::Expr& f] +@init { + std::string name; + std::vector args; + Debug("parser-extra") << "term: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : /* function application */ + // { isFunction(AntlrInput::tokenText(LT(1))) }? + functionSymbol[f] + { args.push_back( f ); } + LPAREN formulaList[args] RPAREN + // TODO: check arity + { f = input->mkExpr(CVC4::kind::APPLY_UF, args); } + + | /* if-then-else */ + iteTerm[f] + + | /* parenthesized sub-formula */ + LPAREN formula[f] RPAREN + + /* constants */ + | TRUE_TOK { f = input->getTrueExpr(); } + | FALSE_TOK { f = input->getFalseExpr(); } + + | /* variable */ + identifier[name,CHECK_DECLARED,SYM_VARIABLE] + { f = input->getVariable(name); } + ; + +/** + * Parses an ITE term. + */ +iteTerm[CVC4::Expr& f] +@init { + std::vector args; + Debug("parser-extra") << "ite: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : IF_TOK formula[f] { args.push_back(f); } + THEN_TOK formula[f] { args.push_back(f); } + iteElseTerm[f] { args.push_back(f); } + ENDIF_TOK + { f = input->mkExpr(CVC4::kind::ITE, args); } + ; + +/** + * Parses the else part of the ITE, i.e. ELSE f, or ELSIF b THEN f1 ... + */ +iteElseTerm[CVC4::Expr& f] +@init { + std::vector args; + Debug("parser-extra") << "else: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : ELSE_TOK formula[f] + | ELSEIF_TOK iteCondition = formula[f] { args.push_back(f); } + THEN_TOK iteThen = formula[f] { args.push_back(f); } + iteElse = iteElseTerm[f] { args.push_back(f); } + { f = input->mkExpr(CVC4::kind::ITE, args); } + ; + +/** + * Parses a function symbol (an identifier). + * @param what kind of check to perform on the id + * @return the predicate symol + */ +functionSymbol[CVC4::Expr& f] +@init { + Debug("parser-extra") << "function symbol: " << AntlrInput::tokenText(LT(1)) << std::endl; + std::string name; +} + : identifier[name,CHECK_DECLARED,SYM_FUNCTION] + { input->checkFunction(name); + f = input->getFunction(name); } + ; + + +// Keywords + +AND_TOK : 'AND'; +ASSERT_TOK : 'ASSERT'; +BOOLEAN_TOK : 'BOOLEAN'; +CHECKSAT_TOK : 'CHECKSAT'; +ECHO_TOK : 'ECHO'; +ELSEIF_TOK : 'ELSIF'; +ELSE_TOK : 'ELSE'; +ENDIF_TOK : 'ENDIF'; +FALSE_TOK : 'FALSE'; +IF_TOK : 'IF'; +NOT_TOK : 'NOT'; +OR_TOK : 'OR'; +POPTO_TOK : 'POPTO'; +POP_TOK : 'POP'; +PRINT_TOK : 'PRINT'; +PUSH_TOK : 'PUSH'; +QUERY_TOK : 'QUERY'; +THEN_TOK : 'THEN'; +TRUE_TOK : 'TRUE'; +TYPE_TOK : 'TYPE'; +XOR_TOK : 'XOR'; + +// Symbols + +COLON : ':'; +COMMA : ','; +LPAREN : '('; +RPAREN : ')'; +SEMICOLON : ';'; + +// Operators + +IMPLIES_TOK : '=>'; +IFF_TOK : '<=>'; +ARROW_TOK : '->'; +EQUAL_TOK : '='; + +/** + * Matches an identifier from the input. An identifier is a sequence of letters, + * digits and "_", "'", "." symbols, starting with a letter. + */ +IDENTIFIER : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')*; + +/** + * Matches a numeral from the input (non-empty sequence of digits). + */ +NUMERAL: (DIGIT)+; + +/** + * Matches any letter ('a'-'z' and 'A'-'Z'). + */ +fragment ALPHA : 'a'..'z' | 'A'..'Z'; + +/** + * Matches the digits (0-9) + */ +fragment DIGIT : '0'..'9'; + +/** + * Matches and skips whitespace in the input and ignores it. + */ +WHITESPACE : (' ' | '\t' | '\f' | '\r' | '\n') { $channel = HIDDEN;; }; + +/** + * Matches the comments and ignores them + */ +COMMENT : '%' (~('\n' | '\r'))* { $channel = HIDDEN;; }; + diff --git a/src/parser/cvc/Makefile.am b/src/parser/cvc/Makefile.am index c1b5f752e..f02c9345c 100644 --- a/src/parser/cvc/Makefile.am +++ b/src/parser/cvc/Makefile.am @@ -1,30 +1,30 @@ AM_CPPFLAGS = \ -D__BUILDING_CVC4PARSERLIB \ - -I@srcdir@/../../include -I@srcdir@/../.. + -I@srcdir@/../../include -I@srcdir@/../.. $(ANTLR_INCLUDES) AM_CXXFLAGS = -Wall -fvisibility=hidden +# Compile generated C files using C++ compiler +CC=$(CXX) noinst_LTLIBRARIES = libparsercvc.la ANTLR_TOKEN_STUFF = \ - @srcdir@/generated/CvcVocabularyTokenTypes.hpp \ - @srcdir@/generated/CvcVocabularyTokenTypes.txt \ - @srcdir@/generated/AntlrCvcParserTokenTypes.hpp \ - @srcdir@/generated/AntlrCvcParserTokenTypes.txt + @srcdir@/generated/Cvc.tokens ANTLR_LEXER_STUFF = \ - @srcdir@/generated/AntlrCvcLexer.hpp \ - @srcdir@/generated/AntlrCvcLexer.cpp \ - $(ANTLR_TOKEN_STUFF) + @srcdir@/generated/CvcLexer.h \ + @srcdir@/generated/CvcLexer.c \ + $(ANTLR_TOKEN_STUFF) ANTLR_PARSER_STUFF = \ - @srcdir@/generated/AntlrCvcParser.hpp \ - @srcdir@/generated/AntlrCvcParser.cpp + @srcdir@/generated/CvcParser.h \ + @srcdir@/generated/CvcParser.c ANTLR_STUFF = \ - $(ANTLR_LEXER_STUFF) \ - $(ANTLR_PARSER_STUFF) + $(ANTLR_LEXER_STUFF) \ + $(ANTLR_PARSER_STUFF) libparsercvc_la_SOURCES = \ - cvc_lexer.g \ - cvc_parser.g \ - $(ANTLR_STUFF) + Cvc.g \ + cvc_input.h \ + cvc_input.cpp \ + $(ANTLR_STUFF) BUILT_SOURCES = $(ANTLR_STUFF) dist-hook: $(ANTLR_STUFF) @@ -36,16 +36,14 @@ maintainer-clean-local: @srcdir@/stamp-generated: mkdir -p @srcdir@/generated touch @srcdir@/stamp-generated + # antlr doesn't overwrite output files, it just leaves them. So we have to delete them first. -@srcdir@/generated/AntlrCvcLexer.hpp: cvc_lexer.g @srcdir@/stamp-generated - $(AM_V_at)-rm -f $(ANTLR_LEXER_STUFF) - $(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/cvc_lexer.g" -@srcdir@/generated/AntlrCvcLexer.cpp $(ANTLR_TOKEN_STUFF): @srcdir@/generated/AntlrCvcLexer.hpp -# doesn't actually depend on the lexer, but if we're doing parallel +@srcdir@/generated/CvcLexer.h: Cvc.g @srcdir@/stamp-generated + -rm -f $(ANTLR_STUFF) + $(ANTLR) -o "@srcdir@/generated" "@srcdir@/Cvc.g" + +# These don't actually depend on CvcLexer.h, but if we're doing parallel # make and the lexer needs to be rebuilt, we have to keep the rules # from running in parallel (since the token files will be deleted & # recreated) -@srcdir@/generated/AntlrCvcParser.hpp: cvc_parser.g cvc_lexer.g $(ANTLR_TOKEN_STUFF) @srcdir@/stamp-generated - $(AM_V_at)-rm -f $(ANTLR_PARSER_STUFF) - $(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/cvc_parser.g" -@srcdir@/generated/AntlrCvcParser.cpp: @srcdir@/generated/AntlrCvcParser.hpp +@srcdir@/generated/CvcLexer.c @srcdir@/generated/CvcParser.h @srcdir@/generated/CvcParser.c $(ANTLR_TOKEN_STUFF): @srcdir@/generated/CvcLexer.h diff --git a/src/parser/cvc/cvc_input.cpp b/src/parser/cvc/cvc_input.cpp new file mode 100644 index 000000000..9de608aae --- /dev/null +++ b/src/parser/cvc/cvc_input.cpp @@ -0,0 +1,73 @@ +/* + * cvc_parser.cpp + * + * Created on: Mar 5, 2010 + * Author: chris + */ + +#include + +#include "expr/expr_manager.h" +#include "parser/parser_exception.h" +#include "parser/cvc/cvc_input.h" +#include "parser/cvc/generated/CvcLexer.h" +#include "parser/cvc/generated/CvcParser.h" + +namespace CVC4 { +namespace parser { + +/* Use lookahead=2 */ +CvcInput::CvcInput(ExprManager* exprManager, const std::string& filename, bool useMmap) : + AntlrInput(exprManager,filename,2,useMmap) { + init(); +} + +CvcInput::CvcInput(ExprManager* exprManager, const std::string& input, const std::string& name) : + AntlrInput(exprManager,input,name,2) { + init(); +} + +void CvcInput::init() { + pANTLR3_INPUT_STREAM input = getInputStream(); + AlwaysAssert( input != NULL ); + + d_pCvcLexer = CvcLexerNew(input); + if( d_pCvcLexer == NULL ) { + throw ParserException("Failed to create CVC lexer."); + } + + setLexer( d_pCvcLexer->pLexer ); + + pANTLR3_COMMON_TOKEN_STREAM tokenStream = getTokenStream(); + AlwaysAssert( tokenStream != NULL ); + + d_pCvcParser = CvcParserNew(tokenStream); + if( d_pCvcParser == NULL ) { + throw ParserException("Failed to create CVC parser."); + } + + setParser(d_pCvcParser->pParser); + SetCvcInput(this); +} + + +CvcInput::~CvcInput() { + d_pCvcLexer->free(d_pCvcLexer); + d_pCvcParser->free(d_pCvcParser); +} + +Command* CvcInput::doParseCommand() throw (ParserException) { + return d_pCvcParser->parseCommand(d_pCvcParser); +} + +Expr CvcInput::doParseExpr() throw (ParserException) { + return d_pCvcParser->parseExpr(d_pCvcParser); +} + +pANTLR3_LEXER CvcInput::getLexer() { + return d_pCvcLexer->pLexer; +} + +} // namespace parser + +} // namespace CVC4 diff --git a/src/parser/cvc/cvc_input.h b/src/parser/cvc/cvc_input.h new file mode 100644 index 000000000..659123401 --- /dev/null +++ b/src/parser/cvc/cvc_input.h @@ -0,0 +1,48 @@ +/* + * cvc_parser.h + * + * Created on: Mar 5, 2010 + * Author: chris + */ + +#ifndef CVC_PARSER_H_ +#define CVC_PARSER_H_ + +#include "parser/antlr_input.h" +#include "parser/cvc/generated/CvcLexer.h" +#include "parser/cvc/generated/CvcParser.h" + +// extern void CvcParserSetAntlrParser(CVC4::parser::AntlrParser* newAntlrParser); + +namespace CVC4 { + +class Command; +class Expr; +class ExprManager; + +namespace parser { + +class CvcInput : public AntlrInput { +public: + CvcInput(ExprManager* exprManager, const std::string& filename, bool useMmap); + CvcInput(ExprManager* exprManager, const std::string& input, const std::string& name); + ~CvcInput(); + +protected: + Command* doParseCommand() throw(ParserException); + Expr doParseExpr() throw(ParserException); + pANTLR3_LEXER getLexer(); + pANTLR3_LEXER createLexer(pANTLR3_INPUT_STREAM input); + pANTLR3_PARSER createParser(pANTLR3_COMMON_TOKEN_STREAM tokenStream); + +private: + void init(); + pCvcLexer d_pCvcLexer; + pCvcParser d_pCvcParser; +}; // class CvcInput + +} // namespace parser + +} // namespace CVC4 + +#endif /* CVC_PARSER_H_ */ diff --git a/src/parser/cvc/cvc_lexer.g b/src/parser/cvc/cvc_lexer.g deleted file mode 100644 index b5bf90015..000000000 --- a/src/parser/cvc/cvc_lexer.g +++ /dev/null @@ -1,151 +0,0 @@ -/* ******************* */ -/* cvc_lexer.g - ** Original author: dejan - ** Major contributors: mdeters - ** Minor contributors (to current version): cconway - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Lexer for CVC presentation language. - **/ - -options { - language = "Cpp"; // C++ output for antlr - namespaceStd = "std"; // Cosmetic option to get rid of long defines in generated code - namespaceAntlr = "antlr"; // Cosmetic option to get rid of long defines in generated code - namespace = "CVC4::parser"; // Wrap everything in the smtparser namespace -} - -/** - * AntlrCvcLexer class is a stream tokenizer (lexer) for the CVC language. - */ -class AntlrCvcLexer extends Lexer; - -options { - exportVocab = CvcVocabulary; // Name of the shared token vocabulary - testLiterals = false; // Do not check for literals by default - defaultErrorHandler = false; // Skip the defaul error handling, just break with exceptions - k = 2; -} - -/* - * The tokens that might match with the identifiers go here. Otherwise define - * them below. - */ -tokens { - // Types - BOOLEAN = "BOOLEAN"; - TYPE = "TYPE"; - // Boolean oparators - AND = "AND"; - IF = "IF"; - THEN = "THEN"; - ELSE = "ELSE"; - ELSEIF = "ELSIF"; - ENDIF = "ENDIF"; - NOT = "NOT"; - OR = "OR"; - TRUE = "TRUE"; - FALSE = "FALSE"; - XOR = "XOR"; - // Commands - ASSERT = "ASSERT"; - QUERY = "QUERY"; - CHECKSAT = "CHECKSAT"; - PRINT = "PRINT"; - ECHO = "ECHO"; - - PUSH = "PUSH"; - POP = "POP"; - POPTO = "POPTO"; -} - -// Operators -COMMA : ','; -IMPLIES : "=>"; -IFF : "<=>"; -RIGHT_ARROW : "->"; -EQUAL : "="; - -/** - * Matches any letter ('a'-'z' and 'A'-'Z'). - */ -protected -ALPHA options { paraphrase = "a letter"; } - : 'a'..'z' - | 'A'..'Z' - ; - -/** - * Matches the digits (0-9) - */ -protected -DIGIT options { paraphrase = "a digit"; } - : '0'..'9' - ; - -/** - * Matches the ':' - */ -COLON options { paraphrase = "a colon"; } - : ':' - ; - -/** - * Matches the 'l' - */ -SEMICOLON options { paraphrase = "a semicolon"; } - : ';' - ; - -/** - * Matches an identifier from the input. An identifier is a sequence of letters, - * digits and "_", "'", "." symbols, starting with a letter. - */ -IDENTIFIER options { paraphrase = "an identifier"; testLiterals = true; } - : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* - ; - -/** - * Matches the left bracket ('('). - */ -LPAREN options { paraphrase = "a left parenthesis '('"; } - : '('; - -/** - * Matches the right bracket ('('). - */ -RPAREN options { paraphrase = "a right parenthesis ')'"; } - : ')'; - -/** - * Matches and skips whitespace in the input and ignores it. - */ -WHITESPACE options { paraphrase = "whitespace"; } - : (' ' | '\t' | '\f') { $setType(antlr::Token::SKIP); } - ; - -/** - * Matches and skips the newline symbols in the input. - */ -NEWLINE options { paraphrase = "a newline"; } - : ('\r' '\n' | '\r' | '\n') { $setType(antlr::Token::SKIP); newline(); } - ; - -/** - * Matches the comments and ignores them - */ -COMMENT options { paraphrase = "comment"; } - : '%' (~('\n' | '\r'))* { $setType(antlr::Token::SKIP); } - ; - -/** - * Matches a numeral from the input (non-empty sequence of digits). - */ -NUMERAL options { paraphrase = "a numeral"; } - : (DIGIT)+ - ; diff --git a/src/parser/cvc/cvc_parser.g b/src/parser/cvc/cvc_parser.g deleted file mode 100644 index acf0394d0..000000000 --- a/src/parser/cvc/cvc_parser.g +++ /dev/null @@ -1,386 +0,0 @@ -/* ******************* */ -/* cvc_parser.g - ** Original author: dejan - ** Major contributors: cconway - ** Minor contributors (to current version): mdeters - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Parser for CVC presentation language. - **/ - -header "post_include_hpp" { -#include "parser/antlr_parser.h" -#include "expr/command.h" -#include "expr/type.h" -#include "util/output.h" -} - -header "post_include_cpp" { -#include - -using namespace std; -using namespace CVC4; -using namespace CVC4::parser; -} - -options { - language = "Cpp"; // C++ output for antlr - namespaceStd = "std"; // Cosmetic option to get rid of long defines in generated code - namespaceAntlr = "antlr"; // Cosmetic option to get rid of long defines in generated code - namespace = "CVC4::parser"; // Wrap everything in the smtparser namespace -} - -/** - * AntlrCvcParser class is the parser for the files in CVC format. - */ -class AntlrCvcParser extends Parser("AntlrParser"); -options { - genHashLines = true; // Include line number information - importVocab = CvcVocabulary; // Import the common vocabulary - defaultErrorHandler = false; // Skip the defaul error handling, just break with exceptions - k = 2; -} - -/** - * Parses the next command. - * @return command or 0 if EOF - */ -parseCommand returns [CVC4::Command* cmd] - : cmd = command - ; - -/** - * Parses the next expression. - * @return the parsed expression (null expression if EOF) - */ -parseExpr returns [CVC4::Expr expr] - : expr = formula - | EOF - ; - -/** - * Matches a command of the input. If a declaration, it will return an empty - * command. - */ -command returns [CVC4::Command* cmd = 0] -{ - Expr f; - Debug("parser") << "command: " << LT(1)->getText() << endl; -} - : ASSERT f = formula SEMICOLON { cmd = new AssertCommand(f); } - | QUERY f = formula SEMICOLON { cmd = new QueryCommand(f); } - | CHECKSAT f = formula SEMICOLON { cmd = new CheckSatCommand(f); } - | CHECKSAT SEMICOLON { cmd = new CheckSatCommand(getTrueExpr()); } - | PUSH SEMICOLON { cmd = new PushCommand(); } - | POP SEMICOLON { cmd = new PopCommand(); } - | cmd = declaration - | EOF - ; - -/** - * Match a declaration - */ - -declaration returns [CVC4::DeclarationCommand* cmd] -{ - vector ids; - Type* t; - Debug("parser") << "declaration: " << LT(1)->getText() << endl; -} - : identifierList[ids, CHECK_UNDECLARED] COLON t = declType[ids] SEMICOLON - { cmd = new DeclarationCommand(ids,t); } - ; - -/** Match the right-hand side of a declaration. Returns the type. */ -declType[std::vector& idList] returns [CVC4::Type* t] -{ - Debug("parser") << "declType: " << LT(1)->getText() << endl; -} - : /* A sort declaration (e.g., "T : TYPE") */ - TYPE { newSorts(idList); t = kindType(); } - | /* A variable declaration */ - t = type { mkVars(idList,t); } - ; - -/** - * Match the type in a declaration and do the declaration binding. - * TODO: Actually parse sorts into Type objects. - */ -type returns [CVC4::Type* t] -{ - Type *t1, *t2; - Debug("parser") << "type: " << LT(1)->getText() << endl; -} - : /* Simple type */ - t = baseType - | /* Single-parameter function type */ - t1 = baseType RIGHT_ARROW t2 = baseType - { t = functionType(t1,t2); } - | /* Multi-parameter function type */ - LPAREN t1 = baseType - { std::vector argTypes; - argTypes.push_back(t1); } - (COMMA t1 = baseType { argTypes.push_back(t1); } )+ - RPAREN RIGHT_ARROW t2 = baseType - { t = functionType(argTypes,t2); } - ; - -/** - * Matches a list of identifiers separated by a comma and puts them in the - * given list. - * @param idList the list to fill with identifiers. - * @param check what kinds of check to perform on the symbols - */ -identifierList[std::vector& idList, - DeclarationCheck check = CHECK_NONE] -{ - string id; -} - : id = identifier[check] { idList.push_back(id); } - (COMMA id = identifier[check] { idList.push_back(id); })* - ; - - -/** - * Matches an identifier and returns a string. - */ -identifier[DeclarationCheck check = CHECK_NONE, - SymbolType type = SYM_VARIABLE] -returns [std::string id] - : x:IDENTIFIER - { id = x->getText(); - checkDeclaration(id, check, type); } - ; - -/** - * Matches a type. - * TODO: parse more types - */ -baseType returns [CVC4::Type* t] -{ - std::string id; - Debug("parser") << "base type: " << LT(1)->getText() << endl; -} - : BOOLEAN { t = booleanType(); } - | t = typeSymbol - ; - -/** - * Matches a type identifier - */ -typeSymbol returns [CVC4::Type* t] -{ - std::string id; - Debug("parser") << "type symbol: " << LT(1)->getText() << endl; -} - : id = identifier[CHECK_DECLARED,SYM_SORT] - { t = getSort(id); } - ; - -/** - * Matches a CVC4 formula. - * @return the expression representing the formula - */ -formula returns [CVC4::Expr formula] -{ - Debug("parser") << "formula: " << LT(1)->getText() << endl; -} - : formula = iffFormula - ; - -/** - * Matches a comma-separated list of formulas - */ -formulaList[std::vector& formulas] -{ - Expr f; -} - : f = formula { formulas.push_back(f); } - ( COMMA f = formula - { formulas.push_back(f); } - )* - ; - -/** - * Matches a Boolean IFF formula (right-associative) - */ -iffFormula returns [CVC4::Expr f] -{ - Expr e; - Debug("parser") << "<=> formula: " << LT(1)->getText() << endl; -} - : f = impliesFormula - ( IFF e = iffFormula - { f = mkExpr(CVC4::kind::IFF, f, e); } - )? - ; - -/** - * Matches a Boolean IMPLIES formula (right-associative) - */ -impliesFormula returns [CVC4::Expr f] -{ - Expr e; - Debug("parser") << "=> Formula: " << LT(1)->getText() << endl; -} - : f = orFormula - ( IMPLIES e = impliesFormula - { f = mkExpr(CVC4::kind::IMPLIES, f, e); } - )? - ; - -/** - * Matches a Boolean OR formula (left-associative) - */ -orFormula returns [CVC4::Expr f] -{ - Expr e; - vector exprs; - Debug("parser") << "OR Formula: " << LT(1)->getText() << endl; -} - : e = xorFormula { exprs.push_back(e); } - ( OR e = xorFormula { exprs.push_back(e); } )* - { - f = (exprs.size() > 1 ? mkExpr(CVC4::kind::OR, exprs) : exprs[0]); - } - ; - -/** - * Matches a Boolean XOR formula (left-associative) - */ -xorFormula returns [CVC4::Expr f] -{ - Expr e; - Debug("parser") << "XOR formula: " << LT(1)->getText() << endl; -} - : f = andFormula - ( XOR e = andFormula - { f = mkExpr(CVC4::kind::XOR,f, e); } - )* - ; - -/** - * Matches a Boolean AND formula (left-associative) - */ -andFormula returns [CVC4::Expr f] -{ - Expr e; - vector exprs; - Debug("parser") << "AND Formula: " << LT(1)->getText() << endl; -} - : e = notFormula { exprs.push_back(e); } - ( AND e = notFormula { exprs.push_back(e); } )* - { - f = (exprs.size() > 1 ? mkExpr(CVC4::kind::AND, exprs) : exprs[0]); - } - ; - -/** - * Parses a NOT formula. - * @return the expression representing the formula - */ -notFormula returns [CVC4::Expr f] -{ - Debug("parser") << "NOT formula: " << LT(1)->getText() << endl; -} - : /* negation */ - NOT f = notFormula - { f = mkExpr(CVC4::kind::NOT, f); } - | /* a boolean atom */ - f = predFormula - ; - -predFormula returns [CVC4::Expr f] -{ - Debug("parser") << "predicate formula: " << LT(1)->getText() << endl; -} - : { Expr e; } - f = term - (EQUAL e = term - { f = mkExpr(CVC4::kind::EQUAL, f, e); } - )? - ; // TODO: lt, gt, etc. - -/** - * Parses a term. - */ -term returns [CVC4::Expr t] -{ - std::string name; - Debug("parser") << "term: " << LT(1)->getText() << endl; -} - : /* function application */ - // { isFunction(LT(1)->getText()) }? - { Expr f; - std::vector args; } - f = functionSymbol[CHECK_DECLARED] - { args.push_back( f ); } - - LPAREN formulaList[args] RPAREN - // TODO: check arity - { t = mkExpr(CVC4::kind::APPLY_UF, args); } - - | /* if-then-else */ - t = iteTerm - - | /* parenthesized sub-formula */ - LPAREN t = formula RPAREN - - /* constants */ - | TRUE { t = getTrueExpr(); } - | FALSE { t = getFalseExpr(); } - - | /* variable */ - name = identifier[CHECK_DECLARED] - { t = getVariable(name); } - ; - -/** - * Parses an ITE term. - */ -iteTerm returns [CVC4::Expr t] -{ - Expr iteCondition, iteThen, iteElse; - Debug("parser") << "ite: " << LT(1)->getText() << endl; -} - : IF iteCondition = formula - THEN iteThen = formula - iteElse = iteElseTerm - ENDIF - { t = mkExpr(CVC4::kind::ITE, iteCondition, iteThen, iteElse); } - ; - -/** - * Parses the else part of the ITE, i.e. ELSE f, or ELSIF b THEN f1 ... - */ -iteElseTerm returns [CVC4::Expr t] -{ - Expr iteCondition, iteThen, iteElse; - Debug("parser") << "else: " << LT(1)->getText() << endl; -} - : ELSE t = formula - | ELSEIF iteCondition = formula - THEN iteThen = formula - iteElse = iteElseTerm - { t = mkExpr(CVC4::kind::ITE, iteCondition, iteThen, iteElse); } - ; - -/** - * Parses a function symbol (an identifier). - * @param what kind of check to perform on the id - * @return the predicate symol - */ -functionSymbol[DeclarationCheck check = CHECK_NONE] returns [CVC4::Expr f] -{ - Debug("parser") << "function symbol: " << LT(1)->getText() << endl; - std::string name; -} - : name = identifier[check,SYM_FUNCTION] - { checkFunction(name); - f = getFunction(name); } - ; diff --git a/src/parser/antlr_parser.cpp b/src/parser/input.cpp similarity index 50% rename from src/parser/antlr_parser.cpp rename to src/parser/input.cpp index 98fde0556..24ad93d05 100644 --- a/src/parser/antlr_parser.cpp +++ b/src/parser/input.cpp @@ -1,8 +1,8 @@ /********************* */ -/** antlr_parser.cpp - ** Original author: dejan - ** Major contributors: cconway - ** Minor contributors (to current version): mdeters +/** parser.cpp + ** Original author: mdeters + ** Major contributors: dejan + ** Minor contributors (to current version): cconway ** This file is part of the CVC4 prototype. ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) ** Courant Institute of Mathematical Sciences @@ -10,46 +10,153 @@ ** See the file COPYING in the top-level source directory for licensing ** information. ** - ** A super-class for ANTLR-generated input language parsers + ** Parser implementation. **/ -/* - * antlr_parser.cpp - * - * Created on: Nov 30, 2009 - * Author: dejan - */ - #include -#include +#include +#include -#include "antlr_parser.h" -#include "util/output.h" -#include "util/Assert.h" +#include "input.h" #include "expr/command.h" +#include "expr/expr.h" +#include "expr/kind.h" #include "expr/type.h" +#include "util/output.h" +#include "util/Assert.h" +#include "parser/parser_exception.h" +#include "parser/cvc/cvc_input.h" +#include "parser/smt/smt_input.h" using namespace std; -using namespace CVC4; -using namespace CVC4::parser; using namespace CVC4::kind; namespace CVC4 { namespace parser { -AntlrParser::AntlrParser(const antlr::ParserSharedInputState& state, int k) : - antlr::LLkParser(state, k), d_checksEnabled(true) { +void Input::setDone(bool done) { + d_done = done; } -AntlrParser::AntlrParser(antlr::TokenBuffer& tokenBuf, int k) : - antlr::LLkParser(tokenBuf, k), d_checksEnabled(true) { +bool Input::done() const { + return d_done; } -AntlrParser::AntlrParser(antlr::TokenStream& lexer, int k) : - antlr::LLkParser(lexer, k), d_checksEnabled(true) { +Command* Input::parseNextCommand() throw (ParserException) { + Debug("parser") << "parseNextCommand()" << std::endl; + Command* cmd = NULL; + if(!done()) { + try { + cmd = doParseCommand(); + if(cmd == NULL) { + setDone(); + } + } catch(ParserException& e) { + setDone(); + throw ParserException(e.toString()); + } + } + Debug("parser") << "parseNextCommand() => " << cmd << std::endl; + return cmd; +} + +Expr Input::parseNextExpression() throw (ParserException) { + Debug("parser") << "parseNextExpression()" << std::endl; + Expr result; + if(!done()) { + try { + result = doParseExpr(); + if(result.isNull()) + setDone(); + } catch(ParserException& e) { + setDone(); + throw ParserException(e.toString()); + } + } + Debug("parser") << "parseNextExpression() => " << result << std::endl; + return result; +} + +Input::~Input() { } -Expr AntlrParser::getSymbol(const std::string& name, SymbolType type) { +Input::Input(ExprManager* exprManager, const std::string& filename) : + d_exprManager(exprManager), + d_filename(filename), + d_done(false), + d_checksEnabled(true) { +} + +Input* Input::newFileParser(ExprManager* em, InputLanguage lang, + const std::string& filename, bool useMmap) { + + Input* parser = 0; + + switch(lang) { + case LANG_CVC4: + parser = new CvcInput(em,filename,useMmap); + break; + case LANG_SMTLIB: + parser = new SmtInput(em,filename,useMmap); + break; + + default: + Unhandled(lang); + } + + return parser; +} + +/* +Parser* Parser::getNewParser(ExprManager* em, InputLanguage lang, + istream& input, string filename) { + antlr::CharBuffer* inputBuffer = new CharBuffer(input); + return getNewParser(em, lang, inputBuffer, filename); +} +*/ + +/* +Parser* Parser::getNewParser(ExprManager* em, InputLanguage lang, + std::istream& input, const std::string& name) { + Parser* parser = 0; + + switch(lang) { + case LANG_CVC4: { + antlrLexer = new AntlrCvcLexer(*inputBuffer); + antlrParser = new AntlrCvcParser(*antlrLexer); + break; + } + case LANG_SMTLIB: + parser = new Smt(em,input,name); + break; + + default: + Unhandled("Unknown Input language!"); + } + return parser; +} +*/ + +Input* Input::newStringParser(ExprManager* em, InputLanguage lang, + const std::string& input, const std::string& name) { + Input* parser = 0; + + switch(lang) { + case LANG_CVC4: { + parser = new CvcInput(em,input,name); + break; + } + case LANG_SMTLIB: + parser = new SmtInput(em,input,name); + break; + + default: + Unhandled(lang); + } + return parser; +} + +Expr Input::getSymbol(const std::string& name, SymbolType type) { Assert( isDeclared(name, type) ); @@ -64,87 +171,92 @@ Expr AntlrParser::getSymbol(const std::string& name, SymbolType type) { } } -Expr AntlrParser::getVariable(const std::string& name) { + +Expr Input::getVariable(const std::string& name) { return getSymbol(name, SYM_VARIABLE); } -Expr AntlrParser::getFunction(const std::string& name) { +Expr Input::getFunction(const std::string& name) { return getSymbol(name, SYM_FUNCTION); } -Type* AntlrParser::getType(const std::string& var_name, - SymbolType type) { +Type* +Input::getType(const std::string& var_name, + SymbolType type) { Assert( isDeclared(var_name, type) ); - Type* t = getSymbol(var_name, type).getType(); + Type* t = getSymbol(var_name,type).getType(); return t; } -Type* AntlrParser::getSort(const std::string& name) { +Type* Input::getSort(const std::string& name) { Assert( isDeclared(name, SYM_SORT) ); Type* t = d_sortTable.getObject(name); return t; } /* Returns true if name is bound to a boolean variable. */ -bool AntlrParser::isBoolean(const std::string& name) { +bool Input::isBoolean(const std::string& name) { return isDeclared(name, SYM_VARIABLE) && getType(name)->isBoolean(); } /* Returns true if name is bound to a function. */ -bool AntlrParser::isFunction(const std::string& name) { +bool Input::isFunction(const std::string& name) { return isDeclared(name, SYM_FUNCTION) && getType(name)->isFunction(); } /* Returns true if name is bound to a function returning boolean. */ -bool AntlrParser::isPredicate(const std::string& name) { +bool Input::isPredicate(const std::string& name) { return isDeclared(name, SYM_FUNCTION) && getType(name)->isPredicate(); } -Expr AntlrParser::getTrueExpr() const { +Expr Input::getTrueExpr() const { return d_exprManager->mkExpr(TRUE); } -Expr AntlrParser::getFalseExpr() const { +Expr Input::getFalseExpr() const { return d_exprManager->mkExpr(FALSE); } -Expr AntlrParser::mkExpr(Kind kind, const Expr& child) { +Expr Input::mkExpr(Kind kind, const Expr& child) { Expr result = d_exprManager->mkExpr(kind, child); Debug("parser") << "mkExpr() => " << result << std::endl; return result; } -Expr AntlrParser::mkExpr(Kind kind, const Expr& child_1, const Expr& child_2) { +Expr Input::mkExpr(Kind kind, const Expr& child_1, const Expr& child_2) { Expr result = d_exprManager->mkExpr(kind, child_1, child_2); Debug("parser") << "mkExpr() => " << result << std::endl; return result; } -Expr AntlrParser::mkExpr(Kind kind, const Expr& child_1, const Expr& child_2, +Expr Input::mkExpr(Kind kind, const Expr& child_1, const Expr& child_2, const Expr& child_3) { Expr result = d_exprManager->mkExpr(kind, child_1, child_2, child_3); Debug("parser") << "mkExpr() => " << result << std::endl; return result; } -Expr AntlrParser::mkExpr(Kind kind, const std::vector& children) { +Expr Input::mkExpr(Kind kind, const std::vector& children) { Expr result = d_exprManager->mkExpr(kind, children); Debug("parser") << "mkExpr() => " << result << std::endl; return result; } -Type* AntlrParser::functionType(Type* domainType, - Type* rangeType) { - return d_exprManager->mkFunctionType(domainType, rangeType); +Type* +Input::functionType(Type* domainType, + Type* rangeType) { + return d_exprManager->mkFunctionType(domainType,rangeType); } -Type* AntlrParser::functionType(const std::vector& argTypes, - Type* rangeType) { +Type* +Input::functionType(const std::vector& argTypes, + Type* rangeType) { Assert( argTypes.size() > 0 ); - return d_exprManager->mkFunctionType(argTypes, rangeType); + return d_exprManager->mkFunctionType(argTypes,rangeType); } -Type* AntlrParser::functionType(const std::vector& sorts) { +Type* +Input::functionType(const std::vector& sorts) { Assert( sorts.size() > 0 ); if( sorts.size() == 1 ) { return sorts[0]; @@ -152,11 +264,11 @@ Type* AntlrParser::functionType(const std::vector& sorts) { std::vector argTypes(sorts); Type* rangeType = argTypes.back(); argTypes.pop_back(); - return functionType(argTypes, rangeType); + return functionType(argTypes,rangeType); } } -Type* AntlrParser::predicateType(const std::vector& sorts) { +Type* Input::predicateType(const std::vector& sorts) { if(sorts.size() == 0) { return d_exprManager->booleanType(); } else { @@ -164,15 +276,16 @@ Type* AntlrParser::predicateType(const std::vector& sorts) { } } -Expr AntlrParser::mkVar(const std::string& name, Type* type) { +Expr +Input::mkVar(const std::string& name, Type* type) { Debug("parser") << "mkVar(" << name << "," << *type << ")" << std::endl; Expr expr = d_exprManager->mkVar(type, name); - defineVar(name, expr); + defineVar(name,expr); return expr; } const std::vector -AntlrParser::mkVars(const std::vector names, +Input::mkVars(const std::vector names, Type* type) { std::vector vars; for(unsigned i = 0; i < names.size(); ++i) { @@ -181,30 +294,41 @@ AntlrParser::mkVars(const std::vector names, return vars; } -void AntlrParser::defineVar(const std::string& name, const Expr& val) { +void +Input::defineVar(const std::string& name, const Expr& val) { Assert(!isDeclared(name)); - d_varSymbolTable.bindName(name, val); + d_varSymbolTable.bindName(name,val); Assert(isDeclared(name)); } -void AntlrParser::undefineVar(const std::string& name) { +void +Input::undefineVar(const std::string& name) { Assert(isDeclared(name)); d_varSymbolTable.unbindName(name); Assert(!isDeclared(name)); } +void +Input::setLogic(const std::string& name) { + if( name == "QF_UF" ) { + newSort("U"); + } else { + Unhandled(name); + } +} -Type* AntlrParser::newSort(const std::string& name) { +Type* +Input::newSort(const std::string& name) { Debug("parser") << "newSort(" << name << ")" << std::endl; - Assert( !isDeclared(name, SYM_SORT) ); + Assert( !isDeclared(name, SYM_SORT) ) ; Type* type = d_exprManager->mkSort(name); d_sortTable.bindName(name, type); - Assert( isDeclared(name, SYM_SORT) ); + Assert( isDeclared(name, SYM_SORT) ) ; return type; } const std::vector -AntlrParser::newSorts(const std::vector& names) { +Input::newSorts(const std::vector& names) { std::vector types; for(unsigned i = 0; i < names.size(); ++i) { types.push_back(newSort(names[i])); @@ -212,23 +336,15 @@ AntlrParser::newSorts(const std::vector& names) { return types; } -void AntlrParser::setLogic(const std::string& name) { - if( name == "QF_UF" ) { - newSort("U"); - } else { - Unhandled(name); - } -} - -BooleanType* AntlrParser::booleanType() { +BooleanType* Input::booleanType() { return d_exprManager->booleanType(); } -KindType* AntlrParser::kindType() { +KindType* Input::kindType() { return d_exprManager->kindType(); } -unsigned int AntlrParser::minArity(Kind kind) { +unsigned int Input::minArity(Kind kind) { switch(kind) { case FALSE: case SKOLEM: @@ -242,6 +358,7 @@ unsigned int AntlrParser::minArity(Kind kind) { return 1; case APPLY_UF: + case DISTINCT: case EQUAL: case IFF: case IMPLIES: @@ -257,7 +374,7 @@ unsigned int AntlrParser::minArity(Kind kind) { } } -unsigned int AntlrParser::maxArity(Kind kind) { +unsigned int Input::maxArity(Kind kind) { switch(kind) { case FALSE: case SKOLEM: @@ -279,6 +396,7 @@ unsigned int AntlrParser::maxArity(Kind kind) { case AND: case APPLY_UF: + case DISTINCT: case PLUS: case OR: return UINT_MAX; @@ -288,11 +406,7 @@ unsigned int AntlrParser::maxArity(Kind kind) { } } -void AntlrParser::setExpressionManager(ExprManager* em) { - d_exprManager = em; -} - -bool AntlrParser::isDeclared(const std::string& name, SymbolType type) { +bool Input::isDeclared(const std::string& name, SymbolType type) { switch(type) { case SYM_VARIABLE: // Functions share var namespace case SYM_FUNCTION: @@ -304,17 +418,10 @@ bool AntlrParser::isDeclared(const std::string& name, SymbolType type) { } } -void AntlrParser::parseError(const std::string& message) - throw (antlr::SemanticException) { - throw antlr::SemanticException(message, getFilename(), - LT(1).get()->getLine(), - LT(1).get()->getColumn()); -} - -void AntlrParser::checkDeclaration(const std::string& varName, +void Input::checkDeclaration(const std::string& varName, DeclarationCheck check, SymbolType type) - throw (antlr::SemanticException) { + throw (ParserException) { if(!d_checksEnabled) { return; } @@ -340,15 +447,15 @@ void AntlrParser::checkDeclaration(const std::string& varName, } } -void AntlrParser::checkFunction(const std::string& name) - throw (antlr::SemanticException) { +void Input::checkFunction(const std::string& name) + throw (ParserException) { if( d_checksEnabled && !isFunction(name) ) { parseError("Expecting function symbol, found '" + name + "'"); } } -void AntlrParser::checkArity(Kind kind, unsigned int numArgs) - throw (antlr::SemanticException) { +void Input::checkArity(Kind kind, unsigned int numArgs) + throw (ParserException) { if(!d_checksEnabled) { return; } @@ -370,11 +477,11 @@ void AntlrParser::checkArity(Kind kind, unsigned int numArgs) } } -void AntlrParser::enableChecks() { +void Input::enableChecks() { d_checksEnabled = true; } -void AntlrParser::disableChecks() { +void Input::disableChecks() { d_checksEnabled = false; } diff --git a/src/parser/input.h b/src/parser/input.h new file mode 100644 index 000000000..68db0f5dd --- /dev/null +++ b/src/parser/input.h @@ -0,0 +1,394 @@ +/********************* */ +/** parser.h + ** Original author: mdeters + ** Major contributors: dejan + ** Minor contributors (to current version): cconway + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Parser abstraction. + **/ + +#ifndef __CVC4__PARSER__PARSER_H +#define __CVC4__PARSER__PARSER_H + +#include +#include + +#include "cvc4_config.h" +#include "expr/expr.h" +#include "expr/kind.h" +#include "parser/parser_exception.h" +#include "parser/parser_options.h" +#include "parser/symbol_table.h" +#include "util/Assert.h" + +namespace CVC4 { + +// Forward declarations +class BooleanType; +class ExprManager; +class Command; +class FunctionType; +class KindType; +class Type; + +namespace parser { + +/** Types of check for the symols */ +enum DeclarationCheck { + /** Enforce that the symbol has been declared */ + CHECK_DECLARED, + /** Enforce that the symbol has not been declared */ + CHECK_UNDECLARED, + /** Don't check anything */ + CHECK_NONE +}; + +/** Returns a string representation of the given object (for + debugging). */ +inline std::string toString(DeclarationCheck check) { + switch(check) { + case CHECK_NONE: return "CHECK_NONE"; + case CHECK_DECLARED: return "CHECK_DECLARED"; + case CHECK_UNDECLARED: return "CHECK_UNDECLARED"; + default: Unreachable(); + } +} + +/** + * Types of symbols. Used to define namespaces. + */ +enum SymbolType { + /** Variables */ + SYM_VARIABLE, + /** Functions */ + SYM_FUNCTION, + /** Sorts */ + SYM_SORT +}; + +/** Returns a string representation of the given object (for + debugging). */ +inline std::string toString(SymbolType type) { + switch(type) { + case SYM_VARIABLE: return "SYM_VARIABLE"; + case SYM_FUNCTION: return "SYM_FUNCTION"; + case SYM_SORT: return "SYM_SORT"; + default: Unreachable(); + } +} + +/** + * The parser. The parser should be obtained by calling the static methods + * getNewParser, and should be deleted when done. + * + * This class includes convenience methods for interacting with the ExprManager + * from within a grammar. + */ +class CVC4_PUBLIC Input { + +public: + /** Create a parser for the given file. + * + * @param exprManager the ExprManager for creating expressions from the input + * @param lang the input language + * @param filename the input filename + */ + static Input* newFileParser(ExprManager* exprManager, InputLanguage lang, const std::string& filename, bool useMmap=false); + + /** Create a parser for the given input stream. + * + * @param exprManager the ExprManager for creating expressions from the input + * @param lang the input language + * @param input the input stream + * @param name the name of the stream, for use in error messages + */ + //static Parser* getNewParser(ExprManager* exprManager, InputLanguage lang, std::istream& input, const std::string& name); + + /** Create a parser for the given input string + * + * @param exprManager the ExprManager for creating expressions from the input + * @param lang the input language + * @param input the input string + * @param name the name of the stream, for use in error messages + */ + static Input* newStringParser(ExprManager* exprManager, InputLanguage lang, const std::string& input, const std::string& name); + + /** + * Destructor. + */ + ~Input(); + + /** + * Parse the next command of the input. If EOF is encountered a EmptyCommand + * is returned and done flag is set. + */ + Command* parseNextCommand() throw(ParserException); + + /** + * Parse the next expression of the stream. If EOF is encountered a null + * expression is returned and done flag is set. + * @return the parsed expression + */ + Expr parseNextExpression() throw(ParserException); + + /** + * Check if we are done -- either the end of input has been reached, or some + * error has been encountered. + * @return true if parser is done + */ + bool done() const; + + /** Enable semantic checks during parsing. */ + void enableChecks(); + + /** Disable semantic checks during parsing. Disabling checks may lead to crashes on bad inputs. */ + void disableChecks(); + + /** Get the name of the input file. */ + inline const std::string getFilename() { + return d_filename; + } + + /** + * Sets the logic for the current benchmark. Declares any logic symbols. + * + * @param name the name of the logic (e.g., QF_UF, AUFLIA) + */ + void setLogic(const std::string& name); + + /** + * Returns a variable, given a name and a type. + * @param var_name the name of the variable + * @return the variable expression + */ + Expr getVariable(const std::string& var_name); + + /** + * Returns a function, given a name and a type. + * @param name the name of the function + * @return the function expression + */ + Expr getFunction(const std::string& name); + + /** + * Returns a sort, given a name + */ + Type* getSort(const std::string& sort_name); + + /** + * Checks if a symbol has been declared. + * @param name the symbol name + * @param type the symbol type + * @return true iff the symbol has been declared with the given type + */ + bool isDeclared(const std::string& name, SymbolType type = SYM_VARIABLE); + + /** + * Checks if the declaration policy we want to enforce holds + * for the given symbol. + * @param name the symbol to check + * @param check the kind of check to perform + * @param type the type of the symbol + * @throws ParserException if checks are enabled and the check fails + */ + void checkDeclaration(const std::string& name, + DeclarationCheck check, + SymbolType type = SYM_VARIABLE) + throw (ParserException); + + /** + * Checks whether the given name is bound to a function. + * @param name the name to check + * @throws ParserException if checks are enabled and name is not bound to a function + */ + void checkFunction(const std::string& name) throw (ParserException); + + /** + * Check that kind can accept numArgs arguments. + * @param kind the built-in operator to check + * @param numArgs the number of actual arguments + * @throws ParserException if checks are enabled and the operator kind cannot be + * applied to numArgs arguments. + */ + void checkArity(Kind kind, unsigned int numArgs) throw (ParserException); + + /** + * Returns the type for the variable with the given name. + * @param name the symbol to lookup + * @param type the (namespace) type of the symbol + */ + Type* getType(const std::string& var_name, + SymbolType type = SYM_VARIABLE); + + /** + * Returns the true expression. + * @return the true expression + */ + Expr getTrueExpr() const; + + /** + * Returns the false expression. + * @return the false expression + */ + Expr getFalseExpr() const; + + /** + * Creates a new unary CVC4 expression using the expression manager. + * @param kind the kind of the expression + * @param child the child + * @return the new unary expression + */ + Expr mkExpr(Kind kind, const Expr& child); + + /** + * Creates a new binary CVC4 expression using the expression manager. + * @param kind the kind of the expression + * @param child1 the first child + * @param child2 the second child + * @return the new binary expression + */ + Expr mkExpr(Kind kind, const Expr& child_1, const Expr& child_2); + + /** + * Creates a new ternary CVC4 expression using the expression manager. + * @param kind the kind of the expression + * @param child_1 the first child + * @param child_2 the second child + * @param child_3 + * @return the new ternary expression + */ + Expr mkExpr(Kind kind, const Expr& child_1, const Expr& child_2, + const Expr& child_3); + + /** + * Creates a new CVC4 expression using the expression manager. + * @param kind the kind of the expression + * @param children the children of the expression + */ + Expr mkExpr(Kind kind, const std::vector& children); + + /** Create a new CVC4 variable expression of the given type. */ + Expr mkVar(const std::string& name, Type* type); + + /** Create a set of new CVC4 variable expressions of the given + type. */ + const std::vector + mkVars(const std::vector names, + Type* type); + + + /** Create a new variable definition (e.g., from a let binding). */ + void defineVar(const std::string& name, const Expr& val); + /** Remove a variable definition (e.g., from a let binding). */ + void undefineVar(const std::string& name); + + /** Returns a function type over the given domain and range types. */ + Type* functionType(Type* domain, Type* range); + + /** Returns a function type over the given types. argTypes must be + non-empty. */ + Type* functionType(const std::vector& argTypes, + Type* rangeType); + + /** + * Returns a function type over the given types. If types has only + * one element, then the type is just types[0]. + * + * @param types a non-empty list of input and output types. + */ + Type* functionType(const std::vector& types); + + /** + * Returns a predicate type over the given sorts. If sorts is empty, + * then the type is just BOOLEAN. + + * @param sorts a list of input types + */ + Type* predicateType(const std::vector& sorts); + + /** + * Creates a new sort with the given name. + */ + Type* newSort(const std::string& name); + + /** + * Creates a new sorts with the given names. + */ + const std::vector + newSorts(const std::vector& names); + + /** Is the symbol bound to a boolean variable? */ + bool isBoolean(const std::string& name); + + /** Is the symbol bound to a function? */ + bool isFunction(const std::string& name); + + /** Is the symbol bound to a predicate? */ + bool isPredicate(const std::string& name); + + /** Returns the boolean type. */ + BooleanType* booleanType(); + + /** Returns the kind type (i.e., the type of types). */ + KindType* kindType(); + + /** Returns the minimum arity of the given kind. */ + static unsigned int minArity(Kind kind); + + /** Returns the maximum arity of the given kind. */ + static unsigned int maxArity(Kind kind); + + virtual void parseError(const std::string& msg) throw (ParserException) = 0; + +protected: + virtual Command* doParseCommand() throw(ParserException) = 0; + virtual Expr doParseExpr() throw(ParserException) = 0; + + /** + * Create a new parser for the given file. + * @param exprManager the ExprManager to use + * @param filename the path of the file to parse + */ + Input(ExprManager* exprManager, const std::string& filename); + +private: + + /** Sets the done flag */ + void setDone(bool done = true); + + /** Lookup a symbol in the given namespace. */ + Expr getSymbol(const std::string& var_name, SymbolType type); + + /** Whether to de-allocate the input */ +// bool d_deleteInput; + + /** The expression manager */ + ExprManager* d_exprManager; + + /** The symbol table lookup */ + SymbolTable d_varSymbolTable; + + /** The sort table */ + SymbolTable d_sortTable; + + /** The name of the input file. */ + std::string d_filename; + + /** Are we done */ + bool d_done; + + /** Are semantic checks enabled during parsing? */ + bool d_checksEnabled; + +}; // end of class Parser + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ + +#endif /* __CVC4__PARSER__PARSER_H */ diff --git a/src/parser/memory_mapped_input_buffer.cpp b/src/parser/memory_mapped_input_buffer.cpp new file mode 100644 index 000000000..6618ecebc --- /dev/null +++ b/src/parser/memory_mapped_input_buffer.cpp @@ -0,0 +1,106 @@ +/* + * memory_mapped_input_buffer.cpp + * + * Created on: Mar 3, 2010 + * Author: chris + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include "memory_mapped_input_buffer.h" +#include "util/Assert.h" +#include "util/exception.h" + +namespace CVC4 { +namespace parser { + +extern "C" { + +static ANTLR3_UINT32 +MemoryMapFile(pANTLR3_INPUT_STREAM input, const std::string& filename); + +void +UnmapFile(pANTLR3_INPUT_STREAM input); + +pANTLR3_INPUT_STREAM MemoryMappedInputBufferNew(const std::string& filename) { + // Pointer to the input stream we are going to create + // + pANTLR3_INPUT_STREAM input; + ANTLR3_UINT32 status; + + // Allocate memory for the input stream structure + // + input = (pANTLR3_INPUT_STREAM) ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM)); + + if(input == NULL) { + return NULL; + } + + // Structure was allocated correctly, now we can read the file. + // + status = MemoryMapFile(input, filename); + + // Call the common 8 bit ASCII input stream handler + // Initializer type thingy doobry function. + // + antlr3AsciiSetupStream(input, ANTLR3_CHARSTREAM); + + // Now we can set up the file name + // + input->istream->streamName + = input->strFactory->newStr(input->strFactory, + (uint8_t*) filename.c_str()); + input->fileName = input->istream->streamName; + input->free = UnmapFile; + + if(status != ANTLR3_SUCCESS) { + input->close(input); + return NULL; + } + + return input; +} + +static ANTLR3_UINT32 MemoryMapFile(pANTLR3_INPUT_STREAM input, + const std::string& filename) { + errno = 0; + struct stat st; + if(stat(filename.c_str(), &st) == -1) { + return ANTLR3_ERR_NOFILE; + } + + input->sizeBuf = st.st_size; + + int fd = open(filename.c_str(), O_RDONLY); + if(fd == -1) { + return ANTLR3_ERR_NOFILE; + } + + input->data = mmap(0, input->sizeBuf, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, + 0); + errno = 0; + if(intptr_t(input->data) == -1) { + return ANTLR3_ERR_NOMEM; + } + + return ANTLR3_SUCCESS; +} + +/* This is a bit shady. antlr3AsciiSetupStream has free and close as aliases. + * We need to unmap the file somewhere, so we install this function as free and + * call the default version of close to de-allocate everything else. */ +void UnmapFile(pANTLR3_INPUT_STREAM input) { + munmap((void*) input->data, input->sizeBuf); + input->close(input); +} + +} +} +} diff --git a/src/parser/memory_mapped_input_buffer.h b/src/parser/memory_mapped_input_buffer.h index e1639a072..88ba2183a 100644 --- a/src/parser/memory_mapped_input_buffer.h +++ b/src/parser/memory_mapped_input_buffer.h @@ -14,69 +14,21 @@ #ifndef __CVC4__PARSER__MEMORY_MAPPED_INPUT_BUFFER_H #define __CVC4__PARSER__MEMORY_MAPPED_INPUT_BUFFER_H -#include -#include - -#include -#include -#include - -#include -#include - -#include - -#include "util/Assert.h" -#include "util/exception.h" +#include +#include namespace CVC4 { namespace parser { -class MemoryMappedInputBuffer : public antlr::InputBuffer { - -public: - MemoryMappedInputBuffer(const std::string& filename) { - struct stat st; - if( stat(filename.c_str(), &st) == -1 ) { - char buf[80]; - const char* errMsg = strerror_r(errno, buf, sizeof(buf)); - throw Exception("unable to stat() file `" + filename + "': " + errMsg); - } - d_size = st.st_size; - - int fd = open(filename.c_str(), O_RDONLY); - if( fd == -1 ) { - char buf[80]; - const char* errMsg = strerror_r(errno, buf, sizeof(buf)); - throw Exception("unable to fopen() file `" + filename + "': " + errMsg); - } - - d_start = static_cast< const char * >( - mmap( 0, d_size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0 ) ); - if( intptr_t( d_start ) == -1 ) { - char buf[80]; - const char* errMsg = strerror_r(errno, buf, sizeof(buf)); - throw Exception("unable to mmap() file `" + filename + "': " + errMsg); - } - d_cur = d_start; - d_end = d_start + d_size; - } +extern "C" { - ~MemoryMappedInputBuffer() { - munmap((void*) d_start, d_size); - } +pANTLR3_INPUT_STREAM +MemoryMappedInputBufferNew(const std::string& filename); - inline int getChar() { - Assert( d_cur >= d_start && d_cur <= d_end ); - return d_cur == d_end ? EOF : *d_cur++; - } +} -private: - unsigned long int d_size; - const char *d_start, *d_end, *d_cur; -}; +} +} -}/* CVC4::parser namespace */ -}/* CVC4 namespace */ #endif /* __CVC4__PARSER__MEMORY_MAPPED_INPUT_BUFFER_H */ diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp deleted file mode 100644 index a129d97ee..000000000 --- a/src/parser/parser.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/********************* */ -/** parser.cpp - ** Original author: mdeters - ** Major contributors: dejan - ** Minor contributors (to current version): cconway - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Parser implementation. - **/ - -#include -#include -#include -#include - -#include "parser/parser.h" -#include "parser/memory_mapped_input_buffer.h" -#include "expr/command.h" -#include "util/output.h" -#include "util/Assert.h" -#include "parser/parser_exception.h" -#include "parser/antlr_parser.h" -#include "parser/smt/generated/AntlrSmtParser.hpp" -#include "parser/smt/generated/AntlrSmtLexer.hpp" -#include "parser/cvc/generated/AntlrCvcParser.hpp" -#include "parser/cvc/generated/AntlrCvcLexer.hpp" - -using namespace std; -using namespace antlr; - -namespace CVC4 { -namespace parser { - -void Parser::setDone(bool done) { - d_done = done; -} - -bool Parser::done() const { - return d_done; -} - -Command* Parser::parseNextCommand() throw (ParserException, AssertionException) { - Debug("parser") << "parseNextCommand()" << std::endl; - Command* cmd = NULL; - if(!done()) { - try { - cmd = d_antlrParser->parseCommand(); - if(cmd == NULL) { - setDone(); - } - } catch(antlr::ANTLRException& e) { - setDone(); - throw ParserException(e.toString()); - } - } - Debug("parser") << "parseNextCommand() => " << cmd << std::endl; - return cmd; -} - -Expr Parser::parseNextExpression() throw (ParserException, AssertionException) { - Debug("parser") << "parseNextExpression()" << std::endl; - Expr result; - if(!done()) { - try { - result = d_antlrParser->parseExpr(); - if(result.isNull()) - setDone(); - } catch(antlr::ANTLRException& e) { - setDone(); - throw ParserException(e.toString()); - } - } - Debug("parser") << "parseNextExpression() => " << result << std::endl; - return result; -} - -Parser::~Parser() { - delete d_antlrParser; - delete d_antlrLexer; - delete d_inputBuffer; -} - -Parser::Parser(InputBuffer* inputBuffer, AntlrParser* antlrParser, - CharScanner* antlrLexer) : - d_done(false), - d_antlrParser(antlrParser), - d_antlrLexer(antlrLexer), - d_inputBuffer(inputBuffer) { -} - -Parser* Parser::getNewParser(ExprManager* em, InputLanguage lang, - InputBuffer* inputBuffer, string filename) { - - AntlrParser* antlrParser = 0; - antlr::CharScanner* antlrLexer = 0; - - switch(lang) { - case LANG_CVC4: { - antlrLexer = new AntlrCvcLexer(*inputBuffer); - antlrParser = new AntlrCvcParser(*antlrLexer); - break; - } - case LANG_SMTLIB: { -// MemoryMappedInputBuffer inputBuffer(filename); -// antlrLexer = new AntlrSmtLexer(inputBuffer); - antlrLexer = new AntlrSmtLexer(*inputBuffer); - antlrParser = new AntlrSmtParser(*antlrLexer); - break; - } - default: - Unhandled("Unknown Input language!"); - } - - antlrLexer->setFilename(filename); - antlrParser->setFilename(filename); - antlrParser->setExpressionManager(em); - - return new Parser(inputBuffer, antlrParser, antlrLexer); -} - -Parser* Parser::getMemoryMappedParser(ExprManager* em, InputLanguage lang, string filename) { - MemoryMappedInputBuffer* inputBuffer = new MemoryMappedInputBuffer(filename); - return getNewParser(em, lang, inputBuffer, filename); -} - -Parser* Parser::getNewParser(ExprManager* em, InputLanguage lang, - istream& input, string filename) { - antlr::CharBuffer* inputBuffer = new CharBuffer(input); - return getNewParser(em, lang, inputBuffer, filename); -} - -void Parser::disableChecks() { - d_antlrParser->disableChecks(); -} - -void Parser::enableChecks() { - d_antlrParser->enableChecks(); -} - - -}/* CVC4::parser namespace */ -}/* CVC4 namespace */ diff --git a/src/parser/parser.h b/src/parser/parser.h deleted file mode 100644 index d482b7907..000000000 --- a/src/parser/parser.h +++ /dev/null @@ -1,135 +0,0 @@ -/********************* */ -/** parser.h - ** Original author: mdeters - ** Major contributors: dejan - ** Minor contributors (to current version): cconway - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Parser abstraction. - **/ - -#ifndef __CVC4__PARSER__PARSER_H -#define __CVC4__PARSER__PARSER_H - -#include -#include - -#include "cvc4_config.h" -#include "parser/parser_exception.h" -#include "util/Assert.h" - -namespace antlr { - class CharScanner; - class InputBuffer; -} - -namespace CVC4 { - -// Forward declarations -class Expr; -class Command; -class ExprManager; - -namespace parser { - -class AntlrParser; - -/** - * The parser. The parser should be obtained by calling the static methods - * getNewParser, and should be deleted when done. - */ -class CVC4_PUBLIC Parser { - -public: - - /** The input language option */ - enum InputLanguage { - /** The SMTLIB input language */ - LANG_SMTLIB, - /** The CVC4 input language */ - LANG_CVC4, - /** Auto-detect the language */ - LANG_AUTO - }; - - static Parser* getMemoryMappedParser(ExprManager* em, InputLanguage lang, std::string filename); - static Parser* getNewParser(ExprManager* em, InputLanguage lang, std::istream& input, std::string filename); - - /** - * Destructor. - */ - ~Parser(); - - /** - * Parse the next command of the input. If EOF is encountered a EmptyCommand - * is returned and done flag is set. - */ - Command* parseNextCommand() throw(ParserException, AssertionException); - - /** - * Parse the next expression of the stream. If EOF is encountered a null - * expression is returned and done flag is set. - * @return the parsed expression - */ - Expr parseNextExpression() throw(ParserException, AssertionException); - - /** - * Check if we are done -- either the end of input has been reached, or some - * error has been encountered. - * @return true if parser is done - */ - bool done() const; - - /** Enable semantic checks during parsing. */ - void enableChecks(); - - /** Disable semantic checks during parsing. Disabling checks may lead to crashes on bad inputs. */ - void disableChecks(); - -private: - - /** - * Create a new parser. - * @param em the expression manager to usee - * @param lang the language to parse - * @param inputBuffer the input buffer to parse - * @param filename the filename to attach to the stream - * @param deleteInput wheather to delete the input - * @return the parser - */ - static Parser* getNewParser(ExprManager* em, InputLanguage lang, antlr::InputBuffer* inputBuffer, std::string filename); - - /** - * Create a new parser given the actual antlr parser. - * @param antlrParser the antlr parser to user - */ - Parser(antlr::InputBuffer* inputBuffer, AntlrParser* antlrParser, antlr::CharScanner* antlrLexer); - - /** Sets the done flag */ - void setDone(bool done = true); - - /** Are we done */ - bool d_done; - - /** The antlr parser */ - AntlrParser* d_antlrParser; - - /** The entlr lexer */ - antlr::CharScanner* d_antlrLexer; - - /** The input stream we are using */ - antlr::InputBuffer* d_inputBuffer; - - /** Wherther to de-allocate the input */ - bool d_deleteInput; -}; // end of class Parser - -}/* CVC4::parser namespace */ -}/* CVC4 namespace */ - -#endif /* __CVC4__PARSER__PARSER_H */ diff --git a/src/parser/parser_exception.h b/src/parser/parser_exception.h index f0ddc6a7f..ee02289ee 100644 --- a/src/parser/parser_exception.h +++ b/src/parser/parser_exception.h @@ -18,8 +18,9 @@ #include "util/exception.h" #include "cvc4_config.h" -#include #include +#include +#include namespace CVC4 { namespace parser { @@ -30,11 +31,43 @@ public: ParserException() { } ParserException(const std::string& msg): Exception(msg) { } ParserException(const char* msg): Exception(msg) { } + ParserException(const std::string& msg, const std::string& filename, + unsigned long line, unsigned long column) : + Exception(msg), + d_filename(filename), + d_line(line), + d_column(column) { + } + // Destructor virtual ~ParserException() throw() {} virtual std::string toString() const { - return "Parse Error: " + d_msg; + if( d_line > 0 ) { + std::stringstream ss; + ss << "Parser Error: " << d_filename << ":" << d_line << "." + << d_column << ": " << d_msg; + return ss.str(); + } else { + return "Parse Error: " + d_msg; + } + } + + std::string getFilename() const { + return d_filename; } + + int getLine() const { + return d_line; + } + + int getColumn() const { + return d_column; + } + +protected: + std::string d_filename; + unsigned long d_line; + unsigned long d_column; }; // end of class ParserException }/* CVC4::parser namespace */ diff --git a/src/parser/parser_options.h b/src/parser/parser_options.h new file mode 100644 index 000000000..ddba219a4 --- /dev/null +++ b/src/parser/parser_options.h @@ -0,0 +1,27 @@ +/* + * parser_options.h + * + * Created on: Mar 3, 2010 + * Author: chris + */ + +#ifndef CVC4__PARSER__PARSER_OPTIONS_H_ +#define CVC4__PARSER__PARSER_OPTIONS_H_ + +namespace CVC4 { +namespace parser { + + /** The input language option */ + enum InputLanguage { + /** The SMTLIB input language */ + LANG_SMTLIB, + /** The CVC4 input language */ + LANG_CVC4, + /** Auto-detect the language */ + LANG_AUTO + }; + +} +} + +#endif /* CVC4__PARSER__PARSER_OPTIONS_H_ */ diff --git a/src/parser/smt/.gitignore b/src/parser/smt/.gitignore new file mode 100644 index 000000000..7fd0cf319 --- /dev/null +++ b/src/parser/smt/.gitignore @@ -0,0 +1,4 @@ +/.deps +/stamp-generated +/generated +/Makefile.in diff --git a/src/parser/smt/Makefile.am b/src/parser/smt/Makefile.am index 7fe235002..3ea6dc940 100644 --- a/src/parser/smt/Makefile.am +++ b/src/parser/smt/Makefile.am @@ -1,29 +1,29 @@ AM_CPPFLAGS = \ -D__BUILDING_CVC4PARSERLIB \ - -I@srcdir@/../../include -I@srcdir@/../.. + -I@srcdir@/../../include -I@srcdir@/../.. $(ANTLR_INCLUDES) AM_CXXFLAGS = -Wall -fvisibility=hidden +# Compile generated C files using C++ compiler +CC=$(CXX) noinst_LTLIBRARIES = libparsersmt.la ANTLR_TOKEN_STUFF = \ - @srcdir@/generated/SmtVocabularyTokenTypes.hpp \ - @srcdir@/generated/SmtVocabularyTokenTypes.txt \ - @srcdir@/generated/AntlrSmtParserTokenTypes.hpp \ - @srcdir@/generated/AntlrSmtParserTokenTypes.txt + @srcdir@/generated/Smt.tokens ANTLR_LEXER_STUFF = \ - @srcdir@/generated/AntlrSmtLexer.hpp \ - @srcdir@/generated/AntlrSmtLexer.cpp \ + @srcdir@/generated/SmtLexer.h \ + @srcdir@/generated/SmtLexer.c \ $(ANTLR_TOKEN_STUFF) ANTLR_PARSER_STUFF = \ - @srcdir@/generated/AntlrSmtParser.hpp \ - @srcdir@/generated/AntlrSmtParser.cpp + @srcdir@/generated/SmtParser.h \ + @srcdir@/generated/SmtParser.c ANTLR_STUFF = \ $(ANTLR_LEXER_STUFF) \ $(ANTLR_PARSER_STUFF) libparsersmt_la_SOURCES = \ - smt_lexer.g \ - smt_parser.g \ + Smt.g \ + smt_input.h \ + smt_input.cpp \ $(ANTLR_STUFF) BUILT_SOURCES = $(ANTLR_STUFF) @@ -36,16 +36,14 @@ maintainer-clean-local: @srcdir@/stamp-generated: mkdir -p @srcdir@/generated touch @srcdir@/stamp-generated + # antlr doesn't overwrite output files, it just leaves them. So we have to delete them first. -@srcdir@/generated/AntlrSmtLexer.hpp: smt_lexer.g @srcdir@/stamp-generated - $(AM_V_at)-rm -f $(ANTLR_LEXER_STUFF) - $(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/smt_lexer.g" -@srcdir@/generated/AntlrSmtLexer.cpp $(ANTLR_TOKEN_STUFF): @srcdir@/generated/AntlrSmtLexer.hpp -# doesn't actually depend on the lexer, but if we're doing parallel +@srcdir@/generated/SmtLexer.h: Smt.g @srcdir@/stamp-generated + -rm -f $(ANTLR_STUFF) + $(ANTLR) -o "@srcdir@/generated" "@srcdir@/Smt.g" + +# These don't actually depend on SmtLexer.h, but if we're doing parallel # make and the lexer needs to be rebuilt, we have to keep the rules # from running in parallel (since the token files will be deleted & # recreated) -@srcdir@/generated/AntlrSmtParser.hpp: smt_parser.g smt_lexer.g $(ANTLR_TOKEN_STUFF) @srcdir@/stamp-generated - $(AM_V_at)-rm -f $(ANTLR_PARSER_STUFF) - $(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/smt_parser.g" -@srcdir@/generated/AntlrSmtParser.cpp: @srcdir@/generated/AntlrSmtParser.hpp +@srcdir@/generated/SmtLexer.c @srcdir@/generated/SmtParser.h @srcdir@/generated/SmtParser.c $(ANTLR_TOKEN_STUFF): @srcdir@/generated/SmtLexer.h diff --git a/src/parser/smt/Smt.g b/src/parser/smt/Smt.g new file mode 100644 index 000000000..7095c7269 --- /dev/null +++ b/src/parser/smt/Smt.g @@ -0,0 +1,552 @@ +/* ******************* */ +/* Smt.g + ** Original author: cconway + ** Major contributors: dejan, mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + ** + ** Parser for SMT-LIB input language. + **/ + +grammar Smt; + +options { + language = 'C'; // C output for antlr +// defaultErrorHandler = false; // Skip the default error handling, just break with exceptions + k = 2; +} + +@header { +/** + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + **/ +} + +@lexer::includes { +/* This improves performance by ~10 percent on big inputs. + * This option is only valid if we know the input is ASCII (or some 8-bit encoding). + * If we know the input is UTF-16, we can use ANTLR3_INLINE_INPUT_UTF16. + * Otherwise, we have to let the lexer detect the encoding at runtime. + */ +#define ANTLR3_INLINE_INPUT_ASCII +} + +@parser::includes { +#include "expr/command.h" +#include "parser/input.h" + +namespace CVC4 { + class Expr; +namespace parser { + class SmtInput; +} +} + +extern +void SetSmtInput(CVC4::parser::SmtInput* smt); + +} + +@parser::postinclude { +#include "expr/expr.h" +#include "expr/kind.h" +#include "expr/type.h" +#include "parser/input.h" +#include "parser/smt/smt_input.h" +#include "util/output.h" +#include + +using namespace CVC4; +using namespace CVC4::parser; +} + +@members { +static CVC4::parser::SmtInput *input; + +extern +void SetSmtInput(CVC4::parser::SmtInput* _input) { + input = _input; +} +} + +/** + * Parses an expression. + * @return the parsed expression + */ +parseExpr returns [CVC4::Expr expr] + : annotatedFormula[expr] + | EOF + ; + +/** + * Parses a command (the whole benchmark) + * @return the command of the benchmark + */ +parseCommand returns [CVC4::Command* cmd] + : b = benchmark { $cmd = b; } + ; + +/** + * Matches the whole SMT-LIB benchmark. + * @return the sequence command containing the whole problem + */ +benchmark returns [CVC4::Command* cmd] + : LPAREN_TOK BENCHMARK_TOK IDENTIFIER c = benchAttributes RPAREN_TOK + { $cmd = c; } + | EOF { $cmd = 0; } + ; + +/** + * Matches a sequence of benchmark attributes and returns a pointer to a + * command sequence. + * @return the command sequence + */ +benchAttributes returns [CVC4::CommandSequence* cmd_seq] +@init { + cmd_seq = new CommandSequence(); +} + : (cmd = benchAttribute { if (cmd) cmd_seq->addCommand(cmd); } )+ + ; + +/** + * Matches a benchmark attribute, sucha as ':logic', ':formula', and returns + * a corresponding command + * @return a command corresponding to the attribute + */ +benchAttribute returns [CVC4::Command* smt_command] +@declarations { + std::string name; + BenchmarkStatus b_status; + Expr expr; +} + : LOGIC_TOK identifier[name,CHECK_NONE,SYM_VARIABLE] + { input->setLogic(name); + smt_command = new SetBenchmarkLogicCommand(name); } + | ASSUMPTION_TOK annotatedFormula[expr] + { smt_command = new AssertCommand(expr); } + | FORMULA_TOK annotatedFormula[expr] + { smt_command = new CheckSatCommand(expr); } + | STATUS_TOK status[b_status] + { smt_command = new SetBenchmarkStatusCommand(b_status); } + | EXTRAFUNS_TOK LPAREN_TOK (functionDeclaration)+ RPAREN_TOK + | EXTRAPREDS_TOK LPAREN_TOK (predicateDeclaration)+ RPAREN_TOK + | EXTRASORTS_TOK LPAREN_TOK sortDeclaration+ RPAREN_TOK + | NOTES_TOK STRING_LITERAL + | annotation + ; + +/** + * Matches an annotated formula. + * @return the expression representing the formula + */ +annotatedFormula[CVC4::Expr& expr] +@init { + Debug("parser") << "annotated formula: " << AntlrInput::tokenText(LT(1)) << std::endl; + Kind kind; + std::string name; + std::vector args; /* = getExprVector(); */ +} + : /* a built-in operator application */ + LPAREN_TOK builtinOp[kind] annotatedFormulas[args,expr] RPAREN_TOK + { input->checkArity(kind, args.size()); + if((kind == CVC4::kind::AND || kind == CVC4::kind::OR) && args.size() == 1) { + /* Unary AND/OR can be replaced with the argument. + It just so happens expr should already by the only argument. */ + Assert( expr == args[0] ); + } else { + expr = input->mkExpr(kind, args); + } + } + + | /* A non-built-in function application */ + + // Semantic predicate not necessary if parenthesized subexpressions + // are disallowed + // { isFunction(LT(2)->getText()) }? + + LPAREN_TOK + functionSymbol[expr] + { args.push_back(expr); } + annotatedFormulas[args,expr] RPAREN_TOK + // TODO: check arity + { expr = input->mkExpr(CVC4::kind::APPLY_UF,args); } + + | /* An ite expression */ + LPAREN_TOK (ITE_TOK | IF_THEN_ELSE_TOK) + annotatedFormula[expr] + { args.push_back(expr); } + annotatedFormula[expr] + { args.push_back(expr); } + annotatedFormula[expr] + { args.push_back(expr); } + RPAREN_TOK + { expr = input->mkExpr(CVC4::kind::ITE, args); } + + | /* a let/flet binding */ + LPAREN_TOK + (LET_TOK LPAREN_TOK var_identifier[name,CHECK_UNDECLARED] + | FLET_TOK LPAREN_TOK fun_identifier[name,CHECK_UNDECLARED] ) + annotatedFormula[expr] RPAREN_TOK + { input->defineVar(name,expr); } + annotatedFormula[expr] + RPAREN_TOK + { input->undefineVar(name); } + + | /* a variable */ + ( identifier[name,CHECK_DECLARED,SYM_VARIABLE] + | var_identifier[name,CHECK_DECLARED] + | fun_identifier[name,CHECK_DECLARED] ) + { expr = input->getVariable(name); } + + /* constants */ + | TRUE_TOK { expr = input->getTrueExpr(); } + | FALSE_TOK { expr = input->getFalseExpr(); } + /* TODO: let, flet, quantifiers, arithmetic constants */ + ; + +/** + * Matches a sequence of annotated formulas and puts them into the formulas + * vector. + * @param formulas the vector to fill with formulas + * @param expr an Expr reference for the elements of the sequence + */ +/* NOTE: We pass an Expr in here just to avoid allocating a fresh Expr every + * time through this rule. */ +annotatedFormulas[std::vector& formulas, CVC4::Expr& expr] + : ( annotatedFormula[expr] { formulas.push_back(expr); } )+ + ; + +/** +* Matches on of the unary Boolean connectives. +* @return the kind of the Bollean connective +*/ +builtinOp[CVC4::Kind& kind] +@init { + Debug("parser") << "builtin: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + : NOT_TOK { $kind = CVC4::kind::NOT; } + | IMPLIES_TOK { $kind = CVC4::kind::IMPLIES; } + | AND_TOK { $kind = CVC4::kind::AND; } + | OR_TOK { $kind = CVC4::kind::OR; } + | XOR_TOK { $kind = CVC4::kind::XOR; } + | IFF_TOK { $kind = CVC4::kind::IFF; } + | EQUAL_TOK { $kind = CVC4::kind::EQUAL; } + | DISTINCT_TOK { $kind = CVC4::kind::DISTINCT; } + /* TODO: lt, gt, plus, minus, etc. */ + ; + +/** + * Matches a (possibly undeclared) predicate identifier (returning the string). + * @param check what kind of check to do with the symbol + */ +predicateName[std::string& name, CVC4::parser::DeclarationCheck check] + : functionName[name,check] + ; + +/** + * Matches a (possibly undeclared) function identifier (returning the string) + * @param check what kind of check to do with the symbol + */ +functionName[std::string& name, CVC4::parser::DeclarationCheck check] + : identifier[name,check,SYM_FUNCTION] + ; + +/** + * Matches an previously declared function symbol (returning an Expr) + */ +functionSymbol[CVC4::Expr& fun] +@declarations { + std::string name; +} + : functionName[name,CHECK_DECLARED] + { input->checkFunction(name); + fun = input->getFunction(name); } + ; + +/** + * Matches an attribute name from the input (:attribute_name). + */ +attribute + : ATTR_IDENTIFIER + ; + + + +functionDeclaration +@declarations { + std::string name; + std::vector sorts; +} + : LPAREN_TOK functionName[name,CHECK_UNDECLARED] + t = sortSymbol // require at least one sort + { sorts.push_back(t); } + sortList[sorts] RPAREN_TOK + { t = input->functionType(sorts); + input->mkVar(name, t); } + ; + +/** + * Matches the declaration of a predicate and declares it + */ +predicateDeclaration +@declarations { + std::string name; + std::vector p_sorts; +} + : LPAREN_TOK predicateName[name,CHECK_UNDECLARED] sortList[p_sorts] RPAREN_TOK + { Type *t = input->predicateType(p_sorts); + input->mkVar(name, t); } + ; + +sortDeclaration +@declarations { + std::string name; +} + : sortName[name,CHECK_UNDECLARED] + { Debug("parser") << "sort decl: '" << name << "'" << std::endl; + input->newSort(name); } + ; + +/** + * Matches a sequence of sort symbols and fills them into the given vector. + */ +sortList[std::vector& sorts] + : ( t = sortSymbol { sorts.push_back(t); })* + ; + +/** + * Matches the sort symbol, which can be an arbitrary identifier. + * @param check the check to perform on the name + */ +sortName[std::string& name, CVC4::parser::DeclarationCheck check] + : identifier[name,check,SYM_SORT] + ; + +sortSymbol returns [CVC4::Type* t] +@declarations { + std::string name; +} + : sortName[name,CHECK_NONE] + { $t = input->getSort(name); } + ; + +/** + * Matches the status of the benchmark, one of 'sat', 'unsat' or 'unknown'. + */ +status[ CVC4::BenchmarkStatus& status ] + : SAT_TOK { $status = SMT_SATISFIABLE; } + | UNSAT_TOK { $status = SMT_UNSATISFIABLE; } + | UNKNOWN_TOK { $status = SMT_UNKNOWN; } + ; + +/** + * Matches an annotation, which is an attribute name, with an optional user + */ +annotation + : attribute (USER_VALUE)? + ; + +/** + * Matches an identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + * @param type the intended namespace for the identifier + */ +identifier[std::string& id, + CVC4::parser::DeclarationCheck check, + CVC4::parser::SymbolType type] + : IDENTIFIER + { id = AntlrInput::tokenText($IDENTIFIER); + Debug("parser") << "identifier: " << id + << " check? " << toString(check) + << " type? " << toString(type) << std::endl; + input->checkDeclaration(id, check, type); } + ; + +/** + * Matches an variable identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + */ +var_identifier[std::string& id, + CVC4::parser::DeclarationCheck check] + : VAR_IDENTIFIER + { id = AntlrInput::tokenText($VAR_IDENTIFIER); + Debug("parser") << "var_identifier: " << id + << " check? " << toString(check) << std::endl; + input->checkDeclaration(id, check, SYM_VARIABLE); } + ; + +/** + * Matches an function identifier and sets the string reference parameter id. + * @param id string to hold the identifier + * @param check what kinds of check to do on the symbol + */ +fun_identifier[std::string& id, + CVC4::parser::DeclarationCheck check] + : FUN_IDENTIFIER + { id = AntlrInput::tokenText($FUN_IDENTIFIER); + Debug("parser") << "fun_identifier: " << id + << " check? " << toString(check) << std::endl; + input->checkDeclaration(id, check, SYM_FUNCTION); } + ; + + +// Base SMT-LIB tokens +DISTINCT_TOK : 'distinct'; +ITE_TOK : 'ite'; +IF_THEN_ELSE_TOK : 'if_then_else'; +TRUE_TOK : 'true'; +FALSE_TOK : 'false'; +NOT_TOK : 'not'; +IMPLIES_TOK : 'implies'; +AND_TOK : 'and'; +OR_TOK : 'or'; +XOR_TOK : 'xor'; +IFF_TOK : 'iff'; +EXISTS_TOK : 'exists'; +FORALL_TOK : 'forall'; +LET_TOK : 'let'; +FLET_TOK : 'flet'; +THEORY_TOK : 'theory'; +SAT_TOK : 'sat'; +UNSAT_TOK : 'unsat'; +UNKNOWN_TOK : 'unknown'; +BENCHMARK_TOK : 'benchmark'; + +// The SMT attribute tokens +LOGIC_TOK : ':logic'; +ASSUMPTION_TOK : ':assumption'; +FORMULA_TOK : ':formula'; +STATUS_TOK : ':status'; +EXTRASORTS_TOK : ':extrasorts'; +EXTRAFUNS_TOK : ':extrafuns'; +EXTRAPREDS_TOK : ':extrapreds'; +NOTES_TOK : ':notes'; + +// arithmetic symbols +EQUAL_TOK : '='; +LESS_THAN_TOK : '<'; +GREATER_THAN_TOK : '>'; +AMPERSAND_TOK : '&'; +AT_TOK : '@'; +POUND_TOK : '#'; +PLUS_TOK : '+'; +MINUS_TOK : '-'; +STAR_TOK : '*'; +DIV_TOK : '/'; +PERCENT_TOK : '%'; +PIPE_TOK : '|'; +TILDE_TOK : '~'; + +// Language meta-symbols +//QUESTION_TOK : '?'; +//DOLLAR_TOK : '$'; +LPAREN_TOK : '('; +RPAREN_TOK : ')'; + +/** + * Matches an identifier from the input. An identifier is a sequence of letters, + * digits and "_", "'", "." symbols, starting with a letter. + */ +IDENTIFIER /*options { paraphrase = 'an identifier'; testLiterals = true; }*/ + : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* + ; + +/** + * Matches an identifier starting with a colon. + */ +ATTR_IDENTIFIER /*options { paraphrase = 'an identifier starting with a colon'; testLiterals = true; }*/ + : ':' IDENTIFIER + ; + +/** + * Matches an identifier starting with a question mark. + */ +VAR_IDENTIFIER + : '?' IDENTIFIER + ; + +/** + * Matches an identifier starting with a dollar sign. + */ +FUN_IDENTIFIER + : '$' IDENTIFIER + ; + +/** + * Matches the value of user-defined annotations or attributes. The only constraint imposed on a user-defined value is that it start with + * an open brace and end with closed brace. + */ +USER_VALUE + : '{' + ( ~('{' | '}') )* + '}' + ; + + +/** + * Matches and skips whitespace in the input. + */ +WHITESPACE /*options { paraphrase = 'whitespace'; }*/ + : (' ' | '\t' | '\f' | '\r' | '\n')+ { $channel = HIDDEN;; } + ; + +/** + * Matches a numeral from the input (non-empty sequence of digits). + */ +NUMERAL_TOK /*options { paraphrase = 'a numeral'; }*/ + : (DIGIT)+ + ; + +/** + * Matches a double quoted string literal. Escaping is supported, and escape + * character '\' has to be escaped. + */ +STRING_LITERAL /*options { paraphrase = 'a string literal'; }*/ + : '"' (ESCAPE | ~('"'|'\\'))* '"' + ; + +/** + * Matches the comments and ignores them + */ +COMMENT /*options { paraphrase = 'comment'; }*/ + : ';' (~('\n' | '\r'))* { $channel = HIDDEN;; } + ; + + +/** + * Matches any letter ('a'-'z' and 'A'-'Z'). + */ +fragment +ALPHA /*options { paraphrase = 'a letter'; }*/ + : 'a'..'z' + | 'A'..'Z' + ; + +/** + * Matches the digits (0-9) + */ +fragment +DIGIT /*options { paraphrase = 'a digit'; }*/ + : '0'..'9' + ; + + +/** + * Matches an allowed escaped character. + */ +fragment ESCAPE + : '\\' ('"' | '\\' | 'n' | 't' | 'r') + ; + diff --git a/src/parser/smt/smt_input.cpp b/src/parser/smt/smt_input.cpp new file mode 100644 index 000000000..7a28c30f1 --- /dev/null +++ b/src/parser/smt/smt_input.cpp @@ -0,0 +1,73 @@ +/* + * smt_parser.cpp + * + * Created on: Mar 5, 2010 + * Author: chris + */ + +#include + +#include "expr/expr_manager.h" +#include "parser/parser_exception.h" +#include "parser/smt/smt_input.h" +#include "parser/smt/generated/SmtLexer.h" +#include "parser/smt/generated/SmtParser.h" + +namespace CVC4 { +namespace parser { + +/* Use lookahead=2 */ +SmtInput::SmtInput(ExprManager* exprManager, const std::string& filename, bool useMmap) : + AntlrInput(exprManager,filename,2,useMmap) { + init(); +} + +SmtInput::SmtInput(ExprManager* exprManager, const std::string& input, const std::string& name) : + AntlrInput(exprManager,input,name,2) { + init(); +} + +void SmtInput::init() { + pANTLR3_INPUT_STREAM input = getInputStream(); + AlwaysAssert( input != NULL ); + + d_pSmtLexer = SmtLexerNew(input); + if( d_pSmtLexer == NULL ) { + throw ParserException("Failed to create SMT lexer."); + } + + setLexer( d_pSmtLexer->pLexer ); + + pANTLR3_COMMON_TOKEN_STREAM tokenStream = getTokenStream(); + AlwaysAssert( tokenStream != NULL ); + + d_pSmtParser = SmtParserNew(tokenStream); + if( d_pSmtParser == NULL ) { + throw ParserException("Failed to create SMT parser."); + } + + setParser(d_pSmtParser->pParser); + SetSmtInput(this); +} + + +SmtInput::~SmtInput() { + d_pSmtLexer->free(d_pSmtLexer); + d_pSmtParser->free(d_pSmtParser); +} + +Command* SmtInput::doParseCommand() throw (ParserException) { + return d_pSmtParser->parseCommand(d_pSmtParser); +} + +Expr SmtInput::doParseExpr() throw (ParserException) { + return d_pSmtParser->parseExpr(d_pSmtParser); +} + +pANTLR3_LEXER SmtInput::getLexer() { + return d_pSmtLexer->pLexer; +} + +} // namespace parser + +} // namespace CVC4 diff --git a/src/parser/smt/smt_input.h b/src/parser/smt/smt_input.h new file mode 100644 index 000000000..b3613d67b --- /dev/null +++ b/src/parser/smt/smt_input.h @@ -0,0 +1,47 @@ +/* + * smt_parser.h + * + * Created on: Mar 5, 2010 + * Author: chris + */ + +#ifndef SMT_PARSER_H_ +#define SMT_PARSER_H_ + +#include "parser/antlr_input.h" +#include "parser/smt/generated/SmtLexer.h" +#include "parser/smt/generated/SmtParser.h" + +// extern void SmtParserSetAntlrParser(CVC4::parser::AntlrParser* newAntlrParser); + +namespace CVC4 { + +class Command; +class Expr; +class ExprManager; + +namespace parser { + +class SmtInput : public AntlrInput { +public: + SmtInput(ExprManager* exprManager, const std::string& filename, bool useMmap); + SmtInput(ExprManager* exprManager, const std::string& input, const std::string& name); + ~SmtInput(); + +protected: + Command* doParseCommand() throw(ParserException); + Expr doParseExpr() throw(ParserException); + pANTLR3_LEXER getLexer(); + pANTLR3_LEXER createLexer(pANTLR3_INPUT_STREAM input); + pANTLR3_PARSER createParser(pANTLR3_COMMON_TOKEN_STREAM tokenStream); + +private: + void init(); + pSmtLexer d_pSmtLexer; + pSmtParser d_pSmtParser; +}; // class SmtInput +} // namespace parser + +} // namespace CVC4 + +#endif /* SMT_PARSER_H_ */ diff --git a/src/parser/smt/smt_lexer.g b/src/parser/smt/smt_lexer.g deleted file mode 100644 index d694cd93f..000000000 --- a/src/parser/smt/smt_lexer.g +++ /dev/null @@ -1,220 +0,0 @@ -/* ******************* */ -/* smt_lexer.g - ** Original author: dejan - ** Major contributors: cconway, mdeters - ** Minor contributors (to current version): none - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Lexer for SMT-LIB input language. - **/ - -options { - language = "Cpp"; // C++ output for antlr - namespaceStd = "std"; // Cosmetic option to get rid of long defines in generated code - namespaceAntlr = "antlr"; // Cosmetic option to get rid of long defines in generated code - namespace = "CVC4::parser"; // Wrap everything in the smtparser namespace -} - -/** - * AntlrSmtLexer class is a stream tokenizer (lexer) for the SMT-LIB benchmark - * language. - */ -class AntlrSmtLexer extends Lexer; - -options { - exportVocab = SmtVocabulary; // Name of the shared token vocabulary - testLiterals = false; // Do not check for literals by default - defaultErrorHandler = false; // Skip the default error handling, just break with exceptions - k = 2; -} - -tokens { - // Base SMT-LIB tokens - DISTINCT = "distinct"; - ITE = "ite"; - IF_THEN_ELSE = "if_then_else"; - TRUE = "true"; - FALSE = "false"; - NOT = "not"; - IMPLIES = "implies"; - AND = "and"; - OR = "or"; - XOR = "xor"; - IFF = "iff"; - EXISTS = "exists"; - FORALL = "forall"; - LET = "let"; - FLET = "flet"; - THEORY = "theory"; - LOGIC = "logic"; - SAT = "sat"; - UNSAT = "unsat"; - UNKNOWN = "unknown"; - BENCHMARK = "benchmark"; - // The SMT attribute tokens - LOGIC_ATTR = ":logic"; - ASSUMPTION_ATTR = ":assumption"; - FORMULA_ATTR = ":formula"; - STATUS_ATTR = ":status"; - EXTRASORTS_ATTR = ":extrasorts"; - EXTRAFUNS_ATTR = ":extrafuns"; - EXTRAPREDS_ATTR = ":extrapreds"; - C_NOTES = ":notes"; - // arithmetic symbols - EQUAL = "="; - LESS_THAN = "<"; - GREATER_THAN = ">"; - AMPERSAND = "&"; - AT = "@"; - POUND = "#"; - PLUS = "+"; - MINUS = "-"; - STAR = "*"; - DIV = "/"; - PERCENT = "%"; - PIPE = "|"; - TILDE = "~"; -} - -/** - * Matches any letter ('a'-'z' and 'A'-'Z'). - */ -protected -ALPHA options { paraphrase = "a letter"; } - : 'a'..'z' - | 'A'..'Z' - ; - -/** - * Matches the digits (0-9) - */ -protected -DIGIT options { paraphrase = "a digit"; } - : '0'..'9' - ; - -/** - * Matches an identifier from the input. An identifier is a sequence of letters, - * digits and "_", "'", "." symbols, starting with a letter. - */ -IDENTIFIER options { paraphrase = "an identifier"; testLiterals = true; } - : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* - ; - -/** - * Matches an identifier starting with a colon. An identifier is a sequence of letters, - * digits and "_", "'", "." symbols, starting with a colon. - */ -C_IDENTIFIER options { paraphrase = "an attribute (e.g., ':x')"; testLiterals = true; } - : ':' ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')* - ; - -VAR_IDENTIFIER options { paraphrase = "a variable (e.g., '?x')"; testLiterals = false; } - : '?' IDENTIFIER - ; - -FUN_IDENTIFIER options { paraphrase = "a function variable (e.g, '$x')"; testLiterals = false; } - : '$' IDENTIFIER - ; - - -/** - * Matches the value of user-defined annotations or attributes. The only constraint imposed on a user-defined value is that it start with - * an open brace and end with closed brace. - */ -USER_VALUE - : '{' - ( '\n' { newline(); } - | ~('{' | '}' | '\n') - )* - '}' - ; - -/** - * Matches the question mark symbol ('?'). - */ -QUESTION_MARK options { paraphrase = "a question mark '?'"; } - : '?' - ; - -/** - * Matches the dollar sign ('$'). - */ -DOLLAR_SIGN options { paraphrase = "a dollar sign '$'"; } - : '$' - ; - -/** - * Matches the left bracket ('('). - */ -LPAREN options { paraphrase = "a left parenthesis '('"; } - : '('; - -/** - * Matches the right bracket ('('). - */ -RPAREN options { paraphrase = "a right parenthesis ')'"; } - : ')'; - -/** - * Matches and skips whitespace in the input. - */ -WHITESPACE options { paraphrase = "whitespace"; } - : (' ' | '\t' | '\f') { $setType(antlr::Token::SKIP); } - ; - -/** - * Matches and skips the newline symbols in the input. - */ -NEWLINE options { paraphrase = "a newline"; } - : ('\r' '\n' | '\r' | '\n') { $setType(antlr::Token::SKIP); newline(); } - ; - -/** - * Matches a numeral from the input (non-empty sequence of digits). - */ -NUMERAL options { paraphrase = "a numeral"; } - : (DIGIT)+ - ; - -/** - * Matches an allowed escaped character. - */ -protected ESCAPE - : '\\' ('"' | '\\' | 'n' | 't' | 'r') - ; - -/** - * Matches a double quoted string literal. Escaping is supported, and escape - * character '\' has to be escaped. - */ -STRING_LITERAL options { paraphrase = "a string literal"; } - : '"' (ESCAPE | ~('"'|'\\'))* '"' - ; - -/** - * Matches the comments and ignores them - */ -COMMENT options { paraphrase = "comment"; } - : ';' (~('\n' | '\r'))* { $setType(antlr::Token::SKIP); } - ; - -/* Arithmetic symbol tokens */ -EQUAL : "="; -LESS_THAN : "<"; -GREATER_THAN : ">"; -AMPERSAND : "&"; -AT : "@"; -POUND : "#"; -PLUS : "+"; -MINUS : "-"; -STAR : "*"; -DIV : "/"; -PERCENT : "%"; -PIPE : "|"; -TILDE : "~"; diff --git a/src/parser/smt/smt_parser.g b/src/parser/smt/smt_parser.g deleted file mode 100644 index b876e1649..000000000 --- a/src/parser/smt/smt_parser.g +++ /dev/null @@ -1,351 +0,0 @@ -/* ******************* */ -/* smt_parser.g - ** Original author: dejan - ** Major contributors: mdeters, cconway - ** Minor contributors (to current version): none - ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) - ** Courant Institute of Mathematical Sciences - ** New York University - ** See the file COPYING in the top-level source directory for licensing - ** information. - ** - ** Parser for SMT-LIB input language. - **/ - -header "post_include_hpp" { -#include "parser/antlr_parser.h" -#include "expr/command.h" -} - -header "post_include_cpp" { -#include "expr/type.h" -#include "util/output.h" -#include - -using namespace std; -using namespace CVC4; -using namespace CVC4::parser; -} - -options { - language = "Cpp"; // C++ output for antlr - namespaceStd = "std"; // Cosmetic option to get rid of long defines in generated code - namespaceAntlr = "antlr"; // Cosmetic option to get rid of long defines in generated code - namespace = "CVC4::parser"; // Wrap everything in the smtparser namespace -} - -/** - * AntlrSmtParser class is the parser for the SMT-LIB files. - */ -class AntlrSmtParser extends Parser("AntlrParser"); -options { - genHashLines = true; // Include line number information - importVocab = SmtVocabulary; // Import the common vocabulary - defaultErrorHandler = false; // Skip the default error handling, just break with exceptions - k = 2; -} - -/** - * Parses an expression. - * @return the parsed expression - */ -parseExpr returns [CVC4::Expr expr] - : expr = annotatedFormula - | EOF - ; - -/** - * Parses a command (the whole benchmark) - * @return the command of the benchmark - */ -parseCommand returns [CVC4::Command* cmd] - : cmd = benchmark - ; - -/** - * Matches the whole SMT-LIB benchmark. - * @return the sequence command containing the whole problem - */ -benchmark returns [Command* cmd] - : LPAREN BENCHMARK IDENTIFIER cmd = benchAttributes RPAREN - | EOF { cmd = 0; } - ; - -/** - * Matches a sequence of benchmark attributes and returns a pointer to a - * command sequence. - * @return the command sequence - */ -benchAttributes returns [CVC4::CommandSequence* cmd_seq = new CommandSequence()] -{ - Command* cmd; -} - : (cmd = benchAttribute { if (cmd) cmd_seq->addCommand(cmd); } )+ - ; - -/** - * Matches a benchmark attribute, sucha as ':logic', ':formula', and returns - * a corresponding command - * @retrurn a command corresponding to the attribute - */ -benchAttribute returns [Command* smt_command = 0] -{ - Expr formula; - string logic; - SetBenchmarkStatusCommand::BenchmarkStatus b_status = SetBenchmarkStatusCommand::SMT_UNKNOWN; -} - : LOGIC_ATTR logic = identifier - { setLogic(logic); - smt_command = new SetBenchmarkLogicCommand(logic); } - | ASSUMPTION_ATTR formula = annotatedFormula - { smt_command = new AssertCommand(formula); } - | FORMULA_ATTR formula = annotatedFormula - { smt_command = new CheckSatCommand(formula); } - | STATUS_ATTR b_status = status - { smt_command = new SetBenchmarkStatusCommand(b_status); } - | EXTRAFUNS_ATTR LPAREN (functionDeclaration)+ RPAREN - | EXTRAPREDS_ATTR LPAREN (predicateDeclaration)+ RPAREN - | EXTRASORTS_ATTR LPAREN (sortDeclaration)+ RPAREN - | NOTES_ATTR STRING_LITERAL - | annotation - ; - -/** - * Matches an annotated formula. - * @return the expression representing the formula - */ -annotatedFormula returns [CVC4::Expr formula] -{ - Debug("parser") << "annotated formula: " << LT(1)->getText() << endl; - Kind kind = CVC4::kind::UNDEFINED_KIND; - vector args; - std::string name; - Expr expr1, expr2, expr3; -} - : /* a built-in operator application */ - LPAREN kind = builtinOp annotatedFormulas[args] RPAREN - { checkArity(kind, args.size()); - if((kind == kind::AND || kind == kind::OR) && args.size() == 1) { - formula = args[0]; - } else { - formula = mkExpr(kind, args); - } - } - - | /* a "distinct" expr */ - /* TODO: Should there be a DISTINCT kind in the Expr package? */ - LPAREN DISTINCT annotatedFormulas[args] RPAREN - { formula = mkExpr(CVC4::kind::DISTINCT, args); } - - | /* An ite expression */ - LPAREN (ITE | IF_THEN_ELSE) - { Expr test, trueExpr, falseExpr; } - test = annotatedFormula - trueExpr = annotatedFormula - falseExpr = annotatedFormula - RPAREN - { formula = mkExpr(CVC4::kind::ITE, test, trueExpr, falseExpr); } - - | /* A let/flet binding */ - LPAREN (LET LPAREN name=variable[CHECK_UNDECLARED] - | FLET LPAREN name=function_var[CHECK_UNDECLARED] ) - expr1=annotatedFormula RPAREN - { defineVar(name,expr1); } - formula=annotatedFormula - { undefineVar(name); } - RPAREN - - | /* A non-built-in function application */ - { Expr f; } - LPAREN f = functionSymbol - { args.push_back(f); } - annotatedFormulas[args] RPAREN - // TODO: check arity - { formula = mkExpr(CVC4::kind::APPLY_UF, args); } - - | /* a variable */ - { std::string name; } - ( name = identifier[CHECK_DECLARED] - | name = variable[CHECK_DECLARED] - | name = function_var[CHECK_DECLARED] ) - { formula = getVariable(name); } - - /* constants */ - | TRUE { formula = getTrueExpr(); } - | FALSE { formula = getFalseExpr(); } - /* TODO: quantifiers, arithmetic constants */ - ; - -/** - * Matches a sequence of annotaed formulas and puts them into the formulas - * vector. - * @param formulas the vector to fill with formulas - */ -annotatedFormulas[std::vector& formulas] -{ - Expr f; -} - : ( f = annotatedFormula { formulas.push_back(f); } )+ - ; - -/** -* Matches on of the unary Boolean connectives. -* @return the kind of the Bollean connective -*/ -builtinOp returns [CVC4::Kind kind] -{ - Debug("parser") << "builtin: " << LT(1)->getText() << endl; -} - : NOT { kind = CVC4::kind::NOT; } - | IMPLIES { kind = CVC4::kind::IMPLIES; } - | AND { kind = CVC4::kind::AND; } - | OR { kind = CVC4::kind::OR; } - | XOR { kind = CVC4::kind::XOR; } - | IFF { kind = CVC4::kind::IFF; } - | EQUAL { kind = CVC4::kind::EQUAL; } - /* TODO: lt, gt, plus, minus, etc. */ - ; - -/** - * Matches a (possibly undeclared) predicate identifier (returning the string). - * @param check what kind of check to do with the symbol - */ -predicateName[DeclarationCheck check = CHECK_NONE] returns [std::string p] - : p = identifier[check] - ; - -/** - * Matches a (possibly undeclared) function identifier (returning the string) - * @param check what kind of check to do with the symbol - */ -functionName[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : name = identifier[check,SYM_FUNCTION] - ; - -/** - * Matches an previously declared function symbol (returning an Expr) - */ -functionSymbol returns [CVC4::Expr fun] -{ - string name; -} - : name = functionName[CHECK_DECLARED] - { checkFunction(name); - fun = getFunction(name); } - ; - -/** - * Matches an attribute name from the input (:attribute_name). - */ -attribute - : C_IDENTIFIER - ; - -variable[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : x:VAR_IDENTIFIER - { name = x->getText(); - checkDeclaration(name, check, SYM_VARIABLE); } - ; - -function_var[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : x:FUN_IDENTIFIER - { name = x->getText(); - checkDeclaration(name, check, SYM_FUNCTION); } - ; - -/** - * Matches the sort symbol, which can be an arbitrary identifier. - * @param check the check to perform on the name - */ -sortName[DeclarationCheck check = CHECK_NONE] returns [std::string name] - : name = identifier[check,SYM_SORT] - ; - -sortSymbol returns [CVC4::Type* t] -{ - std::string name; -} - : name = sortName { t = getSort(name); } - ; - -functionDeclaration -{ - string name; - Type* t; - std::vector sorts; -} - : LPAREN name = functionName[CHECK_UNDECLARED] - t = sortSymbol // require at least one sort - { sorts.push_back(t); } - sortList[sorts] RPAREN - { t = functionType(sorts); - mkVar(name, t); } - ; - -/** - * Matches the declaration of a predicate and declares it - */ -predicateDeclaration -{ - string p_name; - std::vector p_sorts; - Type *t; -} - : LPAREN p_name = predicateName[CHECK_UNDECLARED] sortList[p_sorts] RPAREN - { t = predicateType(p_sorts); - mkVar(p_name, t); } - ; - -sortDeclaration -{ - string name; -} - : name = sortName[CHECK_UNDECLARED] - { newSort(name); } - ; - -/** - * Matches a sequence of sort symbols and fills them into the given vector. - */ -sortList[std::vector& sorts] -{ - Type* t; -} - : ( t = sortSymbol { sorts.push_back(t); })* - ; - -/** - * Matches the status of the benchmark, one of 'sat', 'unsat' or 'unknown'. - */ -status returns [ SetBenchmarkStatusCommand::BenchmarkStatus status ] - : SAT { status = SetBenchmarkStatusCommand::SMT_SATISFIABLE; } - | UNSAT { status = SetBenchmarkStatusCommand::SMT_UNSATISFIABLE; } - | UNKNOWN { status = SetBenchmarkStatusCommand::SMT_UNKNOWN; } - ; - -/** - * Matches an annotation, which is an attribute name, with an optional user - */ -annotation - : attribute (USER_VALUE)? - ; - -/** - * Matches an identifier and returns a string. - * @param check what kinds of check to do on the symbol - * @return the id string - */ -identifier[DeclarationCheck check = CHECK_NONE, - SymbolType type = SYM_VARIABLE] -returns [std::string id] -{ - Debug("parser") << "identifier: " << LT(1)->getText() - << " check? " << toString(check) - << " type? " << toString(type) << endl; -} - : x:IDENTIFIER - { id = x->getText(); - checkDeclaration(id, check, type); } - ; - diff --git a/src/parser/symbol_table.h b/src/parser/symbol_table.h index 3bcc080bd..d6467f4e9 100644 --- a/src/parser/symbol_table.h +++ b/src/parser/symbol_table.h @@ -23,6 +23,8 @@ #include +#include "util/Assert.h" + namespace CVC4 { namespace parser { diff --git a/src/prop/.gitignore b/src/prop/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/src/prop/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/src/prop/minisat/.gitignore b/src/prop/minisat/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/src/prop/minisat/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/src/smt/.gitignore b/src/smt/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/src/smt/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/src/theory/.gitignore b/src/theory/.gitignore new file mode 100644 index 000000000..daddcd51e --- /dev/null +++ b/src/theory/.gitignore @@ -0,0 +1,3 @@ +/.deps +/Makefile.in +/theoryof_table.h diff --git a/src/theory/arith/.gitignore b/src/theory/arith/.gitignore new file mode 100644 index 000000000..15fb9b26d --- /dev/null +++ b/src/theory/arith/.gitignore @@ -0,0 +1,2 @@ +/Makefile.in +/.deps diff --git a/src/theory/arrays/.gitignore b/src/theory/arrays/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/src/theory/arrays/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/src/theory/booleans/.gitignore b/src/theory/booleans/.gitignore new file mode 100644 index 000000000..10a7e8d6c --- /dev/null +++ b/src/theory/booleans/.gitignore @@ -0,0 +1 @@ +/Makefile.in diff --git a/src/theory/bv/.gitignore b/src/theory/bv/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/src/theory/bv/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/src/theory/uf/.gitignore b/src/theory/uf/.gitignore new file mode 100644 index 000000000..15fb9b26d --- /dev/null +++ b/src/theory/uf/.gitignore @@ -0,0 +1,2 @@ +/Makefile.in +/.deps diff --git a/src/util/.gitignore b/src/util/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/src/util/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/src/util/options.h b/src/util/options.h index b71acd982..8e2d46e99 100644 --- a/src/util/options.h +++ b/src/util/options.h @@ -17,7 +17,9 @@ #define __CVC4__OPTIONS_H #include -#include "parser/parser.h" +#include +#include "cvc4_config.h" +#include "parser/parser_options.h" namespace CVC4 { @@ -39,7 +41,7 @@ struct CVC4_PUBLIC Options { int verbosity; /** The input language */ - parser::Parser::InputLanguage lang; + parser::InputLanguage lang; /** Should we exit after parsing? */ bool parseOnly; @@ -56,7 +58,7 @@ struct CVC4_PUBLIC Options { out(0), err(0), verbosity(0), - lang(parser::Parser::LANG_AUTO), + lang(parser::LANG_AUTO), parseOnly(false), semanticChecks(true), memoryMap(false) diff --git a/test/.gitignore b/test/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/test/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/test/regress/.gitignore b/test/regress/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/test/regress/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/test/regress/regress0/.gitignore b/test/regress/regress0/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/test/regress/regress0/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/test/regress/regress0/Makefile.am b/test/regress/regress0/Makefile.am index 29141d633..eb07b22fb 100644 --- a/test/regress/regress0/Makefile.am +++ b/test/regress/regress0/Makefile.am @@ -1,22 +1,23 @@ SUBDIRS = precedence uf TESTS_ENVIRONMENT = @srcdir@/../run_regression @top_builddir@/../../bin/cvc4 -TESTS = bug32.cvc \ +TESTS = \ distinct.smt \ flet.smt \ flet2.smt \ - hole6.cvc \ let.smt \ let2.smt \ + simple2.smt \ + simple.smt \ + simple-uf.smt \ + bug32.cvc \ + hole6.cvc \ logops.01.cvc \ logops.02.cvc \ logops.03.cvc \ logops.04.cvc \ logops.05.cvc \ - simple2.smt \ simple.cvc \ - simple.smt \ - simple-uf.smt \ smallcnf.cvc \ test11.cvc \ test9.cvc \ @@ -42,7 +43,7 @@ TESTS = bug32.cvc \ wiki.19.cvc \ wiki.20.cvc \ wiki.21.cvc - + # synonyms for "check" .PHONY: regress regress0 test regress regress0 test: check diff --git a/test/regress/regress0/precedence/.gitignore b/test/regress/regress0/precedence/.gitignore new file mode 100644 index 000000000..10a7e8d6c --- /dev/null +++ b/test/regress/regress0/precedence/.gitignore @@ -0,0 +1 @@ +/Makefile.in diff --git a/test/regress/regress0/precedence/Makefile.am b/test/regress/regress0/precedence/Makefile.am index 36722b81b..0b4fcd4a6 100644 --- a/test/regress/regress0/precedence/Makefile.am +++ b/test/regress/regress0/precedence/Makefile.am @@ -1,5 +1,6 @@ TESTS_ENVIRONMENT = @srcdir@/../../run_regression @top_builddir@/../../bin/cvc4 -TESTS = iff-implies.cvc \ +TESTS = \ + iff-implies.cvc \ implies-iff.cvc \ implies-or.cvc \ or-implies.cvc \ diff --git a/test/regress/regress0/uf/.gitignore b/test/regress/regress0/uf/.gitignore new file mode 100644 index 000000000..10a7e8d6c --- /dev/null +++ b/test/regress/regress0/uf/.gitignore @@ -0,0 +1 @@ +/Makefile.in diff --git a/test/regress/regress0/uf/Makefile.am b/test/regress/regress0/uf/Makefile.am index b456f2809..ec99fd45c 100644 --- a/test/regress/regress0/uf/Makefile.am +++ b/test/regress/regress0/uf/Makefile.am @@ -1,8 +1,5 @@ TESTS_ENVIRONMENT = @srcdir@/../../run_regression @top_builddir@/../../bin/cvc4 -TESTS = simple.01.cvc \ - simple.02.cvc \ - simple.03.cvc \ - simple.04.cvc \ +TESTS = \ euf_simp01.smt \ euf_simp02.smt \ euf_simp03.smt \ @@ -17,7 +14,11 @@ TESTS = simple.01.cvc \ euf_simp13.smt \ dead_dnd002.smt \ iso_brn001.smt \ - SEQ032_size2.smt + SEQ032_size2.smt \ + simple.01.cvc \ + simple.02.cvc \ + simple.03.cvc \ + simple.04.cvc # synonyms for "check" .PHONY: regress regress0 test diff --git a/test/regress/regress1/.gitignore b/test/regress/regress1/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/test/regress/regress1/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/test/regress/regress2/.gitignore b/test/regress/regress2/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/test/regress/regress2/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/test/regress/regress3/.gitignore b/test/regress/regress3/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/test/regress/regress3/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/test/system/.gitignore b/test/system/.gitignore new file mode 100644 index 000000000..10a7e8d6c --- /dev/null +++ b/test/system/.gitignore @@ -0,0 +1 @@ +/Makefile.in diff --git a/test/unit/.gitignore b/test/unit/.gitignore new file mode 100644 index 000000000..f39e98071 --- /dev/null +++ b/test/unit/.gitignore @@ -0,0 +1,2 @@ +/.deps +/Makefile.in diff --git a/test/unit/expr/.gitignore b/test/unit/expr/.gitignore new file mode 100644 index 000000000..71ef9896d --- /dev/null +++ b/test/unit/expr/.gitignore @@ -0,0 +1,4 @@ +/expr_black +/expr_black.cpp +/expr_white +/expr_white.cpp diff --git a/test/unit/parser/parser_black.h b/test/unit/parser/parser_black.h index f7d4eff24..b7c58ba99 100644 --- a/test/unit/parser/parser_black.h +++ b/test/unit/parser/parser_black.h @@ -19,7 +19,7 @@ #include "expr/expr.h" #include "expr/expr_manager.h" -#include "parser/parser.h" +#include "parser/input.h" #include "expr/command.h" #include "util/output.h" @@ -27,6 +27,28 @@ using namespace CVC4; using namespace CVC4::parser; using namespace std; +/* Set up declaration context for expr inputs */ + +void setupContext(Input* input) { + /* a, b, c: BOOLEAN */ + input->mkVar("a",(Type*)input->booleanType()); + input->mkVar("b",(Type*)input->booleanType()); + input->mkVar("c",(Type*)input->booleanType()); + /* t, u, v: TYPE */ + Type *t = input->newSort("t"); + Type *u = input->newSort("u"); + Type *v = input->newSort("v"); + /* f : t->u; g: u->v; h: v->t; */ + input->mkVar("f", input->functionType(t,u)); + input->mkVar("g", input->functionType(u,v)); + input->mkVar("h", input->functionType(v,t)); + /* x:t; y:u; z:v; */ + input->mkVar("x",t); + input->mkVar("y",u); + input->mkVar("z",v); +} + + /************************** CVC test inputs ********************************/ const string goodCvc4Inputs[] = { @@ -44,9 +66,8 @@ const string goodCvc4Inputs[] = { const int numGoodCvc4Inputs = sizeof(goodCvc4Inputs) / sizeof(string); -const string cvc4ExprContext = "a,b,c:BOOLEAN;"; -/* The following expressions are good in a context where a, b, and c have been declared as BOOLEAN. */ +/* The following expressions are valid after setupContext. */ const string goodCvc4Exprs[] = { "a AND b", "a AND b OR c", @@ -71,7 +92,7 @@ const string badCvc4Inputs[] = { const int numBadCvc4Inputs = sizeof(badCvc4Inputs) / sizeof(string); -/* The following expressions are bad even in a context where a, b, and c have been declared as BOOLEAN. */ +/* The following expressions are invalid even after setupContext. */ const string badCvc4Exprs[] = { "a AND", // wrong arity "AND(a,b)", // not infix @@ -100,18 +121,7 @@ const string goodSmtInputs[] = { const int numGoodSmtInputs = sizeof(goodSmtInputs) / sizeof(string); -/* The parser is just going to read this benchmark and leave its decls - in the context. The SMT exprs below will then be able to refer to them, - even though they're "out of scope." */ -const string smtExprContext = - "(benchmark foo\n" - " :extrasorts (t u v)\n" - " :extrapreds ((a) (b) (c))\n" - " :extrafuns ((f t u) (g u v) (h v t) (x t) (y u) (z v)))\n"; - -/* The following expressions are good in a context where a, b, and c - have been declared as BOOLEAN, t, u, v, are sorts, f, g, h are - functions, and x, y, z are variables. */ +/* The following expressions are valid after setupContext. */ const string goodSmtExprs[] = { "(and a b)", "(or (and a b) c)", @@ -134,7 +144,7 @@ const string badSmtInputs[] = { const int numBadSmtInputs = sizeof(badSmtInputs) / sizeof(string); -/* The following expressions are bad even in a context where a, b, and c have been declared as BOOLEAN. */ +/* The following expressions are invalid even after setupContext. */ const string badSmtExprs[] = { "(and)", // wrong arity "(and a b", // no closing paren @@ -152,61 +162,66 @@ const int numBadSmtExprs = sizeof(badSmtExprs) / sizeof(string); class ParserBlack : public CxxTest::TestSuite { ExprManager *d_exprManager; - void tryGoodInputs(Parser::InputLanguage d_lang, const string goodInputs[], int numInputs) { + void tryGoodInputs(InputLanguage d_lang, const string goodInputs[], int numInputs) { for(int i = 0; i < numInputs; ++i) { try { - // cout << "Testing good input: '" << goodInputs[i] << "'\n"; - // Debug.on("parser"); - istringstream stream(goodInputs[i]); - Parser* smtParser = Parser::getNewParser(d_exprManager, d_lang, stream, "test"); - TS_ASSERT( !smtParser->done() ); +// Debug.on("parser"); +// Debug.on("parser-extra"); + Debug("test") << "Testing good input: '" << goodInputs[i] << "'\n"; +// istringstream stream(goodInputs[i]); + Input* parser = Input::newStringParser(d_exprManager, d_lang, goodInputs[i], "test"); + TS_ASSERT( !parser->done() ); Command* cmd; - while((cmd = smtParser->parseNextCommand())) { + while((cmd = parser->parseNextCommand())) { // cout << "Parsed command: " << (*cmd) << endl; } - TS_ASSERT( smtParser->parseNextCommand() == NULL ); - TS_ASSERT( smtParser->done() ); - delete smtParser; + TS_ASSERT( parser->parseNextCommand() == NULL ); + TS_ASSERT( parser->done() ); + delete parser; +// Debug.off("parser"); +// Debug.off("parser-extra"); } catch (Exception& e) { cout << "\nGood input failed:\n" << goodInputs[i] << endl << e << endl; +// Debug.off("parser-extra"); throw; } - } } - void tryBadInputs(Parser::InputLanguage d_lang, const string badInputs[], int numInputs) { + void tryBadInputs(InputLanguage d_lang, const string badInputs[], int numInputs) { for(int i = 0; i < numInputs; ++i) { - // cout << "Testing bad input: '" << badInputs[i] << "'\n"; - istringstream stream(badInputs[i]); - Parser* smtParser = Parser::getNewParser(d_exprManager, d_lang, stream, "test"); +// cout << "Testing bad input: '" << badInputs[i] << "'\n"; +// Debug.on("parser"); + Input* parser = Input::newStringParser(d_exprManager, d_lang, badInputs[i], "test"); TS_ASSERT_THROWS - ( while(smtParser->parseNextCommand()); + ( while(parser->parseNextCommand()); cout << "\nBad input succeeded:\n" << badInputs[i] << endl;, ParserException ); - delete smtParser; +// Debug.off("parser"); + delete parser; } } - void tryGoodExprs(Parser::InputLanguage d_lang,const string& context, const string goodBooleanExprs[], int numExprs) { + void tryGoodExprs(InputLanguage d_lang, const string goodBooleanExprs[], int numExprs) { // cout << "Using context: " << context << endl; +// Debug.on("parser"); +// Debug.on("parser-extra"); for(int i = 0; i < numExprs; ++i) { try { // cout << "Testing good expr: '" << goodBooleanExprs[i] << "'\n"; // Debug.on("parser"); - istringstream stream(context + goodBooleanExprs[i]); - Parser* parser = Parser::getNewParser(d_exprManager, d_lang, stream, "test"); +// istringstream stream(context + goodBooleanExprs[i]); + Input* parser = Input::newStringParser(d_exprManager, d_lang, + goodBooleanExprs[i], "test"); TS_ASSERT( !parser->done() ); - Command* cmd = parser->parseNextCommand(); + setupContext(parser); TS_ASSERT( !parser->done() ); - Expr e; - while(e = parser->parseNextExpression()) { - // cout << "Parsed expr: " << e << endl; - } + Expr e = parser->parseNextExpression(); + TS_ASSERT( !e.isNull() ); + e = parser->parseNextExpression(); TS_ASSERT( parser->done() ); TS_ASSERT( e.isNull() ); - delete cmd; delete parser; } catch (Exception& e) { cout << "\nGood expr failed:\n" << goodBooleanExprs[i] << endl; @@ -216,17 +231,22 @@ class ParserBlack : public CxxTest::TestSuite { } } - void tryBadExprs(Parser::InputLanguage d_lang,const string& context, const string badBooleanExprs[], int numExprs) { + void tryBadExprs(InputLanguage d_lang, const string badBooleanExprs[], int numExprs) { //Debug.on("parser"); for(int i = 0; i < numExprs; ++i) { // cout << "Testing bad expr: '" << badBooleanExprs[i] << "'\n"; - istringstream stream(context + badBooleanExprs[i]); - Parser* smtParser = Parser::getNewParser(d_exprManager, d_lang, stream, "test"); +// istringstream stream(context + badBooleanExprs[i]); + Input* parser = Input::newStringParser(d_exprManager, d_lang, + badBooleanExprs[i], "test"); + + TS_ASSERT( !parser->done() ); + setupContext(parser); + TS_ASSERT( !parser->done() ); TS_ASSERT_THROWS - ( smtParser->parseNextExpression(); + ( parser->parseNextExpression(); cout << "\nBad expr succeeded: " << badBooleanExprs[i] << endl;, ParserException ); - delete smtParser; + delete parser; } //Debug.off("parser"); } @@ -241,34 +261,34 @@ public: } void testGoodCvc4Inputs() { - tryGoodInputs(Parser::LANG_CVC4,goodCvc4Inputs,numGoodCvc4Inputs); + tryGoodInputs(LANG_CVC4,goodCvc4Inputs,numGoodCvc4Inputs); } void testBadCvc4Inputs() { - tryBadInputs(Parser::LANG_CVC4,badCvc4Inputs,numBadCvc4Inputs); + tryBadInputs(LANG_CVC4,badCvc4Inputs,numBadCvc4Inputs); } void testGoodCvc4Exprs() { - tryGoodExprs(Parser::LANG_CVC4,cvc4ExprContext,goodCvc4Exprs,numGoodCvc4Exprs); + tryGoodExprs(LANG_CVC4,goodCvc4Exprs,numGoodCvc4Exprs); } void testBadCvc4Exprs() { - tryBadExprs(Parser::LANG_CVC4,cvc4ExprContext,badCvc4Exprs,numBadCvc4Exprs); + tryBadExprs(LANG_CVC4,badCvc4Exprs,numBadCvc4Exprs); } void testGoodSmtInputs() { - tryGoodInputs(Parser::LANG_SMTLIB,goodSmtInputs,numGoodSmtInputs); + tryGoodInputs(LANG_SMTLIB,goodSmtInputs,numGoodSmtInputs); } void testBadSmtInputs() { - tryBadInputs(Parser::LANG_SMTLIB,badSmtInputs,numBadSmtInputs); + tryBadInputs(LANG_SMTLIB,badSmtInputs,numBadSmtInputs); } void testGoodSmtExprs() { - tryGoodExprs(Parser::LANG_SMTLIB,smtExprContext,goodSmtExprs,numGoodSmtExprs); + tryGoodExprs(LANG_SMTLIB,goodSmtExprs,numGoodSmtExprs); } void testBadSmtExprs() { - tryBadExprs(Parser::LANG_SMTLIB,smtExprContext,badSmtExprs,numBadSmtExprs); + tryBadExprs(LANG_SMTLIB,badSmtExprs,numBadSmtExprs); } }; -- 2.30.2