From 84c4269f3b9edb8de4134fe464dfc70679da2bb1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fran=C3=A7ois=20Bobot?= Date: Fri, 22 Jun 2012 15:11:37 +0000 Subject: [PATCH] TPTP: add parser for cnf and fof - include directive works - no keyword : 'fof', 'cnf', ... can be used for symbols name - real -> unsorted -> real (for the one that appear, so no bijection bitween real and unsorted) - same thing for string But: - string not distinct by projection to real, not sure if the current state of string theory make them distinct - filtering in include is not done - the result is not printed in the TPTP way (currently SMT2 way) --- src/compat/cvc3_compat.h | 1 + src/main/Makefile.am | 5 +- src/main/driver.cpp | 3 + src/main/driver_portfolio.cpp | 3 + src/main/interactive_shell.cpp | 8 + src/parser/Makefile.am | 4 +- src/parser/antlr_input.cpp | 5 + src/parser/parser_builder.cpp | 4 + src/parser/tptp/Makefile | 4 + src/parser/tptp/Makefile.am | 61 +++ src/parser/tptp/Tptp.g | 637 ++++++++++++++++++++++++++ src/parser/tptp/tptp.cpp | 162 +++++++ src/parser/tptp/tptp.h | 260 +++++++++++ src/parser/tptp/tptp_input.cpp | 74 +++ src/parser/tptp/tptp_input.h | 97 ++++ src/printer/printer.cpp | 3 + src/util/language.cpp | 2 + src/util/language.h | 10 + src/util/language.i | 2 + src/util/options.cpp | 8 + test/regress/regress0/Makefile.am | 15 +- test/regress/regress0/tptp_parser.p | 17 + test/regress/regress0/tptp_parser10.p | 10 + test/regress/regress0/tptp_parser2.p | 14 + test/regress/regress0/tptp_parser3.p | 14 + test/regress/regress0/tptp_parser4.p | 14 + test/regress/regress0/tptp_parser5.p | 16 + test/regress/regress0/tptp_parser6.p | 16 + test/regress/regress0/tptp_parser7.p | 16 + test/regress/regress0/tptp_parser8.p | 11 + test/regress/regress0/tptp_parser9.p | 14 + test/regress/run_regression | 20 +- 32 files changed, 1525 insertions(+), 5 deletions(-) create mode 100644 src/parser/tptp/Makefile create mode 100644 src/parser/tptp/Makefile.am create mode 100644 src/parser/tptp/Tptp.g create mode 100644 src/parser/tptp/tptp.cpp create mode 100644 src/parser/tptp/tptp.h create mode 100644 src/parser/tptp/tptp_input.cpp create mode 100644 src/parser/tptp/tptp_input.h create mode 100644 test/regress/regress0/tptp_parser.p create mode 100644 test/regress/regress0/tptp_parser10.p create mode 100644 test/regress/regress0/tptp_parser2.p create mode 100644 test/regress/regress0/tptp_parser3.p create mode 100644 test/regress/regress0/tptp_parser4.p create mode 100644 test/regress/regress0/tptp_parser5.p create mode 100644 test/regress/regress0/tptp_parser6.p create mode 100644 test/regress/regress0/tptp_parser7.p create mode 100644 test/regress/regress0/tptp_parser8.p create mode 100644 test/regress/regress0/tptp_parser9.p diff --git a/src/compat/cvc3_compat.h b/src/compat/cvc3_compat.h index eae8412cf..3ef40636a 100644 --- a/src/compat/cvc3_compat.h +++ b/src/compat/cvc3_compat.h @@ -474,6 +474,7 @@ typedef CVC4::StatisticsRegistry Statistics; #define PRESENTATION_LANG ::CVC4::language::input::LANG_CVC4 #define SMTLIB_LANG ::CVC4::language::input::LANG_SMTLIB #define SMTLIB_V2_LANG ::CVC4::language::input::LANG_SMTLIB_V2 +#define TPTP_LANG ::CVC4::language::input::LANG_TPTP #define AST_LANG ::CVC4::language::input::LANG_AST /*****************************************************************************/ diff --git a/src/main/Makefile.am b/src/main/Makefile.am index e6aa6b423..594751358 100644 --- a/src/main/Makefile.am +++ b/src/main/Makefile.am @@ -53,7 +53,8 @@ BUILT_SOURCES = \ TOKENS_FILES = \ cvc_tokens.h \ smt_tokens.h \ - smt2_tokens.h + smt2_tokens.h \ + tptp_tokens.h cvc_tokens.h: @srcdir@/../parser/cvc/Cvc.g $(AM_V_GEN)grep "'[a-zA-Z][a-zA-Z0-9][a-zA-Z0-9]*'" $^ | sed 's/.*'\''\([a-zA-Z0-9]*\)'\''.*/"\1",/' | sort -u >$@ @@ -61,6 +62,8 @@ smt_tokens.h: @srcdir@/../parser/smt/Smt.g $(AM_V_GEN)grep "'[a-zA-Z][a-zA-Z0-9][a-zA-Z0-9]*'" $^ | sed 's/.*'\''\([a-zA-Z0-9]*\)'\''.*/"\1",/' | sort -u >$@ smt2_tokens.h: @srcdir@/../parser/smt2/Smt2.g $(AM_V_GEN)grep "'[a-zA-Z][a-zA-Z0-9][a-zA-Z0-9]*'" $^ | sed 's/.*'\''\([a-zA-Z0-9]*\)'\''.*/"\1",/' | sort -u >$@ +tptp_tokens.h: @srcdir@/../parser/tptp/Tptp.g + $(AM_V_GEN)grep "'[a-zA-Z][a-zA-Z0-9][a-zA-Z0-9]*'" $^ | sed 's/.*'\''\([a-zA-Z0-9]*\)'\''.*/"\1",/' | sort -u >$@ clean-local: rm -f $(BUILT_SOURCES) diff --git a/src/main/driver.cpp b/src/main/driver.cpp index 742cba8d2..44457841d 100644 --- a/src/main/driver.cpp +++ b/src/main/driver.cpp @@ -182,6 +182,9 @@ int runCvc4(int argc, char* argv[], Options& options) { options.inputLanguage = language::input::LANG_SMTLIB_V2; } else if(len >= 4 && !strcmp(".smt", filename + len - 4)) { options.inputLanguage = language::input::LANG_SMTLIB; + } else if((len >= 2 && !strcmp(".p", filename + len - 2)) + || (len >= 5 && !strcmp(".tptp", filename + len - 5))) { + options.inputLanguage = language::input::LANG_TPTP; } else if(( len >= 4 && !strcmp(".cvc", filename + len - 4) ) || ( len >= 5 && !strcmp(".cvc4", filename + len - 5) )) { options.inputLanguage = language::input::LANG_CVC4; diff --git a/src/main/driver_portfolio.cpp b/src/main/driver_portfolio.cpp index 7972cd94d..a8da88173 100644 --- a/src/main/driver_portfolio.cpp +++ b/src/main/driver_portfolio.cpp @@ -261,6 +261,9 @@ int runCvc4(int argc, char *argv[], Options& options) { options.inputLanguage = language::input::LANG_SMTLIB_V2; } else if(len >= 4 && !strcmp(".smt", filename + len - 4)) { options.inputLanguage = language::input::LANG_SMTLIB; + } else if((len >= 2 && !strcmp(".p", filename + len - 2)) + || (len >= 5 && !strcmp(".tptp", filename + len - 5))) { + options.inputLanguage = language::input::LANG_TPTP; } else if(( len >= 4 && !strcmp(".cvc", filename + len - 4) ) || ( len >= 5 && !strcmp(".cvc4", filename + len - 5) )) { options.inputLanguage = language::input::LANG_CVC4; diff --git a/src/main/interactive_shell.cpp b/src/main/interactive_shell.cpp index d8e7df2f2..03a3a0ae3 100644 --- a/src/main/interactive_shell.cpp +++ b/src/main/interactive_shell.cpp @@ -72,6 +72,10 @@ static const char* const smt2_commands[] = { #include "main/smt2_tokens.h" };/* smt2_commands */ +static const char* const tptp_commands[] = { +#include "main/tptp_tokens.h" +};/* tptp_commands */ + static const char* const* commandsBegin; static const char* const* commandsEnd; @@ -110,6 +114,10 @@ InteractiveShell::InteractiveShell(ExprManager& exprManager, d_historyFilename = string(getenv("HOME")) + "/.cvc4_history_smtlib2"; commandsBegin = smt2_commands; commandsEnd = smt2_commands + sizeof(smt2_commands) / sizeof(*smt2_commands); + case output::LANG_TPTP: + d_historyFilename = string(getenv("HOME")) + "/.cvc4_history_tptp"; + commandsBegin = tptp_commands; + commandsEnd = tptp_commands + sizeof(tptp_commands) / sizeof(*tptp_commands); break; default: Unhandled(lang); } diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index f14941d01..01b4e359f 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -17,7 +17,7 @@ AM_CPPFLAGS = \ -I@srcdir@/../include -I@srcdir@/.. -I@builddir@/.. $(ANTLR_INCLUDES) AM_CXXFLAGS = -Wall -Wno-unknown-pragmas $(FLAG_VISIBILITY_HIDDEN) -SUBDIRS = smt smt2 cvc +SUBDIRS = smt smt2 cvc tptp lib_LTLIBRARIES = libcvc4parser.la if HAVE_CXXTESTGEN @@ -31,12 +31,14 @@ libcvc4parser_noinst_la_LDFLAGS = $(ANTLR_LDFLAGS) libcvc4parser_la_LIBADD = \ @builddir@/smt/libparsersmt.la \ @builddir@/smt2/libparsersmt2.la \ + @builddir@/tptp/libparsertptp.la \ @builddir@/cvc/libparsercvc.la \ @builddir@/../lib/libreplacements.la \ -L@builddir@/.. -lcvc4 libcvc4parser_noinst_la_LIBADD = \ @builddir@/smt/libparsersmt.la \ @builddir@/smt2/libparsersmt2.la \ + @builddir@/tptp/libparsertptp.la \ @builddir@/cvc/libparsercvc.la \ @builddir@/../lib/libreplacements.la \ @builddir@/../libcvc4_noinst.la diff --git a/src/parser/antlr_input.cpp b/src/parser/antlr_input.cpp index 7336dd084..5389f270f 100644 --- a/src/parser/antlr_input.cpp +++ b/src/parser/antlr_input.cpp @@ -34,6 +34,7 @@ #include "parser/cvc/cvc_input.h" #include "parser/smt/smt_input.h" #include "parser/smt2/smt2_input.h" +#include "parser/tptp/tptp_input.h" #include "util/output.h" #include "util/Assert.h" @@ -201,6 +202,10 @@ AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStre input = new Smt2Input(inputStream); break; + case LANG_TPTP: + input = new TptpInput(inputStream); + break; + default: Unhandled(lang); } diff --git a/src/parser/parser_builder.cpp b/src/parser/parser_builder.cpp index dff5b93ac..d9b7cf341 100644 --- a/src/parser/parser_builder.cpp +++ b/src/parser/parser_builder.cpp @@ -23,6 +23,7 @@ #include "parser/parser.h" #include "smt/smt.h" #include "smt2/smt2.h" +#include "tptp/tptp.h" #include "expr/expr_manager.h" #include "util/options.h" @@ -91,6 +92,9 @@ Parser* ParserBuilder::build() case language::input::LANG_SMTLIB_V2: parser = new Smt2(d_exprManager, input, d_strictMode, d_parseOnly); break; + case language::input::LANG_TPTP: + parser = new Tptp(d_exprManager, input, d_strictMode, d_parseOnly); + break; default: parser = new Parser(d_exprManager, input, d_strictMode, d_parseOnly); break; diff --git a/src/parser/tptp/Makefile b/src/parser/tptp/Makefile new file mode 100644 index 000000000..45cde5821 --- /dev/null +++ b/src/parser/tptp/Makefile @@ -0,0 +1,4 @@ +topdir = ../../.. +srcdir = src/parser/tptp + +include $(topdir)/Makefile.subdir diff --git a/src/parser/tptp/Makefile.am b/src/parser/tptp/Makefile.am new file mode 100644 index 000000000..4ec1717e9 --- /dev/null +++ b/src/parser/tptp/Makefile.am @@ -0,0 +1,61 @@ +AM_CPPFLAGS = \ + -D__BUILDING_CVC4PARSERLIB \ + -I@srcdir@/../../include -I@srcdir@/../.. -I@builddir@/../.. $(ANTLR_INCLUDES) +AM_CXXFLAGS = -Wall -Wno-unknown-pragmas $(FLAG_VISIBILITY_HIDDEN) -Wno-unused-function -Wno-unused-variable + +# Compile generated C files using C++ compiler +AM_CFLAGS = $(AM_CXXFLAGS) +CC=$(CXX) + +ANTLR_OPTS = + +# hide this included makefile from automake +@mk_include@ @srcdir@/../Makefile.antlr_tracing + +noinst_LTLIBRARIES = libparsertptp.la + +ANTLR_TOKEN_STUFF = \ + @srcdir@/generated/Tptp.tokens +ANTLR_LEXER_STUFF = \ + @srcdir@/generated/TptpLexer.h \ + @srcdir@/generated/TptpLexer.c \ + $(ANTLR_TOKEN_STUFF) +ANTLR_PARSER_STUFF = \ + @srcdir@/generated/TptpParser.h \ + @srcdir@/generated/TptpParser.c +ANTLR_STUFF = \ + $(ANTLR_LEXER_STUFF) \ + $(ANTLR_PARSER_STUFF) + +libparsertptp_la_SOURCES = \ + Tptp.g \ + tptp.h \ + tptp.cpp \ + tptp_input.h \ + tptp_input.cpp \ + $(ANTLR_STUFF) + +BUILT_SOURCES = $(ANTLR_STUFF) @srcdir@/stamp-generated + +EXTRA_DIST = @srcdir@/stamp-generated + +MAINTAINERCLEANFILES = $(ANTLR_STUFF) +maintainer-clean-local: + -$(AM_V_at)rmdir @srcdir@/generated + -$(AM_V_at)rm -f @srcdir@/stamp-generated + +@srcdir@/stamp-generated: + $(AM_V_at)mkdir -p @srcdir@/generated + $(AM_V_at)touch @srcdir@/stamp-generated + +# antlr doesn't overwrite output files, it just leaves them. So we have to delete them first. +@srcdir@/generated/TptpLexer.h: Tptp.g @srcdir@/stamp-generated + -$(AM_V_at)rm -f $(ANTLR_STUFF) + @if test -z "$(ANTLR)"; then echo "ERROR: antlr parser generator cannot be found, cannot generate the parser" >&2; exit 1; fi + $(AM_V_GEN)$(ANTLR) $(ANTLR_OPTS) -fo "@srcdir@/generated" "@srcdir@/Tptp.g" + +# These don't actually depend on TptpLexer.h, but if we're doing parallel +# make and the lexer needs to be rebuilt, we have to keep the rules +# from running in parallel (since the token files will be deleted & +# recreated) +@srcdir@/generated/TptpLexer.c @srcdir@/generated/TptpParser.h @srcdir@/generated/TptpParser.c $(ANTLR_TOKEN_STUFF): @srcdir@/generated/TptpLexer.h diff --git a/src/parser/tptp/Tptp.g b/src/parser/tptp/Tptp.g new file mode 100644 index 000000000..63774a087 --- /dev/null +++ b/src/parser/tptp/Tptp.g @@ -0,0 +1,637 @@ +/* ******************* */ +/*! \file Tptp.g + ** \verbatim + ** Original author: bobot + ** Major contributors: cconway mdeters + ** Minor contributors (to current version): taking + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information.\endverbatim + ** + ** \brief Parser for TPTP input language. + ** + ** Parser for TPTP input language. + ** cf. http://www.cs.miami.edu/~tptp/cgi-bin/SeeTPTP?Category=Documents&File=SyntaxBNF + **/ + +grammar Tptp; + +options { + // C output for antlr + language = 'C'; + + // Skip the default error handling, just break with exceptions + // defaultErrorHandler = false; + + // Only lookahead of <= k requested (disable for LL* parsing) + // Note that CVC4's BoundedTokenBuffer requires a fixed k ! + // If you change this k, change it also in tptp_input.cpp ! + k = 1; +}/* options */ + +@header { +/** + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information. + **/ +}/* @header */ + +@lexer::includes { + +/** This suppresses warnings about the redefinition of token symbols between + * different parsers. The redefinitions should be harmless as long as no + * client: (a) #include's the lexer headers for two grammars AND (b) uses the + * token symbol definitions. + */ +#pragma GCC system_header + +/* This improves performance by ~10 percent on big inputs. + * This option is only valid if we know the input is ASCII (or some 8-bit encoding). + * If we know the input is UTF-16, we can use ANTLR3_INLINE_INPUT_UTF16. + * Otherwise, we have to let the lexer detect the encoding at runtime. + */ +#define ANTLR3_INLINE_INPUT_ASCII + +#include "parser/antlr_tracing.h" + +}/* @lexer::includes */ + +@lexer::postinclude { +#include + +#include "parser/tptp/tptp.h" +#include "parser/antlr_input.h" + +using namespace CVC4; +using namespace CVC4::parser; + +/* These need to be macros so they can refer to the PARSER macro, which will be defined + * by ANTLR *after* this section. (If they were functions, PARSER would be undefined.) */ +#undef PARSER_STATE +#define PARSER_STATE ((Tptp*)LEXER->super) +#undef EXPR_MANAGER +#define EXPR_MANAGER PARSER_STATE->getExprManager() +#undef MK_EXPR +#define MK_EXPR EXPR_MANAGER->mkExpr +#undef MK_CONST +#define MK_CONST EXPR_MANAGER->mkConst +#define UNSUPPORTED PARSER_STATE->unimplementedFeature + +}/* @lexer::postinclude */ + +@parser::includes { +#include "expr/command.h" +#include "parser/parser.h" +#include "parser/tptp/tptp.h" +#include "parser/antlr_tracing.h" + +}/* @parser::includes */ + +@parser::postinclude { + +#include "expr/expr.h" +#include "expr/kind.h" +#include "expr/type.h" +#include "parser/antlr_input.h" +#include "parser/parser.h" +#include "parser/tptp/tptp.h" +#include "util/integer.h" +#include "util/output.h" +#include "util/rational.h" +#include + +using namespace CVC4; +using namespace CVC4::parser; + +/* These need to be macros so they can refer to the PARSER macro, which will be defined + * by ANTLR *after* this section. (If they were functions, PARSER would be undefined.) */ +#undef PARSER_STATE +#define PARSER_STATE ((Tptp*)PARSER->super) +#undef EXPR_MANAGER +#define EXPR_MANAGER PARSER_STATE->getExprManager() +#undef MK_EXPR +#define MK_EXPR EXPR_MANAGER->mkExpr +#undef MK_CONST +#define MK_CONST EXPR_MANAGER->mkConst +#define UNSUPPORTED PARSER_STATE->unimplementedFeature + + + +}/* parser::postinclude */ + +/** + * Parses an expression. + * @return the parsed expression, or the Null Expr if we've reached the end of the input + */ +parseExpr returns [CVC4::parser::tptp::myExpr expr] + : cnfFormula[expr] + | EOF + ; + +/** + * Parses a command + * @return the parsed command, or NULL if we've reached the end of the input + */ +parseCommand returns [CVC4::Command* cmd = NULL] +@declarations { + Expr expr; + Tptp::FormulaRole fr; + std::string name; + std::string incl_args; +} +// : LPAREN_TOK c = command RPAREN_TOK { $cmd = c; } + : CNF_TOK LPAREN_TOK nameN[name] COMMA_TOK formulaRole[fr] COMMA_TOK + { PARSER_STATE->cnf=true; PARSER_STATE->pushScope(); } + cnfFormula[expr] + { PARSER_STATE->popScope(); + std::vector bvl = PARSER_STATE->getFreeVar(); + if(!bvl.empty()){ + expr = MK_EXPR(kind::FORALL,MK_EXPR(kind::BOUND_VAR_LIST,bvl),expr); + }; + } + (COMMA_TOK anything*)? RPAREN_TOK DOT_TOK + { + cmd = PARSER_STATE->makeCommand(fr,expr); + } + | FOF_TOK LPAREN_TOK nameN[name] COMMA_TOK formulaRole[fr] COMMA_TOK + { PARSER_STATE->cnf=false; } + fofFormula[expr] (COMMA_TOK anything*)? RPAREN_TOK DOT_TOK + { + cmd = PARSER_STATE->makeCommand(fr,expr); + } + | INCLUDE_TOK LPAREN_TOK unquotedFileName[name] + ( COMMA_TOK LBRACK_TOK nameN[incl_args] ( COMMA_TOK nameN[incl_args] )* RBRACK_TOK )? + RPAREN_TOK DOT_TOK + { + PARSER_STATE->includeFile(name); + // The command of the included file will be produce at the new parseCommand call + cmd = new EmptyCommand("include::" + name); + } + | EOF + { + PARSER_STATE->preemptCommand(new CheckSatCommand(MK_CONST(bool(true)))); + cmd = NULL; + } + ; + +/* Parse a formula Role */ +formulaRole[CVC4::parser::Tptp::FormulaRole & role] + : LOWER_WORD + { + std::string r = AntlrInput::tokenText($LOWER_WORD); + if (r == "axiom") role = Tptp::FR_AXIOM; + else if (r == "hypothesis") role = Tptp::FR_HYPOTHESIS; + else if (r == "definition") role = Tptp::FR_DEFINITION; + else if (r == "assumption") role = Tptp::FR_ASSUMPTION; + else if (r == "lemma") role = Tptp::FR_LEMMA; + else if (r == "theorem") role = Tptp::FR_THEOREM; + else if (r == "negated_conjecture") role = Tptp::FR_NEGATED_CONJECTURE; + else if (r == "conjecture") role = Tptp::FR_CONJECTURE; + else if (r == "unknown") role = Tptp::FR_UNKNOWN; + else if (r == "plain") role = Tptp::FR_PLAIN; + else if (r == "fi_domain") role = Tptp::FR_FI_DOMAIN; + else if (r == "fi_functor") role = Tptp::FR_FI_FUNCTORS; + else if (r == "fi_predicate") role = Tptp::FR_FI_PREDICATES; + else if (r == "type") role = Tptp::FR_TYPE; + else PARSER_STATE->parseError("Invalid formula role: " + r); + } + ; + +/*******/ +/* CNF */ + +/* It can parse a little more than the cnf grammar: false and true can appear. + Normally only false can appear and only at top level +*/ + +cnfFormula[CVC4::Expr& expr] + : + LPAREN_TOK disjunction[expr] RPAREN_TOK +| disjunction[expr] +//| FALSE_TOK { expr = MK_CONST(bool(false)); } +; + +disjunction[CVC4::Expr& expr] +@declarations { + std::vector args; +} + : + literal[expr] {args.push_back(expr); } ( OR_TOK literal[expr] {args.push_back(expr); } )* + { + if(args.size() > 1){ + expr = MK_EXPR(kind::OR,args); + } // else its already in the expr + } +; + +literal[CVC4::Expr& expr] + : atomicFormula[expr] + | NOT_TOK atomicFormula[expr] { expr = MK_EXPR(kind::NOT,expr); } +// | folInfixUnary[expr] + ; + +atomicFormula[CVC4::Expr& expr] +@declarations { + Expr expr2; + std::string name; + std::vector args; + bool equal; +} + : atomicWord[name] (LPAREN_TOK arguments[args] RPAREN_TOK)? + ( ( equalOp[equal] //equality/disequality between terms + { + PARSER_STATE->makeApplication(expr,name,args,true); + } + term[expr2] + { + expr = MK_EXPR(kind::EQUAL, expr, expr2); + if(!equal) expr = MK_EXPR(kind::NOT,expr); + } + ) + | //predicate + { + PARSER_STATE->makeApplication(expr,name,args,false); + } + ) + | simpleTerm[expr] equalOp[equal] term[expr2] + { + expr = MK_EXPR(kind::EQUAL, expr, expr2); + if(!equal) expr = MK_EXPR(kind::NOT,expr); + } + | definedProp[expr] +; +//%----Using removes a reduce/reduce ambiguity in lex/yacc. +//%----Note: "defined" means a word starting with one $ and "system" means $$. + +definedProp[CVC4::Expr& expr] + : TRUE_TOK { expr = MK_CONST(bool(true)); } + | FALSE_TOK { expr = MK_CONST(bool(false)); } + ; +//%----Pure CNF should not use $true or $false in problems, and use $false only +//%----at the roots of a refutation. + +equalOp[bool & equal] + : EQUAL_TOK {equal = true;} + | DISEQUAL_TOK {equal = false;} + ; + +term[CVC4::Expr& expr] + : functionTerm[expr] + | simpleTerm[expr] +// | conditionalTerm +// | let_term + ; + +/* Not an application */ +simpleTerm[CVC4::Expr& expr] + : variable[expr] + | NUMBER { expr = PARSER_STATE->d_tmp_expr; } + | DISTINCT_OBJECT { expr = PARSER_STATE->convertStrToUnsorted( + MK_CONST(AntlrInput::tokenText($DISTINCT_OBJECT))); } +; + +functionTerm[CVC4::Expr& expr] + : plainTerm[expr] // | | + ; + +plainTerm[CVC4::Expr& expr] +@declarations{ + std::string name; + std::vector args; +} + : atomicWord[name] (LPAREN_TOK arguments[args] RPAREN_TOK)? + { + PARSER_STATE->makeApplication(expr,name,args,true); + } + ; + +arguments[std::vector & args] +@declarations{ + Expr expr; +} + : + term[expr] { args.push_back(expr); } ( COMMA_TOK term[expr] { args.push_back(expr); } )* + ; + +variable[CVC4::Expr & expr] + : UPPER_WORD + { + std::string name = AntlrInput::tokenText($UPPER_WORD); + if(!PARSER_STATE->cnf || PARSER_STATE->isDeclared(name)){ + expr = PARSER_STATE->getVariable(name); + } else { + expr = PARSER_STATE->mkVar(name, PARSER_STATE->d_unsorted); + if(PARSER_STATE->cnf) PARSER_STATE->addFreeVar(expr); + } + } + ; + +/*******/ +/* FOF */ +fofFormula[CVC4::Expr & expr] : fofLogicFormula[expr] ; + +fofLogicFormula[CVC4::Expr & expr] +@declarations{ + tptp::NonAssoc na; + std::vector< Expr > args; + Expr expr2; +} + : fofUnitaryFormula[expr] + ( //Non Assoc <=> <~> ~& ~| + ( fofBinaryNonAssoc[na] fofUnitaryFormula[expr2] + { switch(na){ + case tptp::NA_IFF: + expr = MK_EXPR(kind::IFF,expr,expr2); + break; + case tptp::NA_REVIFF: + expr = MK_EXPR(kind::XOR,expr,expr2); + break; + case tptp::NA_IMPLIES: + expr = MK_EXPR(kind::IMPLIES,expr,expr2); + break; + case tptp::NA_REVIMPLIES: + expr = MK_EXPR(kind::IMPLIES,expr2,expr); + break; + case tptp::NA_REVOR: + expr = MK_EXPR(kind::NOT,MK_EXPR(kind::OR,expr,expr2)); + break; + case tptp::NA_REVAND: + expr = MK_EXPR(kind::NOT,MK_EXPR(kind::AND,expr,expr2)); + break; + } + } + ) + | //And & + ( { args.push_back(expr); } + ( AND_TOK fofUnitaryFormula[expr] { args.push_back(expr); } )+ + { expr = MK_EXPR(kind::AND,args); } + ) + | //Or | + ( { args.push_back(expr); } + ( OR_TOK fofUnitaryFormula[expr] { args.push_back(expr); } )+ + { expr = MK_EXPR(kind::OR,args); } + ) + )? + ; + +fofUnitaryFormula[CVC4::Expr & expr] +@declarations{ + Kind kind; + std::vector< Expr > bv; +} + : atomicFormula[expr] + | LPAREN_TOK fofLogicFormula[expr] RPAREN_TOK + | NOT_TOK fofUnitaryFormula[expr] { expr = MK_EXPR(kind::NOT,expr); } + | // Quantified + folQuantifier[kind] LBRACK_TOK {PARSER_STATE->pushScope();} + ( bindvariable[expr] { bv.push_back(expr); } + ( COMMA_TOK bindvariable[expr] { bv.push_back(expr); } )* ) RBRACK_TOK + COLON_TOK fofUnitaryFormula[expr] + { PARSER_STATE->popScope(); + expr = MK_EXPR(kind, MK_EXPR(kind::BOUND_VAR_LIST, bv), expr); + } ; + +bindvariable[CVC4::Expr & expr] + : UPPER_WORD + { + std::string name = AntlrInput::tokenText($UPPER_WORD); + expr = PARSER_STATE->mkVar(name, PARSER_STATE->d_unsorted); + } + ; + +fofBinaryNonAssoc[CVC4::parser::tptp::NonAssoc & na] + : IFF_TOK { na = tptp::NA_IFF; } + | REVIFF_TOK { na = tptp::NA_REVIFF; } + | REVOR_TOK { na = tptp::NA_REVOR; } + | REVAND_TOK { na = tptp::NA_REVAND; } + | IMPLIES_TOK { na = tptp::NA_IMPLIES; } + | REVIMPLIES_TOK { na = tptp::NA_REVIMPLIES; } + ; + +folQuantifier[CVC4::Kind & kind] + : BANG_TOK { kind = kind::FORALL; } + | MARK_TOK { kind = kind::EXISTS; } + ; + +/***********************************************/ +/* Anything well parenthesis */ + +anything + : LPAREN_TOK anything* RPAREN_TOK + | LBRACK_TOK anything* RBRACK_TOK + | COMMA_TOK + | DOT_TOK + | COLON_TOK + | OR_TOK + | NOT_TOK + | BANG_TOK + | MARK_TOK + | AND_TOK + | IFF_TOK + | IMPLIES_TOK + | REVIMPLIES_TOK + | REVIFF_TOK + | REVOR_TOK + | REVAND_TOK + | TIMES_TOK + | PLUS_TOK + | MINUS_TOK + | TRUE_TOK + | FALSE_TOK + | EQUAL_TOK + | DISEQUAL_TOK + | CNF_TOK + | FOF_TOK + | THF_TOK + | TFF_TOK + | INCLUDE_TOK + | DISTINCT_OBJECT + | UPPER_WORD + | LOWER_WORD + | LOWER_WORD_SINGLE_QUOTED + | SINGLE_QUOTED + | NUMBER + | DEFINED_SYMBOL + ; +/*********/ + +//punctuation +COMMA_TOK : ','; +DOT_TOK : '.'; +LPAREN_TOK : '('; +RPAREN_TOK : ')'; +LBRACK_TOK : '['; +RBRACK_TOK : ']'; +COLON_TOK : ':'; + +//operator +OR_TOK : '|'; +NOT_TOK : '~'; +BANG_TOK : '!'; +MARK_TOK : '?'; +AND_TOK : '&'; +IFF_TOK : '<=>'; +IMPLIES_TOK : '=>'; +REVIMPLIES_TOK : '<='; +REVIFF_TOK : '<~>'; +REVOR_TOK : '~|'; +REVAND_TOK : '~&'; +TIMES_TOK : '*'; +PLUS_TOK : '+'; +MINUS_TOK : '-'; + +//predicate +TRUE_TOK : '$true'; +FALSE_TOK : '$false'; +EQUAL_TOK : '='; +DISEQUAL_TOK : '!='; + +//KEYWORD +CNF_TOK : 'cnf'; +FOF_TOK : 'fof'; +THF_TOK : 'thf'; +TFF_TOK : 'tff'; +INCLUDE_TOK : 'include'; + +//Other defined symbols, must be defined after all the other +DEFINED_SYMBOL : '$' LOWER_WORD; + +/*********/ +/* Token */ + +/* + * Matches and skips whitespace in the input. + */ + +WHITESPACE + : (' ' | '\t' | '\f' | '\r' | '\n')+ { SKIP(); } + ; + + +/** + * Matches a double or single quoted string literal. Escaping is supported, and + * escape character '\' has to be escaped. + */ +DISTINCT_OBJECT : '"' (DO_CHAR)* '"' ; +fragment DO_CHAR : ' '..'!'| '#'..'[' | ']'..'~' | '\\"' | '\\\\'; + +//The order of this two rules is important +LOWER_WORD_SINGLE_QUOTED : '\'' LOWER_WORD '\'' ; +SINGLE_QUOTED : '\'' (SQ_CHAR)* '\'' ; + +fragment SQ_CHAR : ' '..'&' | '('..'[' | ']'..'~' | '\\\'' | '\\\\'; + +/* Define upper (variable) and lower (everything else) word */ +fragment NUMERIC : '0'..'9'; +fragment LOWER_ALPHA : 'a'..'z'; +fragment UPPER_ALPHA : 'A'..'Z'; +fragment ALPHA_NUMERIC : LOWER_ALPHA | UPPER_ALPHA | NUMERIC | '_'; +UPPER_WORD : UPPER_ALPHA ALPHA_NUMERIC*; +LOWER_WORD : LOWER_ALPHA ALPHA_NUMERIC*; + +/* filename */ +unquotedFileName[std::string & name] /* Beware fileName identifier is used by the backend ... */ + : (s=LOWER_WORD_SINGLE_QUOTED | s=SINGLE_QUOTED) + { name = AntlrInput::tokenText($s); + name = name.substr(1, name.size() - 2 ); + }; + +/* axiom name */ +nameN[std::string & name] + : atomicWord[name] + | NUMBER { name = AntlrInput::tokenText($NUMBER); } + ; + +/* atomic word everything (fof, cnf, thf, tff, include must not be keyword at this position ) */ +atomicWord[std::string & name] + : FOF_TOK { name = "fof"; } + | CNF_TOK { name = "cnf"; } + | THF_TOK { name = "thf"; } + | TFF_TOK { name = "tff"; } + | INCLUDE_TOK { name = "include"; } + | LOWER_WORD { name = AntlrInput::tokenText($LOWER_WORD); } + | LOWER_WORD_SINGLE_QUOTED //the lower word single quoted are considered without quote + { + /* strip off the quotes */ + name = AntlrInput::tokenTextSubstr($LOWER_WORD_SINGLE_QUOTED,1, + ($LOWER_WORD_SINGLE_QUOTED->stop - $LOWER_WORD_SINGLE_QUOTED->start) - 1); + } + | SINGLE_QUOTED {name = AntlrInput::tokenText($SINGLE_QUOTED); }; //for the others the quote remains + +/** I don't understand how is made the difference between rational and real in SyntaxBNF. So I put all in rational */ +/* Rational */ + +fragment DOT : '.'; +fragment EXPONENT : 'E'|'e'; +fragment SLASH : '/'; + +fragment DECIMAL : NUMERIC+; + +/* Currently we put all in the rationnal type */ +fragment SIGN[bool & pos] : PLUS_TOK | MINUS_TOK { pos = false; } ; + + +NUMBER +@declarations{ + bool pos = true; + bool posE = true; +} + : + ( SIGN[pos]? num=DECIMAL + { + Integer i (AntlrInput::tokenText($num)); + Rational r = ( pos ? i : -i ); + PARSER_STATE->d_tmp_expr = MK_CONST(r); + } + | SIGN[pos]? num=DECIMAL DOT den=DECIMAL (EXPONENT SIGN[posE]? e=DECIMAL)? + { + std::string snum = AntlrInput::tokenText($num); + std::string sden = AntlrInput::tokenText($den); + /* compute the numerator */ + Integer inum( snum + sden ); + // The sign + inum = pos ? inum : - inum; + // The exponent + size_t exp = ($e == NULL ? 0 : AntlrInput::tokenToUnsigned($e)); + // Decimal part + size_t dec = sden.size(); + /* multiply it by 10 raised to the exponent reduced by the + * number of decimal place in den (dec) */ + Rational r; + if( !posE ) r = Rational(inum, Integer(10).pow(exp + dec)); + else if( exp == dec ) r = Rational(inum); + else if( exp > dec ) r = Rational(inum * Integer(10).pow(exp - dec)); + else r = Rational(inum, Integer(10).pow(dec - exp)); + PARSER_STATE->d_tmp_expr = MK_CONST( r ); + } + | SIGN[pos]? num=DECIMAL SLASH den=DECIMAL + { + Integer inum(AntlrInput::tokenText($num)); + Integer iden(AntlrInput::tokenText($den)); + PARSER_STATE->d_tmp_expr = MK_CONST( + Rational(pos ? inum : -inum, iden)); + } + ) { + //Put a convertion around it + PARSER_STATE->d_tmp_expr = PARSER_STATE->convertRatToUnsorted( PARSER_STATE->d_tmp_expr ); + } + ; + + +/** + * Matches the comments and ignores them + */ +COMMENT + : '%' (~('\n' | '\r'))* { SKIP(); } //comment line + | '/*' ( options {greedy=false;} : . )* '*/' { SKIP(); } //comment block + ; + + + diff --git a/src/parser/tptp/tptp.cpp b/src/parser/tptp/tptp.cpp new file mode 100644 index 000000000..065c62824 --- /dev/null +++ b/src/parser/tptp/tptp.cpp @@ -0,0 +1,162 @@ +/********************* */ +/*! \file tptp.cpp + ** \verbatim + ** Original author: cconway + ** Major contributors: mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information.\endverbatim + ** + ** \brief Definitions of TPTP constants. + ** + ** Definitions of TPTP constants. + **/ + +#include "expr/type.h" +#include "parser/parser.h" +#include "parser/smt/smt.h" +#include "parser/tptp/tptp.h" +#include "parser/antlr_input.h" + +namespace CVC4 { +namespace parser { + +Tptp::Tptp(ExprManager* exprManager, Input* input, bool strictMode, bool parseOnly) : + Parser(exprManager,input,strictMode,parseOnly) { + addTheory(Tptp::THEORY_CORE); + + /* Try to find TPTP dir */ + // From tptp4x FileUtilities + //----Try the TPTP directory, and TPTP variations + char* home = getenv("TPTP"); + if (home == NULL) { + home = getenv("TPTP_HOME"); +// //----If no TPTP_HOME, try the tptp user (aaargh) +// if (home == NULL && (PasswdEntry = getpwnam("tptp")) != NULL) { +// home = PasswdEntry->pw_dir; +// } +//----Now look in the TPTP directory from there + if (home != NULL) { + d_tptpDir = home; + d_tptpDir.append("/TPTP/"); + } + } else { + d_tptpDir = home; + //add trailing "/" + if( d_tptpDir[d_tptpDir.size() - 1] != '/'){ + d_tptpDir.append("/"); + } + } +} + +void Tptp::addTheory(Theory theory) { + ExprManager * em = getExprManager(); + switch(theory) { + case THEORY_CORE: + //TPTP (CNF and FOF) is unsorted so we define this common type + { + std::string d_unsorted_name = "$$unsorted"; + d_unsorted = em->mkSort(d_unsorted_name); + preemptCommand( new DeclareTypeCommand(d_unsorted_name, 0, d_unsorted) ); + } + // propositionnal + defineType("Bool", em->booleanType()); + defineVar("$true", em->mkConst(true)); + defineVar("$false", em->mkConst(false)); + addOperator(kind::AND); + addOperator(kind::EQUAL); + addOperator(kind::IFF); + addOperator(kind::IMPLIES); + //addOperator(kind::ITE); //only for tff thf + addOperator(kind::NOT); + addOperator(kind::OR); + addOperator(kind::XOR); + addOperator(kind::APPLY_UF); + //Add quantifiers? + break; + + default: + Unhandled(theory); + } +} + + +/* The include are managed in the lexer but called in the parser */ +// Inspired by http://www.antlr.org/api/C/interop.html + +bool newInputStream(std::string fileName, pANTLR3_LEXER lexer){ + Debug("parser") << "Including " << fileName << std::endl; + // Create a new input stream and take advantage of built in stream stacking + // in C target runtime. + // + pANTLR3_INPUT_STREAM in; +#ifdef CVC4_ANTLR3_OLD_INPUT_STREAM + in = antlr3AsciiFileStreamNew((pANTLR3_UINT8) fileName.c_str()); +#else /* CVC4_ANTLR3_OLD_INPUT_STREAM */ + in = antlr3FileStreamNew((pANTLR3_UINT8) fileName.c_str(), ANTLR3_ENC_8BIT); +#endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */ + if( in == NULL ) { + Debug("parser") << "Can't open " << fileName << std::endl; + return false; + } + // Samething than the predefined PUSHSTREAM(in); + lexer->pushCharStream(lexer,in); + // restart it + //lexer->rec->state->tokenStartCharIndex = -10; + //lexer->emit(lexer); + + // Note that the input stream is not closed when it EOFs, I don't bother + // to do it here, but it is up to you to track streams created like this + // and destroy them when the whole parse session is complete. Remember that you + // don't want to do this until all tokens have been manipulated all the way through + // your tree parsers etc as the token does not store the text it just refers + // back to the input stream and trying to get the text for it will abort if you + // close the input stream too early. + // + + //TODO what said before + return true; +} + + +void Tptp::includeFile(std::string fileName){ + // Get the lexer + AntlrInput * ai = static_cast(getInput()); + pANTLR3_LEXER lexer = ai->getAntlr3Lexer(); + // get the name of the current stream "Does it work inside an include?" + const std::string inputName = ai->getInputStreamName(); + + // Test in the directory of the actual parsed file + std::string currentDirFileName; + if( inputName != ""){ + // TODO: Use dirname ot Boost::filesystem? + size_t pos = inputName.rfind('/'); + if( pos != std::string::npos){ + currentDirFileName = std::string(inputName, 0, pos + 1); + } + currentDirFileName.append(fileName); + if( newInputStream(currentDirFileName,lexer) ){ + return; + } + } else { + currentDirFileName = ""; + } + + if( d_tptpDir.empty() ){ + parseError("Couldn't open included file: " + fileName + + " at " + currentDirFileName + " and the TPTP directory is not specified (environnement variable TPTP)"); + }; + + std::string tptpDirFileName = d_tptpDir + fileName; + if( !newInputStream(tptpDirFileName,lexer) ){ + parseError("Couldn't open included file: " + fileName + + " at " + currentDirFileName + " or " + tptpDirFileName); + } +} + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ diff --git a/src/parser/tptp/tptp.h b/src/parser/tptp/tptp.h new file mode 100644 index 000000000..e6231920d --- /dev/null +++ b/src/parser/tptp/tptp.h @@ -0,0 +1,260 @@ +/********************* */ +/*! \file tptp.h + ** \verbatim + ** Original author: cconway + ** Major contributors: mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information.\endverbatim + ** + ** \brief Definitions of TPTP constants. + ** + ** Definitions of TPTP constants. + **/ + +#include "cvc4parser_private.h" + +#ifndef __CVC4__PARSER__TPTP_H +#define __CVC4__PARSER__TPTP_H + +#include "parser/parser.h" +#include "parser/smt/smt.h" +#include "expr/command.h" +#include + +namespace CVC4 { + +class SExpr; + +namespace parser { + +class Tptp : public Parser { + friend class ParserBuilder; + + // In CNF variable are implicitly binded + // d_freevar collect them + std::vector< Expr > d_freeVar; + Expr d_rtu_op; + Expr d_stu_op; + Expr d_utr_op; + Expr d_uts_op; + // The set of expression that already have a bridge + std::hash_set d_r_converted; + std::hash_set d_s_converted; + + //TPTP directory where to find includes + // empty if none could be determined + std::string d_tptpDir; + +public: + bool cnf; //in a cnf formula + + void addFreeVar(Expr var){Assert(cnf); d_freeVar.push_back(var); }; + std::vector< Expr > getFreeVar(){ + Assert(cnf); + std::vector< Expr > r; + r.swap(d_freeVar); + return r; + } + + inline Expr convertRatToUnsorted(Expr expr){ + ExprManager * em = getExprManager(); + + // Create the conversion function If they doesn't exists + if(d_rtu_op.isNull()){ + Type t; + //Conversion from rational to unsorted + t = em->mkFunctionType(em->realType(), d_unsorted); + d_rtu_op = em->mkVar("$$rtu",t); + preemptCommand(new DeclareFunctionCommand("$$rtu", t)); + //Conversion from unsorted to rational + t = em->mkFunctionType(d_unsorted, em->realType()); + d_utr_op = em->mkVar("$$utr",t); + preemptCommand(new DeclareFunctionCommand("$$utur", t)); + } + // Add the inverse in order to show that over the elements that + // appear in the problem there is a bijection between unsorted and + // rational + Expr ret = em->mkExpr(kind::APPLY_UF,d_rtu_op,expr); + if ( d_r_converted.find(expr) == d_r_converted.end() ){ + d_r_converted.insert(expr); + Expr eq = em->mkExpr(kind::EQUAL,expr, + em->mkExpr(kind::APPLY_UF,d_utr_op,ret)); + preemptCommand(new AssertCommand(eq)); + }; + return ret; + } + + inline Expr convertStrToUnsorted(Expr expr){ + ExprManager * em = getExprManager(); + + // Create the conversion function If they doesn't exists + if(d_stu_op.isNull()){ + Type t; + //Conversion from string to unsorted + t = em->mkFunctionType(em->stringType(), d_unsorted); + d_stu_op = em->mkVar("$$stu",t); + preemptCommand(new DeclareFunctionCommand("$$stu", t)); + //Conversion from unsorted to string + t = em->mkFunctionType(d_unsorted, em->stringType()); + d_uts_op = em->mkVar("$$uts",t); + preemptCommand(new DeclareFunctionCommand("$$uts", t)); + } + // Add the inverse in order to show that over the elements that + // appear in the problem there is a bijection between unsorted and + // rational + Expr ret = em->mkExpr(kind::APPLY_UF,d_stu_op,expr); + if ( d_s_converted.find(expr) == d_s_converted.end() ){ + d_s_converted.insert(expr); + Expr eq = em->mkExpr(kind::EQUAL,expr, + em->mkExpr(kind::APPLY_UF,d_uts_op,ret)); + preemptCommand(new AssertCommand(eq)); + }; + return ret; + } + +public: + + //TPTP (CNF and FOF) is unsorted so we define this common type + Type d_unsorted; + + enum Theory { + THEORY_CORE, + }; + + enum FormulaRole { + FR_AXIOM, + FR_HYPOTHESIS, + FR_DEFINITION, + FR_ASSUMPTION, + FR_LEMMA, + FR_THEOREM, + FR_CONJECTURE, + FR_NEGATED_CONJECTURE, + FR_UNKNOWN, + FR_PLAIN, + FR_FI_DOMAIN, + FR_FI_FUNCTORS, + FR_FI_PREDICATES, + FR_TYPE, + }; + + +protected: + Tptp(ExprManager* exprManager, Input* input, bool strictMode = false, bool parseOnly = false); + +public: + /** + * Add theory symbols to the parser state. + * + * @param theory the theory to open (e.g., Core, Ints) + */ + void addTheory(Theory theory); + + inline void makeApplication(Expr & expr, std::string & name, + std::vector & args, bool term); + + inline Command* makeCommand(FormulaRole fr, Expr & expr); + + /** Ugly hack because I don't know how to return an expression from a + token */ + Expr d_tmp_expr; + + /** Push a new stream in the lexer. When EOF is reached the previous stream + is reused */ + void includeFile(std::string fileName); + +private: + + void addArithmeticOperators(); +};/* class Tptp */ + +inline void Tptp::makeApplication(Expr & expr, std::string & name, + std::vector & args, bool term){ + // We distinguish the symbols according to their arities + std::stringstream ss; + ss << name << "_" << args.size(); + name = ss.str(); + if(args.empty()){ // Its a constant + if(isDeclared(name)){ //already appeared + expr = getVariable(name); + } else { + Type t = term ? d_unsorted : getExprManager()->booleanType(); + expr = mkVar(name,t,true); //levelZero + preemptCommand(new DeclareFunctionCommand(name, t)); + } + } else { // Its an application + if(isDeclared(name)){ //already appeared + expr = getVariable(name); + } else { + std::vector sorts(args.size(), d_unsorted); + Type t = term ? d_unsorted : getExprManager()->booleanType(); + t = getExprManager()->mkFunctionType(sorts, t); + expr = mkVar(name,t,true); //levelZero + preemptCommand(new DeclareFunctionCommand(name, t)); + } + expr = getExprManager()->mkExpr(kind::APPLY_UF, expr, args); + } +}; + +inline Command* Tptp::makeCommand(FormulaRole fr, Expr & expr){ + switch(fr){ + case FR_AXIOM: + case FR_HYPOTHESIS: + case FR_DEFINITION: + case FR_ASSUMPTION: + case FR_LEMMA: + case FR_THEOREM: + case FR_NEGATED_CONJECTURE: + case FR_PLAIN: + // it's a usual assert + return new AssertCommand(expr); + break; + case FR_CONJECTURE: + // something to prove + return new AssertCommand(getExprManager()->mkExpr(kind::NOT,expr)); + break; + case FR_UNKNOWN: + case FR_FI_DOMAIN: + case FR_FI_FUNCTORS: + case FR_FI_PREDICATES: + case FR_TYPE: + return new EmptyCommand("Untreated role"); + break; + default: + Unreachable("fr",fr); + }; +}; + +namespace tptp { +/** + * Just exists to provide the uintptr_t constructor that ANTLR + * requires. + */ +struct myExpr : public CVC4::Expr { + myExpr() : CVC4::Expr() {} + myExpr(void*) : CVC4::Expr() {} + myExpr(const Expr& e) : CVC4::Expr(e) {} + myExpr(const myExpr& e) : CVC4::Expr(e) {} +};/* struct myExpr */ + +enum NonAssoc { + NA_IFF, + NA_IMPLIES, + NA_REVIMPLIES, + NA_REVIFF, + NA_REVOR, + NA_REVAND, +}; + +}/* CVC4::parser::tptp namespace */ + + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ + +#endif /* __CVC4__PARSER__TPTP_INPUT_H */ diff --git a/src/parser/tptp/tptp_input.cpp b/src/parser/tptp/tptp_input.cpp new file mode 100644 index 000000000..689f13a8b --- /dev/null +++ b/src/parser/tptp/tptp_input.cpp @@ -0,0 +1,74 @@ +/********************* */ +/*! \file tptp_input.cpp + ** \verbatim + ** Original author: cconway + ** Major contributors: mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information.\endverbatim + ** + ** \brief [[ Add file-specific comments here ]]. + ** + ** [[ Add file-specific comments here ]] + **/ + +#include + +#include "parser/tptp/tptp_input.h" +#include "expr/expr_manager.h" +#include "parser/input.h" +#include "parser/parser.h" +#include "parser/parser_exception.h" +#include "parser/tptp/tptp.h" +#include "parser/tptp/generated/TptpLexer.h" +#include "parser/tptp/generated/TptpParser.h" + +namespace CVC4 { +namespace parser { + +/* Use lookahead=1 */ +TptpInput::TptpInput(AntlrInputStream& inputStream) : + AntlrInput(inputStream, 1) { + pANTLR3_INPUT_STREAM input = inputStream.getAntlr3InputStream(); + AlwaysAssert( input != NULL ); + + d_pTptpLexer = TptpLexerNew(input); + if( d_pTptpLexer == NULL ) { + throw ParserException("Failed to create TPTP lexer."); + } + + setAntlr3Lexer( d_pTptpLexer->pLexer ); + + pANTLR3_COMMON_TOKEN_STREAM tokenStream = getTokenStream(); + AlwaysAssert( tokenStream != NULL ); + + d_pTptpParser = TptpParserNew(tokenStream); + if( d_pTptpParser == NULL ) { + throw ParserException("Failed to create TPTP parser."); + } + + setAntlr3Parser(d_pTptpParser->pParser); +} + + +TptpInput::~TptpInput() { + d_pTptpLexer->free(d_pTptpLexer); + d_pTptpParser->free(d_pTptpParser); +} + +Command* TptpInput::parseCommand() + throw (ParserException, TypeCheckingException, AssertionException) { + return d_pTptpParser->parseCommand(d_pTptpParser); +} + +Expr TptpInput::parseExpr() + throw (ParserException, TypeCheckingException, AssertionException) { + return d_pTptpParser->parseExpr(d_pTptpParser); +} + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ diff --git a/src/parser/tptp/tptp_input.h b/src/parser/tptp/tptp_input.h new file mode 100644 index 000000000..7977117d0 --- /dev/null +++ b/src/parser/tptp/tptp_input.h @@ -0,0 +1,97 @@ +/********************* */ +/*! \file tptp_input.h + ** \verbatim + ** Original author: cconway + ** Major contributors: mdeters + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information.\endverbatim + ** + ** \brief [[ Add file-specific comments here ]]. + ** + ** [[ Add file-specific comments here ]] + **/ + +#include "cvc4parser_private.h" + +#ifndef __CVC4__PARSER__TPTP_INPUT_H +#define __CVC4__PARSER__TPTP_INPUT_H + +#include "parser/antlr_input.h" +#include "parser/tptp/generated/TptpLexer.h" +#include "parser/tptp/generated/TptpParser.h" + +// extern void TptpParserSetAntlrParser(CVC4::parser::AntlrParser* newAntlrParser); + +namespace CVC4 { + +class Command; +class Expr; +class ExprManager; + +namespace parser { + +class Tptp; + +class TptpInput : public AntlrInput { + typedef AntlrInput super; + + /** The ANTLR3 TPTP lexer for the input. */ + pTptpLexer d_pTptpLexer; + + /** The ANTLR3 CVC parser for the input. */ + pTptpParser d_pTptpParser; + + /** + * Initialize the class. Called from the constructors once the input + * stream is initialized. + */ + void init(); + +public: + + /** + * Create an input. + * + * @param inputStream the input stream to use + */ + TptpInput(AntlrInputStream& inputStream); + + /** Destructor. Frees the lexer and the parser. */ + virtual ~TptpInput(); + + /** Get the language that this Input is reading. */ + InputLanguage getLanguage() const throw() { + return language::input::LANG_SMTLIB_V2; + } + +protected: + + /** + * Parse a command from the input. Returns NULL if + * there is no command there to parse. + * + * @throws ParserException if an error is encountered during parsing. + */ + Command* parseCommand() + throw(ParserException, TypeCheckingException, AssertionException); + + /** + * Parse an expression from the input. Returns a null + * Expr if there is no expression there to parse. + * + * @throws ParserException if an error is encountered during parsing. + */ + Expr parseExpr() + throw(ParserException, TypeCheckingException, AssertionException); + +};/* class TptpInput */ + +}/* CVC4::parser namespace */ +}/* CVC4 namespace */ + +#endif /* __CVC4__PARSER__TPTP_INPUT_H */ diff --git a/src/printer/printer.cpp b/src/printer/printer.cpp index cde063584..5229400b5 100644 --- a/src/printer/printer.cpp +++ b/src/printer/printer.cpp @@ -39,6 +39,9 @@ Printer* Printer::makePrinter(OutputLanguage lang) throw() { case LANG_SMTLIB_V2: return new printer::smt2::Smt2Printer(); + case LANG_TPTP: //TODO the printer + return new printer::smt2::Smt2Printer(); + case LANG_CVC4: return new printer::cvc::CvcPrinter(); diff --git a/src/util/language.cpp b/src/util/language.cpp index da54a4783..1792797cf 100644 --- a/src/util/language.cpp +++ b/src/util/language.cpp @@ -25,6 +25,7 @@ InputLanguage toInputLanguage(OutputLanguage language) { switch(language) { case output::LANG_SMTLIB: case output::LANG_SMTLIB_V2: + case output::LANG_TPTP: case output::LANG_CVC4: // these entries directly correspond (by design) return InputLanguage(int(language)); @@ -43,6 +44,7 @@ OutputLanguage toOutputLanguage(InputLanguage language) { case input::LANG_SMTLIB: case input::LANG_SMTLIB_V2: case input::LANG_CVC4: + case input::LANG_TPTP: // these entries directly correspond (by design) return OutputLanguage(int(language)); diff --git a/src/util/language.h b/src/util/language.h index d3405e35b..b84d45d89 100644 --- a/src/util/language.h +++ b/src/util/language.h @@ -48,6 +48,8 @@ enum CVC4_PUBLIC Language { LANG_SMTLIB = 0, /** The SMTLIB v2 input language */ LANG_SMTLIB_V2, + /** The TPTP input language */ + LANG_TPTP, /** The CVC4 input language */ LANG_CVC4, @@ -70,6 +72,9 @@ inline std::ostream& operator<<(std::ostream& out, Language lang) { case LANG_SMTLIB_V2: out << "LANG_SMTLIB_V2"; break; + case LANG_TPTP: + out << "LANG_TPTP"; + break; case LANG_CVC4: out << "LANG_CVC4"; break; @@ -100,6 +105,8 @@ enum CVC4_PUBLIC Language { LANG_SMTLIB = input::LANG_SMTLIB, /** The SMTLIB v2 output language */ LANG_SMTLIB_V2 = input::LANG_SMTLIB_V2, + /** The TPTP output language */ + LANG_TPTP = input::LANG_TPTP, /** The CVC4 output language */ LANG_CVC4 = input::LANG_CVC4, @@ -122,6 +129,9 @@ inline std::ostream& operator<<(std::ostream& out, Language lang) { case LANG_SMTLIB_V2: out << "LANG_SMTLIB_V2"; break; + case LANG_TPTP: + out << "LANG_TPTP"; + break; case LANG_CVC4: out << "LANG_CVC4"; break; diff --git a/src/util/language.i b/src/util/language.i index 28e0eb5ac..ca8e5012a 100644 --- a/src/util/language.i +++ b/src/util/language.i @@ -21,12 +21,14 @@ namespace CVC4 { %rename(INPUT_LANG_AUTO) CVC4::language::input::LANG_AUTO; %rename(INPUT_LANG_SMTLIB) CVC4::language::input::LANG_SMTLIB; %rename(INPUT_LANG_SMTLIB_V2) CVC4::language::input::LANG_SMTLIB_V2; +%rename(INPUT_LANG_TPTP) CVC4::language::input::LANG_TPTP; %rename(INPUT_LANG_CVC4) CVC4::language::input::LANG_CVC4; %rename(INPUT_LANG_MAX) CVC4::language::input::LANG_MAX; %rename(OUTPUT_LANG_AUTO) CVC4::language::output::LANG_AUTO; %rename(OUTPUT_LANG_SMTLIB) CVC4::language::output::LANG_SMTLIB; %rename(OUTPUT_LANG_SMTLIB_V2) CVC4::language::output::LANG_SMTLIB_V2; +%rename(OUTPUT_LANG_TPTP) CVC4::language::output::LANG_TPTP; %rename(OUTPUT_LANG_CVC4) CVC4::language::output::LANG_CVC4; %rename(OUTPUT_LANG_AST) CVC4::language::output::LANG_AST; %rename(OUTPUT_LANG_MAX) CVC4::language::output::LANG_MAX; diff --git a/src/util/options.cpp b/src/util/options.cpp index b9291f59c..c02482e7e 100644 --- a/src/util/options.cpp +++ b/src/util/options.cpp @@ -312,12 +312,14 @@ Languages currently supported as arguments to the -L / --lang option:\n\ pl | cvc4 CVC4 presentation language\n\ smt | smtlib SMT-LIB format 1.2\n\ smt2 | smtlib2 SMT-LIB format 2.0\n\ + tptp TPTP format (cnf and fof)\n\ \n\ Languages currently supported as arguments to the --output-lang option:\n\ auto match the output language to the input language\n\ pl | cvc4 CVC4 presentation language\n\ smt | smtlib SMT-LIB format 1.2\n\ smt2 | smtlib2 SMT-LIB format 2.0\n\ + tptp TPTP format\n\ ast internal format (simple syntax-tree language)\n\ "; @@ -1633,6 +1635,9 @@ void Options::setOutputLanguage(const char* str) throw(OptionException) { } else if(!strcmp(str, "smtlib2") || !strcmp(str, "smt2")) { outputLanguage = language::output::LANG_SMTLIB_V2; return; + } else if(!strcmp(str, "tptp")) { + outputLanguage = language::output::LANG_TPTP; + return; } else if(!strcmp(str, "ast")) { outputLanguage = language::output::LANG_AST; return; @@ -1659,6 +1664,9 @@ void Options::setInputLanguage(const char* str) throw(OptionException) { } else if(!strcmp(str, "smtlib2") || !strcmp(str, "smt2")) { inputLanguage = language::input::LANG_SMTLIB_V2; return; + } else if(!strcmp(str, "tptp")) { + inputLanguage = language::input::LANG_TPTP; + return; } else if(!strcmp(str, "auto")) { inputLanguage = language::input::LANG_AUTO; return; diff --git a/test/regress/regress0/Makefile.am b/test/regress/regress0/Makefile.am index a1d960079..5eda230c3 100644 --- a/test/regress/regress0/Makefile.am +++ b/test/regress/regress0/Makefile.am @@ -85,6 +85,19 @@ CVC_TESTS = \ wiki.21.cvc \ simplification_bug3.cvc +# Regression tests for TPTP inputs +TPTP_TESTS = \ + tptp_parser.p \ + tptp_parser2.p \ + tptp_parser3.p \ + tptp_parser4.p \ + tptp_parser5.p \ + tptp_parser6.p \ + tptp_parser7.p \ + tptp_parser8.p \ + tptp_parser9.p \ + tptp_parser10.p + # Regression tests derived from bug reports BUG_TESTS = \ bug32.cvc \ @@ -104,7 +117,7 @@ BUG_TESTS = \ bug310.cvc \ bug339.smt2 -TESTS = $(SMT_TESTS) $(SMT2_TESTS) $(CVC_TESTS) $(BUG_TESTS) +TESTS = $(SMT_TESTS) $(SMT2_TESTS) $(CVC_TESTS) $(TPTP_TESTS) $(BUG_TESTS) EXTRA_DIST = $(TESTS) \ simplification_bug4.smt2.expect diff --git a/test/regress/regress0/tptp_parser.p b/test/regress/regress0/tptp_parser.p new file mode 100644 index 000000000..0be0adbbf --- /dev/null +++ b/test/regress/regress0/tptp_parser.p @@ -0,0 +1,17 @@ +% EXPECT: unsat +% EXIT: 20 + +%-------------------------------------------------------------------------- + +/* +cnf(query_1,axiom, + $false | $false /* | $false). +*/ + +cnf(query_1,axiom, + $false | $false | $false). + +cnf(query_1,negated_conjecture, ~ + $false | $false | $false). + +%-------------------------------------------------------------------------- diff --git a/test/regress/regress0/tptp_parser10.p b/test/regress/regress0/tptp_parser10.p new file mode 100644 index 000000000..90db619e7 --- /dev/null +++ b/test/regress/regress0/tptp_parser10.p @@ -0,0 +1,10 @@ +% EXPECT: unsat +% EXIT: 20 + +%-------------------------------------------------------------------------- + +fof(query_1,axiom, ![A,B]: (A != B => e(A) != e(B)) ). + +fof(query_1,conjecture, e(1.6) != e(1) ). + +%-------------------------------------------------------------------------- diff --git a/test/regress/regress0/tptp_parser2.p b/test/regress/regress0/tptp_parser2.p new file mode 100644 index 000000000..83a5f7b83 --- /dev/null +++ b/test/regress/regress0/tptp_parser2.p @@ -0,0 +1,14 @@ +% EXPECT: unsat +% EXIT: 20 + +%-------------------------------------------------------------------------- + +cnf(query_1,axiom, a | b ). + +cnf(query_1,axiom, b | c ). + +cnf(query_1,axiom, ~a | ~ 'c' ). + +cnf(query_1,negated_conjecture, ~ b ). + +%-------------------------------------------------------------------------- diff --git a/test/regress/regress0/tptp_parser3.p b/test/regress/regress0/tptp_parser3.p new file mode 100644 index 000000000..3677e6ffe --- /dev/null +++ b/test/regress/regress0/tptp_parser3.p @@ -0,0 +1,14 @@ +% EXPECT: unsat +% EXIT: 20 + +%-------------------------------------------------------------------------- + +cnf(query_1,axiom, p( a, d ) | b ). + +cnf(query_1,axiom, b | c ). + +cnf(query_1,axiom, ~p(a, d) | ~ 'c' ). + +cnf(query_1,negated_conjecture, ~ b ). + +%-------------------------------------------------------------------------- diff --git a/test/regress/regress0/tptp_parser4.p b/test/regress/regress0/tptp_parser4.p new file mode 100644 index 000000000..6c5bd29da --- /dev/null +++ b/test/regress/regress0/tptp_parser4.p @@ -0,0 +1,14 @@ +% EXPECT: unsat +% EXIT: 20 + +%-------------------------------------------------------------------------- + +cnf(query_1,axiom, p( A, d ) | b ). + +cnf(query_1,axiom, b | c ). + +cnf(query_1,axiom, ~p(A, d) | ~ 'c' ). + +cnf(query_1,negated_conjecture, ~ b ). + +%-------------------------------------------------------------------------- diff --git a/test/regress/regress0/tptp_parser5.p b/test/regress/regress0/tptp_parser5.p new file mode 100644 index 000000000..faacfdccc --- /dev/null +++ b/test/regress/regress0/tptp_parser5.p @@ -0,0 +1,16 @@ +% EXPECT: unknown (INCOMPLETE) +% EXIT: 0 + +%-------------------------------------------------------------------------- + +cnf(query_1,axiom, p( A, d ) | b ). + +cnf(query_1,axiom, b | c ). + +cnf(query_1,axiom, ~p(A, e) | ~ 'c' ). + +cnf(query_1,axiom, e != d ). + +cnf(query_1,negated_conjecture, ~ b ). + +%-------------------------------------------------------------------------- diff --git a/test/regress/regress0/tptp_parser6.p b/test/regress/regress0/tptp_parser6.p new file mode 100644 index 000000000..0684a6a3d --- /dev/null +++ b/test/regress/regress0/tptp_parser6.p @@ -0,0 +1,16 @@ +% EXPECT: unknown (INCOMPLETE) +% EXIT: 0 + +%-------------------------------------------------------------------------- + +cnf(query_1,axiom, p( A, d ) | cnf ). + +cnf(query_1,axiom, cnf | c ). + +cnf(query_1,axiom, ~p(A, axiom) | ~ 'c' ). + +cnf(query_1,axiom, axiom != d ). + +cnf(query_1,negated_conjecture, ~ cnf ). + +%-------------------------------------------------------------------------- diff --git a/test/regress/regress0/tptp_parser7.p b/test/regress/regress0/tptp_parser7.p new file mode 100644 index 000000000..ec90157e7 --- /dev/null +++ b/test/regress/regress0/tptp_parser7.p @@ -0,0 +1,16 @@ +% EXPECT: unknown (INCOMPLETE) +% EXIT: 0 + +%-------------------------------------------------------------------------- + +cnf(query_1,axiom, p( A, d ) | b ). + +cnf(query_1,axiom, b | c ). + +cnf(query_1,axiom, ~p(A, axiom) ). + +cnf(query_1,axiom, axiom != d ). + +cnf(query_1,negated_conjecture, ~ b ). + +%-------------------------------------------------------------------------- diff --git a/test/regress/regress0/tptp_parser8.p b/test/regress/regress0/tptp_parser8.p new file mode 100644 index 000000000..3bfd8efb6 --- /dev/null +++ b/test/regress/regress0/tptp_parser8.p @@ -0,0 +1,11 @@ +% EXPECT: unknown (INCOMPLETE) +% EXIT: 0 + +%-------------------------------------------------------------------------- +include('tptp_parser7.p'). + +cnf(query_1,axiom, include('A') | b ). + +cnf(query_1,negated_conjecture, ~ b ). + +%-------------------------------------------------------------------------- diff --git a/test/regress/regress0/tptp_parser9.p b/test/regress/regress0/tptp_parser9.p new file mode 100644 index 000000000..dab8065b5 --- /dev/null +++ b/test/regress/regress0/tptp_parser9.p @@ -0,0 +1,14 @@ +% EXPECT: unknown (INCOMPLETE) +% EXIT: 0 + +%-------------------------------------------------------------------------- + +fof(query_1,axiom, include(1) ). + +fof(query_1,axiom, ![E]: e(E,1.6) ). + +fof(query_1,axiom, ![A,E]: ~e(A,3.0E3) ). + +fof(query_1,conjecture, ![E]: e(E,2.6) ). + +%-------------------------------------------------------------------------- diff --git a/test/regress/run_regression b/test/regress/run_regression index 587d3cdda..5ed43d1a0 100755 --- a/test/regress/run_regression +++ b/test/regress/run_regression @@ -13,7 +13,7 @@ prog=`basename "$0"` if [ $# -lt 2 -o $# -gt 3 ]; then - echo "usage: $prog [ --proof | --dump ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 ]" >&2 + echo "usage: $prog [ --proof | --dump ] cvc4-binary [ benchmark.cvc | benchmark.smt | benchmark.smt2 | benchmark.p ]" >&2 exit 1 fi @@ -149,8 +149,24 @@ elif expr "$benchmark" : '.*\.cvc$' &>/dev/null; then error "cannot determine expected exit status of \`$benchmark': please use \`% EXIT:' gesture" fi command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'` +elif expr "$benchmark" : '.*\.p$' &>/dev/null; then + proof_command=PROOFS-NOT-SUPPORTED-IN-SMTLIB-V1; + lang=tptp + expected_proof=`grep -q '^% PROOF' "$benchmark" && echo yes` + expected_output=$(grep '^% EXPECT: ' "$benchmark") + expected_error=`grep '^% EXPECT-ERROR: ' "$benchmark" | sed 's,^% EXPECT-ERROR: ,,'` + if [ -z "$expected_output" -a -z "$expected_error" ]; then + error "cannot determine expected output of \`$benchmark': " \ + "please use \`% EXPECT:' and/or \`% EXPECT-ERROR:' gestures" + fi + expected_output=$(echo "$expected_output" | perl -pe 's,^% EXPECT: ,,;s,\r,,') + expected_exit_status=`grep -m 1 '^% EXIT: ' "$benchmark" | perl -pe 's,^% EXIT: ,,;s,\r,,'` + if [ -z "$expected_exit_status" ]; then + error "cannot determine expected exit status of \`$benchmark': please use \`% EXIT:' gesture" + fi + command_line=`grep '^% COMMAND-LINE: ' "$benchmark" | sed 's,^% COMMAND-LINE: ,,'` else - error "benchmark \`$benchmark' must be *.cvc or *.smt or *.smt2" + error "benchmark \`$benchmark' must be *.cvc or *.smt or *.smt2 or *.p" fi command_line="${command_line:+$command_line }--lang=$lang" -- 2.30.2