From 46e4487c37628217ec64a2b325b287acfb0ae8c5 Mon Sep 17 00:00:00 2001 From: "Christopher L. Conway" Date: Fri, 7 May 2010 19:44:05 +0000 Subject: [PATCH] Tightening lexer rules for numerals in SMT v2 --- src/parser/parser.cpp | 3 +- src/parser/smt2/Smt2.g | 63 ++++++++++++++++++++++++--------- test/unit/parser/parser_black.h | 36 ++++++++++++++----- 3 files changed, 76 insertions(+), 26 deletions(-) diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp index a393bc85f..b4bafc953 100644 --- a/src/parser/parser.cpp +++ b/src/parser/parser.cpp @@ -39,7 +39,8 @@ Parser::Parser(ExprManager* exprManager, Input* input) : d_exprManager(exprManager), d_input(input), d_done(false), - d_checksEnabled(true) { + d_checksEnabled(true), + d_strictMode(false) { d_input->setParser(this); } diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g index bcab39183..268903bc7 100644 --- a/src/parser/smt2/Smt2.g +++ b/src/parser/smt2/Smt2.g @@ -33,6 +33,7 @@ options { } @lexer::includes { + /** This suppresses warnings about the redefinition of token symbols between * different parsers. The redefinitions should be harmless as long as no * client: (a) #include's the lexer headers for two grammars AND (b) uses the @@ -48,6 +49,19 @@ options { #define ANTLR3_INLINE_INPUT_ASCII } +@lexer::postinclude { +#include + +#include "parser/parser.h" +#include "parser/antlr_input.h" + +using namespace CVC4; +using namespace CVC4::parser; + +#undef PARSER_STATE +#define PARSER_STATE ((Parser*)LEXER->super) +} + @parser::includes { #include "expr/command.h" #include "parser/parser.h" @@ -147,7 +161,7 @@ command returns [CVC4::Command* cmd] setInfo(PARSER_STATE,name,sexpr); cmd = new SetInfoCommand(name,sexpr); } | /* sort declaration */ - DECLARE_SORT_TOK symbol[name,CHECK_UNDECLARED,SYM_SORT] n=NUMERAL + DECLARE_SORT_TOK symbol[name,CHECK_UNDECLARED,SYM_SORT] n=INTEGER_LITERAL { Debug("parser") << "declare sort: '" << name << "' arity=" << n << std::endl; if( AntlrInput::tokenToInteger(n) > 0 ) { Unimplemented("Parameterized user sorts."); @@ -179,10 +193,10 @@ symbolicExpr[CVC4::SExpr& sexpr] @declarations { std::vector children; } - : NUMERAL - { sexpr = SExpr(AntlrInput::tokenText($NUMERAL)); } - | RATIONAL - { sexpr = SExpr(AntlrInput::tokenText($RATIONAL)); } + : INTEGER_LITERAL + { sexpr = SExpr(AntlrInput::tokenText($INTEGER_LITERAL)); } + | RATIONAL_LITERAL + { sexpr = SExpr(AntlrInput::tokenText($RATIONAL_LITERAL)); } | STRING_LITERAL { sexpr = SExpr(AntlrInput::tokenText($STRING_LITERAL)); } | SYMBOL @@ -256,11 +270,11 @@ term[CVC4::Expr& expr] /* constants */ | TRUE_TOK { expr = MK_CONST(true); } | FALSE_TOK { expr = MK_CONST(false); } - | NUMERAL - { expr = MK_CONST( AntlrInput::tokenToInteger($NUMERAL) ); } - | RATIONAL + | INTEGER_LITERAL + { expr = MK_CONST( AntlrInput::tokenToInteger($INTEGER_LITERAL) ); } + | RATIONAL_LITERAL { // FIXME: This doesn't work because an SMT rational is not a valid GMP rational string - expr = MK_CONST( AntlrInput::tokenToRational($RATIONAL) ); } + expr = MK_CONST( AntlrInput::tokenToRational($RATIONAL_LITERAL) ); } | HEX_LITERAL { Assert( AntlrInput::tokenText($HEX_LITERAL).find("#x") == 0 ); std::string hexString = AntlrInput::tokenTextSubstr($HEX_LITERAL,2); @@ -460,20 +474,37 @@ WHITESPACE ; /** - * Matches an integer constant from the input (non-empty sequence of digits). - * This is a bit looser than what the standard allows, because it accepts - * leading zeroes. + * Matches an integer constant from the input (non-empty sequence of digits, with + * no leading zeroes). */ -NUMERAL - : DIGIT+ +INTEGER_LITERAL + : NUMERAL ; +/** Match an integer constant. In non-strict mode, this is any sequence of + * digits. In strict mode, non-zero integers can't have leading zeroes. */ +fragment NUMERAL +@init { + char *start = (char*) GETCHARINDEX(); +} + : DIGIT+ + { Debug("parser-extra") << "NUMERAL: " + << (uintptr_t)start << ".." << GETCHARINDEX() + << " strict? " << (bool)(PARSER_STATE->strictModeEnabled()) + << " ^0? " << (bool)(*start == '0') + << " len>1? " << (bool)(start < (char*)(GETCHARINDEX() - 1)) + << endl; } + { !PARSER_STATE->strictModeEnabled() || + *start != '0' || + start == (char*)(GETCHARINDEX() - 1) }? + ; + /** * Matches a rational constant from the input. This is a bit looser * than what the standard allows, because it accepts leading zeroes. */ -RATIONAL - : DIGIT+ '.' DIGIT+ +RATIONAL_LITERAL + : NUMERAL '.' DIGIT+ ; /** diff --git a/test/unit/parser/parser_black.h b/test/unit/parser/parser_black.h index b4044b96f..37fac552e 100644 --- a/test/unit/parser/parser_black.h +++ b/test/unit/parser/parser_black.h @@ -176,7 +176,8 @@ const string goodSmt2Exprs[] = { "0", "1.5", "#xfab09c7", - "#b0001011" + "#b0001011", + "(* 5 01)" // '01' is OK in non-strict mode }; const int numGoodSmt2Exprs = sizeof(goodSmt2Exprs) / sizeof(string); @@ -192,7 +193,8 @@ const string badSmt2Inputs[] = { const int numBadSmt2Inputs = sizeof(badSmt2Inputs) / sizeof(string); -/* The following expressions are invalid even after setupContext. */ +/* The following expressions are invalid even after setupContext + * in non-strict mode. */ const string badSmt2Exprs[] = { "(and)", // wrong arity "(and a b", // no closing paren @@ -216,6 +218,15 @@ const string badSmt2Exprs[] = { const int numBadSmt2Exprs = sizeof(badSmt2Exprs) / sizeof(string); +/* The following expressions are invalid in strict mode, even after setupContext. */ +const string badStrictSmt2Exprs[] = { + "(and a)", // no unary and's + "(or a)", // no unary or's + "(* 5 01)" // '01' is not a valid integer constant +}; + +const int numBadStrictSmt2Exprs = sizeof(badStrictSmt2Exprs) / sizeof(string); + class ParserBlack : public CxxTest::TestSuite { ExprManager *d_exprManager; @@ -270,7 +281,7 @@ class ParserBlack : public CxxTest::TestSuite { void tryBadInputs(InputLanguage d_lang, const string badInputs[], int numInputs) { for(int i = 0; i < numInputs; ++i) { -// cout << "Testing bad input: '" << badInputs[i] << "'\n"; +// cerr << "Testing bad input: '" << badInputs[i] << "'\n"; // Debug.on("parser"); Input* input = Input::newStringInput(d_lang, badInputs[i], "test"); Parser parser(d_exprManager, input); @@ -320,15 +331,18 @@ class ParserBlack : public CxxTest::TestSuite { * input. It's more trouble than it's worth to check that the whole input was * consumed here, so just be careful to avoid valid prefixes in tests. */ - void tryBadExprs(InputLanguage d_lang, const string badBooleanExprs[], int numExprs) { - //Debug.on("parser"); + void tryBadExprs(InputLanguage d_lang, const string badBooleanExprs[], int numExprs, + bool strictMode = false) { +// Debug.on("parser"); +// Debug.on("parser-extra"); for(int i = 0; i < numExprs; ++i) { - // cout << "Testing bad expr: '" << badBooleanExprs[i] << "'\n"; -// istringstream stream(context + badBooleanExprs[i]); +// cout << "Testing bad expr: '" << badBooleanExprs[i] << "'\n"; Input* input = Input::newStringInput(d_lang, badBooleanExprs[i], "test"); Parser parser(d_exprManager, input); - + if( strictMode ) { + parser.enableStrictMode(); + } setupContext(parser); TS_ASSERT( !parser.done() ); TS_ASSERT_THROWS @@ -340,7 +354,7 @@ class ParserBlack : public CxxTest::TestSuite { const ParserException& ); delete input; } - //Debug.off("parser"); +// Debug.off("parser"); } public: @@ -405,4 +419,8 @@ public: void testBadSmt2Exprs() { tryBadExprs(LANG_SMTLIB_V2,badSmt2Exprs,numBadSmt2Exprs); } + + void testBadSmt2StrictExprs() { + tryBadExprs(LANG_SMTLIB_V2,badStrictSmt2Exprs,numBadStrictSmt2Exprs,true); + } }; -- 2.30.2