Tightening lexer rules for numerals in SMT v2

author Christopher L. Conway <christopherleeconway@gmail.com>

Fri, 7 May 2010 19:44:05 +0000 (19:44 +0000)

committer Christopher L. Conway <christopherleeconway@gmail.com>

Fri, 7 May 2010 19:44:05 +0000 (19:44 +0000)
author Christopher L. Conway <christopherleeconway@gmail.com>
Fri, 7 May 2010 19:44:05 +0000 (19:44 +0000)
committer Christopher L. Conway <christopherleeconway@gmail.com>
Fri, 7 May 2010 19:44:05 +0000 (19:44 +0000)
diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp

index a393bc85fe49ceb795c3ab98515acf51033adf2f..b4bafc953f4b771e9e2cf871a8d6ba9e155bd744 100644 (file)
--- a/src/parser/parser.cpp
+++ b/src/parser/parser.cpp
@@ -39,7 +39,8 @@ Parser::Parser(ExprManager* exprManager, Input* input) :
    d_exprManager(exprManager),
    d_input(input),
    d_done(false),
-  d_checksEnabled(true) {
+  d_checksEnabled(true),
+  d_strictMode(false) {
    d_input->setParser(this);
  }
  
diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g

index bcab391835299b32d7108f24c7c6bad5d7b80577..268903bc771e837588d03ac73605d6c2c82b089c 100644 (file)
--- a/src/parser/smt2/Smt2.g
+++ b/src/parser/smt2/Smt2.g
@@ -33,6 +33,7 @@ options {
  }
  
  @lexer::includes {
+
  /** This suppresses warnings about the redefinition of token symbols between
    * different parsers. The redefinitions should be harmless as long as no
    * client: (a) #include's the lexer headers for two grammars AND (b) uses the
@@ -48,6 +49,19 @@ options {
  #define ANTLR3_INLINE_INPUT_ASCII
  }
  
+@lexer::postinclude {
+#include <stdint.h>
+
+#include "parser/parser.h"
+#include "parser/antlr_input.h"
+
+using namespace CVC4;
+using namespace CVC4::parser;
+
+#undef PARSER_STATE 
+#define PARSER_STATE ((Parser*)LEXER->super)
+}
+
  @parser::includes {
  #include "expr/command.h"
  #include "parser/parser.h"
@@ -147,7 +161,7 @@ command returns [CVC4::Command* cmd]
        setInfo(PARSER_STATE,name,sexpr);    
        cmd = new SetInfoCommand(name,sexpr); }
    | /* sort declaration */
-    DECLARE_SORT_TOK symbol[name,CHECK_UNDECLARED,SYM_SORT] n=NUMERAL
+    DECLARE_SORT_TOK symbol[name,CHECK_UNDECLARED,SYM_SORT] n=INTEGER_LITERAL
      { Debug("parser") << "declare sort: '" << name << "' arity=" << n << std::endl;
        if( AntlrInput::tokenToInteger(n) > 0 ) {
          Unimplemented("Parameterized user sorts.");
@@ -179,10 +193,10 @@ symbolicExpr[CVC4::SExpr& sexpr]
  @declarations {
    std::vector<SExpr> children;
  }
-  : NUMERAL
-    { sexpr = SExpr(AntlrInput::tokenText($NUMERAL)); }
-  | RATIONAL
-    { sexpr = SExpr(AntlrInput::tokenText($RATIONAL)); }
+  : INTEGER_LITERAL
+    { sexpr = SExpr(AntlrInput::tokenText($INTEGER_LITERAL)); }
+  | RATIONAL_LITERAL
+    { sexpr = SExpr(AntlrInput::tokenText($RATIONAL_LITERAL)); }
    | STRING_LITERAL
      { sexpr = SExpr(AntlrInput::tokenText($STRING_LITERAL)); }
    | SYMBOL
@@ -256,11 +270,11 @@ term[CVC4::Expr& expr]
      /* constants */
    | TRUE_TOK          { expr = MK_CONST(true); }
    | FALSE_TOK         { expr = MK_CONST(false); }
-  | NUMERAL
-    { expr = MK_CONST( AntlrInput::tokenToInteger($NUMERAL) ); }
-  | RATIONAL
+  | INTEGER_LITERAL
+    { expr = MK_CONST( AntlrInput::tokenToInteger($INTEGER_LITERAL) ); }
+  | RATIONAL_LITERAL
      { // FIXME: This doesn't work because an SMT rational is not a valid GMP rational string
-      expr = MK_CONST( AntlrInput::tokenToRational($RATIONAL) ); }
+      expr = MK_CONST( AntlrInput::tokenToRational($RATIONAL_LITERAL) ); }
    | HEX_LITERAL
      { Assert( AntlrInput::tokenText($HEX_LITERAL).find("#x") == 0 );
        std::string hexString = AntlrInput::tokenTextSubstr($HEX_LITERAL,2);
@@ -460,20 +474,37 @@ WHITESPACE
    ;
  
  /**
- * Matches an integer constant from the input (non-empty sequence of digits).
- * This is a bit looser than what the standard allows, because it accepts 
- * leading zeroes. 
+ * Matches an integer constant from the input (non-empty sequence of digits, with
+ * no leading zeroes).
   */
-NUMERAL
-  : DIGIT+
+INTEGER_LITERAL
+  : NUMERAL
    ;
  
+/** Match an integer constant. In non-strict mode, this is any sequence of
+ * digits. In strict mode, non-zero integers can't have leading zeroes. */
+fragment NUMERAL
+@init {
+  char *start = (char*) GETCHARINDEX();
+}
+  : DIGIT+
+    { Debug("parser-extra") << "NUMERAL: " 
+       << (uintptr_t)start << ".." << GETCHARINDEX() 
+       << " strict? " << (bool)(PARSER_STATE->strictModeEnabled())
+       << " ^0? " << (bool)(*start == '0')
+       << " len>1? " << (bool)(start < (char*)(GETCHARINDEX() - 1))
+       << endl; }
+    { !PARSER_STATE->strictModeEnabled() || 
+      *start != '0' ||
+      start == (char*)(GETCHARINDEX() - 1) }?
+  ;
+  
  /**
    * Matches a rational constant from the input. This is a bit looser
    * than what the standard allows, because it accepts leading zeroes. 
    */
-RATIONAL
-  : DIGIT+ '.' DIGIT+
+RATIONAL_LITERAL
+  : NUMERAL '.' DIGIT+
    ;
  
  /**
diff --git a/test/unit/parser/parser_black.h b/test/unit/parser/parser_black.h

index b4044b96ffb8f4c8bf5cb924aa20c3780f2262a6..37fac552e1b4647511a1eed4eb14f0a4f7628cdc 100644 (file)
--- a/test/unit/parser/parser_black.h
+++ b/test/unit/parser/parser_black.h
@@ -176,7 +176,8 @@ const string goodSmt2Exprs[] = {
      "0",
      "1.5",
      "#xfab09c7",
-    "#b0001011"
+    "#b0001011",
+    "(* 5 01)" // '01' is OK in non-strict mode
  };
  
  const int numGoodSmt2Exprs = sizeof(goodSmt2Exprs) / sizeof(string);
@@ -192,7 +193,8 @@ const string badSmt2Inputs[] = {
  
  const int numBadSmt2Inputs = sizeof(badSmt2Inputs) / sizeof(string);
  
-/* The following expressions are invalid even after setupContext. */
+/* The following expressions are invalid even after setupContext
+ * in non-strict mode. */
  const string badSmt2Exprs[] = {
      "(and)", // wrong arity
      "(and a b", // no closing paren
@@ -216,6 +218,15 @@ const string badSmt2Exprs[] = {
  
  const int numBadSmt2Exprs = sizeof(badSmt2Exprs) / sizeof(string);
  
+/* The following expressions are invalid in strict mode, even after setupContext. */
+const string badStrictSmt2Exprs[] = {
+    "(and a)", // no unary and's
+    "(or a)",  // no unary or's
+    "(* 5 01)" // '01' is not a valid integer constant
+};
+
+const int numBadStrictSmt2Exprs = sizeof(badStrictSmt2Exprs) / sizeof(string);
+
  class ParserBlack : public CxxTest::TestSuite {
    ExprManager *d_exprManager;
  
@@ -270,7 +281,7 @@ class ParserBlack : public CxxTest::TestSuite {
  
    void tryBadInputs(InputLanguage d_lang, const string badInputs[], int numInputs) {
      for(int i = 0; i < numInputs; ++i) {
-//      cout << "Testing bad input: '" << badInputs[i] << "'\n";
+//      cerr << "Testing bad input: '" << badInputs[i] << "'\n";
  //      Debug.on("parser");
        Input* input = Input::newStringInput(d_lang, badInputs[i], "test");
        Parser parser(d_exprManager, input);
@@ -320,15 +331,18 @@ class ParserBlack : public CxxTest::TestSuite {
     * input. It's more trouble than it's worth to check that the whole input was
     * consumed here, so just be careful to avoid valid prefixes in tests.
     */
-  void tryBadExprs(InputLanguage d_lang, const string badBooleanExprs[], int numExprs) {
-    //Debug.on("parser");
+  void tryBadExprs(InputLanguage d_lang, const string badBooleanExprs[], int numExprs,
+                   bool strictMode = false) {
+//    Debug.on("parser");
+//    Debug.on("parser-extra");
      for(int i = 0; i < numExprs; ++i) {
-      // cout << "Testing bad expr: '" << badBooleanExprs[i] << "'\n";
-//      istringstream stream(context + badBooleanExprs[i]);
+//      cout << "Testing bad expr: '" << badBooleanExprs[i] << "'\n";
        Input* input = Input::newStringInput(d_lang,
                                             badBooleanExprs[i], "test");
        Parser parser(d_exprManager, input);
-
+      if( strictMode ) {
+        parser.enableStrictMode();
+      }
        setupContext(parser);
        TS_ASSERT( !parser.done() );
        TS_ASSERT_THROWS
@@ -340,7 +354,7 @@ class ParserBlack : public CxxTest::TestSuite {
            const ParserException& );
        delete input;
      }
-    //Debug.off("parser");
+//    Debug.off("parser");
    }
  
  public:
@@ -405,4 +419,8 @@ public:
    void testBadSmt2Exprs() {
      tryBadExprs(LANG_SMTLIB_V2,badSmt2Exprs,numBadSmt2Exprs);
    }
+
+  void testBadSmt2StrictExprs() {
+    tryBadExprs(LANG_SMTLIB_V2,badStrictSmt2Exprs,numBadStrictSmt2Exprs,true);
+  }
  };
author	Christopher L. Conway <christopherleeconway@gmail.com>
	Fri, 7 May 2010 19:44:05 +0000 (19:44 +0000)
committer	Christopher L. Conway <christopherleeconway@gmail.com>
	Fri, 7 May 2010 19:44:05 +0000 (19:44 +0000)
src/parser/parser.cpp		patch \| blob \| history
src/parser/smt2/Smt2.g		patch \| blob \| history
test/unit/parser/parser_black.h		patch \| blob \| history