From 09b5745fa1a2fcdc6b8d0a0d31c1ba34c96d67f0 Mon Sep 17 00:00:00 2001 From: "Christopher L. Conway" Date: Mon, 3 May 2010 20:52:41 +0000 Subject: [PATCH] Importing ANTLR3 lexer nextToken function to avoid escaped exceptions. --- src/parser/antlr_input.cpp | 2 + src/parser/antlr_input.h | 4 ++ src/parser/antlr_input_imports.cpp | 107 +++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) diff --git a/src/parser/antlr_input.cpp b/src/parser/antlr_input.cpp index 675a04510..b6f10ab25 100644 --- a/src/parser/antlr_input.cpp +++ b/src/parser/antlr_input.cpp @@ -197,6 +197,8 @@ void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) { // Override default lexer error reporting d_lexer->rec->reportError = &lexerError; + // Override default nextToken function, just to prevent exceptions escaping. + d_lexer->rec->state->tokSource->nextToken = &nextToken; } void AntlrInput::setParser(Parser *parser) { diff --git a/src/parser/antlr_input.h b/src/parser/antlr_input.h index 441d2a0b3..244735e9b 100644 --- a/src/parser/antlr_input.h +++ b/src/parser/antlr_input.h @@ -112,6 +112,10 @@ class AntlrInput : public Input{ /** Builds a message for a lexer error and calls parseError. */ static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer); + /** Returns the next available lexer token from the current input stream. */ + static pANTLR3_COMMON_TOKEN + nextTokenStr (pANTLR3_TOKEN_SOURCE toksource); + /* Since we own d_tokenStream and it needs to be freed, we need to prevent * copy construction and assignment. */ diff --git a/src/parser/antlr_input_imports.cpp b/src/parser/antlr_input_imports.cpp index 75944b21b..9e16d795a 100644 --- a/src/parser/antlr_input_imports.cpp +++ b/src/parser/antlr_input_imports.cpp @@ -241,5 +241,112 @@ void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer) { input->parseError(ss.str()); } +/// +/// \brief +/// Returns the next available token from the current input stream. +/// +/// \param toksource +/// Points to the implementation of a token source. The lexer is +/// addressed by the super structure pointer. +/// +/// \returns +/// The next token in the current input stream or the EOF token +/// if there are no more tokens. +/// +/// \remarks +/// Write remarks for nextToken here. +/// +/// \see nextToken +/// +/* *** CVC4 NOTE *** + * This is copied, largely unmodified, from antlr3lexer.c + * + */ +inline pANTLR3_COMMON_TOKEN +AntlrInput::nextTokenStr (pANTLR3_TOKEN_SOURCE toksource) +{ + pANTLR3_LEXER lexer; + + lexer = (pANTLR3_LEXER)(toksource->super); + + /// Loop until we get a non skipped token or EOF + /// + for (;;) + { + // Get rid of any previous token (token factory takes care of + // any de-allocation when this token is finally used up. + // + lexer->rec->state->token = NULL; + lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception + lexer->rec->state->failed = ANTLR3_FALSE; + + // Now call the matching rules and see if we can generate a new token + // + for (;;) + { + // Record the start of the token in our input stream. + // + lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; + lexer->rec->state->tokenStartCharIndex = lexer->input->istream->index(lexer->input->istream); + lexer->rec->state->tokenStartCharPositionInLine = lexer->input->getCharPositionInLine(lexer->input); + lexer->rec->state->tokenStartLine = lexer->input->getLine(lexer->input); + lexer->rec->state->text = NULL; + + if (lexer->input->istream->_LA(lexer->input->istream, 1) == ANTLR3_CHARSTREAM_EOF) + { + // Reached the end of the current stream, nothing more to do if this is + // the last in the stack. + // + pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken); + + teof->setStartIndex (teof, lexer->getCharIndex(lexer)); + teof->setStopIndex (teof, lexer->getCharIndex(lexer)); + teof->setLine (teof, lexer->getLine(lexer)); + teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it + return teof; + } + + lexer->rec->state->token = NULL; + lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception + lexer->rec->state->failed = ANTLR3_FALSE; + + // Call the generated lexer, see if it can get a new token together. + // + lexer->mTokens(lexer->ctx); + + if (lexer->rec->state->error == ANTLR3_TRUE) + { + // Recognition exception, report it and try to recover. + // + lexer->rec->state->failed = ANTLR3_TRUE; + // *** CVC4 EDIT: Just call the AntlrInput error routine + lexerError(lexer->rec); + lexer->recover(lexer); + } + else + { + if (lexer->rec->state->token == NULL) + { + // Emit the real token, which adds it in to the token stream basically + // + // *** CVC4 Edit: call emit on the lexer object + lexer->emit(lexer); + } + else if (lexer->rec->state->token == &(toksource->skipToken)) + { + // A real token could have been generated, but "Computer say's naaaaah" and it + // it is just something we need to skip altogether. + // + continue; + } + + // Good token, not skipped, not EOF token + // + return lexer->rec->state->token; + } + } + } +} + } // namespace parser } // namespace CVC4 -- 2.30.2