1 /********************* */
3 ** Original author: cconway
4 ** Major contributors: none
5 ** Minor contributors (to current version): none
6 ** This file is part of the CVC4 prototype.
7 ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys)
8 ** Courant Institute of Mathematical Sciences
10 ** See the file COPYING in the top-level source directory for licensing
13 ** A super-class for ANTLR-generated input language parsers
20 #include "antlr_input.h"
21 #include "bounded_token_buffer.h"
22 #include "bounded_token_factory.h"
23 #include "memory_mapped_input_buffer.h"
24 #include "parser_exception.h"
25 #include "parser_state.h"
27 #include "util/output.h"
28 #include "util/Assert.h"
29 #include "expr/command.h"
30 #include "expr/type.h"
34 using namespace CVC4::parser
;
35 using namespace CVC4::kind
;
40 AntlrInput::AntlrInput(ExprManager
* exprManager
, const std::string
& filename
, unsigned int lookahead
, bool useMmap
) :
41 Input(exprManager
, filename
),
42 d_lookahead(lookahead
),
48 d_input
= MemoryMappedInputBufferNew(filename
);
50 d_input
= antlr3AsciiFileStreamNew((pANTLR3_UINT8
) filename
.c_str());
52 if( d_input
== NULL
) {
53 throw ParserException("Couldn't open file: " + filename
);
58 AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
59 Parser(exprManager,name),
60 d_lookahead(lookahead) {
65 AntlrInput::AntlrInput(ExprManager
* exprManager
, const std::string
& input
, const std::string
& name
, unsigned int lookahead
) :
66 Input(exprManager
,name
),
67 d_lookahead(lookahead
),
71 char* inputStr
= strdup(input
.c_str());
72 char* nameStr
= strdup(name
.c_str());
73 if( inputStr
==NULL
|| nameStr
==NULL
) {
74 throw ParserException("Couldn't initialize string input: '" + input
+ "'");
76 d_input
= antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8
)inputStr
,input
.size(),(pANTLR3_UINT8
)nameStr
);
77 if( d_input
== NULL
) {
78 throw ParserException("Couldn't create input stream for string: '" + input
+ "'");
82 AntlrInput::~AntlrInput() {
83 d_tokenStream
->free(d_tokenStream
);
84 d_input
->close(d_input
);
87 pANTLR3_INPUT_STREAM
AntlrInput::getInputStream() {
91 pANTLR3_COMMON_TOKEN_STREAM
AntlrInput::getTokenStream() {
96 /// Match current input symbol against ttype. Upon error, do one token
97 /// insertion or deletion if possible.
98 /// To turn off single token insertion or deletion error
99 /// recovery, override mismatchRecover() and have it call
100 /// plain mismatch(), which does not recover. Then any error
101 /// in a rule will cause an exception and immediate exit from
102 /// rule. Rule would recover by resynchronizing to the set of
103 /// symbols that can follow rule ref.
105 // [chris 4/5/2010] Copy and paste from antlr3baserecognizer.c
107 AntlrInput::match(pANTLR3_BASE_RECOGNIZER recognizer
, ANTLR3_UINT32 ttype
,
108 pANTLR3_BITSET_LIST follow
) {
109 pANTLR3_PARSER parser
;
110 pANTLR3_TREE_PARSER tparser
;
111 pANTLR3_INT_STREAM is
;
112 void * matchedSymbol
;
114 switch(recognizer
->type
) {
115 case ANTLR3_TYPE_PARSER
:
117 parser
= (pANTLR3_PARSER
)(recognizer
->super
);
119 is
= parser
->tstream
->istream
;
123 case ANTLR3_TYPE_TREE_PARSER
:
125 tparser
= (pANTLR3_TREE_PARSER
)(recognizer
->super
);
127 is
= tparser
->ctnstream
->tnstream
->istream
;
135 "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
141 // Pick up the current input token/node for assignment to labels
143 matchedSymbol
= recognizer
->getCurrentInputSymbol(recognizer
, is
);
145 if(is
->_LA(is
, 1) == ttype
) {
146 // The token was the one we were told to expect
148 is
->consume(is
); // Consume that token from the stream
149 recognizer
->state
->errorRecovery
= ANTLR3_FALSE
; // Not in error recovery now (if we were)
150 recognizer
->state
->failed
= ANTLR3_FALSE
; // The match was a success
151 return matchedSymbol
; // We are done
154 // We did not find the expected token type, if we are backtracking then
155 // we just set the failed flag and return.
157 if(recognizer
->state
->backtracking
> 0) {
158 // Backtracking is going on
160 recognizer
->state
->failed
= ANTLR3_TRUE
;
161 return matchedSymbol
;
164 // We did not find the expected token and there is no backtracking
165 // going on, so we mismatch, which creates an exception in the recognizer exception
168 matchedSymbol
= recognizer
->recoverFromMismatchedToken(recognizer
, ttype
,
170 return matchedSymbol
;
173 void AntlrInput::parseError(const std::string
& message
)
174 throw (ParserException
) {
175 Debug("parser") << "Throwing exception: "
176 << getParserState()->getFilename() << ":"
177 << d_lexer
->getLine(d_lexer
) << "."
178 << d_lexer
->getCharPositionInLine(d_lexer
) << ": "
180 throw ParserException(message
, getParserState()->getFilename(),
181 d_lexer
->getLine(d_lexer
),
182 d_lexer
->getCharPositionInLine(d_lexer
));
186 AntlrInput::recoverFromMismatchedToken(pANTLR3_BASE_RECOGNIZER recognizer
,
188 pANTLR3_BITSET_LIST follow
) {
190 pANTLR3_PARSER parser
= (pANTLR3_PARSER
) (recognizer
->super
);
191 pANTLR3_INT_STREAM is
= parser
->tstream
->istream
;
195 // Create an exception if we need one
197 if(recognizer
->state
->exception
== NULL
) {
198 antlr3RecognitionExceptionNew(recognizer
);
201 if(recognizer
->mismatchIsUnwantedToken(recognizer
, is
, ttype
) == ANTLR3_TRUE
) {
202 recognizer
->state
->exception
->type
= ANTLR3_UNWANTED_TOKEN_EXCEPTION
;
203 recognizer
->state
->exception
->message
204 = (void*)ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME
;
207 if(recognizer
->mismatchIsMissingToken(recognizer
, is
, follow
)) {
208 matchedSymbol
= recognizer
->getMissingSymbol(recognizer
, is
,
209 recognizer
->state
->exception
,
211 recognizer
->state
->exception
->type
= ANTLR3_MISSING_TOKEN_EXCEPTION
;
212 recognizer
->state
->exception
->message
= (void*)ANTLR3_MISSING_TOKEN_EXCEPTION_NAME
;
213 recognizer
->state
->exception
->token
= matchedSymbol
;
214 recognizer
->state
->exception
->expecting
= ttype
;
217 reportError(recognizer
);
218 Unreachable("reportError should have thrown exception in AntlrInput::recoverFromMismatchedToken");
221 void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer
) {
222 pANTLR3_EXCEPTION ex
= recognizer
->state
->exception
;
223 pANTLR3_UINT8
* tokenNames
= recognizer
->state
->tokenNames
;
227 // Signal we are in error recovery now
228 recognizer
->state
->errorRecovery
= ANTLR3_TRUE
;
230 // Indicate this recognizer had an error while processing.
231 recognizer
->state
->errorCount
++;
233 // Call the builtin error formatter
234 // recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
236 /* This switch statement is adapted from antlr3baserecognizer.c:displayRecognitionError in libantlr3c.
237 * TODO: Make error messages more useful, maybe by including more expected tokens and information
238 * about the current token. */
240 case ANTLR3_UNWANTED_TOKEN_EXCEPTION
:
242 // Indicates that the recognizer was fed a token which seems to be
243 // spurious input. We can detect this when the token that follows
244 // this unwanted token would normally be part of the syntactically
245 // correct stream. Then we can see that the token we are looking at
246 // is just something that should not be there and throw this exception.
248 if(tokenNames
== NULL
) {
249 ss
<< "Unexpected token." ;
251 if(ex
->expecting
== ANTLR3_TOKEN_EOF
) {
252 ss
<< "Expected end of file.";
254 ss
<< "Expected " << tokenNames
[ex
->expecting
]
255 << ", found '" << tokenText((pANTLR3_COMMON_TOKEN
)ex
->token
) << "'.";
260 case ANTLR3_MISSING_TOKEN_EXCEPTION
:
262 // Indicates that the recognizer detected that the token we just
263 // hit would be valid syntactically if preceded by a particular
264 // token. Perhaps a missing ';' at line end or a missing ',' in an
265 // expression list, and such like.
267 if(tokenNames
== NULL
) {
268 ss
<< "Missing token (" << ex
->expecting
<< ").";
270 if(ex
->expecting
== ANTLR3_TOKEN_EOF
) {
271 ss
<< "Missing end of file marker.";
273 ss
<< "Missing " << tokenNames
[ex
->expecting
] << ".";
278 case ANTLR3_RECOGNITION_EXCEPTION
:
280 // Indicates that the recognizer received a token
281 // in the input that was not predicted. This is the basic exception type
282 // from which all others are derived. So we assume it was a syntax error.
283 // You may get this if there are not more tokens and more are needed
284 // to complete a parse for instance.
286 ss
<<"Syntax error.";
289 case ANTLR3_MISMATCHED_TOKEN_EXCEPTION
:
291 // We were expecting to see one thing and got another. This is the
292 // most common error if we could not detect a missing or unwanted token.
293 // Here you can spend your efforts to
294 // derive more useful error messages based on the expected
295 // token set and the last token and so on. The error following
296 // bitmaps do a good job of reducing the set that we were looking
297 // for down to something small. Knowing what you are parsing may be
298 // able to allow you to be even more specific about an error.
300 if(tokenNames
== NULL
) {
301 ss
<< "Syntax error.";
303 if(ex
->expecting
== ANTLR3_TOKEN_EOF
) {
304 ss
<< "Expected end of file.";
306 ss
<< "Expected " << tokenNames
[ex
->expecting
] << ".";
311 case ANTLR3_NO_VIABLE_ALT_EXCEPTION
:
312 // We could not pick any alt decision from the input given
313 // so god knows what happened - however when you examine your grammar,
314 // you should. It means that at the point where the current token occurred
315 // that the DFA indicates nowhere to go from here.
317 ss
<< "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN
)ex
->token
) << "'.";
320 case ANTLR3_MISMATCHED_SET_EXCEPTION
:
326 ANTLR3_UINT32 numbits
;
327 pANTLR3_BITSET errBits
;
329 // This means we were able to deal with one of a set of
330 // possible tokens at this point, but we did not see any
331 // member of that set.
333 ss
<< "Unexpected input: '" << tokenText((pANTLR3_COMMON_TOKEN
)ex
->token
)
334 << "'. Expected one of: ";
336 // What tokens could we have accepted at this point in the
340 errBits
= antlr3BitsetLoad(ex
->expectingSet
);
341 numbits
= errBits
->numBits(errBits
);
342 size
= errBits
->size(errBits
);
345 // However many tokens we could have dealt with here, it is usually
346 // not useful to print ALL of the set here. I arbitrarily chose 8
347 // here, but you should do whatever makes sense for you of course.
348 // No token number 0, so look for bit 1 and on.
350 for(bit
= 1; bit
< numbits
&& count
< 8 && count
< size
; bit
++) {
351 // TODO: This doesn;t look right - should be asking if the bit is set!!
353 if(tokenNames
[bit
]) {
357 ss
<< tokenNames
[bit
];
361 Unreachable("Parse error with empty set of expected tokens.");
366 case ANTLR3_EARLY_EXIT_EXCEPTION
:
368 // We entered a loop requiring a number of token sequences
369 // but found a token that ended that sequence earlier than
370 // we should have done.
372 ss
<< "Sequence terminated early by token: '"
373 << tokenText((pANTLR3_COMMON_TOKEN
)ex
->token
) << "'.";
378 // We don't handle any other exceptions here, but you can
379 // if you wish. If we get an exception that hits this point
380 // then we are just going to report what we know about the
383 Unhandled("Unexpected exception in parser.");
387 // Now get ready to throw an exception
388 pANTLR3_PARSER parser
= (pANTLR3_PARSER
)(recognizer
->super
);
389 AlwaysAssert(parser
!=NULL
);
390 ParserState
*parserState
= (ParserState
*)(parser
->super
);
391 AlwaysAssert(parserState
!=NULL
);
393 // Call the error display routine
394 parserState
->parseError(ss
.str());
397 void AntlrInput::setLexer(pANTLR3_LEXER pLexer
) {
400 pANTLR3_TOKEN_FACTORY pTokenFactory
= d_lexer
->rec
->state
->tokFactory
;
401 if( pTokenFactory
!= NULL
) {
402 pTokenFactory
->close(pTokenFactory
);
405 /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
406 pTokenFactory
= BoundedTokenFactoryNew(d_input
, 2*d_lookahead
);
407 if( pTokenFactory
== NULL
) {
408 throw ParserException("Couldn't create token factory.");
410 d_lexer
->rec
->state
->tokFactory
= pTokenFactory
;
412 pBOUNDED_TOKEN_BUFFER buffer
= BoundedTokenBufferSourceNew(d_lookahead
, d_lexer
->rec
->state
->tokSource
);
413 if( buffer
== NULL
) {
414 throw ParserException("Couldn't create token buffer.");
417 d_tokenStream
= buffer
->commonTstream
;
420 void AntlrInput::setParser(pANTLR3_PARSER pParser
) {
422 // ANTLR isn't using super, AFAICT.
423 // We could also use @parser::context to add a field to the generated parser, but then
424 // it would have to be declared separately in every input's grammar and we'd have to
425 // pass it in as an address anyway.
426 d_parser
->super
= getParserState();
427 d_parser
->rec
->match
= &match
;
428 d_parser
->rec
->reportError
= &reportError
;
429 d_parser
->rec
->recoverFromMismatchedToken
= &recoverFromMismatchedToken
;
433 }/* CVC4::parser namespace */
434 }/* CVC4 namespace */