Adding listeners to Options.
[cvc5.git] / src / parser / antlr_input_imports.cpp
1 /********************* */
2 /*! \file antlr_input_imports.cpp
3 ** \verbatim
4 ** Original author: cconway
5 ** Major contributors: none
6 ** Minor contributors (to current version): none
7 ** This file is part of the CVC4 prototype.
8 ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys)
9 ** Courant Institute of Mathematical Sciences
10 ** New York University
11 ** See the file COPYING in the top-level source directory for licensing
12 ** information.\endverbatim
13 **
14 ** \brief [[ Add one-line brief description here ]]
15 **
16 ** [[ Add lengthier description here ]]
17 ** \todo document this file
18 **/
19
20 /*
21 * The functions in this file are based on implementations in libantlr3c,
22 * with only minor CVC4-specific changes.
23 */
24
25 // [The "BSD licence"]
26 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
27 // http://www.temporal-wave.com
28 // http://www.linkedin.com/in/jimidle
29 //
30 // All rights reserved.
31 //
32 // Redistribution and use in source and binary forms, with or without
33 // modification, are permitted provided that the following conditions
34 // are met:
35 // 1. Redistributions of source code must retain the above copyright
36 // notice, this list of conditions and the following disclaimer.
37 // 2. Redistributions in binary form must reproduce the above copyright
38 // notice, this list of conditions and the following disclaimer in the
39 // documentation and/or other materials provided with the distribution.
40 // 3. The name of the author may not be used to endorse or promote products
41 // derived from this software without specific prior written permission.
42 //
43 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
44 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
45 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
46 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
47 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
48 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
52 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53
54
55
56 // These headers must be the first two included.
57 // See the documentation in "parser/antlr_undefines.h" for more details.
58 #include <antlr3.h>
59 #include "parser/antlr_undefines.h"
60
61 #include <sstream>
62
63 #include "parser/antlr_input.h"
64 #include "parser/parser.h"
65 #include "parser/parser_exception.h"
66
67 using namespace std;
68
69 namespace CVC4 {
70 namespace parser {
71
72 /// Report a recognition problem.
73 ///
74 /// This method sets errorRecovery to indicate the parser is recovering
75 /// not parsing. Once in recovery mode, no errors are generated.
76 /// To get out of recovery mode, the parser must successfully match
77 /// a token (after a resync). So it will go:
78 ///
79 /// 1. error occurs
80 /// 2. enter recovery mode, report error
81 /// 3. consume until token found in resynch set
82 /// 4. try to resume parsing
83 /// 5. next match() will reset errorRecovery mode
84 ///
85 /// If you override, make sure to update errorCount if you care about that.
86 ///
87 /* *** CVC4 NOTE ***
88 * This function is has been modified in not-completely-trivial ways from its
89 * libantlr3c implementation to support more informative error messages and to
90 * invoke the error reporting mechanism of the Input class instead of the
91 * default error printer.
92 */
93 void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer) {
94 pANTLR3_EXCEPTION ex = recognizer->state->exception;
95 pANTLR3_UINT8 * tokenNames = recognizer->state->tokenNames;
96 stringstream ss;
97
98 // Dig the CVC4 objects out of the ANTLR3 mess
99 pANTLR3_PARSER antlr3Parser = (pANTLR3_PARSER)(recognizer->super);
100 assert(antlr3Parser!=NULL);
101 Parser *parser = (Parser*)(antlr3Parser->super);
102 assert(parser!=NULL);
103 AntlrInput *input = (AntlrInput*) parser->getInput() ;
104 assert(input!=NULL);
105
106 // Signal we are in error recovery now
107 recognizer->state->errorRecovery = ANTLR3_TRUE;
108
109 // Indicate this recognizer had an error while processing.
110 recognizer->state->errorCount++;
111
112 // Call the builtin error formatter
113 // recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
114
115 /* TODO: Make error messages more useful, maybe by including more expected tokens and information
116 * about the current token. */
117 switch(ex->type) {
118 case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
119
120 // Indicates that the recognizer was fed a token which seems to be
121 // spurious input. We can detect this when the token that follows
122 // this unwanted token would normally be part of the syntactically
123 // correct stream. Then we can see that the token we are looking at
124 // is just something that should not be there and throw this exception.
125 //
126 if(tokenNames == NULL) {
127 ss << "Unexpected token." ;
128 } else {
129 if(ex->expecting == ANTLR3_TOKEN_EOF) {
130 ss << "Expected end of file.";
131 } else {
132 ss << "Expected " << tokenNames[ex->expecting]
133 << ", found '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
134 }
135 }
136 break;
137
138 case ANTLR3_MISSING_TOKEN_EXCEPTION:
139
140 // Indicates that the recognizer detected that the token we just
141 // hit would be valid syntactically if preceded by a particular
142 // token. Perhaps a missing ';' at line end or a missing ',' in an
143 // expression list, and such like.
144 //
145 if(tokenNames == NULL) {
146 ss << "Missing token (" << ex->expecting << ").";
147 } else {
148 if(ex->expecting == ANTLR3_TOKEN_EOF) {
149 ss << "Missing end of file marker.";
150 } else if( ex->expecting == 0 ) {
151 ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
152 if( std::string(tokenText((pANTLR3_COMMON_TOKEN)ex->token)) == std::string("IN") ) {
153 ss << " Did you mean: `IS_IN'?";
154 }
155 } else {
156 ss << "Missing " << tokenNames[ex->expecting] << ".";
157 }
158 }
159 break;
160
161 case ANTLR3_RECOGNITION_EXCEPTION:
162
163 // Indicates that the recognizer received a token
164 // in the input that was not predicted. This is the basic exception type
165 // from which all others are derived. So we assume it was a syntax error.
166 // You may get this if there are not more tokens and more are needed
167 // to complete a parse for instance.
168 //
169 ss <<"Syntax error.";
170 break;
171
172 case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
173
174 // We were expecting to see one thing and got another. This is the
175 // most common error if we could not detect a missing or unwanted token.
176 // Here you can spend your efforts to
177 // derive more useful error messages based on the expected
178 // token set and the last token and so on. The error following
179 // bitmaps do a good job of reducing the set that we were looking
180 // for down to something small. Knowing what you are parsing may be
181 // able to allow you to be even more specific about an error.
182 //
183 if(tokenNames == NULL) {
184 ss << "Syntax error.";
185 } else {
186 if(ex->expecting == ANTLR3_TOKEN_EOF) {
187 ss << "Expected end of file.";
188 } else if( ex->expecting == 0 ) {
189 ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
190 } else {
191 ss << "Expected " << tokenNames[ex->expecting] << ".";
192 }
193 }
194 break;
195
196 case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
197 // We could not pick any alt decision from the input given
198 // so god knows what happened - however when you examine your grammar,
199 // you should. It means that at the point where the current token occurred
200 // that the DFA indicates nowhere to go from here.
201 //
202 ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
203 break;
204
205 case ANTLR3_MISMATCHED_SET_EXCEPTION:
206
207 {
208 ANTLR3_UINT32 count;
209 ANTLR3_UINT32 bit;
210 ANTLR3_UINT32 size;
211 ANTLR3_UINT32 numbits;
212 pANTLR3_BITSET errBits;
213
214 // This means we were able to deal with one of a set of
215 // possible tokens at this point, but we did not see any
216 // member of that set.
217 //
218 ss << "Unexpected input: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token)
219 << "'. Expected one of: ";
220
221 // What tokens could we have accepted at this point in the
222 // parse?
223 //
224 count = 0;
225 errBits = antlr3BitsetLoad(ex->expectingSet);
226 numbits = errBits->numBits(errBits);
227 size = errBits->size(errBits);
228
229 if(size > 0) {
230 // However many tokens we could have dealt with here, it is usually
231 // not useful to print ALL of the set here. I arbitrarily chose 8
232 // here, but you should do whatever makes sense for you of course.
233 // No token number 0, so look for bit 1 and on.
234 //
235 for(bit = 1; bit < numbits && count < 8 && count < size; bit++) {
236 // TODO: This doesn;t look right - should be asking if the bit is set!!
237 //
238 if(tokenNames[bit]) {
239 if( count++ > 0 ) {
240 ss << ", ";
241 }
242 ss << tokenNames[bit];
243 }
244 }
245 } else {
246 assert(false);//("Parse error with empty set of expected tokens.");
247 }
248 }
249 break;
250
251 case ANTLR3_EARLY_EXIT_EXCEPTION:
252
253 // We entered a loop requiring a number of token sequences
254 // but found a token that ended that sequence earlier than
255 // we should have done.
256 //
257 ss << "Sequence terminated early by token: '"
258 << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
259 break;
260
261 default:
262
263 // We don't handle any other exceptions here, but you can
264 // if you wish. If we get an exception that hits this point
265 // then we are just going to report what we know about the
266 // token.
267 //
268 assert(false);//("Unexpected exception in parser.");
269 break;
270 }
271
272 // Call the error display routine
273 input->parseError(ss.str(), ((pANTLR3_COMMON_TOKEN)ex->token)->type == ANTLR3_TOKEN_EOF);
274 }
275
276 ///
277 /// \brief
278 /// Returns the next available token from the current input stream.
279 ///
280 /// \param toksource
281 /// Points to the implementation of a token source. The lexer is
282 /// addressed by the super structure pointer.
283 ///
284 /// \returns
285 /// The next token in the current input stream or the EOF token
286 /// if there are no more tokens.
287 ///
288 /// \remarks
289 /// Write remarks for nextToken here.
290 ///
291 /// \see nextToken
292 ///
293 /* *** CVC4 NOTE ***
294 * This is copied, largely unmodified, from antlr3lexer.c
295 *
296 */
297 pANTLR3_COMMON_TOKEN
298 AntlrInput::nextTokenStr (pANTLR3_TOKEN_SOURCE toksource)
299 {
300 pANTLR3_LEXER lexer;
301
302 lexer = (pANTLR3_LEXER)(toksource->super);
303
304 /// Loop until we get a non skipped token or EOF
305 ///
306 for (;;)
307 {
308 // Get rid of any previous token (token factory takes care of
309 // any de-allocation when this token is finally used up.
310 //
311 lexer->rec->state->token = NULL;
312 lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception
313 lexer->rec->state->failed = ANTLR3_FALSE;
314
315 // Now call the matching rules and see if we can generate a new token
316 //
317 for (;;)
318 {
319 // Record the start of the token in our input stream.
320 //
321 lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL;
322 lexer->rec->state->tokenStartCharIndex = lexer->input->istream->index(lexer->input->istream);
323 lexer->rec->state->tokenStartCharPositionInLine = lexer->input->getCharPositionInLine(lexer->input);
324 lexer->rec->state->tokenStartLine = lexer->input->getLine(lexer->input);
325 lexer->rec->state->text = NULL;
326
327 if (lexer->input->istream->_LA(lexer->input->istream, 1) == ANTLR3_CHARSTREAM_EOF)
328 {
329 // Reached the end of the current stream, nothing more to do if this is
330 // the last in the stack.
331 //
332 pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken);
333
334 teof->setStartIndex (teof, lexer->getCharIndex(lexer));
335 teof->setStopIndex (teof, lexer->getCharIndex(lexer));
336 teof->setLine (teof, lexer->getLine(lexer));
337 teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it
338 return teof;
339 }
340
341 lexer->rec->state->token = NULL;
342 lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception
343 lexer->rec->state->failed = ANTLR3_FALSE;
344
345 // Call the generated lexer, see if it can get a new token together.
346 //
347 lexer->mTokens(lexer->ctx);
348
349 if (lexer->rec->state->error == ANTLR3_TRUE)
350 {
351 // Recognition exception, report it and try to recover.
352 //
353 lexer->rec->state->failed = ANTLR3_TRUE;
354 // *** CVC4 EDIT: Just call the AntlrInput error routine
355 lexerError(lexer->rec);
356 lexer->recover(lexer);
357 }
358 else
359 {
360 if (lexer->rec->state->token == NULL)
361 {
362 // Emit the real token, which adds it in to the token stream basically
363 //
364 // *** CVC4 Edit: call emit on the lexer object
365 lexer->emit(lexer);
366 }
367 else if (lexer->rec->state->token == &(toksource->skipToken))
368 {
369 // A real token could have been generated, but "Computer say's naaaaah" and it
370 // it is just something we need to skip altogether.
371 //
372 continue;
373 }
374
375 // Good token, not skipped, not EOF token
376 //
377 return lexer->rec->state->token;
378 }
379 }
380 }
381 }
382
383 /* *** CVC4 NOTE ***
384 * This is copied, totaly unmodified, from antlr3lexer.c
385 * in order to use nextTokenStr previously defined.
386 *
387 */
388 pANTLR3_COMMON_TOKEN
389 AntlrInput::nextToken (pANTLR3_TOKEN_SOURCE toksource)
390 {
391 pANTLR3_COMMON_TOKEN tok;
392
393 // Find the next token in the current stream
394 //
395 tok = nextTokenStr(toksource);
396
397 // If we got to the EOF token then switch to the previous
398 // input stream if there were any and just return the
399 // EOF if there are none. We must check the next token
400 // in any outstanding input stream we pop into the active
401 // role to see if it was sitting at EOF after PUSHing the
402 // stream we just consumed, otherwise we will return EOF
403 // on the reinstalled input stream, when in actual fact
404 // there might be more input streams to POP before the
405 // real EOF of the whole logical inptu stream. Hence we
406 // use a while loop here until we find somethign in the stream
407 // that isn't EOF or we reach the actual end of the last input
408 // stream on the stack.
409 //
410 while (tok->type == ANTLR3_TOKEN_EOF)
411 {
412 pANTLR3_LEXER lexer;
413
414 lexer = (pANTLR3_LEXER)(toksource->super);
415
416 if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
417 {
418 // We have another input stream in the stack so we
419 // need to revert to it, then resume the loop to check
420 // it wasn't sitting at EOF itself.
421 //
422 lexer->popCharStream(lexer);
423 tok = nextTokenStr(toksource);
424 }
425 else
426 {
427 // There were no more streams on the input stack
428 // so this EOF is the 'real' logical EOF for
429 // the input stream. So we just exit the loop and
430 // return the EOF we have found.
431 //
432 break;
433 }
434
435 }
436
437 // return whatever token we have, which may be EOF
438 //
439 return tok;
440 }
441
442
443
444 } // namespace parser
445 } // namespace CVC4