Improvements to header installation on user machines. Internally, we can
[cvc5.git] / src / parser / antlr_input_imports.cpp
1 /********************* */
2 /*! \file antlr_input_imports.cpp
3 ** \verbatim
4 ** Original author: cconway
5 ** Major contributors: none
6 ** Minor contributors (to current version): none
7 ** This file is part of the CVC4 prototype.
8 ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys)
9 ** Courant Institute of Mathematical Sciences
10 ** New York University
11 ** See the file COPYING in the top-level source directory for licensing
12 ** information.\endverbatim
13 **
14 ** \brief [[ Add one-line brief description here ]]
15 **
16 ** [[ Add lengthier description here ]]
17 ** \todo document this file
18 **/
19
20 /*
21 * The functions in this file are based on implementations in libantlr3c,
22 * with only minor CVC4-specific changes.
23 */
24
25 // [The "BSD licence"]
26 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
27 // http://www.temporal-wave.com
28 // http://www.linkedin.com/in/jimidle
29 //
30 // All rights reserved.
31 //
32 // Redistribution and use in source and binary forms, with or without
33 // modification, are permitted provided that the following conditions
34 // are met:
35 // 1. Redistributions of source code must retain the above copyright
36 // notice, this list of conditions and the following disclaimer.
37 // 2. Redistributions in binary form must reproduce the above copyright
38 // notice, this list of conditions and the following disclaimer in the
39 // documentation and/or other materials provided with the distribution.
40 // 3. The name of the author may not be used to endorse or promote products
41 // derived from this software without specific prior written permission.
42 //
43 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
44 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
45 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
46 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
47 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
48 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
52 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53
54 #include <antlr3.h>
55 #include <sstream>
56
57 #include "parser/antlr_input.h"
58 #include "parser/parser.h"
59 #include "parser/parser_exception.h"
60 #include "util/Assert.h"
61
62 using namespace std;
63
64 namespace CVC4 {
65 namespace parser {
66
67 /// Report a recognition problem.
68 ///
69 /// This method sets errorRecovery to indicate the parser is recovering
70 /// not parsing. Once in recovery mode, no errors are generated.
71 /// To get out of recovery mode, the parser must successfully match
72 /// a token (after a resync). So it will go:
73 ///
74 /// 1. error occurs
75 /// 2. enter recovery mode, report error
76 /// 3. consume until token found in resynch set
77 /// 4. try to resume parsing
78 /// 5. next match() will reset errorRecovery mode
79 ///
80 /// If you override, make sure to update errorCount if you care about that.
81 ///
82 /* *** CVC4 NOTE ***
83 * This function is has been modified in not-completely-trivial ways from its
84 * libantlr3c implementation to support more informative error messages and to
85 * invoke the error reporting mechanism of the Input class instead of the
86 * default error printer.
87 */
88 void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer) {
89 pANTLR3_EXCEPTION ex = recognizer->state->exception;
90 pANTLR3_UINT8 * tokenNames = recognizer->state->tokenNames;
91 stringstream ss;
92
93 // Dig the CVC4 objects out of the ANTLR3 mess
94 pANTLR3_PARSER antlr3Parser = (pANTLR3_PARSER)(recognizer->super);
95 AlwaysAssert(antlr3Parser!=NULL);
96 Parser *parser = (Parser*)(antlr3Parser->super);
97 AlwaysAssert(parser!=NULL);
98 AntlrInput *input = (AntlrInput*) parser->getInput() ;
99 AlwaysAssert(input!=NULL);
100
101 // Signal we are in error recovery now
102 recognizer->state->errorRecovery = ANTLR3_TRUE;
103
104 // Indicate this recognizer had an error while processing.
105 recognizer->state->errorCount++;
106
107 // Call the builtin error formatter
108 // recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
109
110 /* TODO: Make error messages more useful, maybe by including more expected tokens and information
111 * about the current token. */
112 switch(ex->type) {
113 case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
114
115 // Indicates that the recognizer was fed a token which seems to be
116 // spurious input. We can detect this when the token that follows
117 // this unwanted token would normally be part of the syntactically
118 // correct stream. Then we can see that the token we are looking at
119 // is just something that should not be there and throw this exception.
120 //
121 if(tokenNames == NULL) {
122 ss << "Unexpected token." ;
123 } else {
124 if(ex->expecting == ANTLR3_TOKEN_EOF) {
125 ss << "Expected end of file.";
126 } else {
127 ss << "Expected " << tokenNames[ex->expecting]
128 << ", found '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
129 }
130 }
131 break;
132
133 case ANTLR3_MISSING_TOKEN_EXCEPTION:
134
135 // Indicates that the recognizer detected that the token we just
136 // hit would be valid syntactically if preceded by a particular
137 // token. Perhaps a missing ';' at line end or a missing ',' in an
138 // expression list, and such like.
139 //
140 if(tokenNames == NULL) {
141 ss << "Missing token (" << ex->expecting << ").";
142 } else {
143 if(ex->expecting == ANTLR3_TOKEN_EOF) {
144 ss << "Missing end of file marker.";
145 } else if( ex->expecting == 0 ) {
146 ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
147 } else {
148 ss << "Missing " << tokenNames[ex->expecting] << ".";
149 }
150 }
151 break;
152
153 case ANTLR3_RECOGNITION_EXCEPTION:
154
155 // Indicates that the recognizer received a token
156 // in the input that was not predicted. This is the basic exception type
157 // from which all others are derived. So we assume it was a syntax error.
158 // You may get this if there are not more tokens and more are needed
159 // to complete a parse for instance.
160 //
161 ss <<"Syntax error.";
162 break;
163
164 case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
165
166 // We were expecting to see one thing and got another. This is the
167 // most common error if we could not detect a missing or unwanted token.
168 // Here you can spend your efforts to
169 // derive more useful error messages based on the expected
170 // token set and the last token and so on. The error following
171 // bitmaps do a good job of reducing the set that we were looking
172 // for down to something small. Knowing what you are parsing may be
173 // able to allow you to be even more specific about an error.
174 //
175 if(tokenNames == NULL) {
176 ss << "Syntax error.";
177 } else {
178 if(ex->expecting == ANTLR3_TOKEN_EOF) {
179 ss << "Expected end of file.";
180 } else if( ex->expecting == 0 ) {
181 ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
182 } else {
183 ss << "Expected " << tokenNames[ex->expecting] << ".";
184 }
185 }
186 break;
187
188 case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
189 // We could not pick any alt decision from the input given
190 // so god knows what happened - however when you examine your grammar,
191 // you should. It means that at the point where the current token occurred
192 // that the DFA indicates nowhere to go from here.
193 //
194 ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
195 break;
196
197 case ANTLR3_MISMATCHED_SET_EXCEPTION:
198
199 {
200 ANTLR3_UINT32 count;
201 ANTLR3_UINT32 bit;
202 ANTLR3_UINT32 size;
203 ANTLR3_UINT32 numbits;
204 pANTLR3_BITSET errBits;
205
206 // This means we were able to deal with one of a set of
207 // possible tokens at this point, but we did not see any
208 // member of that set.
209 //
210 ss << "Unexpected input: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token)
211 << "'. Expected one of: ";
212
213 // What tokens could we have accepted at this point in the
214 // parse?
215 //
216 count = 0;
217 errBits = antlr3BitsetLoad(ex->expectingSet);
218 numbits = errBits->numBits(errBits);
219 size = errBits->size(errBits);
220
221 if(size > 0) {
222 // However many tokens we could have dealt with here, it is usually
223 // not useful to print ALL of the set here. I arbitrarily chose 8
224 // here, but you should do whatever makes sense for you of course.
225 // No token number 0, so look for bit 1 and on.
226 //
227 for(bit = 1; bit < numbits && count < 8 && count < size; bit++) {
228 // TODO: This doesn;t look right - should be asking if the bit is set!!
229 //
230 if(tokenNames[bit]) {
231 if( count++ > 0 ) {
232 ss << ", ";
233 }
234 ss << tokenNames[bit];
235 }
236 }
237 } else {
238 Unreachable("Parse error with empty set of expected tokens.");
239 }
240 }
241 break;
242
243 case ANTLR3_EARLY_EXIT_EXCEPTION:
244
245 // We entered a loop requiring a number of token sequences
246 // but found a token that ended that sequence earlier than
247 // we should have done.
248 //
249 ss << "Sequence terminated early by token: '"
250 << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
251 break;
252
253 default:
254
255 // We don't handle any other exceptions here, but you can
256 // if you wish. If we get an exception that hits this point
257 // then we are just going to report what we know about the
258 // token.
259 //
260 Unhandled("Unexpected exception in parser.");
261 break;
262 }
263
264 // Call the error display routine
265 input->parseError(ss.str());
266 }
267
268 ///
269 /// \brief
270 /// Returns the next available token from the current input stream.
271 ///
272 /// \param toksource
273 /// Points to the implementation of a token source. The lexer is
274 /// addressed by the super structure pointer.
275 ///
276 /// \returns
277 /// The next token in the current input stream or the EOF token
278 /// if there are no more tokens.
279 ///
280 /// \remarks
281 /// Write remarks for nextToken here.
282 ///
283 /// \see nextToken
284 ///
285 /* *** CVC4 NOTE ***
286 * This is copied, largely unmodified, from antlr3lexer.c
287 *
288 */
289 pANTLR3_COMMON_TOKEN
290 AntlrInput::nextTokenStr (pANTLR3_TOKEN_SOURCE toksource)
291 {
292 pANTLR3_LEXER lexer;
293
294 lexer = (pANTLR3_LEXER)(toksource->super);
295
296 /// Loop until we get a non skipped token or EOF
297 ///
298 for (;;)
299 {
300 // Get rid of any previous token (token factory takes care of
301 // any de-allocation when this token is finally used up.
302 //
303 lexer->rec->state->token = NULL;
304 lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception
305 lexer->rec->state->failed = ANTLR3_FALSE;
306
307 // Now call the matching rules and see if we can generate a new token
308 //
309 for (;;)
310 {
311 // Record the start of the token in our input stream.
312 //
313 lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL;
314 lexer->rec->state->tokenStartCharIndex = lexer->input->istream->index(lexer->input->istream);
315 lexer->rec->state->tokenStartCharPositionInLine = lexer->input->getCharPositionInLine(lexer->input);
316 lexer->rec->state->tokenStartLine = lexer->input->getLine(lexer->input);
317 lexer->rec->state->text = NULL;
318
319 if (lexer->input->istream->_LA(lexer->input->istream, 1) == ANTLR3_CHARSTREAM_EOF)
320 {
321 // Reached the end of the current stream, nothing more to do if this is
322 // the last in the stack.
323 //
324 pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken);
325
326 teof->setStartIndex (teof, lexer->getCharIndex(lexer));
327 teof->setStopIndex (teof, lexer->getCharIndex(lexer));
328 teof->setLine (teof, lexer->getLine(lexer));
329 teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it
330 return teof;
331 }
332
333 lexer->rec->state->token = NULL;
334 lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception
335 lexer->rec->state->failed = ANTLR3_FALSE;
336
337 // Call the generated lexer, see if it can get a new token together.
338 //
339 lexer->mTokens(lexer->ctx);
340
341 if (lexer->rec->state->error == ANTLR3_TRUE)
342 {
343 // Recognition exception, report it and try to recover.
344 //
345 lexer->rec->state->failed = ANTLR3_TRUE;
346 // *** CVC4 EDIT: Just call the AntlrInput error routine
347 lexerError(lexer->rec);
348 lexer->recover(lexer);
349 }
350 else
351 {
352 if (lexer->rec->state->token == NULL)
353 {
354 // Emit the real token, which adds it in to the token stream basically
355 //
356 // *** CVC4 Edit: call emit on the lexer object
357 lexer->emit(lexer);
358 }
359 else if (lexer->rec->state->token == &(toksource->skipToken))
360 {
361 // A real token could have been generated, but "Computer say's naaaaah" and it
362 // it is just something we need to skip altogether.
363 //
364 continue;
365 }
366
367 // Good token, not skipped, not EOF token
368 //
369 return lexer->rec->state->token;
370 }
371 }
372 }
373 }
374
375 } // namespace parser
376 } // namespace CVC4