Refactoring Options Handler & Library Cycle Breaking
[cvc5.git] / src / parser / antlr_input.cpp
1 /********************* */
2 /*! \file antlr_input.cpp
3 ** \verbatim
4 ** Original author: Christopher L. Conway
5 ** Major contributors: Morgan Deters, Kshitij Bansal
6 ** Minor contributors (to current version): Francois Bobot
7 ** This file is part of the CVC4 project.
8 ** Copyright (c) 2009-2014 New York University and The University of Iowa
9 ** See the file COPYING in the top-level source directory for licensing
10 ** information.\endverbatim
11 **
12 ** \brief A super-class for ANTLR-generated input language parsers.
13 **
14 ** A super-class for ANTLR-generated input language parsers
15 **/
16
17 #include <limits.h>
18 #include <antlr3.h>
19 #include <stdint.h>
20
21 #include "base/output.h"
22 #include "expr/type.h"
23 #include "parser/antlr_input.h"
24 #include "parser/antlr_line_buffered_input.h"
25 #include "parser/bounded_token_buffer.h"
26 #include "parser/bounded_token_factory.h"
27 #include "parser/cvc/cvc_input.h"
28 #include "parser/input.h"
29 #include "parser/memory_mapped_input_buffer.h"
30 #include "parser/parser.h"
31 #include "parser/parser_exception.h"
32 #include "parser/smt1/smt1_input.h"
33 #include "parser/smt2/smt2_input.h"
34 #include "parser/smt2/sygus_input.h"
35 #include "parser/tptp/tptp_input.h"
36 #include "smt_util/command.h"
37
38 using namespace std;
39 using namespace CVC4;
40 using namespace CVC4::parser;
41 using namespace CVC4::kind;
42
43 namespace CVC4 {
44 namespace parser {
45
46 // These functions exactly wrap the antlr3 source inconsistencies.
47 // These are the only location CVC4_ANTLR3_OLD_INPUT_STREAM ifdefs appear.
48 // No other sanity checking happens;
49 pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input, const std::string& name);
50 pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name);
51 pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
52 uint32_t size,
53 const std::string& name);
54
55 pANTLR3_INPUT_STREAM
56 newAntlr3BufferedStream(std::istream& input, const std::string& name){
57 pANTLR3_INPUT_STREAM inputStream = NULL;
58 pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
59
60 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
61 inputStream =
62 antlr3LineBufferedStreamNew(input, 0, name_duplicate);
63 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
64 inputStream =
65 antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT, name_duplicate);
66 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
67
68 free(name_duplicate);
69 return inputStream;
70 }
71
72 pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){
73 pANTLR3_INPUT_STREAM input = NULL;
74 pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
75
76 // libantlr3c v3.2 isn't source-compatible with v3.4
77 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
78 input = antlr3AsciiFileStreamNew(name_duplicate);
79 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
80 input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT);
81 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
82
83 free(name_duplicate);
84 return input;
85 }
86
87
88 pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
89 uint32_t size,
90 const std::string& name){
91 pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
92 pANTLR3_INPUT_STREAM inputStream = NULL;
93 /* Create an ANTLR input backed by the buffer. */
94 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
95 inputStream =
96 antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate);
97 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
98 inputStream =
99 antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size,
100 name_duplicate);
101 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
102 free(name_duplicate);
103 return inputStream;
104 }
105
106 AntlrInputStream::AntlrInputStream(std::string name,
107 pANTLR3_INPUT_STREAM input,
108 bool fileIsTemporary,
109 pANTLR3_UINT8 inputString) :
110 InputStream(name, fileIsTemporary),
111 d_input(input),
112 d_inputString(inputString)
113 {
114 assert( input != NULL );
115 input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
116 }
117
118 AntlrInputStream::~AntlrInputStream() {
119 d_input->free(d_input);
120 if(d_inputString != NULL){
121 free(d_inputString);
122 }
123 }
124
125 pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
126 return d_input;
127 }
128
129
130
131 AntlrInputStream*
132 AntlrInputStream::newFileInputStream(const std::string& name,
133 bool useMmap)
134 throw (InputStreamException) {
135 #ifdef _WIN32
136 if(useMmap) {
137 useMmap = false;
138 }
139 #endif
140 pANTLR3_INPUT_STREAM input = NULL;
141 if(useMmap) {
142 input = MemoryMappedInputBufferNew(name);
143 } else {
144 input = newAntlr3FileStream(name);
145 }
146 if(input == NULL) {
147 throw InputStreamException("Couldn't open file: " + name);
148 }
149 return new AntlrInputStream( name, input, false, NULL );
150 }
151
152
153 AntlrInputStream*
154 AntlrInputStream::newStreamInputStream(std::istream& input,
155 const std::string& name,
156 bool lineBuffered)
157 throw (InputStreamException) {
158
159 pANTLR3_INPUT_STREAM inputStream = NULL;
160 pANTLR3_UINT8 inputStringCopy = NULL;
161
162 if(lineBuffered) {
163 inputStream = newAntlr3BufferedStream(input, name);
164 } else {
165
166 // Since these are all NULL on entry, realloc will be called
167 char *basep = NULL, *boundp = NULL, *cp = NULL;
168 /* 64KB seems like a reasonable default size. */
169 size_t bufSize = 0x10000;
170
171 /* Keep going until we can't go no more. */
172 while( !input.eof() && !input.fail() ) {
173
174 if( cp == boundp ) {
175 /* We ran out of room in the buffer. Realloc at double the size. */
176 ptrdiff_t offset = cp - basep;
177 basep = (char *) realloc(basep, bufSize);
178 if( basep == NULL ) {
179 throw InputStreamException("Failed buffering input stream: " + name);
180 }
181 cp = basep + offset;
182 boundp = basep + bufSize;
183 bufSize *= 2;
184 }
185
186 /* Read as much as we have room for. */
187 input.read( cp, boundp - cp );
188 cp += input.gcount();
189 }
190
191 /* Make sure the fail bit didn't get set. */
192 if( !input.eof() ) {
193 throw InputStreamException("Stream input failed: " + name);
194 }
195 ptrdiff_t offset = cp - basep;
196 assert(offset >= 0);
197 assert(offset <= std::numeric_limits<uint32_t>::max());
198 inputStringCopy = (pANTLR3_UINT8)basep;
199 inputStream = newAntrl3InPlaceStream(inputStringCopy, (uint32_t) offset, name);
200 }
201
202 if( inputStream == NULL ) {
203 throw InputStreamException("Couldn't initialize input: " + name);
204 }
205
206 return new AntlrInputStream( name, inputStream, false, inputStringCopy );
207 }
208
209
210 AntlrInputStream*
211 AntlrInputStream::newStringInputStream(const std::string& input,
212 const std::string& name)
213 throw (InputStreamException) {
214
215 size_t input_size = input.size();
216 assert(input_size <= std::numeric_limits<uint32_t>::max());
217
218 // Ownership of input_duplicate is transferred to the AntlrInputStream.
219 pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str());
220
221 if( input_duplicate == NULL ) {
222 throw InputStreamException("Couldn't initialize string input: '" + input + "'");
223 }
224
225 pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name);
226 if( inputStream==NULL ) {
227 throw InputStreamException("Couldn't initialize string input: '" + input + "'");
228 }
229 return new AntlrInputStream( name, inputStream, false, input_duplicate );
230 }
231
232 AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) {
233 using namespace language::input;
234
235 AntlrInput* input;
236
237 switch(lang) {
238 case LANG_CVC4: {
239 input = new CvcInput(inputStream);
240 break;
241 }
242 case LANG_SMTLIB_V1:
243 input = new Smt1Input(inputStream);
244 break;
245
246 case LANG_SMTLIB_V2_0:
247 case LANG_SMTLIB_V2_5:
248 input = new Smt2Input(inputStream, lang);
249 break;
250
251 case LANG_SYGUS:
252 input = new SygusInput(inputStream);
253 break;
254
255 case LANG_TPTP:
256 input = new TptpInput(inputStream);
257 break;
258
259 default:
260 std::stringstream ss;
261 ss << "internal error: unhandled language " << lang << " in AntlrInput::newInput";
262 throw InputStreamException(ss.str());
263 }
264
265 return input;
266 }
267
268 AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
269 Input(inputStream),
270 d_lookahead(lookahead),
271 d_lexer(NULL),
272 d_parser(NULL),
273 d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
274 d_tokenBuffer(NULL) {
275 }
276
277 /*
278 AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
279 Parser(exprManager,name),
280 d_lookahead(lookahead) {
281
282 }
283 */
284
285 /*
286 AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
287 Input(exprManager,name),
288 d_lookahead(lookahead),
289 d_lexer(NULL),
290 d_parser(NULL),
291 d_tokenStream(NULL) {
292
293 char* inputStr = strdup(input.c_str());
294 char* nameStr = strdup(name.c_str());
295 if( inputStr==NULL || nameStr==NULL ) {
296 throw ParserException("Couldn't initialize string input: '" + input + "'");
297 }
298 d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
299 if( d_inputStream == NULL ) {
300 throw ParserException("Couldn't create input stream for string: '" + input + "'");
301 }
302
303 }
304 */
305
306 AntlrInput::~AntlrInput() {
307 BoundedTokenBufferFree(d_tokenBuffer);
308 }
309
310 pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
311 return d_tokenBuffer->commonTstream;
312 }
313
314 void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
315 pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
316 assert(lexer!=NULL);
317 Parser *parser = (Parser*)(lexer->super);
318 assert(parser!=NULL);
319 AntlrInput *input = (AntlrInput*) parser->getInput();
320 assert(input!=NULL);
321
322 /* Call the error display routine *if* there's not already a
323 * parse error pending. If a parser error is pending, this
324 * error is probably less important, so we just drop it. */
325 if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
326 input->parseError("Error finding next token.");
327 }
328 }
329
330 void AntlrInput::warning(const std::string& message) {
331 Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
332 }
333
334
335
336 /**
337 * characters considered part of a simple symbol in SMTLIB.
338 *
339 * TODO: Ideally this code shouldn't be smtlib specific (should work
340 * with CVC language too), but this per-language specialization has
341 * been left for a later point.
342 */
343 inline bool isSimpleChar(char ch) {
344 return isalnum(ch) || (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL);
345 }
346
347 size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) {
348 size_t st = 0;
349 size_t N = input.size();
350 while(st < N) {
351 while( st < N && (*isWordChar)(input[st]) == false ) st++;
352 size_t en = st;
353 while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++;
354 if(en - st + 1 == pattern.size()) {
355 bool match = true;
356 for(size_t i = 0; match && i < pattern.size(); ++i) {
357 match &= (pattern[i] == input[st+i]);
358 }
359 if(match == true) {
360 return st;
361 }
362 }
363 st = en + 1;
364 }
365 return string::npos;
366 }
367
368 /**
369 * Gets part of original input and tries to visually hint where the
370 * error might be.
371 *
372 * Something like:
373 *
374 * ...nd (= alpha beta) (= beta delta))
375 * ^
376 *
377 * Implementation (as on 2014/04/24):
378 *
379 * > if suggested pointer by lexer is under a "simple char", move to
380 * start of the word and print pointer there.
381 *
382 * > in the other case, it tries to find the nearest word in the error
383 * message passed along. if it can't find it, we don't add this
384 * visual hint, as experimentally position suggested by lexer was
385 * found to be totally unhelpful. (TODO: fix this upstream to
386 * improve)
387 */
388 std::string parseErrorHelper(const char* lineStart, int charPositionInLine, const std::string& message)
389 {
390 // Is it a multi-line message
391 bool multilineMessage = (message.find('\n') != string::npos);
392 // Useful only if it is a multi-line message
393 int firstLineEnd = message.find('\n');
394
395 std::ostringstream ss, slicess;
396
397 // Keep first line of message
398 if(multilineMessage) {
399 ss << message.substr(0, firstLineEnd) << endl << endl;
400 } else {
401 ss << message << endl << endl;
402 }
403
404 int posSliceStart = (charPositionInLine - 50 <= 0) ? 0 : charPositionInLine - 50 + 5;
405 int posSliceEnd = posSliceStart + 70;
406 int caretPos = 0;
407 int caretPosExtra = 0; // for inital intendation, epilipses etc.
408
409 ss << " "; caretPosExtra += 2;
410 if(posSliceStart > 0) {
411 ss << "..."; caretPosExtra += 3;
412 }
413
414 for(int i = posSliceStart; lineStart[i] != '\n'; ++i) {
415 if(i == posSliceEnd) {
416 ss << "...";
417 break;
418 }
419 if(i < charPositionInLine) { caretPos++; }
420
421 if(!isprint(lineStart[i])) {
422 // non-printable character, something wrong, bail out
423 return message;
424 }
425
426 ss << (lineStart[i]);
427 slicess << (lineStart[i]);
428 }
429
430 // adjust position of caret, based on slice and message
431 {
432 int caretPosOrig = caretPos;
433 string slice = slicess.str();
434 if(isSimpleChar(slice[caretPos])) {
435 // if alphanumeric, try to go to beginning of word/number
436 while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; }
437 if(caretPos == 0 && posSliceStart > 0) {
438 // reached start and this is not really the start? bail out
439 return message;
440 } else {
441 // likely it is also in original message? if so, very likely
442 // we found the right place
443 string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1));
444 size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
445 Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
446 if( matchLoc != string::npos ) {
447 Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl;
448 }
449 }
450 } else {
451 bool foundCaretPos = false;
452
453 for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) {
454 // go to nearest alphanumeric string (before current position),
455 // see if that word can be found in original message. If so,
456 // point to that, else keep pointer where it was.
457 int nearestWordEn = caretPos - 1;
458 while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) {
459 --nearestWordEn;
460 }
461 if(isSimpleChar(slice[nearestWordEn])) {
462 int nearestWordSt = nearestWordEn;
463 while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) {
464 --nearestWordSt;
465 }
466 string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1));
467 size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
468 Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl;
469 Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
470 if( matchLoc != string::npos ) {
471 Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at "
472 << nearestWordSt << std::endl;
473 foundCaretPos = true;
474 }
475 caretPos = nearestWordSt;
476 }
477 }
478 if( !foundCaretPos) {
479 // this doesn't look good. caret generally getting printed
480 // at unhelpful positions. improve upstream?
481 return message;
482 }
483 }
484 caretPos += caretPosExtra;
485 }// end of caret position computation/heuristics
486
487 ss << std::endl;
488 while( caretPos-- > 0 ) {
489 ss << ' ';
490 }
491 ss << '^' << endl;
492 if(multilineMessage) {
493 ss << message.substr(firstLineEnd, message.size() - firstLineEnd);;
494 }
495 return ss.str();
496 }
497
498 void AntlrInput::parseError(const std::string& message, bool eofException)
499 throw (ParserException) {
500
501 string updatedMessage = parseErrorHelper((const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream),
502 d_lexer->getCharPositionInLine(d_lexer),
503 message);
504
505 Debug("parser") << "Throwing exception: "
506 << (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
507 << d_lexer->getLine(d_lexer) << "."
508 << d_lexer->getCharPositionInLine(d_lexer) << ": "
509 << updatedMessage << endl;
510 if(eofException) {
511 throw ParserEndOfFileException(message,
512 (const char*)d_lexer->rec->state->tokSource->fileName->chars,
513 d_lexer->getLine(d_lexer),
514 d_lexer->getCharPositionInLine(d_lexer));
515 } else {
516 throw ParserException(updatedMessage,
517 (const char*)d_lexer->rec->state->tokSource->fileName->chars,
518 d_lexer->getLine(d_lexer),
519 d_lexer->getCharPositionInLine(d_lexer));
520 }
521 }
522
523
524 void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
525 d_lexer = pLexer;
526
527 pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
528 if( pTokenFactory != NULL ) {
529 pTokenFactory->close(pTokenFactory);
530 }
531
532 /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
533 pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
534 if( pTokenFactory == NULL ) {
535 throw InputStreamException("Couldn't create token factory.");
536 }
537 d_lexer->rec->state->tokFactory = pTokenFactory;
538
539 pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
540 if( buffer == NULL ) {
541 throw InputStreamException("Couldn't create token buffer.");
542 }
543
544 d_tokenBuffer = buffer;
545
546 // Override default lexer error reporting
547 d_lexer->rec->reportError = &lexerError;
548 // Override default nextToken function, just to prevent exceptions escaping.
549 d_lexer->rec->state->tokSource->nextToken = &nextToken;
550 }
551
552 void AntlrInput::setParser(Parser& parser) {
553 // ANTLR isn't using super in the lexer or the parser, AFAICT.
554 // We could also use @lexer/parser::context to add a field to the generated
555 // objects, but then it would have to be declared separately in every
556 // language's grammar and we'd have to in the address of the field anyway.
557 d_lexer->super = &parser;
558 d_parser->super = &parser;
559 }
560
561 void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
562 d_parser = pParser;
563 // d_parser->rec->match = &match;
564 d_parser->rec->reportError = &reportError;
565 /* Don't try to recover from a parse error. */
566 // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
567 d_parser->rec->recoverFromMismatchedToken =
568 (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
569 d_parser->rec->mismatch;
570 }
571
572 }/* CVC4::parser namespace */
573 }/* CVC4 namespace */