Merge branch 'master' into bv-core
[cvc5.git] / src / parser / antlr_input.cpp
1 /********************* */
2 /*! \file antlr_input.cpp
3 ** \verbatim
4 ** Original author: cconway
5 ** Major contributors: mdeters
6 ** Minor contributors (to current version): bobot
7 ** This file is part of the CVC4 prototype.
8 ** Copyright (c) 2009-2012 New York University and The University of Iowa
9 ** See the file COPYING in the top-level source directory for licensing
10 ** information.\endverbatim
11 **
12 ** \brief A super-class for ANTLR-generated input language parsers.
13 **
14 ** A super-class for ANTLR-generated input language parsers
15 **/
16
17 #include <limits.h>
18 #include <antlr3.h>
19 #include <stdint.h>
20
21 #include "parser/antlr_input.h"
22 #include "parser/input.h"
23 #include "parser/bounded_token_buffer.h"
24 #include "parser/bounded_token_factory.h"
25 #include "parser/antlr_line_buffered_input.h"
26 #include "parser/memory_mapped_input_buffer.h"
27 #include "parser/parser_exception.h"
28 #include "parser/parser.h"
29
30 #include "expr/command.h"
31 #include "expr/type.h"
32 #include "parser/cvc/cvc_input.h"
33 #include "parser/smt1/smt1_input.h"
34 #include "parser/smt2/smt2_input.h"
35 #include "parser/tptp/tptp_input.h"
36 #include "util/output.h"
37
38 using namespace std;
39 using namespace CVC4;
40 using namespace CVC4::parser;
41 using namespace CVC4::kind;
42
43 namespace CVC4 {
44 namespace parser {
45
46 AntlrInputStream::AntlrInputStream(std::string name,
47 pANTLR3_INPUT_STREAM input,
48 bool fileIsTemporary) :
49 InputStream(name,fileIsTemporary),
50 d_input(input) {
51 assert( input != NULL );
52 }
53
54 AntlrInputStream::~AntlrInputStream() {
55 d_input->free(d_input);
56 }
57
58 pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
59 return d_input;
60 }
61
62 AntlrInputStream*
63 AntlrInputStream::newFileInputStream(const std::string& name,
64 bool useMmap)
65 throw (InputStreamException) {
66 #ifdef _WIN32
67 if(useMmap) {
68 useMmap = false;
69 }
70 #endif
71 pANTLR3_INPUT_STREAM input = NULL;
72 if(useMmap) {
73 input = MemoryMappedInputBufferNew(name);
74 } else {
75 // libantlr3c v3.2 isn't source-compatible with v3.4
76 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
77 input = antlr3AsciiFileStreamNew((pANTLR3_UINT8) name.c_str());
78 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
79 input = antlr3FileStreamNew((pANTLR3_UINT8) name.c_str(), ANTLR3_ENC_8BIT);
80 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
81 }
82 if(input == NULL) {
83 throw InputStreamException("Couldn't open file: " + name);
84 }
85 return new AntlrInputStream( name, input );
86 }
87
88 AntlrInputStream*
89 AntlrInputStream::newStreamInputStream(std::istream& input,
90 const std::string& name,
91 bool lineBuffered)
92 throw (InputStreamException) {
93
94 pANTLR3_INPUT_STREAM inputStream = NULL;
95
96 if(lineBuffered) {
97 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
98 inputStream =
99 antlr3LineBufferedStreamNew(input,
100 0,
101 (pANTLR3_UINT8) strdup(name.c_str()));
102 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
103 inputStream =
104 antlr3LineBufferedStreamNew(input,
105 ANTLR3_ENC_8BIT,
106 (pANTLR3_UINT8) strdup(name.c_str()));
107 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
108 } else {
109
110 // Since these are all NULL on entry, realloc will be called
111 char *basep = NULL, *boundp = NULL, *cp = NULL;
112 /* 64KB seems like a reasonable default size. */
113 size_t bufSize = 0x10000;
114
115 /* Keep going until we can't go no more. */
116 while( !input.eof() && !input.fail() ) {
117
118 if( cp == boundp ) {
119 /* We ran out of room in the buffer. Realloc at double the size. */
120 ptrdiff_t offset = cp - basep;
121 basep = (char *) realloc(basep, bufSize);
122 if( basep == NULL ) {
123 throw InputStreamException("Failed buffering input stream: " + name);
124 }
125 cp = basep + offset;
126 boundp = basep + bufSize;
127 bufSize *= 2;
128 }
129
130 /* Read as much as we have room for. */
131 input.read( cp, boundp - cp );
132 cp += input.gcount();
133 }
134
135 /* Make sure the fail bit didn't get set. */
136 if( !input.eof() ) {
137 throw InputStreamException("Stream input failed: " + name);
138 }
139
140 /* Create an ANTLR input backed by the buffer. */
141 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
142 inputStream =
143 antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8) basep,
144 cp - basep,
145 (pANTLR3_UINT8) strdup(name.c_str()));
146 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
147 inputStream =
148 antlr3StringStreamNew((pANTLR3_UINT8) basep,
149 ANTLR3_ENC_8BIT,
150 cp - basep,
151 (pANTLR3_UINT8) strdup(name.c_str()));
152 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
153
154 }
155
156 if( inputStream == NULL ) {
157 throw InputStreamException("Couldn't initialize input: " + name);
158 }
159
160 return new AntlrInputStream( name, inputStream );
161 }
162
163
164 AntlrInputStream*
165 AntlrInputStream::newStringInputStream(const std::string& input,
166 const std::string& name)
167 throw (InputStreamException) {
168 char* inputStr = strdup(input.c_str());
169 char* nameStr = strdup(name.c_str());
170 assert( inputStr!=NULL && nameStr!=NULL );
171 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
172 pANTLR3_INPUT_STREAM inputStream =
173 antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8) inputStr,
174 input.size(),
175 (pANTLR3_UINT8) nameStr);
176 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
177 pANTLR3_INPUT_STREAM inputStream =
178 antlr3StringStreamNew((pANTLR3_UINT8) inputStr,
179 ANTLR3_ENC_8BIT,
180 input.size(),
181 (pANTLR3_UINT8) nameStr);
182 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
183 if( inputStream==NULL ) {
184 throw InputStreamException("Couldn't initialize string input: '" + input + "'");
185 }
186 return new AntlrInputStream( name, inputStream );
187 }
188
189 AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) {
190 using namespace language::input;
191
192 AntlrInput* input;
193
194 switch(lang) {
195 case LANG_CVC4: {
196 input = new CvcInput(inputStream);
197 break;
198 }
199 case LANG_SMTLIB_V1:
200 input = new Smt1Input(inputStream);
201 break;
202
203 case LANG_SMTLIB_V2:
204 input = new Smt2Input(inputStream);
205 break;
206
207 case LANG_TPTP:
208 input = new TptpInput(inputStream);
209 break;
210
211 default:
212 std::stringstream ss;
213 ss << "internal error: unhandled language " << lang << " in AntlrInput::newInput";
214 throw InputStreamException(ss.str());
215 }
216
217 return input;
218 }
219
220 AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
221 Input(inputStream),
222 d_lookahead(lookahead),
223 d_lexer(NULL),
224 d_parser(NULL),
225 d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
226 d_tokenBuffer(NULL) {
227 }
228
229 /*
230 AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
231 Parser(exprManager,name),
232 d_lookahead(lookahead) {
233
234 }
235 */
236
237 /*
238 AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
239 Input(exprManager,name),
240 d_lookahead(lookahead),
241 d_lexer(NULL),
242 d_parser(NULL),
243 d_tokenStream(NULL) {
244
245 char* inputStr = strdup(input.c_str());
246 char* nameStr = strdup(name.c_str());
247 if( inputStr==NULL || nameStr==NULL ) {
248 throw ParserException("Couldn't initialize string input: '" + input + "'");
249 }
250 d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
251 if( d_inputStream == NULL ) {
252 throw ParserException("Couldn't create input stream for string: '" + input + "'");
253 }
254
255 }
256 */
257
258 AntlrInput::~AntlrInput() {
259 BoundedTokenBufferFree(d_tokenBuffer);
260 }
261
262 pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
263 return d_tokenBuffer->commonTstream;
264 }
265
266 void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
267 pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
268 assert(lexer!=NULL);
269 Parser *parser = (Parser*)(lexer->super);
270 assert(parser!=NULL);
271 AntlrInput *input = (AntlrInput*) parser->getInput();
272 assert(input!=NULL);
273
274 /* Call the error display routine *if* there's not already a
275 * parse error pending. If a parser error is pending, this
276 * error is probably less important, so we just drop it. */
277 if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
278 input->parseError("Error finding next token.");
279 }
280 }
281
282 void AntlrInput::warning(const std::string& message) {
283 Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
284 }
285
286 void AntlrInput::parseError(const std::string& message, bool eofException)
287 throw (ParserException) {
288 Debug("parser") << "Throwing exception: "
289 << getInputStream()->getName() << ":"
290 << d_lexer->getLine(d_lexer) << "."
291 << d_lexer->getCharPositionInLine(d_lexer) << ": "
292 << message << endl;
293 if(eofException) {
294 throw ParserEndOfFileException(message, getInputStream()->getName(),
295 d_lexer->getLine(d_lexer),
296 d_lexer->getCharPositionInLine(d_lexer));
297 } else {
298 throw ParserException(message, getInputStream()->getName(),
299 d_lexer->getLine(d_lexer),
300 d_lexer->getCharPositionInLine(d_lexer));
301 }
302 }
303
304
305 void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
306 d_lexer = pLexer;
307
308 pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
309 if( pTokenFactory != NULL ) {
310 pTokenFactory->close(pTokenFactory);
311 }
312
313 /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
314 pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
315 if( pTokenFactory == NULL ) {
316 throw InputStreamException("Couldn't create token factory.");
317 }
318 d_lexer->rec->state->tokFactory = pTokenFactory;
319
320 pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
321 if( buffer == NULL ) {
322 throw InputStreamException("Couldn't create token buffer.");
323 }
324
325 d_tokenBuffer = buffer;
326
327 // Override default lexer error reporting
328 d_lexer->rec->reportError = &lexerError;
329 // Override default nextToken function, just to prevent exceptions escaping.
330 d_lexer->rec->state->tokSource->nextToken = &nextToken;
331 }
332
333 void AntlrInput::setParser(Parser& parser) {
334 // ANTLR isn't using super in the lexer or the parser, AFAICT.
335 // We could also use @lexer/parser::context to add a field to the generated
336 // objects, but then it would have to be declared separately in every
337 // language's grammar and we'd have to in the address of the field anyway.
338 d_lexer->super = &parser;
339 d_parser->super = &parser;
340 }
341
342 void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
343 d_parser = pParser;
344 // d_parser->rec->match = &match;
345 d_parser->rec->reportError = &reportError;
346 /* Don't try to recover from a parse error. */
347 // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
348 d_parser->rec->recoverFromMismatchedToken =
349 (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
350 d_parser->rec->mismatch;
351 }
352
353 }/* CVC4::parser namespace */
354 }/* CVC4 namespace */