Merge branch '1.2.x'
[cvc5.git] / src / parser / antlr_input.cpp
1 /********************* */
2 /*! \file antlr_input.cpp
3 ** \verbatim
4 ** Original author: Christopher L. Conway
5 ** Major contributors: Morgan Deters
6 ** Minor contributors (to current version): Francois Bobot
7 ** This file is part of the CVC4 project.
8 ** Copyright (c) 2009-2013 New York University and The University of Iowa
9 ** See the file COPYING in the top-level source directory for licensing
10 ** information.\endverbatim
11 **
12 ** \brief A super-class for ANTLR-generated input language parsers.
13 **
14 ** A super-class for ANTLR-generated input language parsers
15 **/
16
17 #include <limits.h>
18 #include <antlr3.h>
19 #include <stdint.h>
20
21 #include "parser/antlr_input.h"
22 #include "parser/input.h"
23 #include "parser/bounded_token_buffer.h"
24 #include "parser/bounded_token_factory.h"
25 #include "parser/antlr_line_buffered_input.h"
26 #include "parser/memory_mapped_input_buffer.h"
27 #include "parser/parser_exception.h"
28 #include "parser/parser.h"
29
30 #include "expr/command.h"
31 #include "expr/type.h"
32 #include "parser/cvc/cvc_input.h"
33 #include "parser/smt1/smt1_input.h"
34 #include "parser/smt2/smt2_input.h"
35 #include "parser/tptp/tptp_input.h"
36 #include "util/output.h"
37
38 using namespace std;
39 using namespace CVC4;
40 using namespace CVC4::parser;
41 using namespace CVC4::kind;
42
43 namespace CVC4 {
44 namespace parser {
45
46 AntlrInputStream::AntlrInputStream(std::string name,
47 pANTLR3_INPUT_STREAM input,
48 bool fileIsTemporary) :
49 InputStream(name, fileIsTemporary),
50 d_input(input) {
51 assert( input != NULL );
52 input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
53 }
54
55 AntlrInputStream::~AntlrInputStream() {
56 d_input->free(d_input);
57 }
58
59 pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
60 return d_input;
61 }
62
63 AntlrInputStream*
64 AntlrInputStream::newFileInputStream(const std::string& name,
65 bool useMmap)
66 throw (InputStreamException) {
67 #ifdef _WIN32
68 if(useMmap) {
69 useMmap = false;
70 }
71 #endif
72 pANTLR3_INPUT_STREAM input = NULL;
73 if(useMmap) {
74 input = MemoryMappedInputBufferNew(name);
75 } else {
76 // libantlr3c v3.2 isn't source-compatible with v3.4
77 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
78 input = antlr3AsciiFileStreamNew((pANTLR3_UINT8) name.c_str());
79 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
80 input = antlr3FileStreamNew((pANTLR3_UINT8) name.c_str(), ANTLR3_ENC_8BIT);
81 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
82 }
83 if(input == NULL) {
84 throw InputStreamException("Couldn't open file: " + name);
85 }
86 return new AntlrInputStream( name, input );
87 }
88
89 AntlrInputStream*
90 AntlrInputStream::newStreamInputStream(std::istream& input,
91 const std::string& name,
92 bool lineBuffered)
93 throw (InputStreamException) {
94
95 pANTLR3_INPUT_STREAM inputStream = NULL;
96
97 if(lineBuffered) {
98 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
99 inputStream =
100 antlr3LineBufferedStreamNew(input,
101 0,
102 (pANTLR3_UINT8) strdup(name.c_str()));
103 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
104 inputStream =
105 antlr3LineBufferedStreamNew(input,
106 ANTLR3_ENC_8BIT,
107 (pANTLR3_UINT8) strdup(name.c_str()));
108 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
109 } else {
110
111 // Since these are all NULL on entry, realloc will be called
112 char *basep = NULL, *boundp = NULL, *cp = NULL;
113 /* 64KB seems like a reasonable default size. */
114 size_t bufSize = 0x10000;
115
116 /* Keep going until we can't go no more. */
117 while( !input.eof() && !input.fail() ) {
118
119 if( cp == boundp ) {
120 /* We ran out of room in the buffer. Realloc at double the size. */
121 ptrdiff_t offset = cp - basep;
122 basep = (char *) realloc(basep, bufSize);
123 if( basep == NULL ) {
124 throw InputStreamException("Failed buffering input stream: " + name);
125 }
126 cp = basep + offset;
127 boundp = basep + bufSize;
128 bufSize *= 2;
129 }
130
131 /* Read as much as we have room for. */
132 input.read( cp, boundp - cp );
133 cp += input.gcount();
134 }
135
136 /* Make sure the fail bit didn't get set. */
137 if( !input.eof() ) {
138 throw InputStreamException("Stream input failed: " + name);
139 }
140
141 /* Create an ANTLR input backed by the buffer. */
142 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
143 inputStream =
144 antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8) basep,
145 cp - basep,
146 (pANTLR3_UINT8) strdup(name.c_str()));
147 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
148 inputStream =
149 antlr3StringStreamNew((pANTLR3_UINT8) basep,
150 ANTLR3_ENC_8BIT,
151 cp - basep,
152 (pANTLR3_UINT8) strdup(name.c_str()));
153 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
154
155 }
156
157 if( inputStream == NULL ) {
158 throw InputStreamException("Couldn't initialize input: " + name);
159 }
160
161 return new AntlrInputStream( name, inputStream );
162 }
163
164
165 AntlrInputStream*
166 AntlrInputStream::newStringInputStream(const std::string& input,
167 const std::string& name)
168 throw (InputStreamException) {
169 char* inputStr = strdup(input.c_str());
170 char* nameStr = strdup(name.c_str());
171 assert( inputStr!=NULL && nameStr!=NULL );
172 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
173 pANTLR3_INPUT_STREAM inputStream =
174 antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8) inputStr,
175 input.size(),
176 (pANTLR3_UINT8) nameStr);
177 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
178 pANTLR3_INPUT_STREAM inputStream =
179 antlr3StringStreamNew((pANTLR3_UINT8) inputStr,
180 ANTLR3_ENC_8BIT,
181 input.size(),
182 (pANTLR3_UINT8) nameStr);
183 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
184 if( inputStream==NULL ) {
185 throw InputStreamException("Couldn't initialize string input: '" + input + "'");
186 }
187 return new AntlrInputStream( name, inputStream );
188 }
189
190 AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) {
191 using namespace language::input;
192
193 AntlrInput* input;
194
195 switch(lang) {
196 case LANG_CVC4: {
197 input = new CvcInput(inputStream);
198 break;
199 }
200 case LANG_SMTLIB_V1:
201 input = new Smt1Input(inputStream);
202 break;
203
204 case LANG_SMTLIB_V2:
205 input = new Smt2Input(inputStream);
206 break;
207
208 case LANG_TPTP:
209 input = new TptpInput(inputStream);
210 break;
211
212 default:
213 std::stringstream ss;
214 ss << "internal error: unhandled language " << lang << " in AntlrInput::newInput";
215 throw InputStreamException(ss.str());
216 }
217
218 return input;
219 }
220
221 AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
222 Input(inputStream),
223 d_lookahead(lookahead),
224 d_lexer(NULL),
225 d_parser(NULL),
226 d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
227 d_tokenBuffer(NULL) {
228 }
229
230 /*
231 AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
232 Parser(exprManager,name),
233 d_lookahead(lookahead) {
234
235 }
236 */
237
238 /*
239 AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
240 Input(exprManager,name),
241 d_lookahead(lookahead),
242 d_lexer(NULL),
243 d_parser(NULL),
244 d_tokenStream(NULL) {
245
246 char* inputStr = strdup(input.c_str());
247 char* nameStr = strdup(name.c_str());
248 if( inputStr==NULL || nameStr==NULL ) {
249 throw ParserException("Couldn't initialize string input: '" + input + "'");
250 }
251 d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
252 if( d_inputStream == NULL ) {
253 throw ParserException("Couldn't create input stream for string: '" + input + "'");
254 }
255
256 }
257 */
258
259 AntlrInput::~AntlrInput() {
260 BoundedTokenBufferFree(d_tokenBuffer);
261 }
262
263 pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
264 return d_tokenBuffer->commonTstream;
265 }
266
267 void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
268 pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
269 assert(lexer!=NULL);
270 Parser *parser = (Parser*)(lexer->super);
271 assert(parser!=NULL);
272 AntlrInput *input = (AntlrInput*) parser->getInput();
273 assert(input!=NULL);
274
275 /* Call the error display routine *if* there's not already a
276 * parse error pending. If a parser error is pending, this
277 * error is probably less important, so we just drop it. */
278 if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
279 input->parseError("Error finding next token.");
280 }
281 }
282
283 void AntlrInput::warning(const std::string& message) {
284 Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
285 }
286
287 void AntlrInput::parseError(const std::string& message, bool eofException)
288 throw (ParserException) {
289 Debug("parser") << "Throwing exception: "
290 << (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
291 << d_lexer->getLine(d_lexer) << "."
292 << d_lexer->getCharPositionInLine(d_lexer) << ": "
293 << message << endl;
294 if(eofException) {
295 throw ParserEndOfFileException(message,
296 (const char*)d_lexer->rec->state->tokSource->fileName->chars,
297 d_lexer->getLine(d_lexer),
298 d_lexer->getCharPositionInLine(d_lexer));
299 } else {
300 throw ParserException(message,
301 (const char*)d_lexer->rec->state->tokSource->fileName->chars,
302 d_lexer->getLine(d_lexer),
303 d_lexer->getCharPositionInLine(d_lexer));
304 }
305 }
306
307
308 void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
309 d_lexer = pLexer;
310
311 pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
312 if( pTokenFactory != NULL ) {
313 pTokenFactory->close(pTokenFactory);
314 }
315
316 /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
317 pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
318 if( pTokenFactory == NULL ) {
319 throw InputStreamException("Couldn't create token factory.");
320 }
321 d_lexer->rec->state->tokFactory = pTokenFactory;
322
323 pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
324 if( buffer == NULL ) {
325 throw InputStreamException("Couldn't create token buffer.");
326 }
327
328 d_tokenBuffer = buffer;
329
330 // Override default lexer error reporting
331 d_lexer->rec->reportError = &lexerError;
332 // Override default nextToken function, just to prevent exceptions escaping.
333 d_lexer->rec->state->tokSource->nextToken = &nextToken;
334 }
335
336 void AntlrInput::setParser(Parser& parser) {
337 // ANTLR isn't using super in the lexer or the parser, AFAICT.
338 // We could also use @lexer/parser::context to add a field to the generated
339 // objects, but then it would have to be declared separately in every
340 // language's grammar and we'd have to in the address of the field anyway.
341 d_lexer->super = &parser;
342 d_parser->super = &parser;
343 }
344
345 void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
346 d_parser = pParser;
347 // d_parser->rec->match = &match;
348 d_parser->rec->reportError = &reportError;
349 /* Don't try to recover from a parse error. */
350 // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
351 d_parser->rec->recoverFromMismatchedToken =
352 (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
353 d_parser->rec->mismatch;
354 }
355
356 }/* CVC4::parser namespace */
357 }/* CVC4 namespace */