5389f270f966f6043b2a2ddb0406bef16133cb5c
[cvc5.git] / src / parser / antlr_input.cpp
1 /********************* */
2 /*! \file antlr_input.cpp
3 ** \verbatim
4 ** Original author: cconway
5 ** Major contributors: none
6 ** Minor contributors (to current version): mdeters
7 ** This file is part of the CVC4 prototype.
8 ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys)
9 ** Courant Institute of Mathematical Sciences
10 ** New York University
11 ** See the file COPYING in the top-level source directory for licensing
12 ** information.\endverbatim
13 **
14 ** \brief A super-class for ANTLR-generated input language parsers.
15 **
16 ** A super-class for ANTLR-generated input language parsers
17 **/
18
19 #include <limits.h>
20 #include <antlr3.h>
21 #include <stdint.h>
22
23 #include "parser/antlr_input.h"
24 #include "parser/input.h"
25 #include "parser/bounded_token_buffer.h"
26 #include "parser/bounded_token_factory.h"
27 #include "parser/antlr_line_buffered_input.h"
28 #include "parser/memory_mapped_input_buffer.h"
29 #include "parser/parser_exception.h"
30 #include "parser/parser.h"
31
32 #include "expr/command.h"
33 #include "expr/type.h"
34 #include "parser/cvc/cvc_input.h"
35 #include "parser/smt/smt_input.h"
36 #include "parser/smt2/smt2_input.h"
37 #include "parser/tptp/tptp_input.h"
38 #include "util/output.h"
39 #include "util/Assert.h"
40
41 using namespace std;
42 using namespace CVC4;
43 using namespace CVC4::parser;
44 using namespace CVC4::kind;
45
46 namespace CVC4 {
47 namespace parser {
48
49 AntlrInputStream::AntlrInputStream(std::string name,
50 pANTLR3_INPUT_STREAM input,
51 bool fileIsTemporary) :
52 InputStream(name,fileIsTemporary),
53 d_input(input) {
54 AlwaysAssert( input != NULL );
55 }
56
57 AntlrInputStream::~AntlrInputStream() {
58 d_input->free(d_input);
59 }
60
61 pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
62 return d_input;
63 }
64
65 AntlrInputStream*
66 AntlrInputStream::newFileInputStream(const std::string& name,
67 bool useMmap)
68 throw (InputStreamException, AssertionException) {
69 pANTLR3_INPUT_STREAM input = NULL;
70 if( useMmap ) {
71 input = MemoryMappedInputBufferNew(name);
72 } else {
73 // libantlr3c v3.2 isn't source-compatible with v3.4
74 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
75 input = antlr3AsciiFileStreamNew((pANTLR3_UINT8) name.c_str());
76 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
77 input = antlr3FileStreamNew((pANTLR3_UINT8) name.c_str(), ANTLR3_ENC_8BIT);
78 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
79 }
80 if( input == NULL ) {
81 throw InputStreamException("Couldn't open file: " + name);
82 }
83 return new AntlrInputStream( name, input );
84 }
85
86 AntlrInputStream*
87 AntlrInputStream::newStreamInputStream(std::istream& input,
88 const std::string& name,
89 bool lineBuffered)
90 throw (InputStreamException, AssertionException) {
91
92 pANTLR3_INPUT_STREAM inputStream = NULL;
93
94 if(lineBuffered) {
95 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
96 inputStream =
97 antlr3LineBufferedStreamNew(input,
98 0,
99 (pANTLR3_UINT8) strdup(name.c_str()));
100 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
101 inputStream =
102 antlr3LineBufferedStreamNew(input,
103 ANTLR3_ENC_8BIT,
104 (pANTLR3_UINT8) strdup(name.c_str()));
105 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
106 } else {
107
108 // Since these are all NULL on entry, realloc will be called
109 char *basep = NULL, *boundp = NULL, *cp = NULL;
110 /* 64KB seems like a reasonable default size. */
111 size_t bufSize = 0x10000;
112
113 /* Keep going until we can't go no more. */
114 while( !input.eof() && !input.fail() ) {
115
116 if( cp == boundp ) {
117 /* We ran out of room in the buffer. Realloc at double the size. */
118 ptrdiff_t offset = cp - basep;
119 basep = (char *) realloc(basep, bufSize);
120 if( basep == NULL ) {
121 throw InputStreamException("Failed buffering input stream: " + name);
122 }
123 cp = basep + offset;
124 boundp = basep + bufSize;
125 bufSize *= 2;
126 }
127
128 /* Read as much as we have room for. */
129 input.read( cp, boundp - cp );
130 cp += input.gcount();
131 }
132
133 /* Make sure the fail bit didn't get set. */
134 if( !input.eof() ) {
135 throw InputStreamException("Stream input failed: " + name);
136 }
137
138 /* Create an ANTLR input backed by the buffer. */
139 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
140 inputStream =
141 antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8) basep,
142 cp - basep,
143 (pANTLR3_UINT8) strdup(name.c_str()));
144 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
145 inputStream =
146 antlr3StringStreamNew((pANTLR3_UINT8) basep,
147 ANTLR3_ENC_8BIT,
148 cp - basep,
149 (pANTLR3_UINT8) strdup(name.c_str()));
150 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
151
152 }
153
154 if( inputStream == NULL ) {
155 throw InputStreamException("Couldn't initialize input: " + name);
156 }
157
158 return new AntlrInputStream( name, inputStream );
159 }
160
161
162 AntlrInputStream*
163 AntlrInputStream::newStringInputStream(const std::string& input,
164 const std::string& name)
165 throw (InputStreamException, AssertionException) {
166 char* inputStr = strdup(input.c_str());
167 char* nameStr = strdup(name.c_str());
168 AlwaysAssert( inputStr!=NULL && nameStr!=NULL );
169 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
170 pANTLR3_INPUT_STREAM inputStream =
171 antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8) inputStr,
172 input.size(),
173 (pANTLR3_UINT8) nameStr);
174 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
175 pANTLR3_INPUT_STREAM inputStream =
176 antlr3StringStreamNew((pANTLR3_UINT8) inputStr,
177 ANTLR3_ENC_8BIT,
178 input.size(),
179 (pANTLR3_UINT8) nameStr);
180 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
181 if( inputStream==NULL ) {
182 throw InputStreamException("Couldn't initialize string input: '" + input + "'");
183 }
184 return new AntlrInputStream( name, inputStream );
185 }
186
187 AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) {
188 using namespace language::input;
189
190 AntlrInput* input;
191
192 switch(lang) {
193 case LANG_CVC4: {
194 input = new CvcInput(inputStream);
195 break;
196 }
197 case LANG_SMTLIB:
198 input = new SmtInput(inputStream);
199 break;
200
201 case LANG_SMTLIB_V2:
202 input = new Smt2Input(inputStream);
203 break;
204
205 case LANG_TPTP:
206 input = new TptpInput(inputStream);
207 break;
208
209 default:
210 Unhandled(lang);
211 }
212
213 return input;
214 }
215
216 AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
217 Input(inputStream),
218 d_lookahead(lookahead),
219 d_lexer(NULL),
220 d_parser(NULL),
221 d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
222 d_tokenBuffer(NULL) {
223 }
224
225 /*
226 AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
227 Parser(exprManager,name),
228 d_lookahead(lookahead) {
229
230 }
231 */
232
233 /*
234 AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
235 Input(exprManager,name),
236 d_lookahead(lookahead),
237 d_lexer(NULL),
238 d_parser(NULL),
239 d_tokenStream(NULL) {
240
241 char* inputStr = strdup(input.c_str());
242 char* nameStr = strdup(name.c_str());
243 if( inputStr==NULL || nameStr==NULL ) {
244 throw ParserException("Couldn't initialize string input: '" + input + "'");
245 }
246 d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
247 if( d_inputStream == NULL ) {
248 throw ParserException("Couldn't create input stream for string: '" + input + "'");
249 }
250
251 }
252 */
253
254 AntlrInput::~AntlrInput() {
255 BoundedTokenBufferFree(d_tokenBuffer);
256 }
257
258 pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
259 return d_tokenBuffer->commonTstream;
260 }
261
262 void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
263 pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
264 AlwaysAssert(lexer!=NULL);
265 Parser *parser = (Parser*)(lexer->super);
266 AlwaysAssert(parser!=NULL);
267 AntlrInput *input = (AntlrInput*) parser->getInput();
268 AlwaysAssert(input!=NULL);
269
270 /* Call the error display routine *if* there's not already a
271 * parse error pending. If a parser error is pending, this
272 * error is probably less important, so we just drop it. */
273 if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
274 input->parseError("Error finding next token.");
275 }
276 }
277
278 void AntlrInput::warning(const std::string& message) {
279 Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
280 }
281
282 void AntlrInput::parseError(const std::string& message)
283 throw (ParserException, AssertionException) {
284 Debug("parser") << "Throwing exception: "
285 << getInputStream()->getName() << ":"
286 << d_lexer->getLine(d_lexer) << "."
287 << d_lexer->getCharPositionInLine(d_lexer) << ": "
288 << message << endl;
289 throw ParserException(message, getInputStream()->getName(),
290 d_lexer->getLine(d_lexer),
291 d_lexer->getCharPositionInLine(d_lexer));
292 }
293
294
295 void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
296 d_lexer = pLexer;
297
298 pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
299 if( pTokenFactory != NULL ) {
300 pTokenFactory->close(pTokenFactory);
301 }
302
303 /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
304 pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
305 if( pTokenFactory == NULL ) {
306 throw ParserException("Couldn't create token factory.");
307 }
308 d_lexer->rec->state->tokFactory = pTokenFactory;
309
310 pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
311 if( buffer == NULL ) {
312 throw ParserException("Couldn't create token buffer.");
313 }
314
315 d_tokenBuffer = buffer;
316
317 // Override default lexer error reporting
318 d_lexer->rec->reportError = &lexerError;
319 // Override default nextToken function, just to prevent exceptions escaping.
320 d_lexer->rec->state->tokSource->nextToken = &nextToken;
321 }
322
323 void AntlrInput::setParser(Parser& parser) {
324 // ANTLR isn't using super in the lexer or the parser, AFAICT.
325 // We could also use @lexer/parser::context to add a field to the generated
326 // objects, but then it would have to be declared separately in every
327 // language's grammar and we'd have to in the address of the field anyway.
328 d_lexer->super = &parser;
329 d_parser->super = &parser;
330 }
331
332 void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
333 d_parser = pParser;
334 // d_parser->rec->match = &match;
335 d_parser->rec->reportError = &reportError;
336 /* Don't try to recover from a parse error. */
337 // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
338 d_parser->rec->recoverFromMismatchedToken =
339 (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
340 d_parser->rec->mismatch;
341 }
342
343 }/* CVC4::parser namespace */
344 }/* CVC4 namespace */