oops, one more fix, hopefully the last
[cvc5.git] / src / parser / antlr_input.cpp
1 /********************* */
2 /*! \file antlr_input.cpp
3 ** \verbatim
4 ** Original author: cconway
5 ** Major contributors: none
6 ** Minor contributors (to current version): mdeters
7 ** This file is part of the CVC4 prototype.
8 ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys)
9 ** Courant Institute of Mathematical Sciences
10 ** New York University
11 ** See the file COPYING in the top-level source directory for licensing
12 ** information.\endverbatim
13 **
14 ** \brief A super-class for ANTLR-generated input language parsers.
15 **
16 ** A super-class for ANTLR-generated input language parsers
17 **/
18
19 #include <limits.h>
20 #include <antlr3.h>
21 #include <stdint.h>
22
23 #include "antlr_input.h"
24 #include "input.h"
25 #include "bounded_token_buffer.h"
26 #include "bounded_token_factory.h"
27 #include "memory_mapped_input_buffer.h"
28 #include "parser_exception.h"
29 #include "parser.h"
30
31 #include "expr/command.h"
32 #include "expr/type.h"
33 #include "parser/cvc/cvc_input.h"
34 #include "parser/smt/smt_input.h"
35 #include "parser/smt2/smt2_input.h"
36 #include "util/output.h"
37 #include "util/Assert.h"
38
39 using namespace std;
40 using namespace CVC4;
41 using namespace CVC4::parser;
42 using namespace CVC4::kind;
43
44 namespace CVC4 {
45 namespace parser {
46
47 AntlrInputStream::AntlrInputStream(std::string name,
48 pANTLR3_INPUT_STREAM input,
49 bool fileIsTemporary) :
50 InputStream(name,fileIsTemporary),
51 d_input(input) {
52 AlwaysAssert( input != NULL );
53 }
54
55 AntlrInputStream::~AntlrInputStream() {
56 d_input->free(d_input);
57 }
58
59 pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
60 return d_input;
61 }
62
63 AntlrInputStream*
64 AntlrInputStream::newFileInputStream(const std::string& name,
65 bool useMmap)
66 throw (InputStreamException, AssertionException) {
67 pANTLR3_INPUT_STREAM input = NULL;
68 if( useMmap ) {
69 input = MemoryMappedInputBufferNew(name);
70 } else {
71 // libantlr3c v3.2 isn't source-compatible with v3.4
72 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
73 input = antlr3AsciiFileStreamNew((pANTLR3_UINT8) name.c_str());
74 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
75 input = antlr3FileStreamNew((pANTLR3_UINT8) name.c_str(), ANTLR3_ENC_8BIT);
76 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
77 }
78 if( input == NULL ) {
79 throw InputStreamException("Couldn't open file: " + name);
80 }
81 return new AntlrInputStream( name, input );
82 }
83
84 AntlrInputStream*
85 AntlrInputStream::newStreamInputStream(std::istream& input,
86 const std::string& name)
87 throw (InputStreamException, AssertionException) {
88
89 // Since these are all NULL on entry, realloc will be called
90 char *basep = NULL, *boundp = NULL, *cp = NULL;
91 /* 64KB seems like a reasonable default size. */
92 size_t bufSize = 0x10000;
93
94 /* Keep going until we can't go no more. */
95 while( !input.eof() && !input.fail() ) {
96
97 if( cp == boundp ) {
98 /* We ran out of room in the buffer. Realloc at double the size. */
99 ptrdiff_t offset = cp - basep;
100 basep = (char *) realloc(basep, bufSize);
101 if( basep == NULL ) {
102 throw InputStreamException("Failed buffering input stream: " + name);
103 }
104 cp = basep + offset;
105 boundp = basep + bufSize;
106 bufSize *= 2;
107 }
108
109 /* Read as much as we have room for. */
110 input.read( cp, boundp - cp );
111 cp += input.gcount();
112 }
113
114 /* Make sure the fail bit didn't get set. */
115 if( !input.eof() ) {
116 throw InputStreamException("Stream input failed: " + name);
117 }
118
119 /* Create an ANTLR input backed by the buffer. */
120 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
121 pANTLR3_INPUT_STREAM inputStream =
122 antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8) basep,
123 cp - basep,
124 (pANTLR3_UINT8) strdup(name.c_str()));
125 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
126 pANTLR3_INPUT_STREAM inputStream =
127 antlr3StringStreamNew((pANTLR3_UINT8) basep,
128 ANTLR3_ENC_8BIT,
129 cp - basep,
130 (pANTLR3_UINT8) strdup(name.c_str()));
131 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
132 if( inputStream==NULL ) {
133 throw InputStreamException("Couldn't initialize input: " + name);
134 }
135 return new AntlrInputStream( name, inputStream );
136 }
137
138
139 AntlrInputStream*
140 AntlrInputStream::newStringInputStream(const std::string& input,
141 const std::string& name)
142 throw (InputStreamException, AssertionException) {
143 char* inputStr = strdup(input.c_str());
144 char* nameStr = strdup(name.c_str());
145 AlwaysAssert( inputStr!=NULL && nameStr!=NULL );
146 #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
147 pANTLR3_INPUT_STREAM inputStream =
148 antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8) inputStr,
149 input.size(),
150 (pANTLR3_UINT8) nameStr);
151 #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
152 pANTLR3_INPUT_STREAM inputStream =
153 antlr3StringStreamNew((pANTLR3_UINT8) inputStr,
154 ANTLR3_ENC_8BIT,
155 input.size(),
156 (pANTLR3_UINT8) nameStr);
157 #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
158 if( inputStream==NULL ) {
159 throw InputStreamException("Couldn't initialize string input: '" + input + "'");
160 }
161 return new AntlrInputStream( name, inputStream );
162 }
163
164 AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) {
165 using namespace language::input;
166
167 AntlrInput* input;
168
169 switch(lang) {
170 case LANG_CVC4: {
171 input = new CvcInput(inputStream);
172 break;
173 }
174 case LANG_SMTLIB:
175 input = new SmtInput(inputStream);
176 break;
177
178 case LANG_SMTLIB_V2:
179 input = new Smt2Input(inputStream);
180 break;
181
182 default:
183 Unhandled(lang);
184 }
185
186 return input;
187 }
188
189 AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
190 Input(inputStream),
191 d_lookahead(lookahead),
192 d_lexer(NULL),
193 d_parser(NULL),
194 d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
195 d_tokenBuffer(NULL) {
196 }
197
198 /*
199 AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
200 Parser(exprManager,name),
201 d_lookahead(lookahead) {
202
203 }
204 */
205
206 /*
207 AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
208 Input(exprManager,name),
209 d_lookahead(lookahead),
210 d_lexer(NULL),
211 d_parser(NULL),
212 d_tokenStream(NULL) {
213
214 char* inputStr = strdup(input.c_str());
215 char* nameStr = strdup(name.c_str());
216 if( inputStr==NULL || nameStr==NULL ) {
217 throw ParserException("Couldn't initialize string input: '" + input + "'");
218 }
219 d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
220 if( d_inputStream == NULL ) {
221 throw ParserException("Couldn't create input stream for string: '" + input + "'");
222 }
223
224 }
225 */
226
227 AntlrInput::~AntlrInput() {
228 BoundedTokenBufferFree(d_tokenBuffer);
229 }
230
231 pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
232 return d_tokenBuffer->commonTstream;
233 }
234
235 void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
236 pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
237 AlwaysAssert(lexer!=NULL);
238 Parser *parser = (Parser*)(lexer->super);
239 AlwaysAssert(parser!=NULL);
240 AntlrInput *input = (AntlrInput*) parser->getInput();
241 AlwaysAssert(input!=NULL);
242
243 /* Call the error display routine *if* there's not already a
244 * parse error pending. If a parser error is pending, this
245 * error is probably less important, so we just drop it. */
246 if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
247 input->parseError("Error finding next token.");
248 }
249 }
250
251 void AntlrInput::parseError(const std::string& message)
252 throw (ParserException, AssertionException) {
253 Debug("parser") << "Throwing exception: "
254 << getInputStream()->getName() << ":"
255 << d_lexer->getLine(d_lexer) << "."
256 << d_lexer->getCharPositionInLine(d_lexer) << ": "
257 << message << endl;
258 throw ParserException(message, getInputStream()->getName(),
259 d_lexer->getLine(d_lexer),
260 d_lexer->getCharPositionInLine(d_lexer));
261 }
262
263
264 void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
265 d_lexer = pLexer;
266
267 pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
268 if( pTokenFactory != NULL ) {
269 pTokenFactory->close(pTokenFactory);
270 }
271
272 /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
273 pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
274 if( pTokenFactory == NULL ) {
275 throw ParserException("Couldn't create token factory.");
276 }
277 d_lexer->rec->state->tokFactory = pTokenFactory;
278
279 pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
280 if( buffer == NULL ) {
281 throw ParserException("Couldn't create token buffer.");
282 }
283
284 d_tokenBuffer = buffer;
285
286 // Override default lexer error reporting
287 d_lexer->rec->reportError = &lexerError;
288 // Override default nextToken function, just to prevent exceptions escaping.
289 d_lexer->rec->state->tokSource->nextToken = &nextTokenStr;
290 }
291
292 void AntlrInput::setParser(Parser& parser) {
293 // ANTLR isn't using super in the lexer or the parser, AFAICT.
294 // We could also use @lexer/parser::context to add a field to the generated
295 // objects, but then it would have to be declared separately in every
296 // language's grammar and we'd have to in the address of the field anyway.
297 d_lexer->super = &parser;
298 d_parser->super = &parser;
299 }
300
301 void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
302 d_parser = pParser;
303 // d_parser->rec->match = &match;
304 d_parser->rec->reportError = &reportError;
305 /* Don't try to recover from a parse error. */
306 // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
307 d_parser->rec->recoverFromMismatchedToken =
308 (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
309 d_parser->rec->mismatch;
310 }
311
312
313 }/* CVC4::parser namespace */
314 }/* CVC4 namespace */