bdf5fe59ee0e97df9892ddb36a72f4ef99779fee
[cvc5.git] / src / parser / antlr_input.h
1 /********************* */
2 /*! \file antlr_input.h
3 ** \verbatim
4 ** Original author: cconway
5 ** Major contributors: none
6 ** Minor contributors (to current version): dejan, mdeters
7 ** This file is part of the CVC4 prototype.
8 ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys)
9 ** Courant Institute of Mathematical Sciences
10 ** New York University
11 ** See the file COPYING in the top-level source directory for licensing
12 ** information.\endverbatim
13 **
14 ** \brief Base for ANTLR parser classes.
15 **
16 ** Base for ANTLR parser classes.
17 **/
18
19 #include "cvc4parser_private.h"
20
21 #ifndef __CVC4__PARSER__ANTLR_INPUT_H
22 #define __CVC4__PARSER__ANTLR_INPUT_H
23
24 #include <antlr3.h>
25 #include <iostream>
26 #include <sstream>
27 #include <stdexcept>
28 #include <string>
29 #include <vector>
30
31 #include "parser/bounded_token_buffer.h"
32 #include "parser/parser_exception.h"
33 #include "parser/input.h"
34
35 #include "util/Assert.h"
36 #include "util/bitvector.h"
37 #include "util/integer.h"
38 #include "util/rational.h"
39
40 namespace CVC4 {
41
42 class Command;
43 class Type;
44 class FunctionType;
45
46 namespace parser {
47
48 /** Wrapper around an ANTLR3 input stream. */
49 class AntlrInputStream : public InputStream {
50 pANTLR3_INPUT_STREAM d_input;
51
52 AntlrInputStream(std::string name,
53 pANTLR3_INPUT_STREAM input,
54 bool fileIsTemporary = false);
55
56 /* This is private and unimplemented, because you should never use it. */
57 AntlrInputStream(const AntlrInputStream& inputStream) CVC4_UNUSED;
58
59 /* This is private and unimplemented, because you should never use it. */
60 AntlrInputStream& operator=(const AntlrInputStream& inputStream) CVC4_UNUSED;
61
62 public:
63
64 virtual ~AntlrInputStream();
65
66 pANTLR3_INPUT_STREAM getAntlr3InputStream() const;
67
68 /** Create a file input.
69 *
70 * @param name the path of the file to read
71 * @param useMmap <code>true</code> if the input should use memory-mapped I/O; otherwise, the
72 * input will use the standard ANTLR3 I/O implementation.
73 */
74 static AntlrInputStream* newFileInputStream(const std::string& name,
75 bool useMmap = false)
76 throw (InputStreamException, AssertionException);
77
78 /** Create an input from an istream. */
79 static AntlrInputStream* newStreamInputStream(std::istream& input,
80 const std::string& name,
81 bool lineBuffered = false)
82 throw (InputStreamException, AssertionException);
83
84 /** Create a string input.
85 *
86 * @param input the string to read
87 * @param name the "filename" to use when reporting errors
88 */
89 static AntlrInputStream* newStringInputStream(const std::string& input,
90 const std::string& name)
91 throw (InputStreamException, AssertionException);
92 };/* class AntlrInputStream */
93
94 class Parser;
95
96 /**
97 * An input to be parsed. The static factory methods in this class (e.g.,
98 * <code>newFileInput</code>, <code>newStringInput</code>) create a parser
99 * for the given input language and attach it to an input source of the
100 * appropriate type.
101 */
102 class AntlrInput : public Input {
103 /** The token lookahead used to lex and parse the input. This should usually be equal to
104 * <code>K</code> for an LL(k) grammar. */
105 unsigned int d_lookahead;
106
107 /** The ANTLR3 lexer associated with this input. This will be <code>NULL</code> initially. It
108 * must be set by a call to <code>setLexer</code>, preferably in the subclass constructor. */
109 pANTLR3_LEXER d_lexer;
110
111 /** The ANTLR3 parser associated with this input. This will be <code>NULL</code> initially. It
112 * must be set by a call to <code>setParser</code>, preferably in the subclass constructor.
113 * The <code>super</code> field of <code>d_parser</code> will be set to <code>this</code> and
114 * <code>reportError</code> will be set to <code>Input::reportError</code>. */
115 pANTLR3_PARSER d_parser;
116
117 /** The ANTLR3 input stream associated with this input. */
118 pANTLR3_INPUT_STREAM d_antlr3InputStream;
119
120 /** The ANTLR3 bounded token buffer associated with this input.
121 * We only need this so we can free it on exit.
122 * This is set by <code>setLexer</code>.
123 * NOTE: We assume that we <em>can</em> free it on exit. No sharing! */
124 pBOUNDED_TOKEN_BUFFER d_tokenBuffer;
125
126 /** Turns an ANTLR3 exception into a message for the user and calls <code>parseError</code>. */
127 static void reportError(pANTLR3_BASE_RECOGNIZER recognizer);
128
129 /** Builds a message for a lexer error and calls <code>parseError</code>. */
130 static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer);
131
132 /** Returns the next available lexer token from the current input stream. */
133 static pANTLR3_COMMON_TOKEN
134 nextTokenStr (pANTLR3_TOKEN_SOURCE toksource);
135
136 /* Since we own d_tokenStream and it needs to be freed, we need to prevent
137 * copy construction and assignment.
138 */
139 AntlrInput(const AntlrInput& input);
140 AntlrInput& operator=(const AntlrInput& input);
141
142 public:
143
144 /** Destructor. Frees the token stream and closes the input. */
145 virtual ~AntlrInput();
146
147 /** Create an input for the given AntlrInputStream. NOTE: the new Input
148 * will take ownership of the input stream and delete it at destruction time.
149 *
150 * @param lang the input language
151 * @param inputStream the input stream
152 *
153 * */
154 static AntlrInput* newInput(InputLanguage lang, AntlrInputStream& inputStream);
155
156 /** Retrieve the text associated with a token. */
157 static std::string tokenText(pANTLR3_COMMON_TOKEN token);
158
159 /** Retrieve a substring of the text associated with a token.
160 *
161 * @param token the token
162 * @param index the index of the starting character of the substring
163 * @param n the size of the substring. If <code>n</code> is 0, then all of the
164 * characters up to the end of the token text will be included. If <code>n</code>
165 * would make the substring span past the end of the token text, only those
166 * characters up to the end of the token text will be included.
167 */
168 static std::string tokenTextSubstr(pANTLR3_COMMON_TOKEN token, size_t index, size_t n = 0);
169
170 /** Retrieve an unsigned from the text of a token */
171 static unsigned tokenToUnsigned( pANTLR3_COMMON_TOKEN token );
172
173 /** Retrieve an Integer from the text of a token */
174 static Rational tokenToInteger( pANTLR3_COMMON_TOKEN token );
175
176 /** Retrieve a Rational from the text of a token */
177 static Rational tokenToRational(pANTLR3_COMMON_TOKEN token);
178
179 /** Get a bitvector constant from the text of the number and the size token */
180 static BitVector tokenToBitvector(pANTLR3_COMMON_TOKEN number, pANTLR3_COMMON_TOKEN size);
181
182 /** Retrieve the remaining text in this input. */
183 std::string getUnparsedText();
184
185 protected:
186 /** Create an input. This input takes ownership of the given input stream,
187 * and will delete it at destruction time.
188 *
189 * @param inputStream the input stream to use
190 * @param lookahead the lookahead needed to parse the input (i.e., k for
191 * an LL(k) grammar)
192 */
193 AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead);
194
195 /** Retrieve the token stream for this parser. Must not be called before
196 * <code>setLexer()</code>. */
197 pANTLR3_COMMON_TOKEN_STREAM getTokenStream();
198
199 /**
200 * Throws a <code>ParserException</code> with the given message.
201 */
202 void parseError(const std::string& msg)
203 throw (ParserException, AssertionException);
204
205 /** Set the ANTLR3 lexer for this input. */
206 void setAntlr3Lexer(pANTLR3_LEXER pLexer);
207
208 /** Set the ANTLR3 parser implementation for this input. */
209 void setAntlr3Parser(pANTLR3_PARSER pParser);
210
211 /** Set the Parser object for this input. */
212 virtual void setParser(Parser& parser);
213 };/* class AntlrInput */
214
215 inline std::string AntlrInput::getUnparsedText() {
216 const char *base = (const char *)d_antlr3InputStream->data;
217 const char *cur = (const char *)d_antlr3InputStream->nextChar;
218
219 return std::string(cur, d_antlr3InputStream->sizeBuf - (cur - base));
220 }
221
222
223 inline std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) {
224 if( token->type == ANTLR3_TOKEN_EOF ) {
225 return "<<EOF>>";
226 }
227
228 ANTLR3_MARKER start = token->getStartIndex(token);
229 ANTLR3_MARKER end = token->getStopIndex(token);
230 /* start and end are boundary pointers. The text is a string
231 * of (end-start+1) bytes beginning at start. */
232 std::string txt( (const char *)start, end-start+1 );
233 Debug("parser-extra") << "tokenText: start=" << start << std::endl
234 << "end=" << end << std::endl
235 << "txt='" << txt << "'" << std::endl;
236 return txt;
237 }
238
239 inline std::string AntlrInput::tokenTextSubstr(pANTLR3_COMMON_TOKEN token,
240 size_t index,
241 size_t n) {
242 ANTLR3_MARKER start = token->getStartIndex(token);
243 ANTLR3_MARKER end = token->getStopIndex(token);
244 Assert( start < end );
245 if( index > (size_t) end - start ) {
246 std::stringstream ss;
247 ss << "Out-of-bounds substring index: " << index;
248 throw std::invalid_argument(ss.str());
249 }
250 start += index;
251 if( n==0 || n >= (size_t) end - start ) {
252 return std::string( (const char *)start, end-start+1 );
253 } else {
254 return std::string( (const char *)start, n );
255 }
256 }
257
258 inline unsigned AntlrInput::tokenToUnsigned(pANTLR3_COMMON_TOKEN token) {
259 unsigned result;
260 std::stringstream ss;
261 ss << tokenText(token);
262 ss >> result;
263 return result;
264 }
265
266 inline Rational AntlrInput::tokenToInteger(pANTLR3_COMMON_TOKEN token) {
267 return Rational( tokenText(token) );
268 }
269
270 inline Rational AntlrInput::tokenToRational(pANTLR3_COMMON_TOKEN token) {
271 return Rational::fromDecimal( tokenText(token) );
272 }
273
274 inline BitVector AntlrInput::tokenToBitvector(pANTLR3_COMMON_TOKEN number, pANTLR3_COMMON_TOKEN size) {
275 std::string number_str = tokenTextSubstr(number, 2);
276 return BitVector(tokenToUnsigned(size), Integer(number_str));
277 }
278
279 }/* CVC4::parser namespace */
280 }/* CVC4 namespace */
281
282 #endif /* __CVC4__PARSER__ANTLR_INPUT_H */