Tuesday end-of-day commit.
[cvc5.git] / src / parser / antlr_input.h
1 /********************* */
2 /*! \file antlr_input.h
3 ** \verbatim
4 ** Original author: cconway
5 ** Major contributors: none
6 ** Minor contributors (to current version): mdeters, dejan
7 ** This file is part of the CVC4 prototype.
8 ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys)
9 ** Courant Institute of Mathematical Sciences
10 ** New York University
11 ** See the file COPYING in the top-level source directory for licensing
12 ** information.\endverbatim
13 **
14 ** \brief Base for ANTLR parser classes.
15 **
16 ** Base for ANTLR parser classes.
17 **/
18
19 #include "cvc4parser_private.h"
20
21 #ifndef __CVC4__PARSER__ANTLR_INPUT_H
22 #define __CVC4__PARSER__ANTLR_INPUT_H
23
24 #include <antlr3.h>
25 #include <iostream>
26 #include <sstream>
27 #include <stdexcept>
28 #include <string>
29 #include <vector>
30
31 #include "bounded_token_buffer.h"
32 #include "parser_exception.h"
33 #include "input.h"
34
35 #include "util/Assert.h"
36 #include "util/bitvector.h"
37 #include "util/integer.h"
38 #include "util/rational.h"
39
40 namespace CVC4 {
41
42 class Command;
43 class Type;
44 class FunctionType;
45
46 namespace parser {
47
48 /** Wrapper around an ANTLR3 input stream. */
49 class AntlrInputStream : public InputStream {
50 pANTLR3_INPUT_STREAM d_input;
51
52 AntlrInputStream(std::string name,
53 pANTLR3_INPUT_STREAM input,
54 bool fileIsTemporary = false);
55
56 /* This is private and unimplemented, because you should never use it. */
57 AntlrInputStream(const AntlrInputStream& inputStream) CVC4_UNUSED;
58
59 /* This is private and unimplemented, because you should never use it. */
60 AntlrInputStream& operator=(const AntlrInputStream& inputStream) CVC4_UNUSED;
61
62 public:
63
64 virtual ~AntlrInputStream();
65
66 pANTLR3_INPUT_STREAM getAntlr3InputStream() const;
67
68 /** Create a file input.
69 *
70 * @param name the path of the file to read
71 * @param useMmap <code>true</code> if the input should use memory-mapped I/O; otherwise, the
72 * input will use the standard ANTLR3 I/O implementation.
73 */
74 static AntlrInputStream* newFileInputStream(const std::string& name,
75 bool useMmap = false)
76 throw (InputStreamException, AssertionException);
77
78 /** Create an input from an istream. */
79 static AntlrInputStream* newStreamInputStream(std::istream& input,
80 const std::string& name)
81 throw (InputStreamException, AssertionException);
82
83 /** Create a string input.
84 *
85 * @param input the string to read
86 * @param name the "filename" to use when reporting errors
87 */
88 static AntlrInputStream* newStringInputStream(const std::string& input,
89 const std::string& name)
90 throw (InputStreamException, AssertionException);
91 };
92
93 class Parser;
94
95 /**
96 * An input to be parsed. The static factory methods in this class (e.g.,
97 * <code>newFileInput</code>, <code>newStringInput</code>) create a parser
98 * for the given input language and attach it to an input source of the
99 * appropriate type.
100 */
101 class AntlrInput : public Input {
102 /** The token lookahead used to lex and parse the input. This should usually be equal to
103 * <code>K</code> for an LL(k) grammar. */
104 unsigned int d_lookahead;
105
106 /** The ANTLR3 lexer associated with this input. This will be <code>NULL</code> initially. It
107 * must be set by a call to <code>setLexer</code>, preferably in the subclass constructor. */
108 pANTLR3_LEXER d_lexer;
109
110 /** The ANTLR3 parser associated with this input. This will be <code>NULL</code> initially. It
111 * must be set by a call to <code>setParser</code>, preferably in the subclass constructor.
112 * The <code>super</code> field of <code>d_parser</code> will be set to <code>this</code> and
113 * <code>reportError</code> will be set to <code>Input::reportError</code>. */
114 pANTLR3_PARSER d_parser;
115
116 /** The ANTLR3 input stream associated with this input. */
117 pANTLR3_INPUT_STREAM d_antlr3InputStream;
118
119 /** The ANTLR3 bounded token buffer associated with this input.
120 * We only need this so we can free it on exit.
121 * This is set by <code>setLexer</code>.
122 * NOTE: We assume that we <em>can</em> free it on exit. No sharing! */
123 pBOUNDED_TOKEN_BUFFER d_tokenBuffer;
124
125 /** Turns an ANTLR3 exception into a message for the user and calls <code>parseError</code>. */
126 static void reportError(pANTLR3_BASE_RECOGNIZER recognizer);
127
128 /** Builds a message for a lexer error and calls <code>parseError</code>. */
129 static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer);
130
131 /** Returns the next available lexer token from the current input stream. */
132 static pANTLR3_COMMON_TOKEN
133 nextTokenStr (pANTLR3_TOKEN_SOURCE toksource);
134
135 /* Since we own d_tokenStream and it needs to be freed, we need to prevent
136 * copy construction and assignment.
137 */
138 AntlrInput(const AntlrInput& input);
139 AntlrInput& operator=(const AntlrInput& input);
140
141 public:
142
143 /** Destructor. Frees the token stream and closes the input. */
144 virtual ~AntlrInput();
145
146 /** Create an input for the given AntlrInputStream. NOTE: the new Input
147 * will take ownership of the input stream and delete it at destruction time.
148 *
149 * @param lang the input language
150 * @param inputStream the input stream
151 *
152 * */
153 static AntlrInput* newInput(InputLanguage lang, AntlrInputStream& inputStream);
154
155 /** Retrieve the text associated with a token. */
156 static std::string tokenText(pANTLR3_COMMON_TOKEN token);
157
158 /** Retrieve a substring of the text associated with a token.
159 *
160 * @param token the token
161 * @param index the index of the starting character of the substring
162 * @param n the size of the substring. If <code>n</code> is 0, then all of the
163 * characters up to the end of the token text will be included. If <code>n</code>
164 * would make the substring span past the end of the token text, only those
165 * characters up to the end of the token text will be included.
166 */
167 static std::string tokenTextSubstr(pANTLR3_COMMON_TOKEN token, size_t index, size_t n = 0);
168
169 /** Retrieve an unsigned from the text of a token */
170 static unsigned tokenToUnsigned( pANTLR3_COMMON_TOKEN token );
171
172 /** Retrieve an Integer from the text of a token */
173 static Integer tokenToInteger( pANTLR3_COMMON_TOKEN token );
174
175 /** Retrieve a Rational from the text of a token */
176 static Rational tokenToRational(pANTLR3_COMMON_TOKEN token);
177
178 /** Get a bitvector constant from the text of the number and the size token */
179 static BitVector tokenToBitvector(pANTLR3_COMMON_TOKEN number, pANTLR3_COMMON_TOKEN size);
180
181 /** Retrieve the remaining text in this input. */
182 std::string getUnparsedText();
183
184 protected:
185 /** Create an input. This input takes ownership of the given input stream,
186 * and will delete it at destruction time.
187 *
188 * @param inputStream the input stream to use
189 * @param lookahead the lookahead needed to parse the input (i.e., k for
190 * an LL(k) grammar)
191 */
192 AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead);
193
194 /** Retrieve the token stream for this parser. Must not be called before
195 * <code>setLexer()</code>. */
196 pANTLR3_COMMON_TOKEN_STREAM getTokenStream();
197
198 /**
199 * Throws a <code>ParserException</code> with the given message.
200 */
201 void parseError(const std::string& msg)
202 throw (ParserException, AssertionException);
203
204 /** Set the ANTLR3 lexer for this input. */
205 void setAntlr3Lexer(pANTLR3_LEXER pLexer);
206
207 /** Set the ANTLR3 parser implementation for this input. */
208 void setAntlr3Parser(pANTLR3_PARSER pParser);
209
210 /** Set the Parser object for this input. */
211 virtual void setParser(Parser& parser);
212 };
213
214 inline std::string AntlrInput::getUnparsedText() {
215 const char *base = (const char *)d_antlr3InputStream->data;
216 const char *cur = (const char *)d_antlr3InputStream->nextChar;
217
218 return std::string(cur, d_antlr3InputStream->sizeBuf - (cur - base));
219 }
220
221
222 inline std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) {
223 if( token->type == ANTLR3_TOKEN_EOF ) {
224 return "<<EOF>>";
225 }
226
227 ANTLR3_MARKER start = token->getStartIndex(token);
228 ANTLR3_MARKER end = token->getStopIndex(token);
229 /* start and end are boundary pointers. The text is a string
230 * of (end-start+1) bytes beginning at start. */
231 std::string txt( (const char *)start, end-start+1 );
232 Debug("parser-extra") << "tokenText: start=" << start << std::endl
233 << "end=" << end << std::endl
234 << "txt='" << txt << "'" << std::endl;
235 return txt;
236 }
237
238 inline std::string AntlrInput::tokenTextSubstr(pANTLR3_COMMON_TOKEN token,
239 size_t index,
240 size_t n) {
241 ANTLR3_MARKER start = token->getStartIndex(token);
242 ANTLR3_MARKER end = token->getStopIndex(token);
243 Assert( start < end );
244 if( index > (size_t) end - start ) {
245 std::stringstream ss;
246 ss << "Out-of-bounds substring index: " << index;
247 throw std::invalid_argument(ss.str());
248 }
249 start += index;
250 if( n==0 || n >= (size_t) end - start ) {
251 return std::string( (const char *)start, end-start+1 );
252 } else {
253 return std::string( (const char *)start, n );
254 }
255 }
256
257 inline unsigned AntlrInput::tokenToUnsigned(pANTLR3_COMMON_TOKEN token) {
258 unsigned result;
259 std::stringstream ss;
260 ss << tokenText(token);
261 ss >> result;
262 return result;
263 }
264
265 inline Integer AntlrInput::tokenToInteger(pANTLR3_COMMON_TOKEN token) {
266 return Integer( tokenText(token) );
267 }
268
269 inline Rational AntlrInput::tokenToRational(pANTLR3_COMMON_TOKEN token) {
270 return Rational::fromDecimal( tokenText(token) );
271 }
272
273 inline BitVector AntlrInput::tokenToBitvector(pANTLR3_COMMON_TOKEN number, pANTLR3_COMMON_TOKEN size) {
274 std::string number_str = tokenTextSubstr(number, 2);
275 return BitVector(tokenToUnsigned(size), Integer(number_str));
276 }
277
278 }/* CVC4::parser namespace */
279 }/* CVC4 namespace */
280
281 #endif /* __CVC4__PARSER__ANTLR_INPUT_H */