e75ca1fad2dd81a7472e786b4f7037aeed53ecf8
[cvc5.git] / src / util / regexp.h
1 /********************* */
2 /*! \file regexp.h
3 ** \verbatim
4 ** Original author: Tianyi Liang
5 ** Major contributors: none
6 ** Minor contributors (to current version): Morgan Deters
7 ** This file is part of the CVC4 project.
8 ** Copyright (c) 2009-2014 New York University and The University of Iowa
9 ** See the file COPYING in the top-level source directory for licensing
10 ** information.\endverbatim
11 **
12 ** \brief [[ Add one-line brief description here ]]
13 **
14 ** [[ Add lengthier description here ]]
15 ** \todo document this file
16 **/
17
18 #include "cvc4_public.h"
19
20 #ifndef __CVC4__REGEXP_H
21 #define __CVC4__REGEXP_H
22
23 #include <vector>
24 #include <string>
25 #include <set>
26 #include <sstream>
27 #include "util/exception.h"
28 //#include "util/integer.h"
29 #include "util/hash.h"
30
31 namespace CVC4 {
32
33 class CVC4_PUBLIC String {
34 public:
35 static unsigned int convertCharToUnsignedInt( char c ) {
36 int i = (int)c;
37 i = i-65;
38 return (unsigned int)(i<0 ? i+256 : i);
39 }
40 static char convertUnsignedIntToChar( unsigned int i ){
41 int ii = i+65;
42 return (char)(ii>=256 ? ii-256 : ii);
43 }
44 static bool isPrintable( unsigned int i ){
45 char c = convertUnsignedIntToChar( i );
46 return isprint( (int)c );
47 }
48
49 private:
50 std::vector<unsigned int> d_str;
51
52 bool isVecSame(const std::vector<unsigned int> &a, const std::vector<unsigned int> &b) const {
53 if(a.size() != b.size()) return false;
54 else {
55 for(unsigned int i=0; i<a.size(); ++i)
56 if(a[i] != b[i]) return false;
57 return true;
58 }
59 }
60
61 //guarded
62 char hexToDec(char c) {
63 if(isdigit(c)) {
64 return c - '0';
65 } else if (c >= 'a' && c <= 'f') {
66 return c - 'a' + 10;
67 } else {
68 //Assert(c >= 'A' && c <= 'F');
69 return c - 'A' + 10;
70 }
71 }
72
73 void toInternal(const std::string &s);
74
75 public:
76 String() {}
77
78 String(const std::string &s) {
79 toInternal(s);
80 }
81
82 String(const char* s) {
83 std::string stmp(s);
84 toInternal(stmp);
85 }
86
87 String(const char c) {
88 d_str.push_back( convertCharToUnsignedInt(c) );
89 }
90
91 String(const std::vector<unsigned int> &s) : d_str(s) { }
92
93 ~String() {}
94
95 String& operator =(const String& y) {
96 if(this != &y) d_str = y.d_str;
97 return *this;
98 }
99
100 bool operator ==(const String& y) const {
101 return isVecSame(d_str, y.d_str);
102 }
103
104 bool operator !=(const String& y) const {
105 return ! ( isVecSame(d_str, y.d_str) );
106 }
107
108 String concat (const String& other) const {
109 std::vector<unsigned int> ret_vec(d_str);
110 ret_vec.insert( ret_vec.end(), other.d_str.begin(), other.d_str.end() );
111 return String(ret_vec);
112 }
113
114 bool operator <(const String& y) const {
115 if(d_str.size() != y.d_str.size()) return d_str.size() < y.d_str.size();
116 else {
117 for(unsigned int i=0; i<d_str.size(); ++i)
118 if(d_str[i] != y.d_str[i]) return d_str[i] < y.d_str[i];
119
120 return false;
121 }
122 }
123
124 bool operator >(const String& y) const {
125 if(d_str.size() != y.d_str.size()) return d_str.size() > y.d_str.size();
126 else {
127 for(unsigned int i=0; i<d_str.size(); ++i)
128 if(d_str[i] != y.d_str[i]) return d_str[i] > y.d_str[i];
129
130 return false;
131 }
132 }
133
134 bool operator <=(const String& y) const {
135 if(d_str.size() != y.d_str.size()) return d_str.size() < y.d_str.size();
136 else {
137 for(unsigned int i=0; i<d_str.size(); ++i)
138 if(d_str[i] != y.d_str[i]) return d_str[i] < y.d_str[i];
139
140 return true;
141 }
142 }
143
144 bool operator >=(const String& y) const {
145 if(d_str.size() != y.d_str.size()) return d_str.size() > y.d_str.size();
146 else {
147 for(unsigned int i=0; i<d_str.size(); ++i)
148 if(d_str[i] != y.d_str[i]) return d_str[i] > y.d_str[i];
149
150 return true;
151 }
152 }
153
154 bool strncmp(const String &y, unsigned int n) const {
155 for(unsigned int i=0; i<n; ++i)
156 if(d_str[i] != y.d_str[i]) return false;
157 return true;
158 }
159
160 bool rstrncmp(const String &y, unsigned int n) const {
161 for(unsigned int i=0; i<n; ++i)
162 if(d_str[d_str.size() - i - 1] != y.d_str[y.d_str.size() - i - 1]) return false;
163 return true;
164 }
165
166 bool isEmptyString() const {
167 return ( d_str.size() == 0 );
168 }
169
170 unsigned int operator[] (const unsigned int i) const {
171 //Assert( i < d_str.size() && i >= 0);
172 return d_str[i];
173 }
174 /*
175 * Convenience functions
176 */
177 std::string toString() const;
178
179 unsigned size() const {
180 return d_str.size();
181 }
182
183 char getFirstChar() const {
184 return convertUnsignedIntToChar( d_str[0] );
185 }
186
187 bool isRepeated() const {
188 if(d_str.size() > 1) {
189 unsigned int f = d_str[0];
190 for(unsigned i=1; i<d_str.size(); ++i) {
191 if(f != d_str[i]) return false;
192 }
193 }
194 return true;
195 }
196
197 bool tailcmp(const String &y, int &c) const {
198 int id_x = d_str.size() - 1;
199 int id_y = y.d_str.size() - 1;
200 while(id_x>=0 && id_y>=0) {
201 if(d_str[id_x] != y.d_str[id_y]) {
202 c = id_x;
203 return false;
204 }
205 --id_x; --id_y;
206 }
207 c = id_x == -1 ? ( - (id_y+1) ) : (id_x + 1);
208 return true;
209 }
210
211 std::size_t find(const String &y, const int start = 0) const {
212 if(d_str.size() < y.d_str.size() + (std::size_t) start) return std::string::npos;
213 if(y.d_str.size() == 0) return (std::size_t) start;
214 if(d_str.size() == 0) return std::string::npos;
215 std::size_t ret = std::string::npos;
216 for(int i = start; i <= (int) d_str.size() - (int) y.d_str.size(); i++) {
217 if(d_str[i] == y.d_str[0]) {
218 std::size_t j=0;
219 for(; j<y.d_str.size(); j++) {
220 if(d_str[i+j] != y.d_str[j]) break;
221 }
222 if(j == y.d_str.size()) {
223 ret = (std::size_t) i;
224 break;
225 }
226 }
227 }
228 return ret;
229 }
230
231 String replace(const String &s, const String &t) const {
232 std::size_t ret = find(s);
233 if( ret != std::string::npos ) {
234 std::vector<unsigned int> vec;
235 vec.insert(vec.begin(), d_str.begin(), d_str.begin() + ret);
236 vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
237 vec.insert(vec.end(), d_str.begin() + ret + s.d_str.size(), d_str.end());
238 return String(vec);
239 } else {
240 return *this;
241 }
242 }
243
244 String substr(unsigned i) const {
245 std::vector<unsigned int> ret_vec;
246 std::vector<unsigned int>::const_iterator itr = d_str.begin() + i;
247 ret_vec.insert(ret_vec.end(), itr, d_str.end());
248 return String(ret_vec);
249 }
250 String substr(unsigned i, unsigned j) const {
251 std::vector<unsigned int> ret_vec;
252 std::vector<unsigned int>::const_iterator itr = d_str.begin() + i;
253 ret_vec.insert( ret_vec.end(), itr, itr + j );
254 return String(ret_vec);
255 }
256
257 String prefix(unsigned i) const {
258 return substr(0, i);
259 }
260 String suffix(unsigned i) const {
261 return substr(d_str.size() - i, i);
262 }
263 std::size_t overlap(String &y) const;
264
265 bool isNumber() const {
266 if(d_str.size() == 0) return false;
267 for(unsigned int i=0; i<d_str.size(); ++i) {
268 char c = convertUnsignedIntToChar( d_str[i] );
269 if(c<'0' || c>'9') {
270 return false;
271 }
272 }
273 return true;
274 }
275 int toNumber() const {
276 if(isNumber()) {
277 int ret=0;
278 for(unsigned int i=0; i<d_str.size(); ++i) {
279 char c = convertUnsignedIntToChar( d_str[i] );
280 ret = ret * 10 + (int)c - (int)'0';
281 }
282 return ret;
283 } else {
284 return -1;
285 }
286 }
287 void getCharSet(std::set<unsigned int> &cset) const;
288 };/* class String */
289
290 namespace strings {
291
292 struct CVC4_PUBLIC StringHashFunction {
293 size_t operator()(const ::CVC4::String& s) const {
294 return __gnu_cxx::hash<const char*>()(s.toString().c_str());
295 }
296 };/* struct StringHashFunction */
297
298 }/* CVC4::strings namespace */
299
300 std::ostream& operator <<(std::ostream& os, const String& s) CVC4_PUBLIC;
301
302 class CVC4_PUBLIC RegExp {
303 protected:
304 int d_type;
305 public:
306 RegExp() : d_type(1) {}
307
308 RegExp(const int t) : d_type(t) {}
309
310 ~RegExp() {}
311
312 bool operator ==(const RegExp& y) const {
313 return d_type == y.d_type ;
314 }
315
316 bool operator !=(const RegExp& y) const {
317 return d_type != y.d_type ;
318 }
319
320 bool operator <(const RegExp& y) const {
321 return d_type < y.d_type;
322 }
323
324 bool operator >(const RegExp& y) const {
325 return d_type > y.d_type ;
326 }
327
328 bool operator <=(const RegExp& y) const {
329 return d_type <= y.d_type;
330 }
331
332 bool operator >=(const RegExp& y) const {
333 return d_type >= y.d_type ;
334 }
335
336 int getType() const { return d_type; }
337 };/* class RegExp */
338
339 /**
340 * Hash function for the RegExp constants.
341 */
342 struct CVC4_PUBLIC RegExpHashFunction {
343 inline size_t operator()(const RegExp& s) const {
344 return (size_t)s.getType();
345 }
346 };/* struct RegExpHashFunction */
347
348 std::ostream& operator <<(std::ostream& os, const RegExp& s) CVC4_PUBLIC;
349
350 }/* CVC4 namespace */
351
352 #endif /* __CVC4__REGEXP_H */