Improvements to strings: work on propagations for reverse normal form processing...
[cvc5.git] / src / util / regexp.h
1 /********************* */
2 /*! \file regexp.h
3 ** \verbatim
4 ** Top contributors (to current version):
5 ** Tianyi Liang, Tim King, Morgan Deters
6 ** This file is part of the CVC4 project.
7 ** Copyright (c) 2009-2016 by the authors listed in the file AUTHORS
8 ** in the top-level source directory) and their institutional affiliations.
9 ** All rights reserved. See the file COPYING in the top-level source
10 ** directory for licensing information.\endverbatim
11 **
12 ** \brief [[ Add one-line brief description here ]]
13 **
14 ** [[ Add lengthier description here ]]
15 ** \todo document this file
16 **/
17
18 #include "cvc4_public.h"
19
20 #ifndef __CVC4__REGEXP_H
21 #define __CVC4__REGEXP_H
22
23 #include <vector>
24 #include <string>
25 #include <set>
26 #include <sstream>
27 #include <cassert>
28
29 #include "base/exception.h"
30 #include "util/hash.h"
31
32 namespace CVC4 {
33
34 class CVC4_PUBLIC String {
35 public:
36 static unsigned convertCharToUnsignedInt( unsigned char c ) {
37 unsigned i = c;
38 i = i + 191;
39 return (i>=256 ? i-256 : i);
40 }
41 static unsigned char convertUnsignedIntToChar( unsigned i ){
42 unsigned ii = i+65;
43 return (unsigned char)(ii>=256 ? ii-256 : ii);
44 }
45 static bool isPrintable( unsigned i ){
46 unsigned char c = convertUnsignedIntToChar( i );
47 return (c>=' ' && c<='~');//isprint( (int)c );
48 }
49
50 private:
51 std::vector<unsigned> d_str;
52
53 bool isVecSame(const std::vector<unsigned> &a, const std::vector<unsigned> &b) const {
54 if(a.size() != b.size()) return false;
55 else {
56 return std::equal(a.begin(), a.end(), b.begin());
57 //for(unsigned int i=0; i<a.size(); ++i)
58 //if(a[i] != b[i]) return false;
59 //return true;
60 }
61 }
62
63 //guarded
64 unsigned char hexToDec(unsigned char c) {
65 if(c>='0' && c<='9') {
66 return c - '0';
67 } else if (c >= 'a' && c <= 'f') {
68 return c - 'a' + 10;
69 } else {
70 assert(c >= 'A' && c <= 'F');
71 return c - 'A' + 10;
72 }
73 }
74
75 void toInternal(const std::string &s);
76
77 public:
78 String() {}
79
80 String(const std::string &s) {
81 toInternal(s);
82 }
83
84 String(const char* s) {
85 std::string stmp(s);
86 toInternal(stmp);
87 }
88
89 String(const unsigned char c) {
90 d_str.push_back( convertCharToUnsignedInt(c) );
91 }
92
93 String(const std::vector<unsigned> &s) : d_str(s) { }
94
95 ~String() {}
96
97 String& operator =(const String& y) {
98 if(this != &y) d_str = y.d_str;
99 return *this;
100 }
101
102 bool operator ==(const String& y) const {
103 return isVecSame(d_str, y.d_str);
104 }
105
106 bool operator !=(const String& y) const {
107 return ! ( isVecSame(d_str, y.d_str) );
108 }
109
110 String concat (const String& other) const {
111 std::vector<unsigned int> ret_vec(d_str);
112 ret_vec.insert( ret_vec.end(), other.d_str.begin(), other.d_str.end() );
113 return String(ret_vec);
114 }
115
116 bool operator <(const String& y) const {
117 if(d_str.size() != y.d_str.size()) return d_str.size() < y.d_str.size();
118 else {
119 for(unsigned int i=0; i<d_str.size(); ++i)
120 if(d_str[i] != y.d_str[i]) return convertUnsignedIntToChar(d_str[i]) < convertUnsignedIntToChar(y.d_str[i]);
121
122 return false;
123 }
124 }
125
126 bool operator >(const String& y) const {
127 if(d_str.size() != y.d_str.size()) return d_str.size() > y.d_str.size();
128 else {
129 for(unsigned int i=0; i<d_str.size(); ++i)
130 if(d_str[i] != y.d_str[i]) return convertUnsignedIntToChar(d_str[i]) > convertUnsignedIntToChar(y.d_str[i]);
131
132 return false;
133 }
134 }
135
136 bool operator <=(const String& y) const {
137 if(d_str.size() != y.d_str.size()) return d_str.size() < y.d_str.size();
138 else {
139 for(unsigned int i=0; i<d_str.size(); ++i)
140 if(d_str[i] != y.d_str[i]) return convertUnsignedIntToChar(d_str[i]) < convertUnsignedIntToChar(y.d_str[i]);
141
142 return true;
143 }
144 }
145
146 bool operator >=(const String& y) const {
147 if(d_str.size() != y.d_str.size()) return d_str.size() > y.d_str.size();
148 else {
149 for(unsigned int i=0; i<d_str.size(); ++i)
150 if(d_str[i] != y.d_str[i]) return convertUnsignedIntToChar(d_str[i]) > convertUnsignedIntToChar(y.d_str[i]);
151
152 return true;
153 }
154 }
155
156 bool strncmp(const String &y, const std::size_t np) const {
157 std::size_t n = np;
158 std::size_t b = (d_str.size() >= y.d_str.size()) ? d_str.size() : y.d_str.size();
159 std::size_t s = (d_str.size() <= y.d_str.size()) ? d_str.size() : y.d_str.size();
160 if(n > s) {
161 if(b == s) {
162 n = s;
163 } else {
164 return false;
165 }
166 }
167 for(std::size_t i=0; i<n; ++i)
168 if(d_str[i] != y.d_str[i]) return false;
169 return true;
170 }
171
172 bool rstrncmp(const String &y, const std::size_t np) const {
173 std::size_t n = np;
174 std::size_t b = (d_str.size() >= y.d_str.size()) ? d_str.size() : y.d_str.size();
175 std::size_t s = (d_str.size() <= y.d_str.size()) ? d_str.size() : y.d_str.size();
176 if(n > s) {
177 if(b == s) {
178 n = s;
179 } else {
180 return false;
181 }
182 }
183 for(std::size_t i=0; i<n; ++i)
184 if(d_str[d_str.size() - i - 1] != y.d_str[y.d_str.size() - i - 1]) return false;
185 return true;
186 }
187
188 bool isEmptyString() const {
189 return ( d_str.size() == 0 );
190 }
191
192 /*char operator[] (const std::size_t i) const {
193 assert( i < d_str.size() );
194 return convertUnsignedIntToChar(d_str[i]);
195 }*/
196 /*
197 * Convenience functions
198 */
199 std::string toString() const;
200
201 std::size_t size() const {
202 return d_str.size();
203 }
204
205 unsigned char getFirstChar() const {
206 return convertUnsignedIntToChar( d_str[0] );
207 }
208
209 unsigned char getLastChar() const {
210 assert(d_str.size() != 0);
211 return convertUnsignedIntToChar( d_str[d_str.size() - 1] );
212 }
213
214 bool isRepeated() const {
215 if(d_str.size() > 1) {
216 unsigned int f = d_str[0];
217 for(unsigned i=1; i<d_str.size(); ++i) {
218 if(f != d_str[i]) return false;
219 }
220 }
221 return true;
222 }
223
224 bool tailcmp(const String &y, int &c) const {
225 int id_x = d_str.size() - 1;
226 int id_y = y.d_str.size() - 1;
227 while(id_x>=0 && id_y>=0) {
228 if(d_str[id_x] != y.d_str[id_y]) {
229 c = id_x;
230 return false;
231 }
232 --id_x; --id_y;
233 }
234 c = id_x == -1 ? ( - (id_y+1) ) : (id_x + 1);
235 return true;
236 }
237
238 std::size_t find(const String &y, const std::size_t start = 0) const {
239 if(d_str.size() < y.d_str.size() + start) return std::string::npos;
240 if(y.d_str.size() == 0) return start;
241 if(d_str.size() == 0) return std::string::npos;
242 std::vector<unsigned>::const_iterator itr = std::search(d_str.begin() + start, d_str.end(), y.d_str.begin(), y.d_str.end());
243 if(itr != d_str.end()) {
244 return itr - d_str.begin();
245 }else{
246 return std::string::npos;
247 }
248 }
249
250 std::size_t rfind( String &y, const std::size_t start = 0) {
251 std::reverse( d_str.begin(), d_str.end() );
252 std::reverse( y.d_str.begin(), y.d_str.end() );
253 std::size_t f = find( y, start );
254 std::reverse( d_str.begin(), d_str.end() );
255 std::reverse( y.d_str.begin(), y.d_str.end() );
256 if( f==std::string::npos ){
257 return std::string::npos;
258 }else{
259 return f;
260 }
261 }
262
263 String replace(const String &s, const String &t) const {
264 std::size_t ret = find(s);
265 if( ret != std::string::npos ) {
266 std::vector<unsigned int> vec;
267 vec.insert(vec.begin(), d_str.begin(), d_str.begin() + ret);
268 vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
269 vec.insert(vec.end(), d_str.begin() + ret + s.d_str.size(), d_str.end());
270 return String(vec);
271 } else {
272 return *this;
273 }
274 }
275
276 String substr(std::size_t i) const {
277 assert(i <= d_str.size());
278 std::vector<unsigned int> ret_vec;
279 std::vector<unsigned int>::const_iterator itr = d_str.begin() + i;
280 ret_vec.insert(ret_vec.end(), itr, d_str.end());
281 return String(ret_vec);
282 }
283 String substr(std::size_t i, std::size_t j) const {
284 assert(i+j <= d_str.size());
285 std::vector<unsigned int> ret_vec;
286 std::vector<unsigned int>::const_iterator itr = d_str.begin() + i;
287 ret_vec.insert( ret_vec.end(), itr, itr + j );
288 return String(ret_vec);
289 }
290
291 String prefix(std::size_t i) const {
292 return substr(0, i);
293 }
294 String suffix(std::size_t i) const {
295 return substr(d_str.size() - i, i);
296 }
297 // if y=y1...yn and overlap returns m, then this is x1...y1...ym
298 std::size_t overlap(String &y) const;
299 // if y=y1...yn and overlap returns m, then this is y(n+1-m)...yn...xk
300 std::size_t roverlap(String &y) const;
301
302 bool isNumber() const {
303 if(d_str.size() == 0) return false;
304 for(unsigned int i=0; i<d_str.size(); ++i) {
305 unsigned char c = convertUnsignedIntToChar( d_str[i] );
306 if(c<'0' || c>'9') {
307 return false;
308 }
309 }
310 return true;
311 }
312 int toNumber() const {
313 if(isNumber()) {
314 int ret=0;
315 for(unsigned int i=0; i<d_str.size(); ++i) {
316 unsigned char c = convertUnsignedIntToChar( d_str[i] );
317 ret = ret * 10 + (int)c - (int)'0';
318 }
319 return ret;
320 } else {
321 return -1;
322 }
323 }
324
325 void getCharSet(std::set<unsigned char> &cset) const;
326
327 std::vector<unsigned> getVec() const {
328 return d_str;
329 }
330 };/* class String */
331
332 namespace strings {
333
334 struct CVC4_PUBLIC StringHashFunction {
335 size_t operator()(const ::CVC4::String& s) const {
336 return __gnu_cxx::hash<const char*>()(s.toString().c_str());
337 }
338 };/* struct StringHashFunction */
339
340 }/* CVC4::strings namespace */
341
342 std::ostream& operator <<(std::ostream& os, const String& s) CVC4_PUBLIC;
343
344 class CVC4_PUBLIC RegExp {
345 protected:
346 int d_type;
347 public:
348 RegExp() : d_type(1) {}
349
350 RegExp(const int t) : d_type(t) {}
351
352 ~RegExp() {}
353
354 bool operator ==(const RegExp& y) const {
355 return d_type == y.d_type ;
356 }
357
358 bool operator !=(const RegExp& y) const {
359 return d_type != y.d_type ;
360 }
361
362 bool operator <(const RegExp& y) const {
363 return d_type < y.d_type;
364 }
365
366 bool operator >(const RegExp& y) const {
367 return d_type > y.d_type ;
368 }
369
370 bool operator <=(const RegExp& y) const {
371 return d_type <= y.d_type;
372 }
373
374 bool operator >=(const RegExp& y) const {
375 return d_type >= y.d_type ;
376 }
377
378 int getType() const { return d_type; }
379 };/* class RegExp */
380
381 /**
382 * Hash function for the RegExp constants.
383 */
384 struct CVC4_PUBLIC RegExpHashFunction {
385 inline size_t operator()(const RegExp& s) const {
386 return (size_t)s.getType();
387 }
388 };/* struct RegExpHashFunction */
389
390 std::ostream& operator <<(std::ostream& os, const RegExp& s) CVC4_PUBLIC;
391
392 }/* CVC4 namespace */
393
394 #endif /* __CVC4__REGEXP_H */