From a94318b0b1f0c4b8a2a4d6757b073626af06deea Mon Sep 17 00:00:00 2001 From: Tim King Date: Thu, 13 Jul 2017 17:35:00 -0700 Subject: [PATCH] Cleaning up the CVC4::String class. --- src/theory/strings/regexp_operation.cpp | 4 +- src/util/regexp.cpp | 368 +++++++++++++++++----- src/util/regexp.h | 394 +++++------------------- 3 files changed, 378 insertions(+), 388 deletions(-) diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index ec788fa78..a98a2a0ef 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -1326,14 +1326,14 @@ void RegExpOpr::getCharSet( Node r, std::set &pcset, SetNodes &pv Node st = Rewriter::rewrite(r[0]); if(st.isConst()) { CVC4::String s = st.getConst< CVC4::String >(); - s.getCharSet( cset ); + cset.insert(s.getVec().begin(), s.getVec().end()); } else if(st.getKind() == kind::VARIABLE) { vset.insert( st ); } else { for(unsigned i=0; i(); - s.getCharSet( cset ); + cset.insert(s.getVec().begin(), s.getVec().end()); } else { vset.insert( st[i] ); } diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp index 9915d480d..6afadfd5b 100644 --- a/src/util/regexp.cpp +++ b/src/util/regexp.cpp @@ -17,105 +17,194 @@ #include "util/regexp.h" +#include +#include #include #include +#include + +#include "base/cvc4_assert.h" +#include "base/exception.h" using namespace std; namespace CVC4 { -void String::toInternal(const std::string &s) { - d_str.clear(); - unsigned i=0; - while(i < s.size()) { - if(s[i] == '\\') { +static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values."); + +int String::cmp(const String &y) const { + if (size() != y.size()) { + return size() < y.size() ? 1 : -1; + } + for (unsigned int i = 0; i < size(); ++i) { + if (d_str[i] != y.d_str[i]) { + return getUnsignedCharAt(i) < y.getUnsignedCharAt(i) ? 1 : -1; + } + } + return 0; +} + +String String::concat(const String &other) const { + std::vector ret_vec(d_str); + ret_vec.insert(ret_vec.end(), other.d_str.begin(), other.d_str.end()); + return String(ret_vec); +} + +bool String::strncmp(const String &y, const std::size_t np) const { + std::size_t n = np; + std::size_t b = (size() >= y.size()) ? size() : y.size(); + std::size_t s = (size() <= y.size()) ? size() : y.size(); + if (n > s) { + if (b == s) { + n = s; + } else { + return false; + } + } + for (std::size_t i = 0; i < n; ++i) { + if (d_str[i] != y.d_str[i]) return false; + } + return true; +} + +bool String::rstrncmp(const String &y, const std::size_t np) const { + std::size_t n = np; + std::size_t b = (size() >= y.size()) ? size() : y.size(); + std::size_t s = (size() <= y.size()) ? size() : y.size(); + if (n > s) { + if (b == s) { + n = s; + } else { + return false; + } + } + for (std::size_t i = 0; i < n; ++i) { + if (d_str[size() - i - 1] != y.d_str[y.size() - i - 1]) return false; + } + return true; +} + +std::vector String::toInternal(const std::string &s) { + std::vector str; + unsigned i = 0; + while (i < s.size()) { + if (s[i] == '\\') { i++; - if(i < s.size()) { - switch(s[i]) { - case 'n': {d_str.push_back( convertCharToUnsignedInt('\n') );i++;} break; - case 't': {d_str.push_back( convertCharToUnsignedInt('\t') );i++;} break; - case 'v': {d_str.push_back( convertCharToUnsignedInt('\v') );i++;} break; - case 'b': {d_str.push_back( convertCharToUnsignedInt('\b') );i++;} break; - case 'r': {d_str.push_back( convertCharToUnsignedInt('\r') );i++;} break; - case 'f': {d_str.push_back( convertCharToUnsignedInt('\f') );i++;} break; - case 'a': {d_str.push_back( convertCharToUnsignedInt('\a') );i++;} break; - case '\\': {d_str.push_back( convertCharToUnsignedInt('\\') );i++;} break; + if (i < s.size()) { + switch (s[i]) { + case 'n': { + str.push_back(convertCharToUnsignedInt('\n')); + i++; + } break; + case 't': { + str.push_back(convertCharToUnsignedInt('\t')); + i++; + } break; + case 'v': { + str.push_back(convertCharToUnsignedInt('\v')); + i++; + } break; + case 'b': { + str.push_back(convertCharToUnsignedInt('\b')); + i++; + } break; + case 'r': { + str.push_back(convertCharToUnsignedInt('\r')); + i++; + } break; + case 'f': { + str.push_back(convertCharToUnsignedInt('\f')); + i++; + } break; + case 'a': { + str.push_back(convertCharToUnsignedInt('\a')); + i++; + } break; + case '\\': { + str.push_back(convertCharToUnsignedInt('\\')); + i++; + } break; case 'x': { - if(i + 2 < s.size()) { - if(isxdigit(s[i+1]) && isxdigit(s[i+2])) { - d_str.push_back( convertCharToUnsignedInt( hexToDec(s[i+1]) * 16 + hexToDec(s[i+2]) ) ); + if (i + 2 < s.size()) { + if (isxdigit(s[i + 1]) && isxdigit(s[i + 2])) { + str.push_back(convertCharToUnsignedInt(hexToDec(s[i + 1]) * 16 + + hexToDec(s[i + 2]))); i += 3; } else { - throw CVC4::Exception( "Illegal String Literal: \"" + s + "\"" ); + throw CVC4::Exception("Illegal String Literal: \"" + s + "\""); } } else { - throw CVC4::Exception( "Illegal String Literal: \"" + s + "\", must have two digits after \\x" ); + throw CVC4::Exception("Illegal String Literal: \"" + s + + "\", must have two digits after \\x"); } - } - break; + } break; default: { - if(isdigit(s[i])) { + if (isdigit(s[i])) { int num = (int)s[i] - (int)'0'; bool flag = num < 4; - if(i+1 < s.size() && num < 8 && isdigit(s[i+1]) && s[i+1] < '8') { - num = num * 8 + (int)s[i+1] - (int)'0'; - if(flag && i+2 < s.size() && isdigit(s[i+2]) && s[i+2] < '8') { - num = num * 8 + (int)s[i+2] - (int)'0'; - d_str.push_back( convertCharToUnsignedInt((unsigned char)num) ); + if (i + 1 < s.size() && num < 8 && isdigit(s[i + 1]) && + s[i + 1] < '8') { + num = num * 8 + (int)s[i + 1] - (int)'0'; + if (flag && i + 2 < s.size() && isdigit(s[i + 2]) && + s[i + 2] < '8') { + num = num * 8 + (int)s[i + 2] - (int)'0'; + str.push_back(convertCharToUnsignedInt((unsigned char)num)); i += 3; } else { - d_str.push_back( convertCharToUnsignedInt((unsigned char)num) ); + str.push_back(convertCharToUnsignedInt((unsigned char)num)); i += 2; } } else { - d_str.push_back( convertCharToUnsignedInt((unsigned char)num) ); + str.push_back(convertCharToUnsignedInt((unsigned char)num)); i++; } - } else if((unsigned)s[i] > 127) { - throw CVC4::Exception( "Illegal String Literal: \"" + s + "\", must use escaped sequence" ); + } else if ((unsigned)s[i] > 127) { + throw CVC4::Exception("Illegal String Literal: \"" + s + + "\", must use escaped sequence"); } else { - d_str.push_back( convertCharToUnsignedInt(s[i]) ); + str.push_back(convertCharToUnsignedInt(s[i])); i++; } } } } else { - throw CVC4::Exception( "should be handled by lexer: \"" + s + "\"" ); - //d_str.push_back( convertCharToUnsignedInt('\\') ); + throw CVC4::Exception("should be handled by lexer: \"" + s + "\""); + // str.push_back( convertCharToUnsignedInt('\\') ); } - } else if((unsigned)s[i] > 127) { - throw CVC4::Exception( "Illegal String Literal: \"" + s + "\", must use escaped sequence" ); + } else if ((unsigned)s[i] > 127) { + throw CVC4::Exception("Illegal String Literal: \"" + s + + "\", must use escaped sequence"); } else { - d_str.push_back( convertCharToUnsignedInt(s[i]) ); + str.push_back(convertCharToUnsignedInt(s[i])); i++; } } + return str; } -void String::getCharSet(std::set &cset) const { - for(std::vector::const_iterator itr = d_str.begin(); - itr != d_str.end(); itr++) { - cset.insert( convertUnsignedIntToChar(*itr) ); - } +unsigned char String::getUnsignedCharAt(size_t pos) const { + Assert(pos < size()); + return convertUnsignedIntToChar(d_str[pos]); } -std::size_t String::overlap(String &y) const { - std::size_t i = d_str.size() < y.size() ? d_str.size() : y.size(); - for(; i>0; i--) { +std::size_t String::overlap(const String &y) const { + std::size_t i = size() < y.size() ? size() : y.size(); + for (; i > 0; i--) { String s = suffix(i); String p = y.prefix(i); - if(s == p) { + if (s == p) { return i; } } return i; } -std::size_t String::roverlap(String &y) const { - std::size_t i = d_str.size() < y.size() ? d_str.size() : y.size(); - for(; i>0; i--) { + +std::size_t String::roverlap(const String &y) const { + std::size_t i = size() < y.size() ? size() : y.size(); + for (; i > 0; i--) { String s = prefix(i); String p = y.suffix(i); - if(s == p) { + if (s == p) { return i; } } @@ -124,36 +213,53 @@ std::size_t String::roverlap(String &y) const { std::string String::toString() const { std::string str; - for(unsigned int i=0; i( &(std::ostringstream() << (int)c) )->str(); + // std::string s2 = static_cast( + // &(std::ostringstream() << (int)c) )->str(); } } str += s; @@ -162,12 +268,126 @@ std::string String::toString() const { return str; } -std::ostream& operator <<(std::ostream& os, const String& s) { +bool String::isRepeated() const { + if (size() > 1) { + unsigned int f = d_str[0]; + for (unsigned i = 1; i < size(); ++i) { + if (f != d_str[i]) return false; + } + } + return true; +} + +bool String::tailcmp(const String &y, int &c) const { + int id_x = size() - 1; + int id_y = y.size() - 1; + while (id_x >= 0 && id_y >= 0) { + if (d_str[id_x] != y.d_str[id_y]) { + c = id_x; + return false; + } + --id_x; + --id_y; + } + c = id_x == -1 ? (-(id_y + 1)) : (id_x + 1); + return true; +} + +std::size_t String::find(const String &y, const std::size_t start) const { + if (size() < y.size() + start) return std::string::npos; + if (y.empty()) return start; + if (empty()) return std::string::npos; + + std::vector::const_iterator itr = std::search( + d_str.begin() + start, d_str.end(), y.d_str.begin(), y.d_str.end()); + if (itr != d_str.end()) { + return itr - d_str.begin(); + } + return std::string::npos; +} + +std::size_t String::rfind(const String &y, const std::size_t start) const { + if (size() < y.size() + start) return std::string::npos; + if (y.empty()) return start; + if (empty()) return std::string::npos; + + std::vector::const_reverse_iterator itr = std::search( + d_str.rbegin() + start, d_str.rend(), y.d_str.rbegin(), y.d_str.rend()); + if (itr != d_str.rend()) { + return itr - d_str.rbegin(); + } + return std::string::npos; +} + +String String::replace(const String &s, const String &t) const { + std::size_t ret = find(s); + if (ret != std::string::npos) { + std::vector vec; + vec.insert(vec.begin(), d_str.begin(), d_str.begin() + ret); + vec.insert(vec.end(), t.d_str.begin(), t.d_str.end()); + vec.insert(vec.end(), d_str.begin() + ret + s.size(), d_str.end()); + return String(vec); + } else { + return *this; + } +} + +String String::substr(std::size_t i) const { + Assert(i <= size()); + std::vector ret_vec; + std::vector::const_iterator itr = d_str.begin() + i; + ret_vec.insert(ret_vec.end(), itr, d_str.end()); + return String(ret_vec); +} + +String String::substr(std::size_t i, std::size_t j) const { + Assert(i + j <= size()); + std::vector ret_vec; + std::vector::const_iterator itr = d_str.begin() + i; + ret_vec.insert(ret_vec.end(), itr, itr + j); + return String(ret_vec); +} + +bool String::isNumber() const { + for (unsigned character : d_str) { + unsigned char c = convertUnsignedIntToChar(character); + if (c < '0' || c > '9') { + return false; + } + } + return true; +} + +int String::toNumber() const { + if (isNumber()) { + int ret = 0; + for (unsigned int i = 0; i < size(); ++i) { + unsigned char c = convertUnsignedIntToChar(d_str[i]); + ret = ret * 10 + (int)c - (int)'0'; + } + return ret; + } else { + return -1; + } +} + +unsigned char String::hexToDec(unsigned char c) { + if (c >= '0' && c <= '9') { + return c - '0'; + } else if (c >= 'a' && c <= 'f') { + return c - 'a' + 10; + } else { + Assert(c >= 'A' && c <= 'F'); + return c - 'A' + 10; + } +} + +std::ostream &operator<<(std::ostream &os, const String &s) { return os << "\"" << s.toString() << "\""; } -std::ostream& operator<<(std::ostream& out, const RegExp& s) { +std::ostream &operator<<(std::ostream &out, const RegExp &s) { return out << "regexp(" << s.getType() << ')'; } -}/* CVC4 namespace */ +} // namespace CVC4 diff --git a/src/util/regexp.h b/src/util/regexp.h index beb0ee097..9fb8aea60 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -20,363 +20,133 @@ #ifndef __CVC4__REGEXP_H #define __CVC4__REGEXP_H -#include +#include +#include #include -#include -#include -#include - -#include "base/exception.h" -#include "util/hash.h" +#include namespace CVC4 { class CVC4_PUBLIC String { -public: - static unsigned convertCharToUnsignedInt( unsigned char c ) { + public: + static unsigned convertCharToUnsignedInt(unsigned char c) { unsigned i = c; i = i + 191; - return (i>=256 ? i-256 : i); + return (i >= 256 ? i - 256 : i); } - static unsigned char convertUnsignedIntToChar( unsigned i ){ - unsigned ii = i+65; - return (unsigned char)(ii>=256 ? ii-256 : ii); + static unsigned char convertUnsignedIntToChar(unsigned i) { + unsigned ii = i + 65; + return (unsigned char)(ii >= 256 ? ii - 256 : ii); } - static bool isPrintable( unsigned i ){ - unsigned char c = convertUnsignedIntToChar( i ); - return (c>=' ' && c<='~');//isprint( (int)c ); + static bool isPrintable(unsigned i) { + unsigned char c = convertUnsignedIntToChar(i); + return (c >= ' ' && c <= '~'); // isprint( (int)c ); } -private: - std::vector d_str; - - bool isVecSame(const std::vector &a, const std::vector &b) const { - if(a.size() != b.size()) return false; - else { - return std::equal(a.begin(), a.end(), b.begin()); - //for(unsigned int i=0; i& s) : d_str(s) {} - //guarded - unsigned char hexToDec(unsigned char c) { - if(c>='0' && c<='9') { - return c - '0'; - } else if (c >= 'a' && c <= 'f') { - return c - 'a' + 10; - } else { - assert(c >= 'A' && c <= 'F'); - return c - 'A' + 10; + String& operator=(const String& y) { + if (this != &y) { + d_str = y.d_str; } - } - - void toInternal(const std::string &s); - -public: - String() {} - - String(const std::string &s) { - toInternal(s); - } - - String(const char* s) { - std::string stmp(s); - toInternal(stmp); - } - - String(const unsigned char c) { - d_str.push_back( convertCharToUnsignedInt(c) ); - } - - String(const std::vector &s) : d_str(s) { } - - ~String() {} - - String& operator =(const String& y) { - if(this != &y) d_str = y.d_str; return *this; } - bool operator ==(const String& y) const { - return isVecSame(d_str, y.d_str); - } + String concat(const String& other) const; - bool operator !=(const String& y) const { - return ! ( isVecSame(d_str, y.d_str) ); - } - - String concat (const String& other) const { - std::vector ret_vec(d_str); - ret_vec.insert( ret_vec.end(), other.d_str.begin(), other.d_str.end() ); - return String(ret_vec); - } + bool operator==(const String& y) const { return cmp(y) == 0; } + bool operator!=(const String& y) const { return cmp(y) != 0; } + bool operator<(const String& y) const { return cmp(y) < 0; } + bool operator>(const String& y) const { return cmp(y) > 0; } + bool operator<=(const String& y) const { return cmp(y) <= 0; } + bool operator>=(const String& y) const { return cmp(y) >= 0; } - bool operator <(const String& y) const { - if(d_str.size() != y.d_str.size()) return d_str.size() < y.d_str.size(); - else { - for(unsigned int i=0; i(const String& y) const { - if(d_str.size() != y.d_str.size()) return d_str.size() > y.d_str.size(); - else { - for(unsigned int i=0; i convertUnsignedIntToChar(y.d_str[i]); - - return false; - } - } - - bool operator <=(const String& y) const { - if(d_str.size() != y.d_str.size()) return d_str.size() < y.d_str.size(); - else { - for(unsigned int i=0; i=(const String& y) const { - if(d_str.size() != y.d_str.size()) return d_str.size() > y.d_str.size(); - else { - for(unsigned int i=0; i convertUnsignedIntToChar(y.d_str[i]); - - return true; - } - } - - bool strncmp(const String &y, const std::size_t np) const { - std::size_t n = np; - std::size_t b = (d_str.size() >= y.d_str.size()) ? d_str.size() : y.d_str.size(); - std::size_t s = (d_str.size() <= y.d_str.size()) ? d_str.size() : y.d_str.size(); - if(n > s) { - if(b == s) { - n = s; - } else { - return false; - } - } - for(std::size_t i=0; i= y.d_str.size()) ? d_str.size() : y.d_str.size(); - std::size_t s = (d_str.size() <= y.d_str.size()) ? d_str.size() : y.d_str.size(); - if(n > s) { - if(b == s) { - n = s; - } else { - return false; - } - } - for(std::size_t i=0; i 1) { - unsigned int f = d_str[0]; - for(unsigned i=1; i=0 && id_y>=0) { - if(d_str[id_x] != y.d_str[id_y]) { - c = id_x; - return false; - } - --id_x; --id_y; - } - c = id_x == -1 ? ( - (id_y+1) ) : (id_x + 1); - return true; - } + String prefix(std::size_t i) const { return substr(0, i); } + String suffix(std::size_t i) const { return substr(size() - i, i); } + // if y=y1...yn and overlap returns m, then this is x1...y1...ym + std::size_t overlap(const String& y) const; + // if y=y1...yn and overlap returns m, then this is y(n+1-m)...yn...xk + std::size_t roverlap(const String& y) const; - std::size_t find(const String &y, const std::size_t start = 0) const { - if(d_str.size() < y.d_str.size() + start) return std::string::npos; - if(y.d_str.size() == 0) return start; - if(d_str.size() == 0) return std::string::npos; - std::vector::const_iterator itr = std::search(d_str.begin() + start, d_str.end(), y.d_str.begin(), y.d_str.end()); - if(itr != d_str.end()) { - return itr - d_str.begin(); - }else{ - return std::string::npos; - } - } - - std::size_t rfind( String &y, const std::size_t start = 0) { - std::reverse( d_str.begin(), d_str.end() ); - std::reverse( y.d_str.begin(), y.d_str.end() ); - std::size_t f = find( y, start ); - std::reverse( d_str.begin(), d_str.end() ); - std::reverse( y.d_str.begin(), y.d_str.end() ); - if( f==std::string::npos ){ - return std::string::npos; - }else{ - return f; - } - } + bool isNumber() const; + int toNumber() const; - String replace(const String &s, const String &t) const { - std::size_t ret = find(s); - if( ret != std::string::npos ) { - std::vector vec; - vec.insert(vec.begin(), d_str.begin(), d_str.begin() + ret); - vec.insert(vec.end(), t.d_str.begin(), t.d_str.end()); - vec.insert(vec.end(), d_str.begin() + ret + s.d_str.size(), d_str.end()); - return String(vec); - } else { - return *this; - } - } + const std::vector& getVec() const { return d_str; } - String substr(std::size_t i) const { - assert(i <= d_str.size()); - std::vector ret_vec; - std::vector::const_iterator itr = d_str.begin() + i; - ret_vec.insert(ret_vec.end(), itr, d_str.end()); - return String(ret_vec); - } - String substr(std::size_t i, std::size_t j) const { - assert(i+j <= d_str.size()); - std::vector ret_vec; - std::vector::const_iterator itr = d_str.begin() + i; - ret_vec.insert( ret_vec.end(), itr, itr + j ); - return String(ret_vec); - } + private: + // guarded + static unsigned char hexToDec(unsigned char c); - String prefix(std::size_t i) const { - return substr(0, i); - } - String suffix(std::size_t i) const { - return substr(d_str.size() - i, i); - } - // if y=y1...yn and overlap returns m, then this is x1...y1...ym - std::size_t overlap(String &y) const; - // if y=y1...yn and overlap returns m, then this is y(n+1-m)...yn...xk - std::size_t roverlap(String &y) const; - - bool isNumber() const { - if(d_str.size() == 0) return false; - for(unsigned int i=0; i'9') { - return false; - } - } - return true; - } - int toNumber() const { - if(isNumber()) { - int ret=0; - for(unsigned int i=0; i toInternal(const std::string& s); + unsigned char getUnsignedCharAt(size_t pos) const; - void getCharSet(std::set &cset) const; + /** + * Returns a negative number if *this < y, 0 if *this and y are equal and a + * positive number if *this > y. + */ + int cmp(const String& y) const; - std::vector getVec() const { - return d_str; - } -};/* class String */ + std::vector d_str; +}; /* class String */ namespace strings { struct CVC4_PUBLIC StringHashFunction { size_t operator()(const ::CVC4::String& s) const { - return __gnu_cxx::hash()(s.toString().c_str()); + return std::hash()(s.toString()); } -};/* struct StringHashFunction */ +}; /* struct StringHashFunction */ -}/* CVC4::strings namespace */ +} // namespace strings -std::ostream& operator <<(std::ostream& os, const String& s) CVC4_PUBLIC; +std::ostream& operator<<(std::ostream& os, const String& s) CVC4_PUBLIC; class CVC4_PUBLIC RegExp { -protected: - int d_type; -public: + public: RegExp() : d_type(1) {} + explicit RegExp(const int t) : d_type(t) {} - RegExp(const int t) : d_type(t) {} - - ~RegExp() {} - - bool operator ==(const RegExp& y) const { - return d_type == y.d_type ; - } - - bool operator !=(const RegExp& y) const { - return d_type != y.d_type ; - } - - bool operator <(const RegExp& y) const { - return d_type < y.d_type; - } - - bool operator >(const RegExp& y) const { - return d_type > y.d_type ; - } - - bool operator <=(const RegExp& y) const { - return d_type <= y.d_type; - } - - bool operator >=(const RegExp& y) const { - return d_type >= y.d_type ; - } + bool operator==(const RegExp& y) const { return d_type == y.d_type; } + bool operator!=(const RegExp& y) const { return d_type != y.d_type; } + bool operator<(const RegExp& y) const { return d_type < y.d_type; } + bool operator>(const RegExp& y) const { return d_type > y.d_type; } + bool operator<=(const RegExp& y) const { return d_type <= y.d_type; } + bool operator>=(const RegExp& y) const { return d_type >= y.d_type; } int getType() const { return d_type; } -};/* class RegExp */ + + private: + int d_type; +}; /* class RegExp */ /** * Hash function for the RegExp constants. @@ -385,10 +155,10 @@ struct CVC4_PUBLIC RegExpHashFunction { inline size_t operator()(const RegExp& s) const { return (size_t)s.getType(); } -};/* struct RegExpHashFunction */ +}; /* struct RegExpHashFunction */ -std::ostream& operator <<(std::ostream& os, const RegExp& s) CVC4_PUBLIC; +std::ostream& operator<<(std::ostream& os, const RegExp& s) CVC4_PUBLIC; -}/* CVC4 namespace */ +} // namespace CVC4 #endif /* __CVC4__REGEXP_H */ -- 2.30.2