From 0a02fd2b69c0c0f454fc33d8028b24f4fcf431de Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Thu, 9 Aug 2018 15:11:27 -0500 Subject: [PATCH] Fix char overflow issues in regular expression solver (#2275) --- src/api/cvc4cpp.cpp | 2 +- src/theory/strings/regexp_operation.cpp | 139 +++++++++++------- src/theory/strings/regexp_operation.h | 54 +++---- src/theory/strings/theory_strings.cpp | 21 ++- .../strings/theory_strings_rewriter.cpp | 24 ++- src/theory/strings/theory_strings_rewriter.h | 2 + .../strings/theory_strings_type_rules.h | 16 +- src/theory/strings/type_enumerator.h | 9 +- src/util/regexp.cpp | 33 ++++- src/util/regexp.h | 16 +- test/regress/Makefile.tests | 1 + .../strings/nterm-re-inter-sigma.smt2 | 11 ++ 12 files changed, 214 insertions(+), 114 deletions(-) create mode 100644 test/regress/regress1/strings/nterm-re-inter-sigma.smt2 diff --git a/src/api/cvc4cpp.cpp b/src/api/cvc4cpp.cpp index bc696a057..3bc8a5cf5 100644 --- a/src/api/cvc4cpp.cpp +++ b/src/api/cvc4cpp.cpp @@ -1753,7 +1753,7 @@ Term Solver::mkString(const std::string& s) const Term Solver::mkString(const unsigned char c) const { - return d_exprMgr->mkConst(String(c)); + return d_exprMgr->mkConst(String(std::string(1, c))); } Term Solver::mkString(const std::vector& s) const diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index d17e42ede..477e99b9b 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -18,14 +18,14 @@ #include "expr/kind.h" #include "options/strings_options.h" +#include "theory/strings/theory_strings_rewriter.h" namespace CVC4 { namespace theory { namespace strings { RegExpOpr::RegExpOpr() - : d_lastchar(options::stdPrintASCII() ? '\x7f' : '\xff'), - d_emptyString(NodeManager::currentNM()->mkConst(::CVC4::String(""))), + : d_emptyString(NodeManager::currentNM()->mkConst(::CVC4::String(""))), d_true(NodeManager::currentNM()->mkConst(true)), d_false(NodeManager::currentNM()->mkConst(false)), d_emptySingleton(NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, @@ -35,12 +35,11 @@ RegExpOpr::RegExpOpr() d_zero(NodeManager::currentNM()->mkConst(::CVC4::Rational(0))), d_one(NodeManager::currentNM()->mkConst(::CVC4::Rational(1))), RMAXINT(LONG_MAX), - d_char_start(), - d_char_end(), d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA, std::vector{})), d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)) { + d_lastchar = TheoryStringsRewriter::getAlphabetCardinality()-1; } RegExpOpr::~RegExpOpr() {} @@ -260,7 +259,8 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { if(tmp == d_emptyString) { ret = 2; } else { - if(tmp.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) { + if (tmp.getConst().front() == c.front()) + { retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) ); } else { @@ -273,7 +273,8 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { if(tmp.getKind() == kind::STRING_CONCAT) { Node t2 = tmp[0]; if(t2.isConst()) { - if(t2.getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) { + if (t2.getConst().front() == c.front()) + { Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) ); std::vector< Node > vec_nodes; @@ -495,7 +496,8 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { if(r[0] == d_emptyString) { retNode = d_emptyRegexp; } else { - if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) { + if (r[0].getConst().front() == c.front()) + { retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) ); } else { @@ -626,14 +628,17 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { return retNode; } -void RegExpOpr::firstChars( Node r, std::set &pcset, SetNodes &pvset ) { +void RegExpOpr::firstChars(Node r, std::set &pcset, SetNodes &pvset) +{ Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl; - std::map< Node, std::pair< std::set, SetNodes > >::const_iterator itr = d_fset_cache.find(r); + std::map, SetNodes> >::const_iterator itr = + d_fset_cache.find(r); if(itr != d_fset_cache.end()) { pcset.insert((itr->second).first.begin(), (itr->second).first.end()); pvset.insert((itr->second).second.begin(), (itr->second).second.end()); } else { - std::set cset; + // cset is code points + std::set cset; SetNodes vset; int k = r.getKind(); switch( k ) { @@ -641,15 +646,22 @@ void RegExpOpr::firstChars( Node r, std::set &pcset, SetNodes &pv break; } case kind::REGEXP_SIGMA: { - for(unsigned char i='\0'; i<=d_lastchar; i++) { + Assert(d_lastchar < std::numeric_limits::max()); + for (unsigned i = 0; i <= d_lastchar; i++) + { cset.insert(i); } break; } case kind::REGEXP_RANGE: { - unsigned char a = r[0].getConst().getFirstChar(); - unsigned char b = r[1].getConst().getFirstChar(); - for(unsigned char c=a; c<=b; c++) { + unsigned a = r[0].getConst().front(); + a = String::convertUnsignedIntToCode(a); + unsigned b = r[1].getConst().front(); + b = String::convertUnsignedIntToCode(b); + Assert(a < b); + Assert(b < std::numeric_limits::max()); + for (unsigned c = a; c <= b; c++) + { cset.insert(c); } break; @@ -659,18 +671,26 @@ void RegExpOpr::firstChars( Node r, std::set &pcset, SetNodes &pv if(st.isConst()) { CVC4::String s = st.getConst< CVC4::String >(); if(s.size() != 0) { - cset.insert(s.getFirstChar()); + unsigned sc = s.front(); + sc = String::convertUnsignedIntToCode(sc); + cset.insert(sc); } - } else if(st.getKind() == kind::VARIABLE) { - vset.insert( st ); - } else { + } + else if (st.getKind() == kind::STRING_CONCAT) + { if(st[0].isConst()) { - CVC4::String s = st[0].getConst< CVC4::String >(); - cset.insert(s.getFirstChar()); + CVC4::String s = st[0].getConst(); + unsigned sc = s.front(); + sc = String::convertUnsignedIntToCode(sc); + cset.insert(sc); } else { vset.insert( st[0] ); } } + else + { + vset.insert(st); + } break; } case kind::REGEXP_CONCAT: { @@ -714,18 +734,21 @@ void RegExpOpr::firstChars( Node r, std::set &pcset, SetNodes &pv } pcset.insert(cset.begin(), cset.end()); pvset.insert(vset.begin(), vset.end()); - std::pair< std::set, SetNodes > p(cset, vset); + std::pair, SetNodes> p(cset, vset); d_fset_cache[r] = p; } if(Trace.isOn("regexp-fset")) { Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {"; - for(std::set::const_iterator itr = pcset.begin(); - itr != pcset.end(); itr++) { - if(itr != pcset.begin()) { - Trace("regexp-fset") << ","; - } - Trace("regexp-fset") << (*itr); + for (std::set::const_iterator itr = pcset.begin(); + itr != pcset.end(); + itr++) + { + if (itr != pcset.begin()) + { + Trace("regexp-fset") << ","; + } + Trace("regexp-fset") << (*itr); } Trace("regexp-fset") << "}" << std::endl; } @@ -749,6 +772,7 @@ void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes, bool polarity) } void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ) { std::pair < Node, Node > p(s, r); + NodeManager *nm = NodeManager::currentNM(); std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p); if(itr != d_simpl_neg_cache.end()) { new_nodes.push_back( itr->second ); @@ -766,10 +790,15 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes } case kind::REGEXP_RANGE: { std::vector< Node > vec; - unsigned char a = r[0].getConst().getFirstChar(); - unsigned char b = r[1].getConst().getFirstChar(); - for(unsigned char c=a; c<=b; c++) { - Node tmp = s.eqNode( NodeManager::currentNM()->mkConst( CVC4::String(c) ) ).negate(); + unsigned a = r[0].getConst().front(); + a = String::convertUnsignedIntToCode(a); + unsigned b = r[1].getConst().front(); + b = String::convertUnsignedIntToCode(b); + for (unsigned c = a; c <= b; c++) + { + std::vector tmpVec; + tmpVec.push_back(String::convertCodeToUnsignedInt(c)); + Node tmp = s.eqNode(nm->mkConst(String(tmpVec))).negate(); vec.push_back( tmp ); } conc = vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::AND, vec); @@ -923,6 +952,7 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes } void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ) { std::pair < Node, Node > p(s, r); + NodeManager *nm = NodeManager::currentNM(); std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p); if(itr != d_simpl_cache.end()) { new_nodes.push_back( itr->second ); @@ -941,26 +971,19 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes case kind::REGEXP_RANGE: { conc = s.eqNode( r[0] ); if(r[0] != r[1]) { - unsigned char a = r[0].getConst().getFirstChar(); - unsigned char b = r[1].getConst().getFirstChar(); + unsigned a = r[0].getConst().front(); + unsigned b = r[1].getConst().front(); a += 1; - Node tmp = a!=b? NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, - NodeManager::currentNM()->mkNode(kind::REGEXP_RANGE, - NodeManager::currentNM()->mkConst( CVC4::String(a) ), - r[1])) : - s.eqNode(r[1]); + std::vector anvec; + anvec.push_back(a); + Node an = nm->mkConst(String(anvec)); + Node tmp = a != b + ? nm->mkNode(kind::STRING_IN_REGEXP, + s, + nm->mkNode(kind::REGEXP_RANGE, an, r[1])) + : s.eqNode(r[1]); conc = NodeManager::currentNM()->mkNode(kind::OR, conc, tmp); } - /* - unsigned char a = r[0].getConst().getFirstChar(); - unsigned char b = r[1].getConst().getFirstChar(); - std::vector< Node > vec; - for(unsigned char c=a; c<=b; c++) { - Node t2 = s.eqNode( NodeManager::currentNM()->mkConst( CVC4::String(c) )); - vec.push_back( t2 ); - } - conc = vec.empty()? d_emptySingleton : vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::OR, vec); - */ break; } case kind::STRING_TO_REGEXP: { @@ -1278,8 +1301,8 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > rNode = itrcache->second; } else { Trace("regexp-int-debug") << " ... normal without cache" << std::endl; - std::vector< unsigned char > cset; - std::set< unsigned char > cset1, cset2; + std::vector cset; + std::set cset1, cset2; std::set< Node > vset1, vset2; firstChars(r1, cset1, vset1); firstChars(r2, cset2, vset2); @@ -1302,8 +1325,10 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > } if(Trace.isOn("regexp-int-debug")) { Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {"; - for(std::vector::const_iterator itr = cset.begin(); - itr != cset.end(); itr++) { + for (std::vector::const_iterator itr = cset.begin(); + itr != cset.end(); + itr++) + { //CVC4::String c( *itr ); if(itr != cset.begin()) { Trace("regexp-int-debug") << ", "; @@ -1313,9 +1338,13 @@ Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > Trace("regexp-int-debug") << std::endl; } std::map< PairNodes, Node > cacheX; - for(std::vector::const_iterator itr = cset.begin(); - itr != cset.end(); itr++) { - CVC4::String c( *itr ); + for (std::vector::const_iterator itr = cset.begin(); + itr != cset.end(); + itr++) + { + std::vector cvec; + cvec.push_back(String::convertCodeToUnsignedInt(*itr)); + String c(cvec); Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl; Node r1l = derivativeSingle(r1, c); Node r2l = derivativeSingle(r2, c); diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index ecf294fc6..a646f0e6f 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -38,8 +38,9 @@ class RegExpOpr { typedef std::set< Node > SetNodes; typedef std::pair< Node, Node > PairNodes; -private: - unsigned char d_lastchar; + private: + /** the code point of the last character in the alphabet we are using */ + unsigned d_lastchar; Node d_emptyString; Node d_true; Node d_false; @@ -49,39 +50,40 @@ private: Node d_one; CVC4::Rational RMAXINT; - unsigned char d_char_start; - unsigned char d_char_end; Node d_sigma; Node d_sigma_star; - std::map< PairNodes, Node > d_simpl_cache; - std::map< PairNodes, Node > d_simpl_neg_cache; - std::map< Node, std::pair< int, Node > > d_delta_cache; - std::map< PairNodeStr, Node > d_dv_cache; - std::map< PairNodeStr, std::pair< Node, int > > d_deriv_cache; - std::map< Node, std::pair< Node, int > > d_compl_cache; - std::map< Node, bool > d_cstre_cache; - std::map< Node, std::pair< std::set, std::set > > d_cset_cache; - std::map< Node, std::pair< std::set, std::set > > d_fset_cache; - std::map< PairNodes, Node > d_inter_cache; - std::map< Node, Node > d_rm_inter_cache; - std::map< Node, bool > d_norv_cache; - std::map< Node, std::vector< PairNodes > > d_split_cache; - //bool checkStarPlus( Node t ); - void simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ); - void simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ); - std::string niceChar( Node r ); - Node mkAllExceptOne( unsigned char c ); - bool isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2); + std::map d_simpl_cache; + std::map d_simpl_neg_cache; + std::map > d_delta_cache; + std::map d_dv_cache; + std::map > d_deriv_cache; + std::map > d_compl_cache; + std::map d_cstre_cache; + std::map, std::set > > d_cset_cache; + std::map, std::set > > d_fset_cache; + std::map d_inter_cache; + std::map d_rm_inter_cache; + std::map d_norv_cache; + std::map > d_split_cache; + void simplifyPRegExp(Node s, Node r, std::vector &new_nodes); + void simplifyNRegExp(Node s, Node r, std::vector &new_nodes); + std::string niceChar(Node r); + Node mkAllExceptOne(unsigned c); + bool isPairNodesInSet(std::set &s, Node n1, Node n2); bool containC2(unsigned cnt, Node n); Node convert1(unsigned cnt, Node n); void convert2(unsigned cnt, Node n, Node &r1, Node &r2); bool testNoRV(Node r); - Node intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ); + Node intersectInternal(Node r1, + Node r2, + std::map cache, + unsigned cnt); Node removeIntersection(Node r); - void firstChars( Node r, std::set &pcset, SetNodes &pvset ); -public: + void firstChars(Node r, std::set &pcset, SetNodes &pvset); + + public: RegExpOpr(); ~RegExpOpr(); diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 1d853a754..c777a0976 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -175,11 +175,7 @@ TheoryStrings::TheoryStrings(context::Context* c, d_true = NodeManager::currentNM()->mkConst( true ); d_false = NodeManager::currentNM()->mkConst( false ); - d_card_size = 256; - if (options::stdPrintASCII()) - { - d_card_size = 128; - } + d_card_size = TheoryStringsRewriter::getAlphabetCardinality(); } TheoryStrings::~TheoryStrings() { @@ -741,6 +737,21 @@ void TheoryStrings::preRegisterTerm(TNode n) { throw LogicException(ss.str()); } if( tn.isString() ) { + // all characters of constants should fall in the alphabet + if (n.isConst()) + { + std::vector vec = n.getConst().getVec(); + for (unsigned u : vec) + { + if (u >= d_card_size) + { + std::stringstream ss; + ss << "Characters in string \"" << n + << "\" are outside of the given alphabet."; + throw LogicException(ss.str()); + } + } + } // if finite model finding is enabled, // then we minimize the length of this term if it is a variable // but not an internally generated Skolem, or a term that does diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 10bce3a29..9a0fad7d8 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -82,7 +82,9 @@ Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, else if (rc.getKind() == kind::REGEXP_RANGE || rc.getKind() == kind::REGEXP_SIGMA) { - CVC4::String ss( t==0 ? s.getLastChar() : s.getFirstChar() ); + std::vector ssVec; + ssVec.push_back(t == 0 ? s.back() : s.front()); + CVC4::String ss(ssVec); if( testConstStringInRegExp( ss, 0, rc ) ){ //strip off one character mchildren.pop_back(); @@ -210,6 +212,17 @@ Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, return Node::null(); } +unsigned TheoryStringsRewriter::getAlphabetCardinality() +{ + if (options::stdPrintASCII()) + { + Assert(128 <= String::num_codes()); + return 128; + } + Assert(256 <= String::num_codes()); + return 256; +} + Node TheoryStringsRewriter::rewriteEquality(Node node) { Assert(node.getKind() == kind::EQUAL); @@ -813,9 +826,12 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i } case kind::REGEXP_RANGE: { if(s.size() == index_start + 1) { - unsigned char a = r[0].getConst().getFirstChar(); - unsigned char b = r[1].getConst().getFirstChar(); - unsigned char c = s.getLastChar(); + unsigned a = r[0].getConst().front(); + a = String::convertUnsignedIntToCode(a); + unsigned b = r[1].getConst().front(); + b = String::convertUnsignedIntToCode(b); + unsigned c = s.back(); + c = String::convertUnsignedIntToCode(c); return (a <= c && c <= b); } else { return false; diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index 1a3f388ba..5937e778f 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -113,6 +113,8 @@ class TheoryStringsRewriter { static inline void init() {} static inline void shutdown() {} + /** get the cardinality of the alphabet used, based on the options */ + static unsigned getAlphabetCardinality(); /** rewrite equality * * This method returns a formula that is equivalent to the equality between diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h index a9ced55f3..9357a8f98 100644 --- a/src/theory/strings/theory_strings_type_rules.h +++ b/src/theory/strings/theory_strings_type_rules.h @@ -347,7 +347,7 @@ public: { if( check ) { TNode::iterator it = n.begin(); - unsigned char ch[2]; + unsigned ch[2]; for(int i=0; i<2; ++i) { TypeNode t = (*it).getType(check); @@ -360,18 +360,20 @@ public: if( (*it).getConst().size() != 1 ) { throw TypeCheckingExceptionPrivate(n, "expecting a single constant string term in regexp range"); } - ch[i] = (*it).getConst().getFirstChar(); + unsigned ci = (*it).getConst().front(); + ch[i] = String::convertUnsignedIntToCode(ci); ++it; } if(ch[0] > ch[1]) { throw TypeCheckingExceptionPrivate(n, "expecting the first constant is less or equal to the second one in regexp range"); } - if (options::stdPrintASCII() && ch[1] > '\x7f') + unsigned maxCh = options::stdPrintASCII() ? 127 : 255; + if (ch[1] > maxCh) { - throw TypeCheckingExceptionPrivate(n, - "expecting standard ASCII " - "characters in regexp range when " - "strings-print-ascii is true"); + std::stringstream ss; + ss << "expecting characters whose code point is less than or equal to " + << maxCh; + throw TypeCheckingExceptionPrivate(n, ss.str()); } } return nodeManager->regExpType(); diff --git a/src/theory/strings/type_enumerator.h b/src/theory/strings/type_enumerator.h index 30d6c798e..108b1edc4 100644 --- a/src/theory/strings/type_enumerator.h +++ b/src/theory/strings/type_enumerator.h @@ -21,10 +21,11 @@ #include -#include "util/regexp.h" -#include "theory/type_enumerator.h" -#include "expr/type_node.h" #include "expr/kind.h" +#include "expr/type_node.h" +#include "theory/strings/theory_strings_rewriter.h" +#include "theory/type_enumerator.h" +#include "util/regexp.h" namespace CVC4 { namespace theory { @@ -45,7 +46,7 @@ class StringEnumerator : public TypeEnumeratorBase { { Assert(type.getKind() == kind::TYPE_CONSTANT && type.getConst() == STRING_TYPE); - d_cardinality = 256; + d_cardinality = TheoryStringsRewriter::getAlphabetCardinality(); mkCurr(); } Node operator*() override { return d_curr; } diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp index 93178b948..8d68d4e86 100644 --- a/src/util/regexp.cpp +++ b/src/util/regexp.cpp @@ -58,13 +58,25 @@ unsigned String::convertUnsignedIntToCode(unsigned i) return (i + start_code()) % num_codes(); } +String::String(const std::vector &s) : d_str(s) +{ +#ifdef CVC4_ASSERTIONS + for (unsigned u : d_str) + { + Assert(convertUnsignedIntToCode(u) < num_codes()); + } +#endif +} + int String::cmp(const String &y) const { if (size() != y.size()) { return size() < y.size() ? -1 : 1; } for (unsigned int i = 0; i < size(); ++i) { if (d_str[i] != y.d_str[i]) { - return getUnsignedCharAt(i) < y.getUnsignedCharAt(i) ? -1 : 1; + unsigned cp = convertUnsignedIntToCode(d_str[i]); + unsigned cpy = convertUnsignedIntToCode(y.d_str[i]); + return cp < cpy ? -1 : 1; } } return 0; @@ -207,12 +219,25 @@ std::vector String::toInternal(const std::string &s, i++; } } +#ifdef CVC4_ASSERTIONS + for (unsigned u : str) + { + Assert(convertUnsignedIntToCode(u) < num_codes()); + } +#endif return str; } -unsigned char String::getUnsignedCharAt(size_t pos) const { - Assert(pos < size()); - return convertUnsignedIntToChar(d_str[pos]); +unsigned String::front() const +{ + Assert(!d_str.empty()); + return d_str.front(); +} + +unsigned String::back() const +{ + Assert(!d_str.empty()); + return d_str.back(); } std::size_t String::overlap(const String &y) const { diff --git a/src/util/regexp.h b/src/util/regexp.h index e91ca61e2..2ab6b659c 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -51,6 +51,8 @@ class CVC4_PUBLIC String { * This is the cardinality of the alphabet that is representable by this * class. Notice that this must be greater than or equal to the cardinality * of the alphabet that the string theory reasons about. + * + * This must be strictly less than std::numeric_limits::max(). */ static inline unsigned num_codes() { return 256; } /** @@ -89,9 +91,7 @@ class CVC4_PUBLIC String { : d_str(toInternal(s, useEscSequences)) {} explicit String(const char* s, bool useEscSequences = false) : d_str(toInternal(std::string(s), useEscSequences)) {} - explicit String(const unsigned char c) - : d_str({convertCharToUnsignedInt(c)}) {} - explicit String(const std::vector& s) : d_str(s) {} + explicit String(const std::vector& s); String& operator=(const String& y) { if (this != &y) { @@ -137,9 +137,6 @@ class CVC4_PUBLIC String { /** Return the length of the string */ std::size_t size() const { return d_str.size(); } - unsigned char getFirstChar() const { return getUnsignedCharAt(0); } - unsigned char getLastChar() const { return getUnsignedCharAt(size() - 1); } - bool isRepeated() const; bool tailcmp(const String& y, int& c) const; @@ -187,8 +184,12 @@ class CVC4_PUBLIC String { bool isNumber() const; /** Returns the corresponding rational for the text of this string. */ Rational toNumber() const; - + /** get the internal unsigned representation of this string */ const std::vector& getVec() const { return d_str; } + /** get the internal unsigned value of the first character in this string */ + unsigned front() const; + /** get the internal unsigned value of the last character in this string */ + unsigned back() const; /** is the unsigned a digit? * The input should be the same type as the element type of d_str */ @@ -205,7 +206,6 @@ class CVC4_PUBLIC String { static std::vector toInternal(const std::string& s, bool useEscSequences = true); - unsigned char getUnsignedCharAt(size_t pos) const; /** * Returns a negative number if *this < y, 0 if *this and y are equal and a diff --git a/test/regress/Makefile.tests b/test/regress/Makefile.tests index 0724fc163..804077186 100644 --- a/test/regress/Makefile.tests +++ b/test/regress/Makefile.tests @@ -1472,6 +1472,7 @@ REG1_TESTS = \ regress1/strings/norn-ab.smt2 \ regress1/strings/norn-nel-bug-052116.smt2 \ regress1/strings/norn-simp-rew-sat.smt2 \ + regress1/strings/nterm-re-inter-sigma.smt2 \ regress1/strings/pierre150331.smt2 \ regress1/strings/re-unsound-080718.smt2 \ regress1/strings/regexp001.smt2 \ diff --git a/test/regress/regress1/strings/nterm-re-inter-sigma.smt2 b/test/regress/regress1/strings/nterm-re-inter-sigma.smt2 new file mode 100644 index 000000000..4ac3efff7 --- /dev/null +++ b/test/regress/regress1/strings/nterm-re-inter-sigma.smt2 @@ -0,0 +1,11 @@ +(set-info :smt-lib-version 2.5) +(set-logic ALL) +(set-info :status sat) +(set-option :strings-exp true) +(declare-fun x () String) + +(assert +(not (= (str.in.re x (re.++ (re.++ re.allchar re.allchar ) (re.* re.allchar ))) (not (str.in.re x (re.* (str.to.re "A"))))) +)) + +(check-sat) -- 2.30.2