From 8494e02bf31a08a686e1cf990e512250a9210acc Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 4 Jul 2018 16:34:17 +0100 Subject: [PATCH] More cleanup in strings (#2138) --- src/theory/strings/kinds | 11 - src/theory/strings/regexp_operation.cpp | 8 - src/theory/strings/regexp_operation.h | 1 - .../strings/theory_strings_rewriter.cpp | 311 +----------------- src/theory/strings/theory_strings_rewriter.h | 4 - src/util/regexp.cpp | 17 +- src/util/regexp.h | 9 +- 7 files changed, 17 insertions(+), 344 deletions(-) diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index d931e99bc..de4a013cd 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -28,13 +28,6 @@ operator STRING_ITOS 1 "integer to string" operator STRING_STOI 1 "string to integer (total function)" operator STRING_CODE 1 "string to code, returns the code of the first character of the string if it has length one, -1 otherwise" -#sort CHAR_TYPE \ -# Cardinality::INTEGERS \ -# well-founded \ -# "NodeManager::currentNM()->mkConst(::CVC4::String())" \ -# "util/regexp.h" \ -# "String type" - sort STRING_TYPE \ Cardinality::INTEGERS \ well-founded \ @@ -53,10 +46,6 @@ enumerator STRING_TYPE \ "::CVC4::theory::strings::StringEnumerator" \ "theory/strings/type_enumerator.h" -#enumerator REGEXP_TYPE \ -# "::CVC4::theory::strings::RegExpEnumerator" \ -# "theory/strings/type_enumerator.h" - constant CONST_STRING \ ::CVC4::String \ ::CVC4::strings::StringHashFunction \ diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index e130d5e4b..d17e42ede 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -45,14 +45,6 @@ RegExpOpr::RegExpOpr() RegExpOpr::~RegExpOpr() {} -int RegExpOpr::gcd ( int a, int b ) { - int c; - while ( a != 0 ) { - c = a; a = b%a; b = c; - } - return b; -} - bool RegExpOpr::checkConstRegExp( Node r ) { Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with /" << mkString( r ) << "/" << std::endl; bool ret = true; diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index d33a2b70c..ecf294fc6 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -71,7 +71,6 @@ private: void simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ); void simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ); std::string niceChar( Node r ); - int gcd ( int a, int b ); Node mkAllExceptOne( unsigned char c ); bool isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2); diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index b7cb22329..66514fde2 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -382,302 +382,6 @@ Node TheoryStringsRewriter::rewriteConcat(Node node) return retNode; } - -void TheoryStringsRewriter::mergeInto(std::vector &t, const std::vector &s) { - for(std::vector::const_iterator itr=s.begin(); itr!=s.end(); itr++) { - if(std::find(t.begin(), t.end(), (*itr)) == t.end()) { - t.push_back( *itr ); - } - } -} - -void TheoryStringsRewriter::shrinkConVec(std::vector &vec) { - unsigned i = 0; - Node emptysingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( CVC4::String("") ) ); - while(i < vec.size()) { - if( vec[i] == emptysingleton ) { - vec.erase(vec.begin() + i); - } else if(vec[i].getKind()==kind::STRING_TO_REGEXP && imkNode(kind::STRING_CONCAT, vec[i][0], vec[i+1][0]); - tmp = rewriteConcat(tmp); - vec[i] = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, tmp); - vec.erase(vec.begin() + i + 1); - } else { - i++; - } - } -} - -Node TheoryStringsRewriter::applyAX( TNode node ) { - Trace("regexp-ax") << "Regexp::AX start " << node << std::endl; - Node retNode = node; - - int k = node.getKind(); - switch( k ) { - case kind::REGEXP_UNION: { - std::vector vec_nodes; - for(unsigned i=0; i nvec; - retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec ); - } else { - retNode = vec_nodes.size() == 1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ); - } - break; - } - case kind::REGEXP_CONCAT: { - std::vector< std::vector > vec_nodes; - bool emptyflag = false; - Node emptysingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( CVC4::String("") ) ); - for(unsigned i=0; i vtmp; - if(tmp[j].getKind() == kind::REGEXP_CONCAT) { - for(unsigned j2=0; j2 vtmp; - for(unsigned j=0; j vtmp; - vtmp.push_back(tmp); - vec_nodes.push_back(vtmp); - } - } else { - //non-empty vec - if(tmp.getKind() == kind::REGEXP_UNION) { - unsigned cnt = vec_nodes.size(); - for(unsigned i2=0; i2 vleft( vec_nodes[i2] ); - for(unsigned j=0; j vt( vec_nodes[i2] ); - if(tmp[j].getKind() != kind::REGEXP_CONCAT) { - vt.push_back( tmp[j] ); - } else { - for(unsigned j2=0; j2 nvec; - retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec ); - } else if(vec_nodes.empty()) { - retNode = emptysingleton; - } else if(vec_nodes.size() == 1) { - shrinkConVec(vec_nodes[0]); - retNode = vec_nodes[0].empty()? emptysingleton - : vec_nodes[0].size()==1? vec_nodes[0][0] - : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes[0]); - } else { - std::vector vtmp; - for(unsigned i=0; imkNode(kind::REGEXP_CONCAT, vec_nodes[i]); - vtmp.push_back(ntmp); - } - } - retNode = vtmp.empty()? emptysingleton - : vtmp.size()==1? vtmp[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vtmp); - } - break; - } - case kind::REGEXP_STAR: { - Node tmp = applyAX(node[0]); - Node emptysingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( CVC4::String("") ) ); - if(tmp.getKind() == kind::REGEXP_EMPTY || tmp == emptysingleton) { - retNode = emptysingleton; - } else { - if(tmp.getKind() == kind::REGEXP_UNION) { - std::vector vec; - for(unsigned i=0; imkNode( kind::REGEXP_UNION, vec) ; - } - } else if(tmp.getKind() == kind::REGEXP_STAR) { - tmp = tmp[0]; - } - if(tmp != node[0]) { - retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, tmp ); - } - } - break; - } - case kind::REGEXP_INTER: { - std::vector< std::vector > vec_nodes; - bool emptyflag = false; - bool epsflag = false; - Node emptysingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( CVC4::String("") ) ); - for(unsigned i=0; i vtmp; - for(unsigned j=0; j vtmp; - if(tmp[j].getKind() == kind::REGEXP_INTER) { - for(unsigned j2=0; j2 vtmp; - vtmp.push_back(tmp); - vec_nodes.push_back(vtmp); - } - } else { - //non-empty vec - if(tmp.getKind() == kind::REGEXP_INTER) { - for(unsigned j=0; j vleft( vec_nodes[i2] ); - for(unsigned j=0; j vt(vec_nodes[i2]); - if(tmp[j].getKind() != kind::REGEXP_INTER) { - if(std::find(vt.begin(), vt.end(), tmp[j]) == vt.end()) { - vt.push_back(tmp[j]); - } - } else { - std::vector vtmp; - for(unsigned j2=0; j2 nvec; - retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec ); - } else if(vec_nodes.empty()) { - //to check? - retNode = emptysingleton; - } else if(vec_nodes.size() == 1) { - retNode = vec_nodes[0].empty() ? emptysingleton : vec_nodes[0].size() == 1 ? vec_nodes[0][0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes[0] ); - } else { - std::vector vtmp; - for(unsigned i=0; imkNode( kind::REGEXP_INTER, vec_nodes[i] ); - vtmp.push_back(tmp); - } - retNode = vtmp.size() == 1? vtmp[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vtmp ); - } - break; - } - case kind::REGEXP_SIGMA: { - break; - } - case kind::REGEXP_EMPTY: { - break; - } - //default: { - //to check? - //} - } - - Trace("regexp-ax") << "Regexp::AX end " << node << " to\n " << retNode << std::endl; - return retNode; -} - Node TheoryStringsRewriter::prerewriteConcatRegExp( TNode node ) { Assert( node.getKind() == kind::REGEXP_CONCAT ); Trace("strings-prerewrite") << "Strings::prerewriteConcatRegExp start " << node << std::endl; @@ -1023,7 +727,7 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i Node TheoryStringsRewriter::rewriteMembership(TNode node) { Node retNode = node; Node x = node[0]; - Node r = node[1];//applyAX(node[1]); + Node r = node[1]; if(r.getKind() == kind::REGEXP_EMPTY) { retNode = NodeManager::currentNM()->mkConst( false ); @@ -1129,6 +833,7 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) { RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { Trace("strings-postrewrite") << "Strings::postRewrite start " << node << std::endl; + NodeManager* nm = NodeManager::currentNM(); Node retNode = node; Node orig = retNode; @@ -1177,7 +882,6 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } else if (node.getKind() == kind::STRING_LT) { - NodeManager* nm = NodeManager::currentNM(); // eliminate s < t ---> s != t AND s <= t retNode = nm->mkNode(AND, node[0].eqNode(node[1]).negate(), @@ -1209,16 +913,9 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { if(node[0].isConst()) { CVC4::String s = node[0].getConst(); if(s.isNumber()) { - std::string stmp = s.toString(); - //TODO: leading zeros : when smt2 standard for strings is set, uncomment this if applicable - //if(stmp[0] == '0' && stmp.size() != 1) { - //retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(-1)); - //} else { - CVC4::Rational r2(stmp.c_str()); - retNode = NodeManager::currentNM()->mkConst( r2 ); - //} + retNode = nm->mkConst(s.toNumber()); } else { - retNode = NodeManager::currentNM()->mkConst(::CVC4::Rational(-1)); + retNode = nm->mkConst(::CVC4::Rational(-1)); } } else if(node[0].getKind() == kind::STRING_CONCAT) { for(unsigned i=0; i &t, const std::vector &s); - static void shrinkConVec(std::vector &vec); - static Node applyAX( TNode node ); - static Node prerewriteConcatRegExp(TNode node); static Node prerewriteOrRegExp(TNode node); static Node prerewriteAndRegExp(TNode node); diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp index 8589c6993..93178b948 100644 --- a/src/util/regexp.cpp +++ b/src/util/regexp.cpp @@ -421,18 +421,11 @@ size_t String::maxSize() { return std::numeric_limits::max(); } - -int String::toNumber() const { - if (isNumber()) { - int ret = 0; - for (unsigned int i = 0; i < size(); ++i) { - unsigned char c = convertUnsignedIntToChar(d_str[i]); - ret = ret * 10 + (int)c - (int)'0'; - } - return ret; - } else { - return -1; - } +Rational String::toNumber() const +{ + // when smt2 standard for strings is set, this may change, based on the + // semantics of str.from.int for leading zeros + return Rational(toString()); } unsigned char String::hexToDec(unsigned char c) { diff --git a/src/util/regexp.h b/src/util/regexp.h index e2d07111d..e91ca61e2 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -25,6 +25,7 @@ #include #include #include +#include "util/rational.h" namespace CVC4 { @@ -178,8 +179,14 @@ class CVC4_PUBLIC String { */ std::size_t roverlap(const String& y) const; + /** + * Returns true if this string corresponds in text to a number, for example + * this returns true for strings "7", "12", "004", "0" and false for strings + * "abc", "4a", "-4", "". + */ bool isNumber() const; - int toNumber() const; + /** Returns the corresponding rational for the text of this string. */ + Rational toNumber() const; const std::vector& getVec() const { return d_str; } /** is the unsigned a digit? -- 2.30.2