From 2bc57de07b8132ee584da614ae49e4d132818f67 Mon Sep 17 00:00:00 2001 From: Tianyi Liang Date: Thu, 5 Feb 2015 18:27:47 -0600 Subject: [PATCH] Improved string performance, thanks to Peter's benchmarks. --- src/theory/strings/options | 7 +- src/theory/strings/regexp_operation.cpp | 316 ++++++++++++++++-- src/theory/strings/regexp_operation.h | 2 +- src/theory/strings/theory_strings.cpp | 30 +- src/theory/strings/theory_strings.h | 1 + .../strings/theory_strings_preprocess.cpp | 13 + .../strings/theory_strings_rewriter.cpp | 113 ++++++- .../strings/theory_strings_type_rules.h | 4 + src/theory/theory_model.cpp | 8 +- test/regress/regress0/strings/Makefile.am | 1 - 10 files changed, 445 insertions(+), 50 deletions(-) diff --git a/src/theory/strings/options b/src/theory/strings/options index dc8c50966..11156b5a4 100644 --- a/src/theory/strings/options +++ b/src/theory/strings/options @@ -11,6 +11,9 @@ option stringExp strings-exp --strings-exp bool :default false :read-write option stringLB strings-lb --strings-lb=N unsigned :default 0 :predicate less_equal(2) :predicate-include "smt/smt_engine.h" the strategy of LB rule application: 0-lazy, 1-eager, 2-no +option stdASCII strings-std-ascii --strings-std-ascii bool :default true :predicate less_equal(2) :predicate-include "smt/smt_engine.h" + the alphabet contains only characters from the standard ASCII or the extended one + option stringFMF strings-fmf --strings-fmf bool :default false :read-write the finite model finding used by the theory of strings @@ -26,7 +29,7 @@ option stringOpt2 strings-opt2 --strings-opt2 bool :default false option stringIgnNegMembership strings-inm --strings-inm bool :default false internal for strings: ignore negative membership constraints (fragment checking is needed, left to users for now) -expert-option stringCharCardinality strings-alphabet-card --strings-alphabet-card=N int16_t :default 256 :predicate CVC4::smt::beforeSearch :predicate-include "smt/smt_engine.h" - the cardinality of the characters used by the theory of strings, default 256 +#expert-option stringCharCardinality strings-alphabet-card --strings-alphabet-card=N int16_t :default 128 :read-write +# the cardinality of the characters used by the theory of strings, default 128 (for standard ASCII) or 256 (for extended ASCII) endmodule diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index e09f47f0f..20b13f7b1 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -16,14 +16,15 @@ #include "theory/strings/regexp_operation.h" #include "expr/kind.h" +#include "theory/strings/options.h" namespace CVC4 { namespace theory { namespace strings { RegExpOpr::RegExpOpr() - : d_lastchar( '\xff' ), - RMAXINT( LONG_MAX ) + : d_lastchar( options::stdASCII()? '\x7f' : '\xff' ), + RMAXINT( LONG_MAX ) { d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); d_true = NodeManager::currentNM()->mkConst( true ); @@ -46,7 +47,7 @@ int RegExpOpr::gcd ( int a, int b ) { } bool RegExpOpr::checkConstRegExp( Node r ) { - Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with " << mkString( r ) << std::endl; + Trace("strings-regexp-cstre") << "RegExp-CheckConstRegExp starts with /" << mkString( r ) << "/" << std::endl; bool ret = true; if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) { ret = d_cstre_cache[r]; @@ -68,7 +69,7 @@ bool RegExpOpr::checkConstRegExp( Node r ) { // 0-unknown, 1-yes, 2-no int RegExpOpr::delta( Node r, Node &exp ) { - Trace("regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl; + Trace("regexp-delta") << "RegExp-Delta starts with /" << mkString( r ) << "/" << std::endl; int ret = 0; if( d_delta_cache.find( r ) != d_delta_cache.end() ) { ret = d_delta_cache[r].first; @@ -193,6 +194,14 @@ int RegExpOpr::delta( Node r, Node &exp ) { ret = 2; break; } + case kind::REGEXP_LOOP: { + if(r[1] == d_zero) { + ret = 1; + } else { + ret = delta(r[0], exp); + } + break; + } default: { //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl; Unreachable(); @@ -211,7 +220,7 @@ int RegExpOpr::delta( Node r, Node &exp ) { // 0-unknown, 1-yes, 2-no int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { Assert( c.size() < 2 ); - Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r ) << " }, c=" << c << std::endl; + Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl; int ret = 1; retNode = d_emptyRegexp; @@ -411,6 +420,26 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r )); break; } + case kind::REGEXP_LOOP: { + if(r[1] == r[2] && r[1] == d_zero) { + ret = 2; + //retNode = d_emptyRegexp; + } else { + Node dc; + ret = derivativeS(r[0], c, dc); + if(dc==d_emptyRegexp) { + unsigned l = r[1].getConst().getNumerator().toUnsignedInt(); + unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); + Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], + NodeManager::currentNM()->mkConst(CVC4::Rational(l==0? 0 : (l-1))), + NodeManager::currentNM()->mkConst(CVC4::Rational(u-1))); + retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); + } else { + retNode = d_emptyRegexp; + } + } + break; + } default: { //Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; Unreachable(); @@ -423,13 +452,13 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { d_deriv_cache[dv] = p; } - Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode ) << std::endl; + Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl; return ret; } Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { Assert( c.size() < 2 ); - Trace("regexp-derive") << "RegExp-derive starts with R{ " << mkString( r ) << " }, c=" << c << std::endl; + Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl; Node retNode = d_emptyRegexp; PairNodeStr dv = std::make_pair( r, c ); if( d_dv_cache.find( dv ) != d_dv_cache.end() ) { @@ -519,7 +548,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { vec_nodes.push_back( dc ); } } - Trace("regexp-derive") << "RegExp-derive OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl; + //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl; } retNode = vec_nodes.size() == 0 ? d_emptyRegexp : ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); @@ -559,17 +588,34 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { case kind::REGEXP_STAR: { Node dc = derivativeSingle(r[0], c); if(dc != d_emptyRegexp) { - retNode = dc==NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ); + retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ); } else { retNode = d_emptyRegexp; } break; } + case kind::REGEXP_LOOP: { + if(r[1] == r[2] && r[1] == d_zero) { + retNode = d_emptyRegexp; + } else { + Node dc = derivativeSingle(r[0], c); + if(dc != d_emptyRegexp) { + unsigned l = r[1].getConst().getNumerator().toUnsignedInt(); + unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); + Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], + NodeManager::currentNM()->mkConst(CVC4::Rational(l==0? 0 : (l-1))), + NodeManager::currentNM()->mkConst(CVC4::Rational(u-1))); + retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); + } else { + retNode = d_emptyRegexp; + } + } + //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl; + break; + } default: { - //TODO: special sym: sigma, none, all Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; Unreachable(); - //return Node::null(); } } if(retNode != d_emptyRegexp) { @@ -577,7 +623,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { } d_dv_cache[dv] = retNode; } - Trace("regexp-derive") << "RegExp-derive returns : " << mkString( retNode ) << std::endl; + Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl; return retNode; } @@ -671,6 +717,14 @@ void RegExpOpr::firstChars( Node r, std::set &pcset, SetNodes &pv } break; } + case kind::REGEXP_RANGE: { + unsigned char a = r[0].getConst().getFirstChar(); + unsigned char b = r[1].getConst().getFirstChar(); + for(unsigned char c=a; c<=b; c++) { + cset.insert(c); + } + break; + } case kind::STRING_TO_REGEXP: { Node st = Rewriter::rewrite(r[0]); if(st.isConst()) { @@ -720,6 +774,10 @@ void RegExpOpr::firstChars( Node r, std::set &pcset, SetNodes &pv firstChars(r[0], cset, vset); break; } + case kind::REGEXP_LOOP: { + firstChars(r[0], cset, vset); + break; + } default: { Trace("regexp-error") << "Unsupported term: " << r << " in firstChars." << std::endl; Unreachable(); @@ -843,7 +901,7 @@ bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< unsigned char > &ve } break; default: { - Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl; + Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in follow of RegExp." << std::endl; //AlwaysAssert( false ); //return Node::null(); return false; @@ -895,6 +953,17 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)).negate(); break; } + case kind::REGEXP_RANGE: { + std::vector< Node > vec; + unsigned char a = r[0].getConst().getFirstChar(); + unsigned char b = r[1].getConst().getFirstChar(); + for(unsigned char c=a; c<=b; c++) { + Node tmp = s.eqNode( NodeManager::currentNM()->mkConst( CVC4::String(c) ) ).negate(); + vec.push_back( tmp ); + } + conc = vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::AND, vec); + break; + } case kind::STRING_TO_REGEXP: { conc = s.eqNode(r[0]).negate(); break; @@ -997,8 +1066,42 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes } break; } + case kind::REGEXP_LOOP: { + Assert(r.getNumChildren() == 3); + if(r[1] == r[2]) { + if(r[1] == d_zero) { + conc = s.eqNode(d_emptyString).negate(); + } else if(r[1] == d_one) { + conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r[0]).negate(); + } else { + //unroll for now + unsigned l = r[1].getConst().getNumerator().toUnsignedInt(); + std::vector vec; + for(unsigned i=0; imkNode(kind::REGEXP_CONCAT, vec); + conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r2).negate(); + } + } else { + Assert(r[1] == d_zero); + //unroll for now + unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); + std::vector vec; + vec.push_back(d_emptySingleton); + std::vector vec2; + for(unsigned i=1; i<=u; i++) { + vec2.push_back(r[0]); + Node r2 = i==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec); + vec.push_back(r2); + } + Node r3 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec); + conc = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, r3).negate(); + } + break; + } default: { - Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl; + Trace("strings-error") << "Unsupported term: " << r << " in simplifyNRegExp." << std::endl; Assert( false, "Unsupported Term" ); } } @@ -1024,6 +1127,29 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes conc = d_one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)); break; } + case kind::REGEXP_RANGE: { + conc = s.eqNode( r[0] ); + if(r[0] != r[1]) { + unsigned char a = r[0].getConst().getFirstChar(); + a += 1; + Node tmp = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, s, + NodeManager::currentNM()->mkNode(kind::REGEXP_RANGE, + NodeManager::currentNM()->mkConst( CVC4::String(a) ), + r[1])); + conc = NodeManager::currentNM()->mkNode(kind::OR, conc, tmp); + } + /* + unsigned char a = r[0].getConst().getFirstChar(); + unsigned char b = r[1].getConst().getFirstChar(); + std::vector< Node > vec; + for(unsigned char c=a; c<=b; c++) { + Node t2 = s.eqNode( NodeManager::currentNM()->mkConst( CVC4::String(c) )); + vec.push_back( t2 ); + } + conc = vec.empty()? d_emptySingleton : vec.size()==1? vec[0] : NodeManager::currentNM()->mkNode(kind::OR, vec); + */ + break; + } case kind::STRING_TO_REGEXP: { conc = s.eqNode(r[0]); break; @@ -1112,8 +1238,47 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes } break; } + case kind::REGEXP_LOOP: { + Assert(r.getNumChildren() == 3); + if(r[1] == d_zero) { + if(r[2] == d_zero) { + conc = s.eqNode( d_emptyString ); + } else { + //R{0,n} + if(s != d_emptyString) { + Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); + Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); + Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2)); + Node sk1ne = sk1.eqNode(d_emptyString).negate(); + Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]); + unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); + Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1)); + Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, + NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], d_zero, u1)); + conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru); + conc = NodeManager::currentNM()->mkNode(kind::OR, + s.eqNode(d_emptyString), conc); + } else { + conc = d_true; + } + } + } else { + //R^n + Node sk1 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); + Node sk2 = NodeManager::currentNM()->mkSkolem( "lps", s.getType(), "created for regular expression loop" ); + Node seq12 = s.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, sk1, sk2)); + Node sk1ne = sk1.eqNode(d_emptyString).negate(); + Node sk1inr = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk1, r[0]); + unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); + Node u1 = NodeManager::currentNM()->mkConst(CVC4::Rational(u - 1)); + Node sk2inru = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, sk2, + NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], u1, u1)); + conc = NodeManager::currentNM()->mkNode(kind::AND, seq12, sk1ne, sk1inr, sk2inru); + } + break; + } default: { - Trace("strings-regexp") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl; + Trace("strings-error") << "Unsupported term: " << r << " in simplifyPRegExp." << std::endl; Assert( false, "Unsupported Term" ); } } @@ -1193,6 +1358,10 @@ void RegExpOpr::getCharSet( Node r, std::set &pcset, SetNodes &pv getCharSet(r[0], cset, vset); break; } + case kind::REGEXP_LOOP: { + getCharSet(r[0], cset, vset); + break; + } default: { //Trace("strings-error") << "Unsupported term: " << r << " in getCharSet." << std::endl; Unreachable(); @@ -1236,6 +1405,8 @@ bool RegExpOpr::containC2(unsigned cnt, Node n) { } } else if(n.getKind() == kind::REGEXP_STAR) { return containC2(cnt, n[0]); + } else if(n.getKind() == kind::REGEXP_LOOP) { + return containC2(cnt, n[0]); } else if(n.getKind() == kind::REGEXP_UNION) { for( unsigned i=0; i() <= RMAXINT, "Exceeded LONG_MAX in RegExpOpr::containC2"); + Assert(n[0].getConst() <= RMAXINT, "Exceeded LONG_MAX in RegExpOpr::convert2"); unsigned y = n[0].getConst().getNumerator().toUnsignedInt(); r1 = d_emptySingleton; if(cnt == y) { @@ -1311,6 +1482,11 @@ void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) { } else if(n.getKind() == kind::STRING_TO_REGEXP || n.getKind() == kind::REGEXP_SIGMA || n.getKind() == kind::REGEXP_RANGE) { r1 = d_emptySingleton; r2 = n; + } else if(n.getKind() == kind::REGEXP_LOOP) { + //TODO:LOOP + r1 = d_emptySingleton; + r2 = n; + //Unreachable(); } else { //is it possible? Unreachable(); @@ -1479,6 +1655,10 @@ Node RegExpOpr::removeIntersection(Node r) { retNode = r; break; } + case kind::REGEXP_RANGE: { + retNode = r; + break; + } case kind::STRING_TO_REGEXP: { retNode = r; break; @@ -1515,6 +1695,11 @@ Node RegExpOpr::removeIntersection(Node r) { retNode = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, retNode) ); break; } + case kind::REGEXP_LOOP: { + retNode = removeIntersection( r[0] ); + retNode = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, retNode, r[1], r[2]) ); + break; + } default: { Unreachable(); } @@ -1679,6 +1864,58 @@ void RegExpOpr::splitRegExp(Node r, std::vector< PairNodes > &pset) { } break; } + case kind::REGEXP_LOOP: { + unsigned l = r[1].getConst().getNumerator().toUnsignedInt(); + unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); + if(l == u) { + //R^n + if(l == 0) { + PairNodes tmp1(d_emptySingleton, d_emptySingleton); + pset.push_back(tmp1); + } else if(l == 1) { + splitRegExp(r[0], pset); + } else { + std::vector< PairNodes > tset; + splitRegExp(r[0], tset); + for(unsigned j=0; jmkConst(CVC4::Rational(j)); + Node r1 = j==0? d_emptySingleton : j==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num1, num1); + unsigned j2 = l - j - 1; + Node num2 = NodeManager::currentNM()->mkConst(CVC4::Rational(j2)); + Node r2 = j2==0? d_emptySingleton : j2==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num2, num2); + for(unsigned i=0; imkNode(kind::REGEXP_CONCAT, r1, tset[i].first); + r2 = tset[i].second==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r2); + PairNodes tmp2(r1, r2); + pset.push_back(tmp2); + } + } + } + } else { + //R{0,n} + PairNodes tmp1(d_emptySingleton, d_emptySingleton); + pset.push_back(tmp1); + std::vector< PairNodes > tset; + splitRegExp(r[0], tset); + pset.insert(pset.end(), tset.begin(), tset.end()); + for(unsigned k=2; k<=u; k++) { + for(unsigned j=0; jmkConst(CVC4::Rational(j)); + Node r1 = j==0? d_emptySingleton : j==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num1, num1); + unsigned j2 = k - j - 1; + Node num2 = NodeManager::currentNM()->mkConst(CVC4::Rational(j2)); + Node r2 = j2==0? d_emptySingleton : j2==1? r[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num2, num2); + for(unsigned i=0; imkNode(kind::REGEXP_CONCAT, r1, tset[i].first); + r2 = tset[i].second==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, tset[i].second, r2); + PairNodes tmp2(r1, r2); + pset.push_back(tmp2); + } + } + } + } + break; + } case kind::REGEXP_PLUS: { std::vector< PairNodes > tset; splitRegExp(r[0], tset); @@ -1741,6 +1978,10 @@ void RegExpOpr::flattenRegExp(Node r, std::vector< std::pair< CVC4::String, unsi //TODO break; } + case kind::REGEXP_LOOP: { + //TODO + break; + } default: { Unreachable(); } @@ -1757,6 +1998,10 @@ void RegExpOpr::disjunctRegExp(Node r, std::vector &vec_or) { vec_or.push_back(r); break; } + case kind::REGEXP_RANGE: { + vec_or.push_back(r); + break; + } case kind::STRING_TO_REGEXP: { vec_or.push_back(r); break; @@ -1801,6 +2046,10 @@ void RegExpOpr::disjunctRegExp(Node r, std::vector &vec_or) { vec_or.push_back(r); break; } + case kind::REGEXP_LOOP: { + vec_or.push_back(r); + break; + } default: { Unreachable(); } @@ -1811,7 +2060,7 @@ void RegExpOpr::disjunctRegExp(Node r, std::vector &vec_or) { std::string RegExpOpr::niceChar(Node r) { if(r.isConst()) { std::string s = r.getConst().toString() ; - return s == "" ? "{E}" : ( s == " " ? "{ }" : s ); + return s == "." ? "\\." : s; } else { std::string ss = "$" + r.toString(); return ss; @@ -1820,16 +2069,16 @@ std::string RegExpOpr::niceChar(Node r) { std::string RegExpOpr::mkString( Node r ) { std::string retStr; if(r.isNull()) { - retStr = "Phi"; + retStr = "\\E"; } else { int k = r.getKind(); switch( k ) { case kind::REGEXP_EMPTY: { - retStr += "Phi"; + retStr += "\\E"; break; } case kind::REGEXP_SIGMA: { - retStr += "{W}"; + retStr += "."; break; } case kind::STRING_TO_REGEXP: { @@ -1846,16 +2095,12 @@ std::string RegExpOpr::mkString( Node r ) { break; } case kind::REGEXP_UNION: { - if(r == d_sigma) { - retStr += "{A}"; - } else { - retStr += "("; - for(unsigned i=0; i().toString(); + retStr += ","; + if(r.getNumChildren() == 3) { + retStr += r[2].getConst().toString(); + } + retStr += "}"; + break; + } case kind::REGEXP_RV: { retStr += "<"; retStr += r[0].getConst().getNumerator().toString(); diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index da84ed04c..012a573c1 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -39,7 +39,7 @@ class RegExpOpr { typedef std::pair< Node, Node > PairNodes; private: - const unsigned char d_lastchar; + unsigned char d_lastchar; Node d_emptyString; Node d_true; Node d_false; diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index a2eb58cdc..3e705d213 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -87,6 +87,7 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu d_true = NodeManager::currentNM()->mkConst( true ); d_false = NodeManager::currentNM()->mkConst( false ); + d_card_size = 128; //d_opt_regexp_gcd = true; } @@ -255,6 +256,10 @@ Node TheoryStrings::explain( TNode literal ){ void TheoryStrings::presolve() { Debug("strings-presolve") << "TheoryStrings::Presolving : get fmf options " << (options::stringFMF() ? "true" : "false") << std::endl; d_opt_fmf = options::stringFMF(); + + if(!options::stdASCII()) { + d_card_size = 256; + } } @@ -2332,8 +2337,8 @@ bool TheoryStrings::checkLengthsEqc() { } bool TheoryStrings::checkCardinality() { - int cardinality = options::stringCharCardinality(); - Trace("strings-solve-debug2") << "get cardinality: " << cardinality << endl; + //int cardinality = options::stringCharCardinality(); + //Trace("strings-solve-debug2") << "get cardinality: " << cardinality << endl; std::vector< Node > eqcs; getEquivalenceClasses( eqcs ); @@ -2347,9 +2352,9 @@ bool TheoryStrings::checkCardinality() { Trace("strings-card") << "Number of strings with length equal to " << lr << " is " << cols[i].size() << std::endl; if( cols[i].size() > 1 ) { // size > c^k - double k = 1.0 + std::log((double) cols[i].size() - 1) / log((double) cardinality); + double k = 1.0 + std::log((double) cols[i].size() - 1) / log((double) d_card_size); unsigned int int_k = (unsigned int) k; - //double c_k = pow ( (double)cardinality, (double)lr ); + //double c_k = pow ( (double)d_card_size, (double)lr ); bool allDisequal = true; for( std::vector< Node >::iterator itr1 = cols[i].begin(); @@ -2905,7 +2910,7 @@ bool TheoryStrings::checkMemberships() { } //} - Trace("regexp-debug") << "... No Intersec Conflict in Memberships " << std::endl; + Trace("regexp-debug") << "... No Intersec Conflict in Memberships, addedLemma: " << addedLemma << std::endl; if(!addedLemma) { for( unsigned i=0; i nvec; - d_regexp_opr.simplify(atom, nvec, polarity); + + /*if(xr.isConst()) { + Node tmp = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, xr, r) ); + if(tmp==d_true || tmp==d_false) { + if(!polarity) { + tmp = tmp==d_true? d_false : d_true; + } + nvec.push_back( tmp ); + } + }*/ + + if(nvec.empty()) { + d_regexp_opr.simplify(atom, nvec, polarity); + } Node antec = assertion; if(d_regexp_ant.find(assertion) != d_regexp_ant.end()) { antec = d_regexp_ant[assertion]; diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 623647942..2003bcd54 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -128,6 +128,7 @@ private: Node d_zero; Node d_one; CVC4::Rational RMAXINT; + unsigned d_card_size; // Options bool d_opt_fmf; bool d_opt_regexp_gcd; diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 71db11fe1..a8cbcfac0 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -43,6 +43,14 @@ void StringsPreprocess::processRegExp( Node s, Node r, std::vector< Node > &ret ret.push_back( eq ); break; } + case kind::REGEXP_RANGE: { + Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) ); + Node eq = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s)); + ret.push_back( eq ); + eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r ); + ret.push_back( eq ); + break; + } case kind::STRING_TO_REGEXP: { Node eq = s.eqNode( r[0] ); ret.push_back( eq ); @@ -95,6 +103,11 @@ void StringsPreprocess::processRegExp( Node s, Node r, std::vector< Node > &ret } break; } + case kind::REGEXP_LOOP: { + Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r ); + ret.push_back( eq ); + break; + } default: { Trace("strings-error") << "Unsupported term: " << r << " in simplifyRegExp." << std::endl; Assert( false, "Unsupported Term" ); diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 6960659e8..3396cc1a9 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -737,6 +737,56 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i return false; } } + case kind::REGEXP_LOOP: { + unsigned l = r[1].getConst().getNumerator().toUnsignedInt(); + if(s.size() == index_start) { + return l==0? true : testConstStringInRegExp(s, index_start, r[0]); + } else if(l==0 && r[1]==r[2]) { + return false; + } else { + Assert(r.getNumChildren() == 3, "String rewriter error: LOOP has 2 childrens"); + if(l==0) { + //R{0,u} + unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); + for(unsigned len=s.size() - index_start; len>=1; len--) { + CVC4::String t = s.substr(index_start, len); + if(testConstStringInRegExp(t, 0, r[0])) { + if(len + index_start == s.size()) { + return true; + } else { + Node num2 = NodeManager::currentNM()->mkConst( CVC4::Rational(u - 1) ); + Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], r[1], num2); + if(testConstStringInRegExp(s, index_start+len, r2)) { + return true; + } + } + } + } + return false; + } else { + //R{l,l} + Assert(r[1]==r[2], "String rewriter error: LOOP nums are not equal"); + if(l>s.size() - index_start) { + if(testConstStringInRegExp(s, s.size(), r[0])) { + l = s.size() - index_start; + } else { + return false; + } + } + for(unsigned len=1; len<=s.size() - index_start; len++) { + CVC4::String t = s.substr(index_start, len); + if(testConstStringInRegExp(t, 0, r[0])) { + Node num2 = NodeManager::currentNM()->mkConst( CVC4::Rational(l - 1) ); + Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], num2, num2); + if(testConstStringInRegExp(s, index_start+len, r2)) { + return true; + } + } + } + return false; + } + } + } default: { Trace("strings-error") << "Unsupported term: " << r << " in testConstStringInRegExp." << std::endl; Unreachable(); @@ -1129,7 +1179,10 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String("") ) ), node[0]); } else if(node.getKind() == kind::REGEXP_RANGE) { - std::vector< Node > vec_nodes; + if(node[0] == node[1]) { + retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, node[0] ); + } + /*std::vector< Node > vec_nodes; unsigned char c = node[0].getConst().getFirstChar(); unsigned char end = node[1].getConst().getFirstChar(); for(; c<=end; ++c) { @@ -1140,18 +1193,64 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { retNode = vec_nodes[0]; } else { retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ); - } + }*/ } else if(node.getKind() == kind::REGEXP_LOOP) { Node r = node[0]; if(r.getKind() == kind::REGEXP_STAR) { retNode = r; } else { + Node n1 = Rewriter::rewrite( node[1] ); + if(!n1.isConst()) { + throw LogicException("re.loop contains non-constant integer (1)."); + } + CVC4::Rational rz(0); + CVC4::Rational RMAXINT(LONG_MAX); + AlwaysAssert(rz <= n1.getConst(), "Negative integer in string REGEXP_LOOP (1)"); + Assert(n1.getConst() <= RMAXINT, "Exceeded LONG_MAX in string REGEXP_LOOP (1)"); + unsigned l = n1.getConst().getNumerator().toUnsignedInt(); + if(node.getNumChildren() == 3) { + Node n2 = Rewriter::rewrite( node[2] ); + if(!n2.isConst()) { + throw LogicException("re.loop contains non-constant integer (2)."); + } + if(n1 == n2) { + if(l == 0) { + retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, + NodeManager::currentNM()->mkConst(CVC4::String(""))); + } else if(l == 1) { + retNode = node[0]; + } + } else { + AlwaysAssert(rz <= n2.getConst(), "Negative integer in string REGEXP_LOOP (2)"); + Assert(n2.getConst() <= RMAXINT, "Exceeded LONG_MAX in string REGEXP_LOOP (2)"); + unsigned u = n2.getConst().getNumerator().toUnsignedInt(); + AlwaysAssert(l <= u, "REGEXP_LOOP (1) > REGEXP_LOOP (2)"); + if(l != 0) { + Node zero = NodeManager::currentNM()->mkConst( CVC4::Rational(0) ); + Node num = NodeManager::currentNM()->mkConst( CVC4::Rational(u - l) ); + Node t1 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, node[0], n1, n1); + Node t2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, node[0], zero, num); + retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, t1, t2); + } + } + } else { + retNode = l==0? NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, node[0]) : + NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, + NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, node[0], n1, n1), + NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, node[0])); + } + } + /*else { TNode n1 = Rewriter::rewrite( node[1] ); + // if(!n1.isConst()) { throw LogicException("re.loop contains non-constant integer (1)."); } + CVC4::Rational rz(0); CVC4::Rational RMAXINT(LONG_MAX); + AlwaysAssert(rz <= n1.getConst(), "Negative integer in string REGEXP_LOOP (1)"); Assert(n1.getConst() <= RMAXINT, "Exceeded LONG_MAX in string REGEXP_LOOP (1)"); + // unsigned l = n1.getConst().getNumerator().toUnsignedInt(); std::vector< Node > vec_nodes; for(unsigned i=0; imkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(CVC4::String(""))) : vec_nodes.size()==1 ? r : prerewriteConcatRegExp(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes)); - Assert(n2.getConst() <= RMAXINT, "Exceeded LONG_MAX in string REGEXP_LOOP (2)"); + //Assert(n2.getConst() <= RMAXINT, "Exceeded LONG_MAX in string REGEXP_LOOP (2)"); unsigned u = n2.getConst().getNumerator().toUnsignedInt(); if(u <= l) { retNode = n; @@ -1185,7 +1284,7 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { :NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes), rest) ); } - } + }*/ Trace("strings-lp") << "Strings::lp " << node << " => " << retNode << std::endl; } diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h index e053f48da..13f3b3b73 100644 --- a/src/theory/strings/theory_strings_type_rules.h +++ b/src/theory/strings/theory_strings_type_rules.h @@ -15,6 +15,7 @@ **/ #include "cvc4_private.h" +#include "theory/strings/options.h" #ifndef __CVC4__THEORY__STRINGS__THEORY_STRINGS_TYPE_RULES_H #define __CVC4__THEORY__STRINGS__THEORY_STRINGS_TYPE_RULES_H @@ -368,6 +369,9 @@ public: if(ch[0] > ch[1]) { throw TypeCheckingExceptionPrivate(n, "expecting the first constant is less or equal to the second one in regexp range"); } + if(options::stdASCII() && ch[1] > '\x7f') { + throw TypeCheckingExceptionPrivate(n, "expecting standard ASCII characters in regexp range, or please set the option strings-std-ascii to be false"); + } } return nodeManager->regexpType(); } diff --git a/src/theory/theory_model.cpp b/src/theory/theory_model.cpp index 7d385c398..435abc568 100644 --- a/src/theory/theory_model.cpp +++ b/src/theory/theory_model.cpp @@ -852,11 +852,11 @@ Node TheoryEngineModelBuilder::normalize(TheoryModel* m, TNode r, std::map< Node itMap = constantReps.find(m->d_equalityEngine->getRepresentative(ri)); if (itMap != constantReps.end()) { ri = (*itMap).second; - recurse = false; + recurse = false; } else if (!evalOnly) { - recurse = false; - } + recurse = false; + } } if (recurse) { ri = normalize(m, ri, constantReps, evalOnly); @@ -870,7 +870,7 @@ Node TheoryEngineModelBuilder::normalize(TheoryModel* m, TNode r, std::map< Node retNode = NodeManager::currentNM()->mkNode( r.getKind(), children ); if (childrenConst) { retNode = Rewriter::rewrite(retNode); - Assert(retNode.getKind() == kind::APPLY_UF || retNode.isConst()); + Assert(retNode.getKind()==kind::APPLY_UF || retNode.getKind()==kind::REGEXP_RANGE || retNode.isConst()); } } d_normalizedCache[r] = retNode; diff --git a/test/regress/regress0/strings/Makefile.am b/test/regress/regress0/strings/Makefile.am index 414904919..32876bccd 100644 --- a/test/regress/regress0/strings/Makefile.am +++ b/test/regress/regress0/strings/Makefile.am @@ -22,7 +22,6 @@ TESTS = \ at001.smt2 \ bug001.smt2 \ bug002.smt2 \ - cardinality.smt2 \ escchar.smt2 \ escchar_25.smt2 \ str001.smt2 \ -- 2.30.2