From 03034910cedc64a1a3a5d83715c79294cb35bee8 Mon Sep 17 00:00:00 2001 From: Tianyi Liang Date: Tue, 25 Mar 2014 01:08:29 -0500 Subject: [PATCH] adds intersection --- src/theory/strings/options | 3 + src/theory/strings/regexp_operation.cpp | 264 ++++++++++++++++++++++-- src/theory/strings/regexp_operation.h | 51 +++-- src/theory/strings/theory_strings.cpp | 82 +++++++- src/theory/strings/theory_strings.h | 12 +- src/util/regexp.cpp | 7 + src/util/regexp.h | 3 +- 7 files changed, 376 insertions(+), 46 deletions(-) diff --git a/src/theory/strings/options b/src/theory/strings/options index fb28f7e32..f6d765fc4 100644 --- a/src/theory/strings/options +++ b/src/theory/strings/options @@ -14,6 +14,9 @@ option stringLB strings-lb --strings-lb=N unsigned :default 0 :predicate less_eq option stringFMF strings-fmf --strings-fmf bool :default false :read-write the finite model finding used by the theory of strings +option stringEIT strings-eit --strings-eit bool :default false + the eager intersection used by the theory of strings + expert-option stringCharCardinality strings-alphabet-card --strings-alphabet-card=N int16_t :default 256 :predicate CVC4::smt::beforeSearch :predicate-include "smt/smt_engine.h" the cardinality of the characters used by the theory of strings, default 256 diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index 9d5c1c7e4..743130727 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -20,9 +20,9 @@ ** - ** \brief Regular Expresion Operations + ** \brief Symbolic Regular Expresion Operations ** - ** Regular Expresion Operations + ** Symbolic Regular Expresion Operations **/ #include "theory/strings/regexp_operation.h" @@ -38,6 +38,7 @@ RegExpOpr::RegExpOpr() { d_false = NodeManager::currentNM()->mkConst( false ); d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) ); d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) ); + d_emptySingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ); std::vector< Node > nvec; d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec ); // All Charactors = all printable ones 32-126 @@ -46,6 +47,7 @@ RegExpOpr::RegExpOpr() { //d_sigma = mkAllExceptOne( '\0' ); d_sigma = NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA, nvec ); d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma ); + d_card = 256; } int RegExpOpr::gcd ( int a, int b ) { @@ -185,7 +187,7 @@ int RegExpOpr::delta( Node r ) { Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { Assert( c.size() < 2 ); - Trace("strings-regexp-derivative") << "RegExp-derivative starts with R{ " << mkString( r ) << " }, c=" << c << std::endl; + Trace("regexp-deriv") << "RegExp-deriv starts with R{ " << mkString( r ) << " }, c=" << c << std::endl; Node retNode = d_emptyRegexp; PairDvStr dv = std::make_pair( r, c ); if( d_dv_cache.find( dv ) != d_dv_cache.end() ) { @@ -268,7 +270,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { vec_nodes.push_back( dc ); } } - Trace("strings-regexp-derivative") << "RegExp-derivative OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl; + Trace("regexp-deriv") << "RegExp-deriv OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl; } retNode = vec_nodes.size() == 0 ? d_emptyRegexp : ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); @@ -326,7 +328,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { } d_dv_cache[dv] = retNode; } - Trace("strings-regexp-derivative") << "RegExp-derivative returns : " << mkString( retNode ) << std::endl; + Trace("regexp-deriv") << "RegExp-deriv returns : " << mkString( retNode ) << std::endl; return retNode; } @@ -400,16 +402,89 @@ bool RegExpOpr::guessLength( Node r, int &co ) { } } -void RegExpOpr::firstChar( Node r ) { - Trace("strings-regexp-firstchar") << "Head characters: "; - for(char ch = d_char_start; ch <= d_char_end; ++ch) { - CVC4::String c(ch); - Node dc = derivativeSingle(r, ch); - if(!dc.isNull()) { - Trace("strings-regexp-firstchar") << c << " (" << mkString(dc) << ")" << std::endl; +void RegExpOpr::firstChars( Node r, std::set &pcset, SetNodes &pvset ) { + std::map< Node, std::pair< std::set, SetNodes > >::const_iterator itr = d_fset_cache.find(r); + if(itr != d_fset_cache.end()) { + pcset.insert((itr->second).first.begin(), (itr->second).first.end()); + pvset.insert((itr->second).second.begin(), (itr->second).second.end()); + } else { + std::set cset; + SetNodes vset; + int k = r.getKind(); + switch( k ) { + case kind::REGEXP_EMPTY: { + break; + } + case kind::REGEXP_SIGMA: { + for(unsigned i=0; i(); + if(s.size() != 0) { + cset.insert(s[0]); + } + } else if(st.getKind() == kind::VARIABLE) { + vset.insert( st ); + } else { + if(st[0].isConst()) { + CVC4::String s = st[0].getConst< CVC4::String >(); + cset.insert(s[0]); + } else { + vset.insert( st[0] ); + } + } + break; + } + case kind::REGEXP_CONCAT: { + for(unsigned i=0; i, SetNodes > p(cset, vset); + d_fset_cache[r] = p; + + Trace("regexp-fset") << "FSET( " << mkString(r) << " ) = { "; + for(std::set::const_iterator itr = cset.begin(); + itr != cset.end(); itr++) { + Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr) << ","; + } + Trace("regexp-fset") << " }" << std::endl; } - Trace("strings-regexp-firstchar") << std::endl; } bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) { @@ -567,7 +642,6 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p); if(itr != d_simpl_neg_cache.end()) { new_nodes.push_back( itr->second ); - return; } else { int k = r.getKind(); Node conc; @@ -701,7 +775,6 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p); if(itr != d_simpl_cache.end()) { new_nodes.push_back( itr->second ); - return; } else { int k = r.getKind(); Node conc; @@ -813,6 +886,159 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes } } +void RegExpOpr::getCharSet( Node r, std::set &pcset, SetNodes &pvset ) { + std::map< Node, std::pair< std::set, SetNodes > >::const_iterator itr = d_cset_cache.find(r); + if(itr != d_cset_cache.end()) { + pcset.insert((itr->second).first.begin(), (itr->second).first.end()); + pvset.insert((itr->second).second.begin(), (itr->second).second.end()); + } else { + std::set cset; + SetNodes vset; + int k = r.getKind(); + switch( k ) { + case kind::REGEXP_EMPTY: { + break; + } + case kind::REGEXP_SIGMA: { + for(unsigned i=0; i(); + s.getCharSet( cset ); + } else if(st.getKind() == kind::VARIABLE) { + vset.insert( st ); + } else { + for(unsigned i=0; i(); + s.getCharSet( cset ); + } else { + vset.insert( st[i] ); + } + } + } + break; + } + case kind::REGEXP_CONCAT: { + for(unsigned i=0; i, SetNodes > p(cset, vset); + d_cset_cache[r] = p; + + Trace("regexp-cset") << "CSET( " << mkString(r) << " ) = { "; + for(std::set::const_iterator itr = cset.begin(); + itr != cset.end(); itr++) { + Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr) << ","; + } + Trace("regexp-cset") << " }" << std::endl; + } +} + +Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache ) { + std::pair < Node, Node > p(r1, r2); + std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p); + Node rNode; + if(itr != d_inter_cache.end()) { + //Trace("regexp-intersect") << "INTERSECT Case 0: Cached " << std::endl; + rNode = itr->second; + } else { + if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) { + Trace("regexp-intersect") << "INTERSECT Case 1: one empty RE" << std::endl; + rNode = d_emptyRegexp; + } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) { + Trace("regexp-intersect") << "INTERSECT Case 2: one empty Singleton" << std::endl; + int r = delta(r1 == d_emptySingleton ? r2 : r1); + if(r == 0) { + //TODO: variable + AlwaysAssert( false, "Unsupported Yet, 892 REO" ); + } else if(r == 1) { + rNode = d_emptySingleton; + } else { + rNode = d_emptyRegexp; + } + } else { + Trace("regexp-intersect") << "INTERSECT Case 3: must compare" << std::endl; + std::set< unsigned > cset, cset2; + std::vector< unsigned > cdiff; + std::set< Node > vset; + getCharSet(r1, cset, vset); + getCharSet(r2, cset2, vset); + if(vset.empty()) { + std::set_symmetric_difference(cset.begin(), cset.end(), cset2.begin(), cset2.end(), std::back_inserter(cdiff)); + if(cdiff.empty()) { + Trace("regexp-intersect") << "INTERSECT Case 3.1: compare" << std::endl; + cset.clear(); + firstChars(r1, cset, vset); + std::vector< Node > vec_nodes; + for(std::set::const_iterator itr = cset.begin(); + itr != cset.end(); itr++) { + CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) ); + std::pair< Node, Node > p(r1, r2); + if(cache[ *itr ].find(p) == cache[ *itr ].end()) { + Node r1l = derivativeSingle(r1, c); + Node r2l = derivativeSingle(r2, c); + std::map< unsigned, std::set< PairNodes > > cache2(cache); + PairNodes p(r1l, r2l); + cache2[ *itr ].insert( p ); + Node rt = intersectInternal(r1l, r2l, cache2); + rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, + NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) ); + vec_nodes.push_back(rt); + } + } + rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] : + NodeManager::currentNM()->mkNode(kind::OR, vec_nodes); + rNode = Rewriter::rewrite( rNode ); + } else { + Trace("regexp-intersect") << "INTERSECT Case 3.2: diff cset" << std::endl; + rNode = d_emptyRegexp; + } + } else { + //TODO: non-empty var set + AlwaysAssert( false, "Unsupported Yet, 927 REO" ); + } + } + d_inter_cache[p] = rNode; + } + Trace("regexp-intersect") << "INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl; + return rNode; +} +Node RegExpOpr::intersect(Node r1, Node r2) { + std::map< unsigned, std::set< PairNodes > > cache; + return intersectInternal(r1, r2, cache); +} //printing std::string RegExpOpr::niceChar( Node r ) { if(r.isConst()) { @@ -842,12 +1068,12 @@ std::string RegExpOpr::mkString( Node r ) { break; } case kind::REGEXP_CONCAT: { - retStr += "("; + //retStr += "("; for(unsigned i=0; i +#include +#include #include "util/hash.h" +#include "util/regexp.h" #include "theory/theory.h" #include "theory/rewriter.h" //#include "context/cdhashmap.h" @@ -31,11 +34,15 @@ namespace strings { class RegExpOpr { typedef std::pair< Node, CVC4::String > PairDvStr; + typedef std::set< Node > SetNodes; + typedef std::pair< Node, Node > PairNodes; private: + unsigned d_card; Node d_emptyString; Node d_true; Node d_false; + Node d_emptySingleton; Node d_emptyRegexp; Node d_zero; Node d_one; @@ -45,12 +52,15 @@ private: Node d_sigma; Node d_sigma_star; - std::map< std::pair< Node, Node >, Node > d_simpl_cache; - std::map< std::pair< Node, Node >, Node > d_simpl_neg_cache; + std::map< PairNodes, Node > d_simpl_cache; + std::map< PairNodes, Node > d_simpl_neg_cache; std::map< Node, Node > d_compl_cache; std::map< Node, int > d_delta_cache; std::map< PairDvStr, Node > d_dv_cache; std::map< Node, bool > d_cstre_cache; + std::map< Node, std::pair< std::set, std::set > > d_cset_cache; + std::map< Node, std::pair< std::set, std::set > > d_fset_cache; + std::map< PairNodes, Node > d_inter_cache; //bool checkStarPlus( Node t ); void simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes ); void simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes ); @@ -58,6 +68,13 @@ private: int gcd ( int a, int b ); Node mkAllExceptOne( char c ); + void getCharSet( Node r, std::set &pcset, SetNodes &pvset ); + Node intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache ); + void firstChars( Node r, std::set &pcset, SetNodes &pvset ); + + //TODO: for intersection + bool follow( Node r, CVC4::String c, std::vector< char > &vec_chars ); + public: RegExpOpr(); @@ -66,8 +83,8 @@ public: int delta( Node r ); Node derivativeSingle( Node r, CVC4::String c ); bool guessLength( Node r, int &co ); - void firstChar( Node r ); - bool follow( Node r, CVC4::String c, std::vector< char > &vec_chars ); + Node intersect(Node r1, Node r2); + std::string mkString( Node r ); }; diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index c44d2d334..7998669cf 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -55,6 +55,8 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu d_regexp_memberships(c), d_regexp_ucached(u), d_regexp_ccached(c), + d_str_re_map(c), + d_inter_cache(c), d_regexp_ant(c), d_input_vars(u), d_input_var_lsum(u), @@ -552,7 +554,7 @@ void TheoryStrings::check(Effort e) { atom = polarity ? fact : fact[0]; //must record string in regular expressions if ( atom.getKind() == kind::STRING_IN_REGEXP ) { - d_regexp_memberships.push_back( assertion ); + addMembership(assertion); //d_equalityEngine.assertPredicate(atom, polarity, fact); } else if (atom.getKind() == kind::STRING_STRCTN) { if(polarity) { @@ -2298,7 +2300,43 @@ bool TheoryStrings::checkMemberships() { bool addedLemma = false; std::vector< Node > processed; std::vector< Node > cprocessed; - for( unsigned i=0; isize() > 1) { + Node r = (*lst)[0]; + NodeList::const_iterator itr_lst = lst->begin(); + ++itr_lst; + for(;itr_lst != lst->end(); ++itr_lst) { + Node r2 = *itr_lst; + r = d_regexp_opr.intersect(r, r2); + if(r == d_emptyRegexp) { + std::vector< Node > vec_nodes; + Node x = (*itr_xr).first; + ++itr_lst; + for(NodeList::const_iterator itr2 = lst->begin(); + itr2 != itr_lst; ++itr2) { + Node n = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, *itr2); + vec_nodes.push_back( n ); + } + Node antec = vec_nodes.size() == 1? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes); + Node conc; + sendLemma(antec, conc, "INTERSEC CONFLICT"); + addedLemma = true; + break; + } + if(d_conflict) { + break; + } + } + } + } + } + + if(!addedLemma) { + for( unsigned i=0; igetCMM()) NodeList( true, getSatContext(), false, + ContextMemoryAllocator(getSatContext()->getCMM()) ); + d_str_re_map.insertDataFromContextMemory( x, lst ); + } else { + lst = (*itr_xr).second; + } + //check + for( NodeList::const_iterator itr = lst->begin(); itr != lst->end(); ++itr ) { + if( r == *itr ) { + return; + } + } + lst->push_back( r ); + //TODO: make it smarter + /* + unsigned size = lst->size(); + if(size == 1) { + d_inter_cache[x] = r; + } else { + Node r1 = (*lst)[size - 2]; + Node rr = d_regexp_opr.intersect(r1, r); + d_inter_cache[x] = rr; + }*/ + } + } +} + //// Finite Model Finding Node TheoryStrings::getNextDecisionRequest() { diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index e07c61a19..355c536dd 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -118,12 +118,6 @@ private: */ Node d_ufSubstr; - /** - * Function symbol used to implement uninterpreted undefined string - * semantics. Needed to deal with partial str2int function. - */ - Node d_ufS2I; - // Constants Node d_emptyString; Node d_emptyRegexp; @@ -308,12 +302,15 @@ private: NodeSet d_pos_ctn_cached; NodeSet d_neg_ctn_cached; - // Regular Expression + // Symbolic Regular Expression private: // regular expression memberships NodeList d_regexp_memberships; NodeSet d_regexp_ucached; NodeSet d_regexp_ccached; + // intersection + NodeListMap d_str_re_map; + NodeNodeMap d_inter_cache; // antecedant for why regexp membership must be true NodeNodeMap d_regexp_ant; // membership length @@ -324,6 +321,7 @@ private: CVC4::String getHeadConst( Node x ); bool splitRegExp( Node x, Node r, Node ant ); bool addMembershipLength(Node atom); + void addMembership(Node assertion); // Finite Model Finding diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp index 684a480fb..fd5dff994 100644 --- a/src/util/regexp.cpp +++ b/src/util/regexp.cpp @@ -23,6 +23,13 @@ using namespace std; namespace CVC4 { +void String::getCharSet(std::set &cset) const { + for(std::vector::const_iterator itr = d_str.begin(); + itr != d_str.end(); itr++) { + cset.insert( *itr ); + } +} + std::string String::toString() const { std::string str; for(unsigned int i=0; i #include #include +#include #include #include "util/exception.h" //#include "util/integer.h" @@ -31,7 +32,6 @@ namespace CVC4 { class CVC4_PUBLIC String { - public: static unsigned int convertCharToUnsignedInt( char c ) { int i = (int)c; @@ -342,6 +342,7 @@ public: return -1; } } + void getCharSet(std::set &cset) const; };/* class String */ namespace strings { -- 2.30.2