\r
**
\r
- ** \brief Regular Expresion Operations\r
+ ** \brief Symbolic Regular Expresion Operations\r
**\r
- ** Regular Expresion Operations\r
+ ** Symbolic Regular Expresion Operations\r
**/\r
\r
#include "theory/strings/regexp_operation.h"\r
d_false = NodeManager::currentNM()->mkConst( false );\r
d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );\r
d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );\r
+ d_emptySingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );\r
std::vector< Node > nvec;\r
d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec );\r
// All Charactors = all printable ones 32-126\r
//d_sigma = mkAllExceptOne( '\0' );\r
d_sigma = NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA, nvec );\r
d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );\r
+ d_card = 256;\r
}\r
\r
int RegExpOpr::gcd ( int a, int b ) {\r
\r
Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {\r
Assert( c.size() < 2 );\r
- Trace("strings-regexp-derivative") << "RegExp-derivative starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;\r
+ Trace("regexp-deriv") << "RegExp-deriv starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;\r
Node retNode = d_emptyRegexp;\r
PairDvStr dv = std::make_pair( r, c );\r
if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {\r
vec_nodes.push_back( dc );\r
}\r
}\r
- Trace("strings-regexp-derivative") << "RegExp-derivative OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;\r
+ Trace("regexp-deriv") << "RegExp-deriv OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;\r
}\r
retNode = vec_nodes.size() == 0 ? d_emptyRegexp :\r
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );\r
}\r
d_dv_cache[dv] = retNode;\r
}\r
- Trace("strings-regexp-derivative") << "RegExp-derivative returns : " << mkString( retNode ) << std::endl;\r
+ Trace("regexp-deriv") << "RegExp-deriv returns : " << mkString( retNode ) << std::endl;\r
return retNode;\r
}\r
\r
}\r
}\r
\r
-void RegExpOpr::firstChar( Node r ) {\r
- Trace("strings-regexp-firstchar") << "Head characters: ";\r
- for(char ch = d_char_start; ch <= d_char_end; ++ch) {\r
- CVC4::String c(ch);\r
- Node dc = derivativeSingle(r, ch);\r
- if(!dc.isNull()) {\r
- Trace("strings-regexp-firstchar") << c << " (" << mkString(dc) << ")" << std::endl;\r
+void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {\r
+ std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_fset_cache.find(r);\r
+ if(itr != d_fset_cache.end()) {\r
+ pcset.insert((itr->second).first.begin(), (itr->second).first.end());\r
+ pvset.insert((itr->second).second.begin(), (itr->second).second.end());\r
+ } else {\r
+ std::set<unsigned> cset;\r
+ SetNodes vset;\r
+ int k = r.getKind();\r
+ switch( k ) {\r
+ case kind::REGEXP_EMPTY: {\r
+ break;\r
+ }\r
+ case kind::REGEXP_SIGMA: {\r
+ for(unsigned i=0; i<d_card; i++) {\r
+ cset.insert(i);\r
+ }\r
+ break;\r
+ }\r
+ case kind::STRING_TO_REGEXP: {\r
+ Node st = Rewriter::rewrite(r[0]);\r
+ if(st.isConst()) {\r
+ CVC4::String s = st.getConst< CVC4::String >();\r
+ if(s.size() != 0) {\r
+ cset.insert(s[0]);\r
+ }\r
+ } else if(st.getKind() == kind::VARIABLE) {\r
+ vset.insert( st );\r
+ } else {\r
+ if(st[0].isConst()) {\r
+ CVC4::String s = st[0].getConst< CVC4::String >();\r
+ cset.insert(s[0]);\r
+ } else {\r
+ vset.insert( st[0] );\r
+ }\r
+ }\r
+ break;\r
+ }\r
+ case kind::REGEXP_CONCAT: {\r
+ for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+ firstChars(r[i], cset, vset);\r
+ Node n = r[i];\r
+ int r = delta( n );\r
+ if(r != 1) {\r
+ break;\r
+ }\r
+ }\r
+ break;\r
+ }\r
+ case kind::REGEXP_UNION: {\r
+ for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+ firstChars(r[i], cset, vset);\r
+ }\r
+ break;\r
+ }\r
+ case kind::REGEXP_INTER: {\r
+ //TODO: Overapproximation for now\r
+ for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+ firstChars(r[i], cset, vset);\r
+ }\r
+ break;\r
+ }\r
+ case kind::REGEXP_STAR: {\r
+ firstChars(r[0], cset, vset);\r
+ break;\r
+ }\r
+ default: {\r
+ Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;\r
+ Assert( false, "Unsupported Term" );\r
+ }\r
}\r
+ pcset.insert(cset.begin(), cset.end());\r
+ pvset.insert(vset.begin(), vset.end());\r
+ std::pair< std::set<unsigned>, SetNodes > p(cset, vset);\r
+ d_fset_cache[r] = p;\r
+\r
+ Trace("regexp-fset") << "FSET( " << mkString(r) << " ) = { ";\r
+ for(std::set<unsigned>::const_iterator itr = cset.begin();\r
+ itr != cset.end(); itr++) {\r
+ Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";\r
+ }\r
+ Trace("regexp-fset") << " }" << std::endl;\r
}\r
- Trace("strings-regexp-firstchar") << std::endl;\r
}\r
\r
bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {\r
std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);\r
if(itr != d_simpl_neg_cache.end()) {\r
new_nodes.push_back( itr->second );\r
- return;\r
} else {\r
int k = r.getKind();\r
Node conc;\r
std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);\r
if(itr != d_simpl_cache.end()) {\r
new_nodes.push_back( itr->second );\r
- return;\r
} else {\r
int k = r.getKind();\r
Node conc;\r
}\r
}\r
\r
+void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {\r
+ std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_cset_cache.find(r);\r
+ if(itr != d_cset_cache.end()) {\r
+ pcset.insert((itr->second).first.begin(), (itr->second).first.end());\r
+ pvset.insert((itr->second).second.begin(), (itr->second).second.end());\r
+ } else {\r
+ std::set<unsigned> cset;\r
+ SetNodes vset;\r
+ int k = r.getKind();\r
+ switch( k ) {\r
+ case kind::REGEXP_EMPTY: {\r
+ break;\r
+ }\r
+ case kind::REGEXP_SIGMA: {\r
+ for(unsigned i=0; i<d_card; i++) {\r
+ cset.insert(i);\r
+ }\r
+ break;\r
+ }\r
+ case kind::STRING_TO_REGEXP: {\r
+ Node st = Rewriter::rewrite(r[0]);\r
+ if(st.isConst()) {\r
+ CVC4::String s = st.getConst< CVC4::String >();\r
+ s.getCharSet( cset );\r
+ } else if(st.getKind() == kind::VARIABLE) {\r
+ vset.insert( st );\r
+ } else {\r
+ for(unsigned i=0; i<st.getNumChildren(); i++) {\r
+ if(st[i].isConst()) {\r
+ CVC4::String s = st[i].getConst< CVC4::String >();\r
+ s.getCharSet( cset );\r
+ } else {\r
+ vset.insert( st[i] );\r
+ }\r
+ }\r
+ }\r
+ break;\r
+ }\r
+ case kind::REGEXP_CONCAT: {\r
+ for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+ getCharSet(r[i], cset, vset);\r
+ }\r
+ break;\r
+ }\r
+ case kind::REGEXP_UNION: {\r
+ for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+ getCharSet(r[i], cset, vset);\r
+ }\r
+ break;\r
+ }\r
+ case kind::REGEXP_INTER: {\r
+ //TODO: Overapproximation for now\r
+ for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+ getCharSet(r[i], cset, vset);\r
+ }\r
+ break;\r
+ }\r
+ case kind::REGEXP_STAR: {\r
+ getCharSet(r[0], cset, vset);\r
+ break;\r
+ }\r
+ default: {\r
+ Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;\r
+ Assert( false, "Unsupported Term" );\r
+ }\r
+ }\r
+ pcset.insert(cset.begin(), cset.end());\r
+ pvset.insert(vset.begin(), vset.end());\r
+ std::pair< std::set<unsigned>, SetNodes > p(cset, vset);\r
+ d_cset_cache[r] = p;\r
+\r
+ Trace("regexp-cset") << "CSET( " << mkString(r) << " ) = { ";\r
+ for(std::set<unsigned>::const_iterator itr = cset.begin();\r
+ itr != cset.end(); itr++) {\r
+ Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";\r
+ }\r
+ Trace("regexp-cset") << " }" << std::endl;\r
+ }\r
+}\r
+\r
+Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache ) {\r
+ std::pair < Node, Node > p(r1, r2);\r
+ std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);\r
+ Node rNode;\r
+ if(itr != d_inter_cache.end()) {\r
+ //Trace("regexp-intersect") << "INTERSECT Case 0: Cached " << std::endl;\r
+ rNode = itr->second;\r
+ } else {\r
+ if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {\r
+ Trace("regexp-intersect") << "INTERSECT Case 1: one empty RE" << std::endl;\r
+ rNode = d_emptyRegexp;\r
+ } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {\r
+ Trace("regexp-intersect") << "INTERSECT Case 2: one empty Singleton" << std::endl;\r
+ int r = delta(r1 == d_emptySingleton ? r2 : r1);\r
+ if(r == 0) {\r
+ //TODO: variable\r
+ AlwaysAssert( false, "Unsupported Yet, 892 REO" );\r
+ } else if(r == 1) {\r
+ rNode = d_emptySingleton;\r
+ } else {\r
+ rNode = d_emptyRegexp;\r
+ }\r
+ } else {\r
+ Trace("regexp-intersect") << "INTERSECT Case 3: must compare" << std::endl;\r
+ std::set< unsigned > cset, cset2;\r
+ std::vector< unsigned > cdiff;\r
+ std::set< Node > vset;\r
+ getCharSet(r1, cset, vset);\r
+ getCharSet(r2, cset2, vset);\r
+ if(vset.empty()) {\r
+ std::set_symmetric_difference(cset.begin(), cset.end(), cset2.begin(), cset2.end(), std::back_inserter(cdiff));\r
+ if(cdiff.empty()) {\r
+ Trace("regexp-intersect") << "INTERSECT Case 3.1: compare" << std::endl;\r
+ cset.clear();\r
+ firstChars(r1, cset, vset);\r
+ std::vector< Node > vec_nodes;\r
+ for(std::set<unsigned>::const_iterator itr = cset.begin();\r
+ itr != cset.end(); itr++) {\r
+ CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );\r
+ std::pair< Node, Node > p(r1, r2);\r
+ if(cache[ *itr ].find(p) == cache[ *itr ].end()) {\r
+ Node r1l = derivativeSingle(r1, c);\r
+ Node r2l = derivativeSingle(r2, c);\r
+ std::map< unsigned, std::set< PairNodes > > cache2(cache);\r
+ PairNodes p(r1l, r2l);\r
+ cache2[ *itr ].insert( p );\r
+ Node rt = intersectInternal(r1l, r2l, cache2);\r
+ rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, \r
+ NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );\r
+ vec_nodes.push_back(rt);\r
+ }\r
+ }\r
+ rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :\r
+ NodeManager::currentNM()->mkNode(kind::OR, vec_nodes);\r
+ rNode = Rewriter::rewrite( rNode );\r
+ } else {\r
+ Trace("regexp-intersect") << "INTERSECT Case 3.2: diff cset" << std::endl;\r
+ rNode = d_emptyRegexp;\r
+ }\r
+ } else {\r
+ //TODO: non-empty var set\r
+ AlwaysAssert( false, "Unsupported Yet, 927 REO" );\r
+ }\r
+ }\r
+ d_inter_cache[p] = rNode;\r
+ }\r
+ Trace("regexp-intersect") << "INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;\r
+ return rNode;\r
+}\r
+Node RegExpOpr::intersect(Node r1, Node r2) {\r
+ std::map< unsigned, std::set< PairNodes > > cache;\r
+ return intersectInternal(r1, r2, cache);\r
+}\r
//printing\r
std::string RegExpOpr::niceChar( Node r ) {\r
if(r.isConst()) {\r
break;\r
}\r
case kind::REGEXP_CONCAT: {\r
- retStr += "(";\r
+ //retStr += "(";\r
for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
- if(i != 0) retStr += ".";\r
+ //if(i != 0) retStr += ".";\r
retStr += mkString( r[i] );\r
}\r
- retStr += ")";\r
+ //retStr += ")";\r
break;\r
}\r
case kind::REGEXP_UNION: {\r
}\r
default:\r
Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;\r
- //AlwaysAssert( false );\r
+ //Assert( false );\r
//return Node::null();\r
}\r
}\r
-/********************* */
-/*! \file regexp_operation.h
- ** \verbatim
- ** Original author: Tianyi Liang
- ** Major contributors: none
- ** Minor contributors (to current version): none
- ** This file is part of the CVC4 project.
- ** Copyright (c) 2009-2013 New York University and The University of Iowa
- ** See the file COPYING in the top-level source directory for licensing
- ** information.\endverbatim
- **
- ** \brief Regular Expression Operations\r
+/********************* */\r
+/*! \file regexp_operation.h\r
+ ** \verbatim\r
+ ** Original author: Tianyi Liang\r
+ ** Major contributors: none\r
+ ** Minor contributors (to current version): none\r
+ ** This file is part of the CVC4 project.\r
+ ** Copyright (c) 2009-2013 New York University and The University of Iowa\r
+ ** See the file COPYING in the top-level source directory for licensing\r
+ ** information.\endverbatim\r
**\r
- ** Regular Expression Operations\r
+ ** \brief Symbolic Regular Expresion Operations\r
+ **\r
+ ** Symbolic Regular Expression Operations\r
**/\r
\r
#include "cvc4_private.h"\r
#define __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H\r
\r
#include <vector>\r
+#include <set>\r
+#include <algorithm>\r
#include "util/hash.h"\r
+#include "util/regexp.h"\r
#include "theory/theory.h"\r
#include "theory/rewriter.h"\r
//#include "context/cdhashmap.h"\r
\r
class RegExpOpr {\r
typedef std::pair< Node, CVC4::String > PairDvStr;\r
+ typedef std::set< Node > SetNodes;\r
+ typedef std::pair< Node, Node > PairNodes;\r
\r
private:\r
+ unsigned d_card;\r
Node d_emptyString;\r
Node d_true;\r
Node d_false;\r
+ Node d_emptySingleton;\r
Node d_emptyRegexp;\r
Node d_zero;\r
Node d_one;\r
Node d_sigma;\r
Node d_sigma_star;\r
\r
- std::map< std::pair< Node, Node >, Node > d_simpl_cache;\r
- std::map< std::pair< Node, Node >, Node > d_simpl_neg_cache;\r
+ std::map< PairNodes, Node > d_simpl_cache;\r
+ std::map< PairNodes, Node > d_simpl_neg_cache;\r
std::map< Node, Node > d_compl_cache;\r
std::map< Node, int > d_delta_cache;\r
std::map< PairDvStr, Node > d_dv_cache;\r
std::map< Node, bool > d_cstre_cache;\r
+ std::map< Node, std::pair< std::set<unsigned>, std::set<Node> > > d_cset_cache;\r
+ std::map< Node, std::pair< std::set<unsigned>, std::set<Node> > > d_fset_cache;\r
+ std::map< PairNodes, Node > d_inter_cache;\r
//bool checkStarPlus( Node t );\r
void simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes );\r
void simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes );\r
int gcd ( int a, int b );\r
Node mkAllExceptOne( char c );\r
\r
+ void getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset );\r
+ Node intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache );\r
+ void firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset );\r
+\r
+ //TODO: for intersection\r
+ bool follow( Node r, CVC4::String c, std::vector< char > &vec_chars );\r
+\r
public:\r
RegExpOpr();\r
\r
int delta( Node r );\r
Node derivativeSingle( Node r, CVC4::String c );\r
bool guessLength( Node r, int &co );\r
- void firstChar( Node r );\r
- bool follow( Node r, CVC4::String c, std::vector< char > &vec_chars );\r
+ Node intersect(Node r1, Node r2);\r
+\r
std::string mkString( Node r );\r
};\r
\r
d_regexp_memberships(c),
d_regexp_ucached(u),
d_regexp_ccached(c),
+ d_str_re_map(c),
+ d_inter_cache(c),
d_regexp_ant(c),
d_input_vars(u),
d_input_var_lsum(u),
atom = polarity ? fact : fact[0];
//must record string in regular expressions
if ( atom.getKind() == kind::STRING_IN_REGEXP ) {
- d_regexp_memberships.push_back( assertion );
+ addMembership(assertion);
//d_equalityEngine.assertPredicate(atom, polarity, fact);
} else if (atom.getKind() == kind::STRING_STRCTN) {
if(polarity) {
bool addedLemma = false;
std::vector< Node > processed;
std::vector< Node > cprocessed;
- for( unsigned i=0; i<d_regexp_memberships.size(); i++ ){
+
+ if(options::stringEIT()) {
+ for(NodeListMap::const_iterator itr_xr = d_str_re_map.begin();
+ itr_xr != d_str_re_map.end(); ++itr_xr ) {
+ NodeList* lst = (*itr_xr).second;
+ if(lst->size() > 1) {
+ Node r = (*lst)[0];
+ NodeList::const_iterator itr_lst = lst->begin();
+ ++itr_lst;
+ for(;itr_lst != lst->end(); ++itr_lst) {
+ Node r2 = *itr_lst;
+ r = d_regexp_opr.intersect(r, r2);
+ if(r == d_emptyRegexp) {
+ std::vector< Node > vec_nodes;
+ Node x = (*itr_xr).first;
+ ++itr_lst;
+ for(NodeList::const_iterator itr2 = lst->begin();
+ itr2 != itr_lst; ++itr2) {
+ Node n = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, *itr2);
+ vec_nodes.push_back( n );
+ }
+ Node antec = vec_nodes.size() == 1? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
+ Node conc;
+ sendLemma(antec, conc, "INTERSEC CONFLICT");
+ addedLemma = true;
+ break;
+ }
+ if(d_conflict) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if(!addedLemma) {
+ for( unsigned i=0; i<d_regexp_memberships.size(); i++ ) {
//check regular expression membership
Node assertion = d_regexp_memberships[i];
if( d_regexp_ucached.find(assertion) == d_regexp_ucached.end()
if(d_conflict) {
break;
}
+ }
}
if( addedLemma ){
if( !d_conflict ){
}
}
+void TheoryStrings::addMembership(Node assertion) {
+ d_regexp_memberships.push_back( assertion );
+
+ if(options::stringEIT()) {
+ bool polarity = assertion.getKind() != kind::NOT;
+ if(polarity) {
+ TNode atom = polarity ? assertion : assertion[0];
+ Node x = atom[0];
+ Node r = atom[1];
+ NodeList* lst;
+ NodeListMap::iterator itr_xr = d_str_re_map.find( x );
+ if( itr_xr == d_str_re_map.end() ){
+ lst = new(getSatContext()->getCMM()) NodeList( true, getSatContext(), false,
+ ContextMemoryAllocator<TNode>(getSatContext()->getCMM()) );
+ d_str_re_map.insertDataFromContextMemory( x, lst );
+ } else {
+ lst = (*itr_xr).second;
+ }
+ //check
+ for( NodeList::const_iterator itr = lst->begin(); itr != lst->end(); ++itr ) {
+ if( r == *itr ) {
+ return;
+ }
+ }
+ lst->push_back( r );
+ //TODO: make it smarter
+ /*
+ unsigned size = lst->size();
+ if(size == 1) {
+ d_inter_cache[x] = r;
+ } else {
+ Node r1 = (*lst)[size - 2];
+ Node rr = d_regexp_opr.intersect(r1, r);
+ d_inter_cache[x] = rr;
+ }*/
+ }
+ }
+}
+
//// Finite Model Finding
Node TheoryStrings::getNextDecisionRequest() {