adds intersection
authorTianyi Liang <tianyi-liang@uiowa.edu>
Tue, 25 Mar 2014 06:08:29 +0000 (01:08 -0500)
committerTianyi Liang <tianyi-liang@uiowa.edu>
Thu, 27 Mar 2014 21:56:14 +0000 (16:56 -0500)
src/theory/strings/options
src/theory/strings/regexp_operation.cpp
src/theory/strings/regexp_operation.h
src/theory/strings/theory_strings.cpp
src/theory/strings/theory_strings.h
src/util/regexp.cpp
src/util/regexp.h

index fb28f7e32fa761a116bdfef39cfe7c1efaf5cec6..f6d765fc4837e29f96aaf95725d4fc21fbde71bb 100644 (file)
@@ -14,6 +14,9 @@ option stringLB strings-lb --strings-lb=N unsigned :default 0 :predicate less_eq
 option stringFMF strings-fmf --strings-fmf bool :default false :read-write
  the finite model finding used by the theory of strings
 
+option stringEIT strings-eit --strings-eit bool :default false
+ the eager intersection used by the theory of strings
+
 expert-option stringCharCardinality strings-alphabet-card --strings-alphabet-card=N int16_t :default 256 :predicate CVC4::smt::beforeSearch :predicate-include "smt/smt_engine.h"
  the cardinality of the characters used by the theory of strings, default 256
 
index 9d5c1c7e4eb78fb7772ec083f7a0b0f809b4585f..743130727f13fc091a76361adba74c26deb61359 100644 (file)
@@ -20,9 +20,9 @@
  \r
  **
  \r
- ** \brief Regular Expresion Operations\r
+ ** \brief Symbolic Regular Expresion Operations\r
  **\r
- ** Regular Expresion Operations\r
+ ** Symbolic Regular Expresion Operations\r
  **/\r
 \r
 #include "theory/strings/regexp_operation.h"\r
@@ -38,6 +38,7 @@ RegExpOpr::RegExpOpr() {
     d_false = NodeManager::currentNM()->mkConst( false );\r
        d_zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) );\r
        d_one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );\r
+       d_emptySingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );\r
        std::vector< Node > nvec;\r
     d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec );\r
        // All Charactors = all printable ones 32-126\r
@@ -46,6 +47,7 @@ RegExpOpr::RegExpOpr() {
        //d_sigma = mkAllExceptOne( '\0' );\r
        d_sigma = NodeManager::currentNM()->mkNode( kind::REGEXP_SIGMA, nvec );\r
        d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );\r
+       d_card = 256;\r
 }\r
 \r
 int RegExpOpr::gcd ( int a, int b ) {\r
@@ -185,7 +187,7 @@ int RegExpOpr::delta( Node r ) {
 \r
 Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {\r
        Assert( c.size() < 2 );\r
-       Trace("strings-regexp-derivative") << "RegExp-derivative starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;\r
+       Trace("regexp-deriv") << "RegExp-deriv starts with R{ " << mkString( r ) << " }, c=" << c << std::endl;\r
        Node retNode = d_emptyRegexp;\r
        PairDvStr dv = std::make_pair( r, c );\r
        if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {\r
@@ -268,7 +270,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
                                                        vec_nodes.push_back( dc );\r
                                                }\r
                                        }\r
-                                       Trace("strings-regexp-derivative") << "RegExp-derivative OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;\r
+                                       Trace("regexp-deriv") << "RegExp-deriv OR R[" << i << "]{ " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;\r
                                }\r
                                retNode = vec_nodes.size() == 0 ? d_emptyRegexp :\r
                                                        ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );\r
@@ -326,7 +328,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
                }\r
                d_dv_cache[dv] = retNode;\r
        }\r
-       Trace("strings-regexp-derivative") << "RegExp-derivative returns : " << mkString( retNode ) << std::endl;\r
+       Trace("regexp-deriv") << "RegExp-deriv returns : " << mkString( retNode ) << std::endl;\r
        return retNode;\r
 }\r
 \r
@@ -400,16 +402,89 @@ bool RegExpOpr::guessLength( Node r, int &co ) {
        }\r
 }\r
 \r
-void RegExpOpr::firstChar( Node r ) {\r
-       Trace("strings-regexp-firstchar") << "Head characters: ";\r
-       for(char ch = d_char_start; ch <= d_char_end; ++ch) {\r
-               CVC4::String c(ch);\r
-               Node dc = derivativeSingle(r, ch);\r
-               if(!dc.isNull()) {\r
-                       Trace("strings-regexp-firstchar") << c << " (" << mkString(dc) << ")" << std::endl;\r
+void RegExpOpr::firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {\r
+       std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_fset_cache.find(r);\r
+       if(itr != d_fset_cache.end()) {\r
+               pcset.insert((itr->second).first.begin(), (itr->second).first.end());\r
+               pvset.insert((itr->second).second.begin(), (itr->second).second.end());\r
+       } else {\r
+               std::set<unsigned> cset;\r
+               SetNodes vset;\r
+               int k = r.getKind();\r
+               switch( k ) {\r
+                       case kind::REGEXP_EMPTY: {\r
+                               break;\r
+                       }\r
+                       case kind::REGEXP_SIGMA: {\r
+                               for(unsigned i=0; i<d_card; i++) {\r
+                                       cset.insert(i);\r
+                               }\r
+                               break;\r
+                       }\r
+                       case kind::STRING_TO_REGEXP: {\r
+                               Node st = Rewriter::rewrite(r[0]);\r
+                               if(st.isConst()) {\r
+                                       CVC4::String s = st.getConst< CVC4::String >();\r
+                                       if(s.size() != 0) {\r
+                                               cset.insert(s[0]);\r
+                                       }\r
+                               } else if(st.getKind() == kind::VARIABLE) {\r
+                                       vset.insert( st );\r
+                               } else {\r
+                                       if(st[0].isConst()) {\r
+                                               CVC4::String s = st[0].getConst< CVC4::String >();\r
+                                               cset.insert(s[0]);\r
+                                       } else {\r
+                                               vset.insert( st[0] );\r
+                                       }\r
+                               }\r
+                               break;\r
+                       }\r
+                       case kind::REGEXP_CONCAT: {\r
+                               for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+                                       firstChars(r[i], cset, vset);\r
+                                       Node n = r[i];\r
+                                       int r = delta( n );\r
+                                       if(r != 1) {\r
+                                               break;\r
+                                       }\r
+                               }\r
+                               break;\r
+                       }\r
+                       case kind::REGEXP_UNION: {\r
+                               for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+                                       firstChars(r[i], cset, vset);\r
+                               }\r
+                               break;\r
+                       }\r
+                       case kind::REGEXP_INTER: {\r
+                               //TODO: Overapproximation for now\r
+                               for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+                                       firstChars(r[i], cset, vset);\r
+                               }\r
+                               break;\r
+                       }\r
+                       case kind::REGEXP_STAR: {\r
+                               firstChars(r[0], cset, vset);\r
+                               break;\r
+                       }\r
+                       default: {\r
+                               Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;\r
+                               Assert( false, "Unsupported Term" );\r
+                       }\r
                }\r
+               pcset.insert(cset.begin(), cset.end());\r
+               pvset.insert(vset.begin(), vset.end());\r
+               std::pair< std::set<unsigned>, SetNodes > p(cset, vset);\r
+               d_fset_cache[r] = p;\r
+\r
+               Trace("regexp-fset") << "FSET( " << mkString(r) << " ) = { ";\r
+               for(std::set<unsigned>::const_iterator itr = cset.begin();\r
+                       itr != cset.end(); itr++) {\r
+                               Trace("regexp-fset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";\r
+                       }\r
+               Trace("regexp-fset") << " }" << std::endl;\r
        }\r
-       Trace("strings-regexp-firstchar") << std::endl;\r
 }\r
 \r
 bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {\r
@@ -567,7 +642,6 @@ void RegExpOpr::simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes
        std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_neg_cache.find(p);\r
        if(itr != d_simpl_neg_cache.end()) {\r
                new_nodes.push_back( itr->second );\r
-               return;\r
        } else {\r
                int k = r.getKind();\r
                Node conc;\r
@@ -701,7 +775,6 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes
        std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);\r
        if(itr != d_simpl_cache.end()) {\r
                new_nodes.push_back( itr->second );\r
-               return;\r
        } else {\r
                int k = r.getKind();\r
                Node conc;\r
@@ -813,6 +886,159 @@ void RegExpOpr::simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes
        }\r
 }\r
 \r
+void RegExpOpr::getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset ) {\r
+       std::map< Node, std::pair< std::set<unsigned>, SetNodes > >::const_iterator itr = d_cset_cache.find(r);\r
+       if(itr != d_cset_cache.end()) {\r
+               pcset.insert((itr->second).first.begin(), (itr->second).first.end());\r
+               pvset.insert((itr->second).second.begin(), (itr->second).second.end());\r
+       } else {\r
+               std::set<unsigned> cset;\r
+               SetNodes vset;\r
+               int k = r.getKind();\r
+               switch( k ) {\r
+                       case kind::REGEXP_EMPTY: {\r
+                               break;\r
+                       }\r
+                       case kind::REGEXP_SIGMA: {\r
+                               for(unsigned i=0; i<d_card; i++) {\r
+                                       cset.insert(i);\r
+                               }\r
+                               break;\r
+                       }\r
+                       case kind::STRING_TO_REGEXP: {\r
+                               Node st = Rewriter::rewrite(r[0]);\r
+                               if(st.isConst()) {\r
+                                       CVC4::String s = st.getConst< CVC4::String >();\r
+                                       s.getCharSet( cset );\r
+                               } else if(st.getKind() == kind::VARIABLE) {\r
+                                       vset.insert( st );\r
+                               } else {\r
+                                       for(unsigned i=0; i<st.getNumChildren(); i++) {\r
+                                               if(st[i].isConst()) {\r
+                                                       CVC4::String s = st[i].getConst< CVC4::String >();\r
+                                                       s.getCharSet( cset );\r
+                                               } else {\r
+                                                       vset.insert( st[i] );\r
+                                               }\r
+                                       }\r
+                               }\r
+                               break;\r
+                       }\r
+                       case kind::REGEXP_CONCAT: {\r
+                               for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+                                       getCharSet(r[i], cset, vset);\r
+                               }\r
+                               break;\r
+                       }\r
+                       case kind::REGEXP_UNION: {\r
+                               for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+                                       getCharSet(r[i], cset, vset);\r
+                               }\r
+                               break;\r
+                       }\r
+                       case kind::REGEXP_INTER: {\r
+                               //TODO: Overapproximation for now\r
+                               for(unsigned i=0; i<r.getNumChildren(); i++) {\r
+                                       getCharSet(r[i], cset, vset);\r
+                               }\r
+                               break;\r
+                       }\r
+                       case kind::REGEXP_STAR: {\r
+                               getCharSet(r[0], cset, vset);\r
+                               break;\r
+                       }\r
+                       default: {\r
+                               Trace("strings-regexp") << "Unsupported term: " << r << " in getCharSet." << std::endl;\r
+                               Assert( false, "Unsupported Term" );\r
+                       }\r
+               }\r
+               pcset.insert(cset.begin(), cset.end());\r
+               pvset.insert(vset.begin(), vset.end());\r
+               std::pair< std::set<unsigned>, SetNodes > p(cset, vset);\r
+               d_cset_cache[r] = p;\r
+\r
+               Trace("regexp-cset") << "CSET( " << mkString(r) << " ) = { ";\r
+               for(std::set<unsigned>::const_iterator itr = cset.begin();\r
+                       itr != cset.end(); itr++) {\r
+                               Trace("regexp-cset") << CVC4::String::convertUnsignedIntToChar(*itr) << ",";\r
+                       }\r
+               Trace("regexp-cset") << " }" << std::endl;\r
+       }\r
+}\r
+\r
+Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache ) {\r
+       std::pair < Node, Node > p(r1, r2);\r
+       std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_inter_cache.find(p);\r
+       Node rNode;\r
+       if(itr != d_inter_cache.end()) {\r
+               //Trace("regexp-intersect") << "INTERSECT Case 0: Cached " << std::endl;\r
+               rNode = itr->second;\r
+       } else {\r
+               if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {\r
+               Trace("regexp-intersect") << "INTERSECT Case 1: one empty RE" << std::endl;\r
+                       rNode = d_emptyRegexp;\r
+               } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {\r
+                       Trace("regexp-intersect") << "INTERSECT Case 2: one empty Singleton" << std::endl;\r
+                       int r = delta(r1 == d_emptySingleton ? r2 : r1);\r
+                       if(r == 0) {\r
+                               //TODO: variable\r
+                               AlwaysAssert( false, "Unsupported Yet, 892 REO" );\r
+                       } else if(r == 1) {\r
+                               rNode = d_emptySingleton;\r
+                       } else {\r
+                               rNode = d_emptyRegexp;\r
+                       }\r
+               } else {\r
+                       Trace("regexp-intersect") << "INTERSECT Case 3: must compare" << std::endl;\r
+                       std::set< unsigned > cset, cset2;\r
+                       std::vector< unsigned > cdiff;\r
+                       std::set< Node > vset;\r
+                       getCharSet(r1, cset, vset);\r
+                       getCharSet(r2, cset2, vset);\r
+                       if(vset.empty()) {\r
+                               std::set_symmetric_difference(cset.begin(), cset.end(), cset2.begin(), cset2.end(), std::back_inserter(cdiff));\r
+                               if(cdiff.empty()) {\r
+                                       Trace("regexp-intersect") << "INTERSECT Case 3.1: compare" << std::endl;\r
+                                       cset.clear();\r
+                                       firstChars(r1, cset, vset);\r
+                                       std::vector< Node > vec_nodes;\r
+                                       for(std::set<unsigned>::const_iterator itr = cset.begin();\r
+                                               itr != cset.end(); itr++) {\r
+                                               CVC4::String c( CVC4::String::convertUnsignedIntToChar(*itr) );\r
+                                               std::pair< Node, Node > p(r1, r2);\r
+                                               if(cache[ *itr ].find(p) == cache[ *itr ].end()) {\r
+                                                       Node r1l = derivativeSingle(r1, c);\r
+                                                       Node r2l = derivativeSingle(r2, c);\r
+                                                       std::map< unsigned, std::set< PairNodes > > cache2(cache);\r
+                                                       PairNodes p(r1l, r2l);\r
+                                                       cache2[ *itr ].insert( p );\r
+                                                       Node rt = intersectInternal(r1l, r2l, cache2);\r
+                                                       rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, \r
+                                                               NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );\r
+                                                       vec_nodes.push_back(rt);\r
+                                               }\r
+                                       }\r
+                                       rNode = vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :\r
+                                                       NodeManager::currentNM()->mkNode(kind::OR, vec_nodes);\r
+                                       rNode = Rewriter::rewrite( rNode );\r
+                               } else {\r
+                                       Trace("regexp-intersect") << "INTERSECT Case 3.2: diff cset" << std::endl;\r
+                                       rNode = d_emptyRegexp;\r
+                               }\r
+                       } else {\r
+                               //TODO: non-empty var set\r
+                               AlwaysAssert( false, "Unsupported Yet, 927 REO" );\r
+                       }\r
+               }\r
+               d_inter_cache[p] = rNode;\r
+       }\r
+       Trace("regexp-intersect") << "INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;\r
+       return rNode;\r
+}\r
+Node RegExpOpr::intersect(Node r1, Node r2) {\r
+       std::map< unsigned, std::set< PairNodes > > cache;\r
+       return intersectInternal(r1, r2, cache);\r
+}\r
 //printing\r
 std::string RegExpOpr::niceChar( Node r ) {\r
        if(r.isConst()) {\r
@@ -842,12 +1068,12 @@ std::string RegExpOpr::mkString( Node r ) {
                                break;\r
                        }\r
                        case kind::REGEXP_CONCAT: {\r
-                               retStr += "(";\r
+                               //retStr += "(";\r
                                for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
-                                       if(i != 0) retStr += ".";\r
+                                       //if(i != 0) retStr += ".";\r
                                        retStr += mkString( r[i] );\r
                                }\r
-                               retStr += ")";\r
+                               //retStr += ")";\r
                                break;\r
                        }\r
                        case kind::REGEXP_UNION: {\r
@@ -897,7 +1123,7 @@ std::string RegExpOpr::mkString( Node r ) {
                        }\r
                        default:\r
                                Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;\r
-                               //AlwaysAssert( false );\r
+                               //Assert( false );\r
                                //return Node::null();\r
                }\r
        }\r
index 7b41c176419ff5df8f3bb4c8ee47360db75dd028..9bd694f5c0863b36121f8db9d57a20d526656f71 100644 (file)
@@ -1,17 +1,17 @@
-/*********************                                                        */
-/*! \file regexp_operation.h
- ** \verbatim
- ** Original author: Tianyi Liang
- ** Major contributors: none
- ** Minor contributors (to current version): none
- ** This file is part of the CVC4 project.
- ** Copyright (c) 2009-2013  New York University and The University of Iowa
- ** See the file COPYING in the top-level source directory for licensing
- ** information.\endverbatim
- **
- ** \brief Regular Expression Operations\r
+/*********************                                                        */\r
+/*! \file regexp_operation.h\r
+ ** \verbatim\r
+ ** Original author: Tianyi Liang\r
+ ** Major contributors: none\r
+ ** Minor contributors (to current version): none\r
+ ** This file is part of the CVC4 project.\r
+ ** Copyright (c) 2009-2013  New York University and The University of Iowa\r
+ ** See the file COPYING in the top-level source directory for licensing\r
+ ** information.\endverbatim\r
  **\r
- ** Regular Expression Operations\r
+ ** \brief Symbolic Regular Expresion Operations\r
+ **\r
+ ** Symbolic Regular Expression Operations\r
  **/\r
 \r
 #include "cvc4_private.h"\r
 #define __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H\r
 \r
 #include <vector>\r
+#include <set>\r
+#include <algorithm>\r
 #include "util/hash.h"\r
+#include "util/regexp.h"\r
 #include "theory/theory.h"\r
 #include "theory/rewriter.h"\r
 //#include "context/cdhashmap.h"\r
@@ -31,11 +34,15 @@ namespace strings {
 \r
 class RegExpOpr {\r
        typedef std::pair< Node, CVC4::String > PairDvStr;\r
+       typedef std::set< Node > SetNodes;\r
+       typedef std::pair< Node, Node > PairNodes;\r
 \r
 private:\r
+       unsigned d_card;\r
     Node d_emptyString;\r
     Node d_true;\r
     Node d_false;\r
+       Node d_emptySingleton;\r
        Node d_emptyRegexp;\r
        Node d_zero;\r
        Node d_one;\r
@@ -45,12 +52,15 @@ private:
        Node d_sigma;\r
        Node d_sigma_star;\r
        \r
-       std::map< std::pair< Node, Node >, Node > d_simpl_cache;\r
-       std::map< std::pair< Node, Node >, Node > d_simpl_neg_cache;\r
+       std::map< PairNodes, Node > d_simpl_cache;\r
+       std::map< PairNodes, Node > d_simpl_neg_cache;\r
        std::map< Node, Node > d_compl_cache;\r
        std::map< Node, int > d_delta_cache;\r
        std::map< PairDvStr, Node > d_dv_cache;\r
        std::map< Node, bool > d_cstre_cache;\r
+       std::map< Node, std::pair< std::set<unsigned>, std::set<Node> > > d_cset_cache;\r
+       std::map< Node, std::pair< std::set<unsigned>, std::set<Node> > > d_fset_cache;\r
+       std::map< PairNodes, Node > d_inter_cache;\r
        //bool checkStarPlus( Node t );\r
        void simplifyPRegExp( Node s, Node r, std::vector< Node > &new_nodes );\r
        void simplifyNRegExp( Node s, Node r, std::vector< Node > &new_nodes );\r
@@ -58,6 +68,13 @@ private:
        int gcd ( int a, int b );\r
        Node mkAllExceptOne( char c );\r
 \r
+       void getCharSet( Node r, std::set<unsigned> &pcset, SetNodes &pvset );\r
+       Node intersectInternal( Node r1, Node r2, std::map< unsigned, std::set< PairNodes > > cache );\r
+       void firstChars( Node r, std::set<unsigned> &pcset, SetNodes &pvset );\r
+\r
+       //TODO: for intersection\r
+       bool follow( Node r, CVC4::String c, std::vector< char > &vec_chars );\r
+\r
 public:\r
        RegExpOpr();\r
 \r
@@ -66,8 +83,8 @@ public:
        int delta( Node r );\r
        Node derivativeSingle( Node r, CVC4::String c );\r
        bool guessLength( Node r, int &co );\r
-       void firstChar( Node r );\r
-       bool follow( Node r, CVC4::String c, std::vector< char > &vec_chars );\r
+       Node intersect(Node r1, Node r2);\r
+\r
        std::string mkString( Node r );\r
 };\r
 \r
index c44d2d33431ef425b0c081e8a8a178da597a7dbb..7998669cf719ca2c6fc9162301e38fa38cdc7f83 100644 (file)
@@ -55,6 +55,8 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu
        d_regexp_memberships(c),
        d_regexp_ucached(u),
        d_regexp_ccached(c),
+       d_str_re_map(c),
+       d_inter_cache(c),
        d_regexp_ant(c),
        d_input_vars(u),
        d_input_var_lsum(u),
@@ -552,7 +554,7 @@ void TheoryStrings::check(Effort e) {
     atom = polarity ? fact : fact[0];
        //must record string in regular expressions
        if ( atom.getKind() == kind::STRING_IN_REGEXP ) {
-               d_regexp_memberships.push_back( assertion );
+               addMembership(assertion);
                //d_equalityEngine.assertPredicate(atom, polarity, fact);
        } else if (atom.getKind() == kind::STRING_STRCTN) {
                if(polarity) {
@@ -2298,7 +2300,43 @@ bool TheoryStrings::checkMemberships() {
        bool addedLemma = false;
        std::vector< Node > processed;
        std::vector< Node > cprocessed;
-       for( unsigned i=0; i<d_regexp_memberships.size(); i++ ){
+
+       if(options::stringEIT()) {
+               for(NodeListMap::const_iterator itr_xr = d_str_re_map.begin();
+                       itr_xr != d_str_re_map.end(); ++itr_xr ) {
+                       NodeList* lst = (*itr_xr).second;
+                       if(lst->size() > 1) {
+                               Node r = (*lst)[0];
+                               NodeList::const_iterator itr_lst = lst->begin();
+                               ++itr_lst;
+                               for(;itr_lst != lst->end(); ++itr_lst) {
+                                       Node r2 = *itr_lst;
+                                       r = d_regexp_opr.intersect(r, r2);
+                                       if(r == d_emptyRegexp) {
+                                               std::vector< Node > vec_nodes;
+                                               Node x = (*itr_xr).first;
+                                               ++itr_lst;
+                                               for(NodeList::const_iterator itr2 = lst->begin();
+                                                       itr2 != itr_lst; ++itr2) {
+                                                       Node n = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, *itr2);
+                                                       vec_nodes.push_back( n );
+                                               }
+                                               Node antec = vec_nodes.size() == 1? vec_nodes[0] : NodeManager::currentNM()->mkNode(kind::AND, vec_nodes);
+                                               Node conc;
+                                               sendLemma(antec, conc, "INTERSEC CONFLICT");
+                                               addedLemma = true;
+                                               break;
+                                       }
+                                       if(d_conflict) {
+                                               break;
+                                       }
+                               }
+                       }
+               }
+       }
+
+       if(!addedLemma) {
+         for( unsigned i=0; i<d_regexp_memberships.size(); i++ ) {
                //check regular expression membership
                Node assertion = d_regexp_memberships[i];
                if( d_regexp_ucached.find(assertion) == d_regexp_ucached.end() 
@@ -2449,6 +2487,7 @@ bool TheoryStrings::checkMemberships() {
                if(d_conflict) {
                        break;
                }
+         }
        }
        if( addedLemma ){
                if( !d_conflict ){
@@ -2735,6 +2774,45 @@ bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) {
        }
 }
 
+void TheoryStrings::addMembership(Node assertion) {
+       d_regexp_memberships.push_back( assertion );
+
+       if(options::stringEIT()) {
+               bool polarity = assertion.getKind() != kind::NOT;
+               if(polarity) {
+                       TNode atom = polarity ? assertion : assertion[0];
+                       Node x = atom[0];
+                       Node r = atom[1];
+                       NodeList* lst;
+                       NodeListMap::iterator itr_xr = d_str_re_map.find( x );
+                       if( itr_xr == d_str_re_map.end() ){
+                         lst = new(getSatContext()->getCMM()) NodeList( true, getSatContext(), false,
+                                                                                                                                       ContextMemoryAllocator<TNode>(getSatContext()->getCMM()) );
+                         d_str_re_map.insertDataFromContextMemory( x, lst );
+                       } else {
+                         lst = (*itr_xr).second;
+                       }
+                       //check
+                   for( NodeList::const_iterator itr = lst->begin(); itr != lst->end(); ++itr ) {
+                               if( r == *itr ) {
+                                       return;
+                               }
+                       }
+                       lst->push_back( r );
+                       //TODO: make it smarter
+                       /*
+                       unsigned size = lst->size();
+                       if(size == 1) {
+                               d_inter_cache[x] = r;
+                       } else {
+                               Node r1 = (*lst)[size - 2];
+                               Node rr = d_regexp_opr.intersect(r1, r);
+                               d_inter_cache[x] = rr;
+                       }*/
+               }
+       }
+}
+
 //// Finite Model Finding
 
 Node TheoryStrings::getNextDecisionRequest() {
index e07c61a19fbc024778a37c8910487c875f773220..355c536dddccb296afbcdb39cca68773a805929d 100644 (file)
@@ -118,12 +118,6 @@ private:
         */
        Node d_ufSubstr;
 
-       /**
-        * Function symbol used to implement uninterpreted undefined string
-        * semantics.  Needed to deal with partial str2int function.
-        */
-       Node d_ufS2I;
-
        // Constants
     Node d_emptyString;
        Node d_emptyRegexp;
@@ -308,12 +302,15 @@ private:
        NodeSet d_pos_ctn_cached;
        NodeSet d_neg_ctn_cached;
 
-       // Regular Expression
+       // Symbolic Regular Expression
 private:
        // regular expression memberships
        NodeList d_regexp_memberships;
        NodeSet d_regexp_ucached;
        NodeSet d_regexp_ccached;
+       // intersection
+       NodeListMap d_str_re_map;
+       NodeNodeMap d_inter_cache;
        // antecedant for why regexp membership must be true
        NodeNodeMap d_regexp_ant;
        // membership length
@@ -324,6 +321,7 @@ private:
        CVC4::String getHeadConst( Node x );
        bool splitRegExp( Node x, Node r, Node ant );
        bool addMembershipLength(Node atom);
+       void addMembership(Node assertion);
 
 
        // Finite Model Finding
index 684a480fb2631f26f0ae454becce433e39c89b95..fd5dff994eb0e8e3cd437ee28cc1cd09380f1d4b 100644 (file)
@@ -23,6 +23,13 @@ using namespace std;
 \r
 namespace CVC4 {\r
 \r
+void String::getCharSet(std::set<unsigned int> &cset) const {\r
+       for(std::vector<unsigned int>::const_iterator itr = d_str.begin();\r
+               itr != d_str.end(); itr++) {\r
+                       cset.insert( *itr );\r
+               }\r
+}\r
+\r
 std::string String::toString() const {\r
        std::string str;\r
        for(unsigned int i=0; i<d_str.size(); ++i) {\r
index 7985f407232d8007460744dbd50e9c736953c189..512c2eff0d62c6945aa40c7cbb3fe71778a47c5f 100644 (file)
@@ -23,6 +23,7 @@
 #include <iostream>
 #include <iomanip>
 #include <string>
+#include <set>
 #include <sstream>
 #include "util/exception.h"
 //#include "util/integer.h"
@@ -31,7 +32,6 @@
 namespace CVC4 {
 
 class CVC4_PUBLIC String {
-
 public:
   static unsigned int convertCharToUnsignedInt( char c ) {
        int i = (int)c;
@@ -342,6 +342,7 @@ public:
                 return -1;
         }
   }
+  void getCharSet(std::set<unsigned int> &cset) const;
 };/* class String */
 
 namespace strings {