add seperate regular expression files
authorTianyi Liang <tianyi-liang@uiowa.edu>
Wed, 6 Nov 2013 16:43:06 +0000 (10:43 -0600)
committerTianyi Liang <tianyi-liang@uiowa.edu>
Wed, 6 Nov 2013 16:59:42 +0000 (10:59 -0600)
src/theory/strings/Makefile.am
src/theory/strings/regexp_operation.cpp [new file with mode: 0644]
src/theory/strings/regexp_operation.h [new file with mode: 0644]
src/theory/strings/theory_strings.cpp
src/theory/strings/theory_strings.h

index 38efa33f31f36c162ef1be733e8d9e3fc0ba26cc..f7a6fa0a2d93a22ddd0486236c291f24f57c884e 100644 (file)
@@ -13,7 +13,8 @@ libstrings_la_SOURCES = \
        theory_strings_type_rules.h \
        type_enumerator.h \
        theory_strings_preprocess.h \
-       theory_strings_preprocess.cpp
+       theory_strings_preprocess.cpp \
+       regexp_operation.cpp
 
 EXTRA_DIST = \
        kinds
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
new file mode 100644 (file)
index 0000000..0a77018
--- /dev/null
@@ -0,0 +1,656 @@
+/*********************                                                        */\r
+/*! \file regexp_operation.CPP\r
+ ** \verbatim\r
+ ** Original author: Tianyi Liang\r
+ ** Major contributors: Tianyi Liang, Andrew Reynolds\r
+ ** Minor contributors (to current version): none\r
+ ** This file is part of the CVC4 prototype.\r
+ ** Copyright (c) 2013-2013  New York University and The University of Iowa\r
+ ** See the file COPYING in the top-level source directory for licensing\r
+ ** information.\endverbatim\r
+ **\r
+ ** \brief Regular Expresion Operations\r
+ **\r
+ ** Regular Expresion Operations\r
+ **/\r
+\r
+#include "theory/strings/regexp_operation.h"\r
+#include "expr/kind.h"\r
+\r
+namespace CVC4 {\r
+namespace theory {\r
+namespace strings {\r
+\r
+RegExpOpr::RegExpOpr() {\r
+    d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );\r
+       // All Charactors = all printable ones 32-126\r
+       d_char_start = 'a';//' ';\r
+       d_char_end = 'c';//'~';\r
+       d_sigma = mkAllExceptOne( '\0' );\r
+       d_sigma_star = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma );\r
+}\r
+\r
+bool RegExpOpr::checkConstRegExp( Node r ) {\r
+       bool ret = true;\r
+       if( d_cstre_cache.find( r ) != d_cstre_cache.end() ) {\r
+               ret = d_cstre_cache[r];\r
+       } else {\r
+               if(r.getKind() == kind::STRING_TO_REGEXP) {\r
+                       Node tmp = Rewriter::rewrite( r[0] );\r
+                       ret = tmp.isConst();\r
+               } else {\r
+                       for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                               if(!checkConstRegExp(r[i])) {\r
+                                       ret = false; break;\r
+                               }\r
+                       }\r
+               }\r
+               d_cstre_cache[r] = ret;\r
+       }\r
+       return ret;\r
+}\r
+\r
+// 0-unknown, 1-yes, 2-no\r
+int RegExpOpr::delta( Node r ) {\r
+       Trace("strings-regexp-delta") << "RegExp-Delta starts with " << mkString( r ) << std::endl;\r
+       int ret = 0;\r
+       if( d_delta_cache.find( r ) != d_delta_cache.end() ) {\r
+               ret = d_delta_cache[r];\r
+       } else {\r
+               int k = r.getKind();\r
+               switch( k ) {\r
+                       case kind::STRING_TO_REGEXP:\r
+                       {\r
+                               if(r[0].isConst()) {\r
+                                       if(r[0] == d_emptyString) {\r
+                                               ret = 1;\r
+                                       } else {\r
+                                               ret = 2;\r
+                                       }\r
+                               } else {\r
+                                       ret = 0;\r
+                               }\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_CONCAT:\r
+                       {\r
+                               bool flag = false;\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       int tmp = delta( r[i] );\r
+                                       if(tmp == 2) {\r
+                                               ret = 2;\r
+                                               break;\r
+                                       } else if(tmp == 0) {\r
+                                               flag = true;\r
+                                       }\r
+                               }\r
+                               if(!flag && ret != 2) {\r
+                                       ret = 1;\r
+                               }\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_OR:\r
+                       {\r
+                               bool flag = false;\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       int tmp = delta( r[i] );\r
+                                       if(tmp == 1) {\r
+                                               ret = 1;\r
+                                               break;\r
+                                       } else if(tmp == 0) {\r
+                                               flag = true;\r
+                                       }\r
+                               }\r
+                               if(!flag && ret != 1) {\r
+                                       ret = 2;\r
+                               }\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_INTER:\r
+                       {\r
+                               bool flag = true;\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       int tmp = delta( r[i] );\r
+                                       if(tmp == 2) {\r
+                                               ret = 2; flag=false;\r
+                                               break;\r
+                                       } else if(tmp == 0) {\r
+                                               flag=false;\r
+                                               break;\r
+                                       }\r
+                               }\r
+                               if(flag) {\r
+                                       ret = 1;\r
+                               }\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_STAR:\r
+                       {\r
+                               ret = 1;\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_PLUS:\r
+                       {\r
+                               ret = delta( r[0] );\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_OPT:\r
+                       {\r
+                               ret = 1;\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_RANGE:\r
+                       {\r
+                               ret = 2;\r
+                       }\r
+                               break;\r
+                       default:\r
+                               //TODO: special sym: sigma, none, all\r
+                               Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;\r
+                               //AlwaysAssert( false );\r
+                               //return Node::null();\r
+               }\r
+               d_delta_cache[r] = ret;\r
+       }\r
+       Trace("strings-regexp-delta") << "RegExp-Delta returns : " << ret << std::endl;\r
+       return ret;\r
+}\r
+\r
+//null - no solution\r
+Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {\r
+       Assert( c.size() < 2 );\r
+       Trace("strings-regexp-derivative") << "RegExp-derivative starts with " << mkString( r ) << ", c=" << c << std::endl;\r
+       Node retNode = Node::null();\r
+       PairDvStr dv = std::make_pair( r, c );\r
+       if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {\r
+               retNode = d_dv_cache[dv];\r
+       } else if( c.isEmptyString() ){\r
+               int tmp = delta( r );\r
+               if(tmp == 0) {\r
+                       retNode = Node::null();\r
+                       // TODO variable\r
+               } else if(tmp == 1) {\r
+                       retNode = r;\r
+               } else {\r
+                       retNode = Node::null();\r
+               }\r
+       } else {\r
+               int k = r.getKind();\r
+               switch( k ) {\r
+                       case kind::STRING_TO_REGEXP:\r
+                       {\r
+                               if(r[0].isConst()) {\r
+                                       if(r[0] == d_emptyString) {\r
+                                               retNode = Node::null();\r
+                                       } else {\r
+                                               if(r[0].getConst< CVC4::String >().getFirstChar() == c.getFirstChar()) {\r
+                                                       retNode = r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, \r
+                                                                               NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );\r
+                                               } else {\r
+                                                       retNode = Node::null();\r
+                                               }\r
+                                       }\r
+                               } else {\r
+                                       retNode = Node::null();\r
+                                       // TODO variable\r
+                               }\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_CONCAT:\r
+                       {\r
+                               std::vector< Node > vec_nodes;\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       Node dc = derivativeSingle(r[i], c);\r
+                                       if(!dc.isNull()) {\r
+                                               std::vector< Node > vec_nodes2;\r
+                                               if(dc != d_emptyString) {\r
+                                                       vec_nodes2.push_back( dc );\r
+                                               }\r
+                                               for(unsigned j=i+1; j<r.getNumChildren(); ++j) {\r
+                                                       if(r[j] != d_emptyString) {\r
+                                                               vec_nodes2.push_back( r[j] );\r
+                                                       }\r
+                                               }\r
+                                               Node tmp = vec_nodes2.size()==0 ? d_emptyString :\r
+                                                                       ( vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 ) );\r
+                                               if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {\r
+                                                       vec_nodes.push_back( tmp );\r
+                                               }\r
+                                       }\r
+\r
+                                       if( delta( r[i] ) != 1 ) {\r
+                                               break;\r
+                                       }\r
+                               }\r
+                               retNode = vec_nodes.size() == 0 ? Node::null() :\r
+                                                       ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ) );\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_OR:\r
+                       {\r
+                               std::vector< Node > vec_nodes;\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       Node dc = derivativeSingle(r[i], c);\r
+                                       if(!dc.isNull()) {\r
+                                               if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {\r
+                                                       vec_nodes.push_back( dc );\r
+                                               }\r
+                                       }\r
+\r
+                               }\r
+                               retNode = vec_nodes.size() == 0 ? Node::null() :\r
+                                                       ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes ) );\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_INTER:\r
+                       {\r
+                               bool flag = true;\r
+                               bool flag_sg = false;\r
+                               std::vector< Node > vec_nodes;\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       Node dc = derivativeSingle(r[i], c);\r
+                                       if(!dc.isNull()) {\r
+                                               if(dc == d_sigma_star) {\r
+                                                       flag_sg = true;\r
+                                               } else {\r
+                                                       if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {\r
+                                                               vec_nodes.push_back( dc );\r
+                                                       }\r
+                                               }\r
+                                       } else {\r
+                                               flag = false;\r
+                                               break;\r
+                                       }\r
+                               }\r
+                               if(flag) {\r
+                                       if(vec_nodes.size() == 0 && flag_sg) {\r
+                                               retNode = d_sigma_star;\r
+                                       } else {\r
+                                               retNode = vec_nodes.size() == 0 ? Node::null() :\r
+                                                                       ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );\r
+                                       }\r
+                               } else {\r
+                                       retNode = Node::null();\r
+                               }\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_STAR:\r
+                       {\r
+                               Node dc = derivativeSingle(r[0], c);\r
+                               if(!dc.isNull()) {\r
+                                       retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );\r
+                               } else {\r
+                                       retNode = Node::null();\r
+                               }\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_PLUS:\r
+                       {\r
+                               Node dc = derivativeSingle(r[0], c);\r
+                               if(!dc.isNull()) {\r
+                                       retNode = dc==d_emptyString ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );\r
+                               } else {\r
+                                       retNode = Node::null();\r
+                               }\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_OPT:\r
+                       {\r
+                               return derivativeSingle(r[0], c);\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_RANGE:\r
+                       {\r
+                               char ch = c.getFirstChar();\r
+                               if (ch >= r[0].getConst< CVC4::String >().getFirstChar() && ch <= r[1].getConst< CVC4::String >().getFirstChar() ) {\r
+                                       return d_emptyString;\r
+                               } else {\r
+                                       return Node::null();\r
+                               }\r
+                       }\r
+                               break;\r
+                       default:\r
+                               //TODO: special sym: sigma, none, all\r
+                               Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;\r
+                               //AlwaysAssert( false );\r
+                               //return Node::null();\r
+               }\r
+               d_dv_cache[dv] = retNode;\r
+       }\r
+       Trace("strings-regexp-derivative") << "RegExp-derivative returns : " << ( retNode.isNull() ? "NULL" : mkString( retNode ) )  << std::endl;\r
+       return retNode;\r
+}\r
+\r
+void RegExpOpr::firstChar( Node r ) {\r
+       Trace("strings-regexp-firstchar") << "Head characters: ";\r
+       for(char ch = d_char_start; ch <= d_char_end; ++ch) {\r
+               CVC4::String c(ch);\r
+               Node dc = derivativeSingle(r, ch);\r
+               if(!dc.isNull()) {\r
+                       Trace("strings-regexp-firstchar") << c << " (" << mkString(dc) << ")" << std::endl;\r
+               }\r
+       }\r
+       Trace("strings-regexp-firstchar") << std::endl;\r
+}\r
+\r
+bool RegExpOpr::follow( Node r, CVC4::String c, std::vector< char > &vec_chars ) {\r
+       int k = r.getKind();\r
+       switch( k ) {\r
+               case kind::STRING_TO_REGEXP:\r
+               {\r
+                       if(r[0].isConst()) {\r
+                               if(r[0] != d_emptyString) {\r
+                                       char t1 = r[0].getConst< CVC4::String >().getFirstChar();\r
+                                       if(c.isEmptyString()) {\r
+                                               vec_chars.push_back( t1 );\r
+                                               return true;\r
+                                       } else {\r
+                                               char t2 = c.getFirstChar();\r
+                                               if(t1 != t2) {\r
+                                                       return false;\r
+                                               } else {\r
+                                                       if(c.size() >= 2) {\r
+                                                               vec_chars.push_back( c.substr(1,1).getFirstChar() );\r
+                                                       } else {\r
+                                                               vec_chars.push_back( '\0' );\r
+                                                       }\r
+                                                       return true;\r
+                                               }\r
+                                       }\r
+                               } else {\r
+                                       return false;\r
+                               }\r
+                       } else {\r
+                               return false;\r
+                       }\r
+               }\r
+                       break;\r
+               case kind::REGEXP_CONCAT:\r
+               {\r
+                       for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                               if( follow(r[i], c, vec_chars) ) {\r
+                                       if(vec_chars[vec_chars.size() - 1] == '\0') {\r
+                                               vec_chars.pop_back();\r
+                                               c = d_emptyString.getConst< CVC4::String >();\r
+                                       }\r
+                               } else {\r
+                                       return false;\r
+                               }\r
+                       }\r
+                       vec_chars.push_back( '\0' );\r
+                       return true;\r
+               }\r
+                       break;\r
+               case kind::REGEXP_OR:\r
+               {\r
+                       bool flag = false;\r
+                       for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                               if( follow(r[i], c, vec_chars) ) {\r
+                                       flag=true;\r
+                               }\r
+                       }\r
+                       return flag;\r
+               }\r
+                       break;\r
+               case kind::REGEXP_INTER:\r
+               {\r
+                       std::vector< char > vt2;\r
+                       for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                               std::vector< char > v_tmp;\r
+                               if( !follow(r[i], c, v_tmp) ) {\r
+                                       return false;\r
+                               }\r
+                               std::vector< char > vt3(vt2);\r
+                               vt2.clear();\r
+                               std::set_intersection( vt3.begin(), vt3.end(), v_tmp.begin(), v_tmp.end(), vt2.begin() );\r
+                               if(vt2.size() == 0) {\r
+                                       return false;\r
+                               }\r
+                       }\r
+                       vec_chars.insert( vec_chars.end(), vt2.begin(), vt2.end() );\r
+                       return true;\r
+               }\r
+                       break;\r
+               case kind::REGEXP_STAR:\r
+               {\r
+                       if(follow(r[0], c, vec_chars)) {\r
+                               if(vec_chars[vec_chars.size() - 1] == '\0') {\r
+                                       if(c.isEmptyString()) {\r
+                                               return true;\r
+                                       } else {\r
+                                               vec_chars.pop_back();\r
+                                               c = d_emptyString.getConst< CVC4::String >();\r
+                                               return follow(r[0], c, vec_chars);\r
+                                       }\r
+                               } else {\r
+                                       return true;\r
+                               }\r
+                       } else {\r
+                               vec_chars.push_back( '\0' );\r
+                               return true;\r
+                       }\r
+               }\r
+                       break;\r
+                       /*\r
+               case kind::REGEXP_PLUS:\r
+               {\r
+                       ret = delta( r[0] );\r
+               }\r
+                       break;\r
+               case kind::REGEXP_OPT:\r
+               {\r
+                       ret = 1;\r
+               }\r
+                       break;\r
+               case kind::REGEXP_RANGE:\r
+               {\r
+                       ret = 2;\r
+               }\r
+                       break;*/\r
+               default:\r
+                       //TODO: special sym: sigma, none, all\r
+                       Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in delta of RegExp." << std::endl;\r
+                       //AlwaysAssert( false );\r
+                       //return Node::null();\r
+                       return false;\r
+       }\r
+}\r
+\r
+Node RegExpOpr::mkAllExceptOne( char exp_c ) {\r
+       std::vector< Node > vec_nodes;\r
+       for(char c=d_char_start; c<=d_char_end; ++c) {\r
+               if(c != exp_c ) {\r
+                       Node n = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String( c ) ) );\r
+                       vec_nodes.push_back( n );\r
+               }\r
+       }\r
+       return NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );\r
+}\r
+\r
+Node RegExpOpr::complement( Node r ) {\r
+       Trace("strings-regexp-compl") << "RegExp-Compl starts with " << mkString( r ) << std::endl;\r
+       Node retNode = r;\r
+       if( d_compl_cache.find( r ) != d_compl_cache.end() ) {\r
+               retNode = d_compl_cache[r];\r
+       } else {\r
+               int k = r.getKind();\r
+               switch( k ) {\r
+                       case kind::STRING_TO_REGEXP:\r
+                       {\r
+                               if(r[0].isConst()) {\r
+                                       if(r[0] == d_emptyString) {\r
+                                               retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star );\r
+                                       } else {\r
+                                               std::vector< Node > vec_nodes;\r
+                                               vec_nodes.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ) );\r
+                                               CVC4::String s = r[0].getConst<String>();\r
+                                               for(unsigned i=0; i<s.size(); ++i) {\r
+                                                       char c = s.substr(i, 1).getFirstChar();\r
+                                                       Node tmp = mkAllExceptOne( c );\r
+                                                       if(i != 0 ) {\r
+                                                               Node tmph = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, \r
+                                                                                               NodeManager::currentNM()->mkConst( s.substr(0, i) ) );\r
+                                                               tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmph, tmp );\r
+                                                       }\r
+                                                       tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp, d_sigma_star );\r
+                                                       vec_nodes.push_back( tmp );\r
+                                               }\r
+                                               Node tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, d_sigma, d_sigma_star );\r
+                                               vec_nodes.push_back( tmp );\r
+                                               retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );\r
+                                       }\r
+                               } else {\r
+                                       Trace("strings-error") << "Unsupported string variable: " << r[0] << " in complement of RegExp." << std::endl;\r
+                                       //AlwaysAssert( false );\r
+                                       //return Node::null();\r
+                               }\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_CONCAT:\r
+                       {\r
+                               std::vector< Node > vec_nodes;\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       Node tmp = complement( r[i] );\r
+                                       for(unsigned j=0; j<i; ++j) {\r
+                                               tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r[j], tmp );\r
+                                       }\r
+                                       if(i != r.getNumChildren() - 1) {\r
+                                               tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, tmp,\r
+                                                               NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, d_sigma ) );\r
+                                       }\r
+                                       vec_nodes.push_back( tmp );\r
+                               }\r
+                               retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_OR:\r
+                       {\r
+                               std::vector< Node > vec_nodes;\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       Node tmp = complement( r[i] );\r
+                                       vec_nodes.push_back( tmp );\r
+                               }\r
+                               retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes );\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_INTER:\r
+                       {\r
+                               std::vector< Node > vec_nodes;\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       Node tmp = complement( r[i] );\r
+                                       vec_nodes.push_back( tmp );\r
+                               }\r
+                               retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR, vec_nodes );\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_STAR:\r
+                       {\r
+                               retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, d_sigma, d_sigma_star );\r
+                               Node tmp = complement( r[0] );\r
+                               tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, r, tmp );\r
+                               retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, retNode, tmp );\r
+                       }\r
+                               break;\r
+                       default:\r
+                               //TODO: special sym: sigma, none, all\r
+                               Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in complement of RegExp." << std::endl;\r
+                               //AlwaysAssert( false );\r
+                               //return Node::null();\r
+               }\r
+               d_compl_cache[r] = retNode;\r
+       }\r
+       Trace("strings-regexp-compl") << "RegExp-Compl returns : " << mkString( retNode ) << std::endl;\r
+       return retNode;\r
+}\r
+\r
+std::string RegExpOpr::niceChar( Node r ) {\r
+       if(r.isConst()) {\r
+               std::string s = r.getConst<CVC4::String>().toString() ;\r
+               return s == "" ? "{E}" : ( s == " " ? "{ }" : s );\r
+       } else {\r
+               return r.toString();\r
+       }\r
+}\r
+std::string RegExpOpr::mkString( Node r ) {\r
+       std::string retStr;\r
+       int k = r.getKind();\r
+       switch( k ) {\r
+               case kind::STRING_TO_REGEXP:\r
+               {\r
+                       retStr += niceChar( r[0] );\r
+               }\r
+                       break;\r
+               case kind::REGEXP_CONCAT:\r
+               {\r
+                       retStr += "(";\r
+                       for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                               if(i != 0) retStr += ".";\r
+                               retStr += mkString( r[i] );\r
+                       }\r
+                       retStr += ")";\r
+               }\r
+                       break;\r
+               case kind::REGEXP_OR:\r
+               {\r
+                       if(r == d_sigma) {\r
+                               retStr += "{A}";\r
+                       } else {\r
+                               retStr += "(";\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       if(i != 0) retStr += "|";\r
+                                       retStr += mkString( r[i] );\r
+                               }\r
+                               retStr += ")";\r
+                       }\r
+               }\r
+                       break;\r
+               case kind::REGEXP_INTER:\r
+               {\r
+                       retStr += "(";\r
+                       for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                               if(i != 0) retStr += "&";\r
+                               retStr += mkString( r[i] );\r
+                       }\r
+                       retStr += ")";\r
+               }\r
+                       break;\r
+               case kind::REGEXP_STAR:\r
+               {\r
+                       retStr += mkString( r[0] );\r
+                       retStr += "*";\r
+               }\r
+                       break;\r
+               case kind::REGEXP_PLUS:\r
+               {\r
+                       retStr += mkString( r[0] );\r
+                       retStr += "+";\r
+               }\r
+                       break;\r
+               case kind::REGEXP_OPT:\r
+               {\r
+                       retStr += mkString( r[0] );\r
+                       retStr += "?";\r
+               }\r
+                       break;\r
+               case kind::REGEXP_RANGE:\r
+               {\r
+                       retStr += "[";\r
+                       retStr += niceChar( r[0] );\r
+                       retStr += "-";\r
+                       retStr += niceChar( r[1] );\r
+                       retStr += "]";\r
+               }\r
+                       break;\r
+               default:\r
+                       //TODO: special sym: sigma, none, all\r
+                       Trace("strings-error") << "Unsupported term: " << r << " in RegExp." << std::endl;\r
+                       //AlwaysAssert( false );\r
+                       //return Node::null();\r
+       }\r
+\r
+       return retStr;\r
+}\r
+\r
+}/* CVC4::theory::strings namespace */\r
+}/* CVC4::theory namespace */\r
+}/* CVC4 namespace */\r
diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h
new file mode 100644 (file)
index 0000000..5f0eab2
--- /dev/null
@@ -0,0 +1,65 @@
+/*********************                                                        */\r
+/*! \file regexp_operation.h\r
+ ** \verbatim\r
+ ** Original author: Tianyi Liang\r
+ ** Major contributors: Tianyi Liang, Andrew Reynolds\r
+ ** Minor contributors (to current version): none\r
+ ** This file is part of the CVC4 prototype.\r
+ ** Copyright (c) 2013-2013  New York University and The University of Iowa\r
+ ** See the file COPYING in the top-level source directory for licensing\r
+ ** information.\endverbatim\r
+ **\r
+ ** \brief Regular Expresion Operations\r
+ **\r
+ ** Regular Expresion Operations\r
+ **/\r
+\r
+#include "cvc4_private.h"\r
+\r
+#ifndef __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H\r
+#define __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H\r
+\r
+#include <vector>\r
+#include "util/hash.h"\r
+#include "theory/theory.h"\r
+#include "theory/rewriter.h"\r
+\r
+namespace CVC4 {\r
+namespace theory {\r
+namespace strings {\r
+\r
+class RegExpOpr {\r
+       typedef std::pair< Node, CVC4::String > PairDvStr;\r
+private:\r
+    Node d_emptyString;\r
+       char d_char_start;\r
+       char d_char_end;\r
+       Node d_sigma;\r
+       Node d_sigma_star;\r
+       \r
+       std::map< Node, Node > d_compl_cache;\r
+       std::map< Node, int > d_delta_cache;\r
+       std::map< PairDvStr, Node > d_dv_cache;\r
+       std::map< Node, bool > d_cstre_cache;\r
+       //bool checkStarPlus( Node t );\r
+       //void simplifyRegExp( Node s, Node r, std::vector< Node > &ret, std::vector< Node > &nn );\r
+       //Node simplify( Node t, std::vector< Node > &new_nodes );\r
+       std::string niceChar( Node r );\r
+public:\r
+       RegExpOpr();\r
+       bool checkConstRegExp( Node r );\r
+    //void simplify(std::vector< Node > &vec_node);\r
+       Node mkAllExceptOne( char c );\r
+       Node complement( Node r );\r
+       int delta( Node r );\r
+       Node derivativeSingle( Node r, CVC4::String c );\r
+       void firstChar( Node r );\r
+       bool follow( Node r, CVC4::String c, std::vector< char > &vec_chars );\r
+       std::string mkString( Node r );\r
+};\r
+\r
+}/* CVC4::theory::strings namespace */\r
+}/* CVC4::theory namespace */\r
+}/* CVC4 namespace */\r
+\r
+#endif /* __CVC4__THEORY__STRINGS__REGEXP__OPERATION_H */\r
index a50c295da6ebba4345e33415b0cb857817d08abb..ab6ff9d686b0f384dec61fa9ddd9fa729b20c7b1 100644 (file)
@@ -60,9 +60,6 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu
     d_true = NodeManager::currentNM()->mkConst( true );
     d_false = NodeManager::currentNM()->mkConst( false );
 
-       //option
-       //d_regexp_unroll_depth = options::stringRegExpUnrollDepth();
-       //d_fmf = options::stringFMF();
 }
 
 TheoryStrings::~TheoryStrings() {
@@ -394,7 +391,27 @@ void TheoryStrings::check(Effort e) {
        //must record string in regular expressions
        if ( atom.getKind() == kind::STRING_IN_REGEXP ) {
                //if(fact[0].getKind() != kind::CONST_STRING) {
-                d_reg_exp_mem.push_back( assertion );
+               //if(polarity) {
+                       d_reg_exp_mem.push_back( assertion );
+               /*} else {
+                       Node r = Rewriter::rewrite( atom[1] );
+                       r = d_regexp_opr.complement( r );
+                       r = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, atom[0], r );
+                       std::vector< Node > vec_r;
+                       vec_r.push_back( r );
+
+                       StringsPreprocess spp;
+                       spp.simplify( vec_r );
+                       for( unsigned i=1; i<vec_r.size(); i++ ){
+                               if(vec_r[i].getKind() == kind::STRING_IN_REGEXP) {
+                                       d_reg_exp_mem.push_back( vec_r[i] );
+                               } else if(vec_r[i].getKind() == kind::EQUAL) {
+                                       d_equalityEngine.assertEquality(vec_r[i], true, vec_r[i]);
+                               } else {
+                                       Assert(false);
+                               }
+                       }
+               }*/
                //}
        }else if (atom.getKind() == kind::EQUAL) {
       d_equalityEngine.assertEquality(atom, polarity, fact);
@@ -1869,7 +1886,7 @@ bool TheoryStrings::unrollStar( Node atom ) {
        Node r = atom[1];
        int depth = d_reg_exp_unroll_depth.find( atom )==d_reg_exp_unroll_depth.end() ? 0 : d_reg_exp_unroll_depth[atom];
        if( depth <= options::stringRegExpUnrollDepth() ) {
-               Trace("strings-regex") << "Strings::regex: Unroll " << atom << " for " << ( depth + 1 ) << " times." << std::endl;
+               Trace("strings-regexp") << "Strings::regexp: Unroll " << atom << " for " << ( depth + 1 ) << " times." << std::endl;
                d_reg_exp_unroll[atom] = true;
                //add lemma?
                Node xeqe = x.eqNode( d_emptyString );
@@ -1930,7 +1947,7 @@ bool TheoryStrings::unrollStar( Node atom ) {
                sendLemma( ant, lem, "Unroll" );
                return true;
        }else{
-               Trace("strings-regex") << "Strings::regex: Stop unrolling " << atom << " the max (" << depth << ") is reached." << std::endl;
+               Trace("strings-regexp") << "Strings::regexp: Stop unrolling " << atom << " the max (" << depth << ") is reached." << std::endl;
                return false;
        }
 }
@@ -1941,7 +1958,7 @@ bool TheoryStrings::checkMemberships() {
        for( unsigned i=0; i<d_reg_exp_mem.size(); i++ ){
                //check regular expression membership
                Node assertion = d_reg_exp_mem[i];
-               Trace("strings-regex") << "We have regular expression assertion : " << assertion << std::endl;
+               Trace("strings-regexp") << "We have regular expression assertion : " << assertion << std::endl;
                Node atom = assertion.getKind()==kind::NOT ? assertion[0] : assertion;
                bool polarity = assertion.getKind()!=kind::NOT;
                if( polarity ){
@@ -1952,21 +1969,49 @@ bool TheoryStrings::checkMemberships() {
                                //TODO
                                Assert( r.getKind()==kind::REGEXP_STAR );
                                if( !areEqual( x, d_emptyString ) ){
-                                       if( unrollStar( atom ) ){
+                                       //if(splitRegExp( x, r, atom )) {
+                                       //      addedLemma = true;
+                                       //} else 
+                                       if( unrollStar( atom ) ) {
                                                addedLemma = true;
-                                       }else{
-                                               Trace("strings-regex") << "RegEx is incomplete due to " << assertion << ", depth = " << options::stringRegExpUnrollDepth() << std::endl;
+                                       } else {
+                                               Trace("strings-regexp") << "RegExp is incomplete due to " << assertion << ", depth = " << options::stringRegExpUnrollDepth() << std::endl;
                                                is_unk = true;
                                        }
                                }else{
-                                       Trace("strings-regex") << "...is satisfied." << std::endl;
+                                       Trace("strings-regexp") << "...is satisfied." << std::endl;
                                }
                        }else{
-                               Trace("strings-regex") << "...Already unrolled." << std::endl;
+                               Trace("strings-regexp") << "...Already unrolled." << std::endl;
                        }
                }else{
                        //TODO: negative membership
-                       Trace("strings-regex") << "RegEx is incomplete due to " << assertion << "." << std::endl;
+                       //Node r = Rewriter::rewrite( atom[1] );
+                       //r = d_regexp_opr.complement( r );
+                       //Trace("strings-regexp-test") << "Compl( " << d_regexp_opr.mkString( atom[1] ) << " ) is " << d_regexp_opr.mkString( r ) << std::endl;
+                       //Trace("strings-regexp-test") << "Delta( " << d_regexp_opr.mkString( atom[1] ) << " ) is " << d_regexp_opr.delta( atom[1] ) << std::endl;
+                       //Trace("strings-regexp-test") << "Delta( " << d_regexp_opr.mkString( r ) << " ) is " << d_regexp_opr.delta( r ) << std::endl;
+                       //Trace("strings-regexp-test") << "Deriv( " << d_regexp_opr.mkString( r ) << ", c='b' ) is " << d_regexp_opr.mkString( d_regexp_opr.derivativeSingle( r, ::CVC4::String("b") ) ) << std::endl;
+                       //Trace("strings-regexp-test") << "FHC( " << d_regexp_opr.mkString( r ) <<" ) is " << std::endl;
+                       //d_regexp_opr.firstChar( r );
+                       //r = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, atom[0], r );
+                       /*
+                       std::vector< Node > vec_r;
+                       vec_r.push_back( r );
+
+                       StringsPreprocess spp;
+                       spp.simplify( vec_r );
+                       for( unsigned i=1; i<vec_r.size(); i++ ){
+                               if(vec_r[i].getKind() == kind::STRING_IN_REGEXP) {
+                                       d_reg_exp_mem.push_back( vec_r[i] );
+                               } else if(vec_r[i].getKind() == kind::EQUAL) {
+                                       d_equalityEngine.assertEquality(vec_r[i], true, vec_r[i]);
+                               } else {
+                                       Assert(false);
+                               }
+                       }
+                       */
+                       Trace("strings-regexp") << "RegEx is incomplete due to " << assertion << "." << std::endl;
                        is_unk = true;
                }
        }
@@ -1975,13 +2020,71 @@ bool TheoryStrings::checkMemberships() {
                return true;
        }else{
                if( is_unk ){
-                       Trace("strings-regex") << "SET INCOMPLETE" << std::endl;
+                       Trace("strings-regexp") << "SET INCOMPLETE" << std::endl;
                        d_out->setIncomplete();
                }
                return false;
        }
 }
 
+CVC4::String TheoryStrings::getHeadConst( Node x ) {
+       if( x.isConst() && x != d_emptyString ) {
+               return x.getConst< String >();
+       } else if( x.getKind() == kind::STRING_CONCAT ) {
+               if( x[0].isConst() && x[0] != d_emptyString ) {
+                       return x.getConst< String >();
+               } else {
+                       return d_emptyString.getConst< String >();
+               }
+       } else {
+               return d_emptyString.getConst< String >();
+       }
+}
+
+bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) {
+       x =  Rewriter::rewrite( x );
+       if(x == d_emptyString) {
+               //if(d_regexp_opr.delta() == 1) {
+               //}
+               return false;
+       } else {
+               CVC4::String s = getHeadConst( x );
+               if( !s.isEmptyString() && d_regexp_opr.checkConstRegExp( r ) ) {
+                       Node conc = Node::null();
+                       Node dc = r;
+                       bool flag = true;
+                       for(unsigned i=0; i<s.size(); ++i) {
+                               CVC4::String c = s.substr(i, 1);
+                               dc = d_regexp_opr.derivativeSingle(dc, c);
+                               if(dc.isNull()) {
+                                       // CONFLICT
+                                       flag = false;
+                                       break;
+                               }
+                       }
+                       // send lemma
+                       if(flag) {
+                               Node left = Node::null();
+                               if(x.isConst()) {
+                                       left = d_emptyString;
+                                       if(d_regexp_opr.delta(dc)) {
+                                               //TODO yes
+                                       } else {
+                                               // TODO conflict
+                                       }
+                               } else {
+                                       //TODO find x rest
+                                       conc = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, dc );
+                               }
+                       }
+                       sendLemma(ant, conc, "RegExp Const Split");
+               } else {
+                       return false;
+               }
+       }
+       return false;
+}
+
 Node TheoryStrings::getNextDecisionRequest() {
        if(options::stringFMF() && !d_conflict) {
                //Trace("strings-fmf-debug") << "Strings::FMF: Assertion Level = " << d_valuation.getAssertionLevel() << std::endl;
index 8f21888a270e9dd42c257973e15ca0dd206ccb17..1291fc94ed25a55848f5a40d7c704f3c63d89198 100644 (file)
@@ -22,6 +22,7 @@
 #include "theory/theory.h"
 #include "theory/uf/equality_engine.h"
 #include "theory/strings/theory_strings_preprocess.h"
+#include "theory/strings/regexp_operation.h"
 
 #include "context/cdchunk_list.h"
 
@@ -252,6 +253,12 @@ protected:
   void seperateByLength( std::vector< Node >& n, std::vector< std::vector< Node > >& col, std::vector< Node >& lts );
   void printConcat( std::vector< Node >& n, const char * c );
 
+       // Regular Expression
+private:
+       RegExpOpr d_regexp_opr;
+       CVC4::String getHeadConst( Node x );
+       bool splitRegExp( Node x, Node r, Node ant );
+
 private:
        // Finite Model Finding
        //bool d_fmf;