for merging
authorTianyi Liang <tianyi-liang@uiowa.edu>
Wed, 26 Feb 2014 17:29:38 +0000 (11:29 -0600)
committerTianyi Liang <tianyi-liang@uiowa.edu>
Wed, 26 Feb 2014 17:34:08 +0000 (11:34 -0600)
src/theory/strings/kinds
src/theory/strings/regexp_operation.cpp
src/theory/strings/theory_strings_rewriter.cpp
src/theory/strings/theory_strings_type_rules.h
src/util/Makefile.am
src/util/regexp.h

index 7fbefe251e57880198901ad35c2d9fb5c46115c8..f8f8b95556f5fe7e6acc1e9621d6f7eb96565911 100644 (file)
@@ -78,11 +78,17 @@ operator REGEXP_PLUS 1 "regexp +"
 operator REGEXP_OPT 1 "regexp ?"
 operator REGEXP_RANGE 2 "regexp range"
 
-#constant REGEXP_EMPTY \
-#      ::CVC4::RegExp \
-#      ::CVC4::RegExpHashFunction \
-#      "util/string.h" \
-#      "a regexp contains nothing"
+constant REGEXP_EMPTY \
+       ::CVC4::RegExpEmpty \
+       ::CVC4::RegExpHashFunction \
+       "util/regexp.h" \
+       "a regexp contains nothing"
+
+constant REGEXP_SIGMA \
+       ::CVC4::RegExpSigma \
+       ::CVC4::RegExpHashFunction \
+       "util/regexp.h" \
+       "a regexp contains an arbitrary charactor"
 
 #constant REGEXP_ALL \
 #      ::CVC4::RegExp \
@@ -90,12 +96,6 @@ operator REGEXP_RANGE 2 "regexp range"
 #      "util/string.h" \
 #      "a regexp contains all strings"
 
-#constant REGEXP_SIGMA \
-#      ::CVC4::RegExp \
-#      ::CVC4::RegExpHashFunction \
-#      "util/string.h" \
-#      "a regexp contains an arbitrary charactor"
-
 typerule REGEXP_CONCAT ::CVC4::theory::strings::RegExpConcatTypeRule
 typerule REGEXP_OR ::CVC4::theory::strings::RegExpOrTypeRule
 typerule REGEXP_INTER ::CVC4::theory::strings::RegExpInterTypeRule
@@ -122,4 +122,7 @@ typerule STRING_STOI ::CVC4::theory::strings::StringStrToIntTypeRule
 
 typerule STRING_IN_REGEXP ::CVC4::theory::strings::StringInRegExpTypeRule
 
+typerule REGEXP_EMPTY ::CVC4::theory::strings::EmptyRegExpTypeRule
+typerule REGEXP_SIGMA ::CVC4::theory::strings::SigmaRegExpTypeRule
+
 endtheory
index 3b81ae4a5684ae7b93018669d93420e810e574e4..1f4e86846f038219516dd8f687ab805b45f3d18c 100644 (file)
@@ -1,14 +1,25 @@
-/*********************                                                        */
+/*********************                                                        */\r
+
 /*! \file regexp_operation.cpp
+\r
  ** \verbatim
+\r
  ** Original author: Tianyi Liang
\r
  ** Major contributors: none
\r
  ** Minor contributors (to current version): none
\r
  ** This file is part of the CVC4 project.
\r
  ** Copyright (c) 2009-2013  New York University and The University of Iowa
\r
  ** See the file COPYING in the top-level source directory for licensing
\r
  ** information.\endverbatim
\r
  **
\r
  ** \brief Regular Expresion Operations\r
  **\r
  ** Regular Expresion Operations\r
@@ -661,6 +672,21 @@ void RegExpOpr::simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes )
        } else {\r
                int k = r.getKind();\r
                switch( k ) {\r
+                       case kind::REGEXP_EMPTY:\r
+                       {\r
+                               Node eq = NodeManager::currentNM()->mkConst( false );\r
+                               new_nodes.push_back( eq );\r
+                               d_simpl_cache[p] = eq;\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_SIGMA:\r
+                       {\r
+                               Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) );\r
+                               Node eq = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s));\r
+                               new_nodes.push_back( eq );\r
+                               d_simpl_cache[p] = eq;\r
+                       }\r
+                               break;\r
                        case kind::STRING_TO_REGEXP:\r
                        {\r
                                Node eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, r[0] );\r
@@ -671,18 +697,34 @@ void RegExpOpr::simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes )
                        case kind::REGEXP_CONCAT:\r
                        {\r
                                std::vector< Node > cc;\r
+                               bool emptyflag = false;\r
+                               Node ff = NodeManager::currentNM()->mkConst( false );\r
                                for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
                                        if(r[i].getKind() == kind::STRING_TO_REGEXP) {\r
                                                cc.push_back( r[i][0] );\r
+                                       } if(r[i].getKind() == kind::STRING_TO_REGEXP) {\r
+                                               emptyflag = true;\r
+                                               break;\r
                                        } else {\r
                                                Node sk = NodeManager::currentNM()->mkSkolem( "rcon_$$", s.getType(), "created for regular expression concat" );\r
                                                simplifyRegExp( sk, r[i], new_nodes );\r
+                                               if(new_nodes.size() != 0) {\r
+                                                       if(new_nodes[new_nodes.size() - 1] == ff) {\r
+                                                               emptyflag = true;\r
+                                                               break;\r
+                                                       }\r
+                                               }\r
                                                cc.push_back( sk );\r
                                        }\r
                                }\r
-                               Node cc_eq = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );\r
-                               new_nodes.push_back( cc_eq );\r
-                               d_simpl_cache[p] = cc_eq;\r
+                               if(emptyflag) {\r
+                                       new_nodes.push_back( ff );\r
+                                       d_simpl_cache[p] = ff;\r
+                               } else {\r
+                                       Node cc_eq = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );\r
+                                       new_nodes.push_back( cc_eq );\r
+                                       d_simpl_cache[p] = cc_eq;\r
+                               }\r
                        }\r
                                break;\r
                        case kind::REGEXP_OR:\r
@@ -697,13 +739,28 @@ void RegExpOpr::simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes )
                        }\r
                                break;\r
                        case kind::REGEXP_INTER:\r
+                       {\r
+                               Node nfalse = NodeManager::currentNM()->mkConst( false );\r
                                for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
                                        simplifyRegExp( s, r[i], new_nodes );\r
+                                       if(new_nodes.size() != 0) {\r
+                                               if(new_nodes[new_nodes.size() - 1] == nfalse) {\r
+                                                       break;\r
+                                               }\r
+                                       }\r
                                }\r
+                       }\r
                                break;\r
                        case kind::REGEXP_STAR:\r
                        {\r
-                               Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r );\r
+                               Node eq;\r
+                               if(r[0].getKind() == kind::REGEXP_EMPTY) {\r
+                                       eq = NodeManager::currentNM()->mkConst( false );\r
+                               } else if(r[0].getKind() == kind::REGEXP_SIGMA) {\r
+                                       eq = NodeManager::currentNM()->mkConst( true );\r
+                               } else {\r
+                                       eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r );\r
+                               }\r
                                new_nodes.push_back( eq );\r
                                d_simpl_cache[p] = eq;\r
                        }\r
@@ -729,10 +786,20 @@ std::string RegExpOpr::niceChar( Node r ) {
 std::string RegExpOpr::mkString( Node r ) {\r
        std::string retStr;\r
        if(r.isNull()) {\r
-               retStr = "EmptySet";\r
+               retStr = "Empty";\r
        } else {\r
                int k = r.getKind();\r
                switch( k ) {\r
+                       case kind::REGEXP_EMPTY:\r
+                       {\r
+                               retStr += "Empty";\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_SIGMA:\r
+                       {\r
+                               retStr += "{W}";\r
+                       }\r
+                               break;\r
                        case kind::STRING_TO_REGEXP:\r
                        {\r
                                retStr += niceChar( r[0] );\r
index c459d7d7eb81dd051b3b26e007f8b330dc3da83d..c0d66ab22fdd89f66b3adb3357f7cb6d426a8483 100644 (file)
@@ -28,9 +28,13 @@ Node TheoryStringsRewriter::rewriteConcatString( TNode node ) {
     Node retNode = node;
     std::vector<Node> node_vec;
     Node preNode = Node::null();
+       bool emptyflag = false;
     for(unsigned int i=0; i<node.getNumChildren(); ++i) {
         Node tmpNode = node[i];
-        if(node[i].getKind() == kind::STRING_CONCAT) {
+               if( tmpNode.getKind() == kind::REGEXP_EMPTY ) {
+                       emptyflag = true;
+                       break;
+               } else if(node[i].getKind() == kind::STRING_CONCAT) {
             tmpNode = rewriteConcatString(node[i]);
             if(tmpNode.getKind() == kind::STRING_CONCAT) {
                 unsigned int j=0;
@@ -71,14 +75,18 @@ Node TheoryStringsRewriter::rewriteConcatString( TNode node ) {
             }
         }
     }
-    if(preNode != Node::null()) {
-        node_vec.push_back( preNode );
-    }
-    if(node_vec.size() > 1) {
-        retNode = NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, node_vec);
-    } else {
-        retNode = node_vec[0];
-    }
+       if(emptyflag) {
+               retNode = NodeManager::currentNM()->mkConst( kind::REGEXP_EMPTY );
+       } else {
+               if(preNode != Node::null()) {
+                       node_vec.push_back( preNode );
+               }
+               if(node_vec.size() > 1) {
+                       retNode = NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, node_vec);
+               } else {
+                       retNode = node_vec[0];
+               }
+       }
     Trace("strings-prerewrite") << "Strings::rewriteConcatString end " << retNode << std::endl;
     return retNode;
 }
@@ -89,6 +97,7 @@ Node TheoryStringsRewriter::prerewriteConcatRegExp( TNode node ) {
     Node retNode = node;
     std::vector<Node> node_vec;
     Node preNode = Node::null();
+       bool emptyflag = false;
     for(unsigned int i=0; i<node.getNumChildren(); ++i) {
                Trace("strings-prerewrite") << "Strings::prerewriteConcatRegExp preNode: " << preNode << std::endl;
         Node tmpNode = node[i];
@@ -123,7 +132,10 @@ Node TheoryStringsRewriter::prerewriteConcatRegExp( TNode node ) {
                                preNode = rewriteConcatString(
                                                        NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, preNode, tmpNode[0] ) );
             }
-        } else {
+        } else if( tmpNode.getKind() == kind::REGEXP_EMPTY ) {
+                       emptyflag = true;
+                       break;
+               } else {
             if(!preNode.isNull()) {
                 if(preNode.getKind() == kind::CONST_STRING && preNode.getConst<String>().isEmptyString() ) {
                     preNode = Node::null();
@@ -135,14 +147,18 @@ Node TheoryStringsRewriter::prerewriteConcatRegExp( TNode node ) {
             node_vec.push_back( tmpNode );
         }
     }
-    if(!preNode.isNull()) {
-        node_vec.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, preNode ) );
-    }
-    if(node_vec.size() > 1) {
-        retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, node_vec);
-    } else {
-        retNode = node_vec[0];
-    }
+       if(emptyflag) {
+               retNode = NodeManager::currentNM()->mkConst( kind::REGEXP_EMPTY );
+       } else {
+               if(!preNode.isNull()) {
+                       node_vec.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, preNode ) );
+               }
+               if(node_vec.size() > 1) {
+                       retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, node_vec);
+               } else {
+                       retNode = node_vec[0];
+               }
+       }
     Trace("strings-prerewrite") << "Strings::prerewriteConcatRegExp end " << retNode << std::endl;
     return retNode;
 }
@@ -161,11 +177,14 @@ Node TheoryStringsRewriter::prerewriteOrRegExp(TNode node) {
                        }
                        flag = false;
                } else {
-                       node_vec.push_back( node[i] );
+                       if(node[i].getKind() != kind::REGEXP_EMPTY) {
+                               node_vec.push_back( node[i] );
+                       }
                }
        }
        if(flag) {
-               retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_OR, node_vec);
+               retNode = node_vec.size() == 0 ? NodeManager::currentNM()->mkConst( kind::REGEXP_EMPTY ) :
+                                       node_vec.size() == 1 ? node_vec[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_OR, node_vec);
        }
        Trace("strings-prerewrite") << "Strings::prerewriteOrRegExp end " << retNode << std::endl;
     return retNode;
@@ -267,6 +286,18 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i
                                return true;
                        }
                }
+               case kind::REGEXP_EMPTY:
+               {
+                       return false;
+               }
+               case kind::REGEXP_SIGMA:
+               {
+                       if(s.size() == 1) {
+                               return true;
+                       } else {
+                               return false;
+                       }
+               }
                default:
                        //TODO: special sym: sigma, none, all
                        Trace("strings-postrewrite") << "Unsupported term: " << r << " in testConstStringInRegExp." << std::endl;
@@ -282,7 +313,9 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) {
                x = rewriteConcatString(node[0]);
        }
 
-       if( x.getKind() == kind::CONST_STRING && checkConstRegExp(node[1]) ) {
+       if(node[1].getKind() == kind::REGEXP_EMPTY) {
+               retNode = NodeManager::currentNM()->mkConst( false );
+       } else if( x.getKind() == kind::CONST_STRING && checkConstRegExp(node[1]) ) {
                //test whether x in node[1]
                CVC4::String s = x.getConst<String>();
                retNode = NodeManager::currentNM()->mkConst( testConstStringInRegExp( s, 0, node[1] ) );
index 0c365e9938cd2d551b1c143a36eedf5674d39893..4ac4c26b4cfc58dc8a1125d91bd048705894aef5 100644 (file)
@@ -425,6 +425,25 @@ public:
   }
 };
 
+class EmptyRegExpTypeRule {
+public:
+  inline static TypeNode computeType(NodeManager* nodeManager, TNode n, bool check)
+    throw (TypeCheckingExceptionPrivate, AssertionException) {
+
+    Assert(n.getKind() == kind::REGEXP_EMPTY);
+    return nodeManager->regexpType();
+  }
+};
+
+class SigmaRegExpTypeRule {
+public:
+  inline static TypeNode computeType(NodeManager* nodeManager, TNode n, bool check)
+    throw (TypeCheckingExceptionPrivate, AssertionException) {
+
+    Assert(n.getKind() == kind::REGEXP_SIGMA);
+    return nodeManager->regexpType();
+  }
+};
 
 }/* CVC4::theory::strings namespace */
 }/* CVC4::theory namespace */
index 0717df90724c8aef58bc1a94911472aac1eb2e1b..5c16db2a757ae858931e43cb607fa2c2c70a5118 100644 (file)
@@ -93,7 +93,8 @@ libutil_la_SOURCES = \
        model.cpp \
        sort_inference.h \
        sort_inference.cpp \
-       regexp.h
+       regexp.h \
+       regexp.cpp
 
 libstatistics_la_SOURCES = \
        statistics_registry.h \
index 0cd92810bf99f45825cdcc35ffa5ec6f7e7e66c7..dadc5996ccfee8c0ae07856b5df094f154462e4d 100644 (file)
@@ -237,42 +237,7 @@ public:
   /*
    * Convenience functions
    */
-  std::string toString() const {
-    std::string str;
-    for(unsigned int i=0; i<d_str.size(); ++i) {
-         char c = convertUnsignedIntToChar( d_str[i] );
-         if(isprint( c )) {
-        if(c == '\\') {
-                       str += "\\\\";
-               } else if(c == '\"') {
-                       str += "\\\"";
-               } else {
-                       str += c;
-               }
-         } else {
-                 std::string s;
-                 switch(c) {
-                       case '\a': s = "\\a"; break;
-                       case '\b': s = "\\b"; break;
-                       case '\t': s = "\\t"; break;
-                       case '\r': s = "\\r"; break;
-                       case '\v': s = "\\v"; break;
-                       case '\f': s = "\\f"; break;
-                       case '\n': s = "\\n"; break;
-                       case '\e': s = "\\e"; break;
-                       default  : {
-                               std::string s2 = static_cast<std::ostringstream*>( &(std::ostringstream() << (int)c) )->str();
-                               if(s2.size() == 1) {
-                                       s2 = "0" + s2;
-                               }
-                               s = "\\x" + s2;
-                       }
-                 }
-                 str += s;
-         }
-    }
-    return str;
-  }
+  std::string toString() const;
 
   unsigned size() const {
     return d_str.size();
@@ -389,86 +354,43 @@ struct CVC4_PUBLIC StringHashFunction {
 
 }/* CVC4::strings namespace */
 
-inline std::ostream& operator <<(std::ostream& os, const String& s) CVC4_PUBLIC;
-inline std::ostream& operator <<(std::ostream& os, const String& s) {
-  os << '"';
-  std::string str = s.toString();
-  for(std::string::iterator i = str.begin(); i != str.end(); ++i) {
-    if(*i == '\\' || *i == '"') {
-      os << '\\';
-    } else if(!isprint(*i)) {
-      os << "\\x" << std::ios::hex << std::setw(2) << (unsigned(*i) % 0x100);
-      continue;
-    }
-    os << *i;
-  }
-  return os << '"';
-}
+std::ostream& operator <<(std::ostream& os, const String& s) CVC4_PUBLIC;
 
 class CVC4_PUBLIC RegExp {
-
-private:
-  std::string d_str;
-
+protected:
+  int d_type;
 public:
-  RegExp() {}
+  RegExp() : d_type(1) {}
 
-  RegExp(const std::string s)
-      : d_str(s) {}
+  RegExp(const int t) : d_type(t) {}
 
   ~RegExp() {}
 
-  RegExp& operator =(const RegExp& y) {
-    if(this != &y) d_str = y.d_str;
-    return *this;
-  }
-
   bool operator ==(const RegExp& y) const {
-    return d_str == y.d_str ;
+    return d_type == y.d_type ;
   }
 
   bool operator !=(const RegExp& y) const {
-    return d_str != y.d_str ;
-  }
-
-  String concat (const RegExp& other) const {
-    return String(d_str + other.d_str);
+    return d_type != y.d_type ;
   }
 
   bool operator <(const RegExp& y) const {
-    return d_str < y.d_str;
+    return d_type < y.d_type;
   }
 
   bool operator >(const RegExp& y) const {
-    return d_str > y.d_str ;
+    return d_type > y.d_type ;
   }
 
   bool operator <=(const RegExp& y) const {
-    return d_str <= y.d_str;
+    return d_type <= y.d_type;
   }
 
   bool operator >=(const RegExp& y) const {
-    return d_str >= y.d_str ;
-  }
-
-  /*
-   * Convenience functions
-   */
-
-  size_t hash() const {
-    unsigned int h = 1;
-
-    for (size_t i = 0; i < d_str.length(); ++i) {
-        h = (h << 5)  + d_str[i];
-    }
-
-    return h;
-  }
-
-  std::string toString() const {
-    return d_str;
+    return d_type >= y.d_type ;
   }
 
+  int getType() const { return d_type; }
 };/* class RegExp */
 
 /**
@@ -476,14 +398,21 @@ public:
  */
 struct CVC4_PUBLIC RegExpHashFunction {
   inline size_t operator()(const RegExp& s) const {
-    return s.hash();
+    return (size_t)s.getType();
   }
 };/* struct RegExpHashFunction */
 
-inline std::ostream& operator <<(std::ostream& os, const RegExp& s) CVC4_PUBLIC;
-inline std::ostream& operator <<(std::ostream& os, const RegExp& s) {
-  return os << s.toString();
-}
+std::ostream& operator <<(std::ostream& os, const RegExp& s) CVC4_PUBLIC;
+
+class CVC4_PUBLIC RegExpEmpty : public RegExp {
+public:
+       RegExpEmpty() : RegExp(0) {}
+};
+
+class CVC4_PUBLIC RegExpSigma : public RegExp {
+public:
+       RegExpSigma() : RegExp(2) {}
+};
 
 }/* CVC4 namespace */