add constant membership
authorTianyi Liang <tianyi-liang@uiowa.edu>
Fri, 11 Oct 2013 08:32:33 +0000 (03:32 -0500)
committerTianyi Liang <tianyi-liang@uiowa.edu>
Fri, 11 Oct 2013 08:33:28 +0000 (03:33 -0500)
src/theory/strings/theory_strings.cpp
src/theory/strings/theory_strings.h
src/theory/strings/theory_strings_preprocess.cpp
src/theory/strings/theory_strings_rewriter.cpp
src/theory/strings/theory_strings_rewriter.h
src/util/regexp.h

index a0058954d01315cd7f5c674c2c32d72009750500..f01da389b945a213dc3e5ee4c6353cbf42d5d90c 100644 (file)
@@ -49,6 +49,7 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu
     d_ind_map_lemma(c),
        //d_lit_to_decide_index( c, 0 ),
        //d_lit_to_decide( c ),
+       d_reg_exp_mem( c ),
        d_lit_to_unroll( c )
 {
     // The kinds we are treating as function application in congruence
@@ -374,6 +375,11 @@ void TheoryStrings::check(Effort e) {
     } else {
       d_equalityEngine.assertPredicate(atom, polarity, fact);
     }
+       if ( atom.getKind() == kind::STRING_IN_REGEXP ) {
+               if(fact[0].getKind() != kind::CONST_STRING) {
+                       d_reg_exp_mem.push_back( assertion );
+               }
+       }
 #ifdef STR_UNROLL_INDUCTION
        //check if it is a literal to unroll?
        if( d_lit_to_unroll.find( atom )!=d_lit_to_unroll.end() ){
@@ -400,6 +406,10 @@ void TheoryStrings::check(Effort e) {
                                  if( !d_conflict && !addedLemma ){
                                        addedLemma = checkInductiveEquations();
                                        Trace("strings-process") << "Done check inductive equations, addedLemma = " << addedLemma << ", d_conflict = " << d_conflict << std::endl;
+                                         if( !d_conflict && !addedLemma ){
+                                               addedLemma = checkMemberships();
+                                               Trace("strings-process") << "Done check membership constraints, addedLemma = " << addedLemma << ", d_conflict = " << d_conflict << std::endl;
+                                         }
                                  }
                          }
                  }
@@ -1851,14 +1861,14 @@ bool TheoryStrings::checkInductiveEquations() {
                                ++i2;
                                ++ie;
                                //++il;
-                               if( !d_equalityEngine.hasTerm( d_emptyString ) || !d_equalityEngine.areEqual( y, d_emptyString ) || !d_equalityEngine.areEqual( x, d_emptyString ) ){
+                               if( !areEqual( y, d_emptyString ) || !areEqual( x, d_emptyString ) ){
                                        hasEq = true;
                                }
                        }
                }
        }
     if( hasEq ){
-               Trace("strings-ind") << "It is incomplete." << std::endl;
+               Trace("strings-ind") << "Induction is incomplete." << std::endl;
         d_out->setIncomplete();
     }else{
                Trace("strings-ind") << "We can answer SAT." << std::endl;
@@ -1956,6 +1966,36 @@ void TheoryStrings::printConcat( std::vector< Node >& n, const char * c ) {
        }
 }
 
+
+bool TheoryStrings::checkMemberships() {
+       for( unsigned i=0; i<d_reg_exp_mem.size(); i++ ){
+               //check regular expression membership
+               Node assertion = d_reg_exp_mem[i];
+               Node atom = assertion.getKind()==kind::NOT ? assertion[0] : assertion;
+               bool polarity = assertion.getKind()!=kind::NOT;
+               bool is_unk = false;
+               if( polarity ){
+                       Assert( atom.getKind()==kind::STRING_IN_REGEXP );
+                       Node x = atom[0];
+                       Node r = atom[1];
+                       //TODO
+                       Assert( r.getKind()==kind::REGEXP_STAR );
+                       if( !areEqual( x, d_emptyString ) ){
+                               //add lemma?
+                               is_unk = true;
+                       }
+               }else{
+                       //TODO: negative membership
+                       is_unk = true;
+               }
+               if( is_unk ){
+                       Trace("strings-regex") << "RegEx is incomplete due to " << assertion << "." << std::endl;
+                       //d_out->setIncomplete();
+               }
+       }
+       return false;
+}
+
 /*
 Node TheoryStrings::getNextDecisionRequest() {
        if( d_lit_to_decide_index.get()<d_lit_to_decide.size() ){
index 16c3d48764e11f5040aeda823e0864105d639b96..c906d7f917f874532698fcd581c0b9223756f61a 100644 (file)
@@ -142,10 +142,13 @@ class TheoryStrings : public Theory {
   bool isNormalFormPair( Node n1, Node n2 );
   bool isNormalFormPair2( Node n1, Node n2 );
 
+  //inductive equations
   NodeListMap d_ind_map1;
   NodeListMap d_ind_map2;
   NodeListMap d_ind_map_exp;
   NodeListMap d_ind_map_lemma;
+  //regular expression memberships
+  NodeList d_reg_exp_mem;
   bool addInductiveEquation( Node x, Node y, Node z, Node exp, const char * c );
 
   //for unrolling inductive equations
@@ -203,6 +206,7 @@ class TheoryStrings : public Theory {
        bool checkLengthsEqc();
     bool checkCardinality();
     bool checkInductiveEquations();
+       bool checkMemberships();
        int gcd(int a, int b);
   public:
   void preRegisterTerm(TNode n);
index 64425ea03567d139487e4b0a7c398948cc7e3f89..472a6d89cbdd04408b704369ee2bdc18716ecf2b 100644 (file)
@@ -58,6 +58,7 @@ void StringsPreprocess::simplifyRegExp( Node s, Node r, std::vector< Node > &ret
                                simplifyRegExp( s, r[i], ret );
                        }
                        break;
+               /*
                case kind::REGEXP_OPT:
                {
                        Node eq_empty = NodeManager::currentNM()->mkNode( kind::EQUAL, s, NodeManager::currentNM()->mkConst( ::CVC4::String("") ) );
@@ -67,10 +68,13 @@ void StringsPreprocess::simplifyRegExp( Node s, Node r, std::vector< Node > &ret
                        ret.push_back( NodeManager::currentNM()->mkNode( kind::OR, eq_empty, nrr) );
                }
                        break;
-               //TODO:
+               //case kind::REGEXP_PLUS:
+               */
                case kind::REGEXP_STAR:
-               case kind::REGEXP_PLUS:
-                       Assert( false );
+               {
+                       Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r );
+                       ret.push_back( eq );
+               }
                        break;
                default:
                        //TODO: special sym: sigma, none, all
@@ -95,11 +99,12 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
       return (*i).second.isNull() ? t : (*i).second;
     }
 
+       /*
        if( t.getKind() == kind::STRING_IN_REGEXP ){
                // t0 in t1
                Node t0 = simplify( t[0], new_nodes );
                
-               if(!checkStarPlus( t[1] )) {
+               //if(!checkStarPlus( t[1] )) {
                        //rewrite it
                        std::vector< Node > ret;
                        simplifyRegExp( t0, t[1], ret );
@@ -107,11 +112,13 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
                        Node n = ret.size() == 1 ? ret[0] : NodeManager::currentNM()->mkNode( kind::AND, ret );
                        d_cache[t] = (t == n) ? Node::null() : n;
                        return n;
-               } else {
+               //} else {
                        // TODO
-                       return (t0 == t[0]) ? t : NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, t0, t[1] );
-               }
-       }else if( t.getKind() == kind::STRING_SUBSTR ){
+               //      return (t0 == t[0]) ? t : NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, t0, t[1] );
+               //}
+       }else 
+       */
+       if( t.getKind() == kind::STRING_SUBSTR ){
                Node sk1 = NodeManager::currentNM()->mkSkolem( "st1sym_$$", t.getType(), "created for substr" );
                Node sk2 = NodeManager::currentNM()->mkSkolem( "st2sym_$$", t.getType(), "created for substr" );
                Node sk3 = NodeManager::currentNM()->mkSkolem( "st3sym_$$", t.getType(), "created for substr" );
index 3a7ad1ee0cf0b0d5516c3c8925e2fafffc403998..29e35981dfa609fe307c2bd07ecf95fc5d74d0c2 100644 (file)
@@ -53,7 +53,7 @@ Node TheoryStringsRewriter::rewriteConcatString(TNode node) {
         }
         if(!tmpNode.isConst()) {
             if(preNode != Node::null()) {
-                if(preNode.getKind() == kind::CONST_STRING && preNode.getConst<String>().toString()=="" ) {
+                if(preNode.getKind() == kind::CONST_STRING && preNode.getConst<String>().isEmptyString() ) {
                     preNode = Node::null();
                 } else {
                     node_vec.push_back( preNode );
@@ -81,6 +81,179 @@ Node TheoryStringsRewriter::rewriteConcatString(TNode node) {
     return retNode;
 }
 
+void TheoryStringsRewriter::simplifyRegExp( Node s, Node r, std::vector< Node > &ret ) {
+       int k = r.getKind();
+       switch( k ) {
+               case kind::STRING_TO_REGEXP:
+               {
+                       Node eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, r[0] );
+                       ret.push_back( eq );
+               }
+                       break;
+               case kind::REGEXP_CONCAT:
+               {
+                       std::vector< Node > cc;
+                       for(unsigned i=0; i<r.getNumChildren(); ++i) {
+                               Node sk = NodeManager::currentNM()->mkSkolem( "recsym_$$", s.getType(), "created for regular expression concat" );
+                               simplifyRegExp( sk, r[i], ret );
+                               cc.push_back( sk );
+                       }
+                       Node cc_eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, 
+                                               NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, cc ) );
+                       ret.push_back( cc_eq );
+               }
+                       break;
+               case kind::REGEXP_OR:
+               {
+                       std::vector< Node > c_or;
+                       for(unsigned i=0; i<r.getNumChildren(); ++i) {
+                               simplifyRegExp( s, r[i], c_or );
+                       }
+                       Node eq = NodeManager::currentNM()->mkNode( kind::OR, c_or );
+                       ret.push_back( eq );
+               }
+                       break;
+               case kind::REGEXP_INTER:
+                       for(unsigned i=0; i<r.getNumChildren(); ++i) {
+                               simplifyRegExp( s, r[i], ret );
+                       }
+                       break;
+               case kind::REGEXP_STAR:
+               {
+                       Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r );
+                       ret.push_back( eq );
+               }
+                       break;
+               default:
+                       //TODO: special sym: sigma, none, all
+                       Trace("strings-prerewrite") << "Unsupported term: " << r << " in simplifyRegExp." << std::endl;
+                       Assert( false );
+                       break;
+       }
+}
+bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned int index_start, Node r ) {
+       Assert( index_start <= s.size() );
+       int k = r.getKind();
+       switch( k ) {
+               case kind::STRING_TO_REGEXP:
+               {
+                       CVC4::String s2 = s.substr( index_start, s.size() - index_start );
+                       CVC4::String t = r[0].getConst<String>();
+                       return s2 == r[0].getConst<String>();
+               }
+               case kind::REGEXP_CONCAT:
+               {
+                       if( s.size() != index_start ) {
+                               std::vector<int> vec_k( r.getNumChildren(), -1 );
+                               int start = 0;
+                               int left = (int) s.size();
+                               int i=0;
+                               while( i<(int) r.getNumChildren() ) {
+                                       bool flag = true;
+                                       if( i == (int) r.getNumChildren() - 1 ) {
+                                               if( testConstStringInRegExp( s, index_start + start, r[i] ) ) {
+                                                       return true;
+                                               }
+                                       } else if( i == -1 ) {
+                                               return false;
+                                       } else {
+                                               for(vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i]) {
+                                                       CVC4::String t = s.substr(index_start + start, vec_k[i]);
+                                                       if( testConstStringInRegExp( t, 0, r[i] ) ) {
+                                                               start += vec_k[i]; left -= vec_k[i]; flag = false;
+                                                               ++i; vec_k[i] = -1;
+                                                               break;
+                                                       }
+                                               }
+                                       }
+
+                                       if(flag) {
+                                               --i;
+                                               if(i >= 0) {
+                                                       start -= vec_k[i]; left += vec_k[i];
+                                               }
+                                       }
+                               }
+                               return false;
+                       } else {
+                               return true;
+                       }
+               }
+               case kind::REGEXP_OR:
+               {
+                       for(unsigned i=0; i<r.getNumChildren(); ++i) {
+                               if(testConstStringInRegExp( s, index_start, r[i] )) return true;
+                       }
+                       return false;
+               }
+               case kind::REGEXP_INTER:
+               {
+                       for(unsigned i=0; i<r.getNumChildren(); ++i) {
+                               if(!testConstStringInRegExp( s, index_start, r[i] )) return false;
+                       }
+                       return true;
+               }
+               case kind::REGEXP_STAR:
+               {
+                       if( s.size() != index_start ) {
+                               for(unsigned int k=s.size() - index_start; k>0; --k) {
+                                       CVC4::String t = s.substr(index_start, k);
+                                       if( testConstStringInRegExp( t, 0, r[0] ) ) {
+                                               if( index_start + k == s.size() || testConstStringInRegExp( s, index_start + k, r[0] ) ) {
+                                                       return true;
+                                               }
+                                       }
+                               }
+                               return false;
+                       } else {
+                               return true;
+                       }
+               }
+               default:
+                       //TODO: special sym: sigma, none, all
+                       Trace("strings-postrewrite") << "Unsupported term: " << r << " in testConstStringInRegExp." << std::endl;
+                       Assert( false );
+                       return false;
+       }
+}
+Node TheoryStringsRewriter::rewriteMembership(TNode node) {
+       Node retNode;
+
+    //Trace("strings-postrewrite") << "Strings::rewriteMembership start " << node << std::endl;
+       
+       Node x;
+       if(node[0].getKind() == kind::STRING_CONCAT) {
+               x = rewriteConcatString(node[0]);
+       } else {
+               x = node[0];
+       }
+
+       if( x.getKind() == kind::CONST_STRING ) {
+               //test whether x in node[1]
+               CVC4::String s = x.getConst<String>();
+               retNode = NodeManager::currentNM()->mkConst( testConstStringInRegExp( s, 0, node[1] ) );
+       } else {
+               if( node[1].getKind() == kind::REGEXP_STAR ) {
+                       if( x == node[0] ) {
+                               retNode = node;
+                       } else {
+                               retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, node[1] );
+                       }
+               } else {
+                       std::vector<Node> node_vec;
+                       simplifyRegExp( x, node[1], node_vec );
+
+                       if(node_vec.size() > 1) {
+                               retNode = NodeManager::currentNM()->mkNode(kind::AND, node_vec);
+                       } else {
+                               retNode = node_vec[0];
+                       }
+               }
+       }
+    //Trace("strings-prerewrite") << "Strings::rewriteMembership end " << retNode << std::endl;
+    return retNode;
+}
+
 RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) {
   Trace("strings-postrewrite") << "Strings::postRewrite start " << node << std::endl;
   Node retNode = node;
@@ -106,17 +279,6 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) {
         } else if( leftNode != node[0] || rightNode != node[1]) {
             retNode = NodeManager::currentNM()->mkNode(kind::EQUAL, leftNode, rightNode);
         }
-    } else if(node.getKind() == kind::STRING_IN_REGEXP) {
-        Node leftNode  = node[0];
-        if(node[0].getKind() == kind::STRING_CONCAT) {
-            leftNode = rewriteConcatString(node[0]);
-        }
-        // TODO: right part
-        Node rightNode = node[1];
-        // merge
-        if( leftNode != node[0] || rightNode != node[1]) {
-            retNode = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, leftNode, rightNode);
-        }
     } else if(node.getKind() == kind::STRING_LENGTH) {
         if(node[0].isConst()) {
             retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational( node[0].getConst<String>().size() ) );
@@ -150,9 +312,11 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) {
                } else {
                        //handled by preprocess
                }
+       } else if(node.getKind() == kind::STRING_IN_REGEXP) {
+               retNode = rewriteMembership(node);
        }
 
-  Trace("strings-postrewrite") << "Strings::postRewrite returning " << node << std::endl;
+  Trace("strings-postrewrite") << "Strings::postRewrite returning " << retNode << std::endl;
   return RewriteResponse(REWRITE_DONE, retNode);
 }
 
@@ -162,7 +326,14 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) {
 
     if(node.getKind() == kind::STRING_CONCAT) {
         retNode = rewriteConcatString(node);
-    }
+    } else if(node.getKind() == kind::REGEXP_PLUS) {
+               retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, node[0],
+                                       NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, node[0]));
+       } else if(node.getKind() == kind::REGEXP_OPT) {
+               retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_OR,
+                                       NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String("") ) ),
+                                       node[0]);
+       }
 
     Trace("strings-prerewrite") << "Strings::preRewrite returning " << retNode << std::endl;
     return RewriteResponse(REWRITE_DONE, retNode);
index 3bccd91de2684fac8677b7ac3cd2fac1ffff8606..ecc863a756e3d5a38537e0f2f7031c84996f4048 100644 (file)
@@ -29,9 +29,12 @@ namespace theory {
 namespace strings {
 
 class TheoryStringsRewriter {
-
 public:
+  static bool testConstStringInRegExp( CVC4::String &s, unsigned int index_start, Node r );
+  static void simplifyRegExp( Node s, Node r, std::vector< Node > &ret );
+
   static Node rewriteConcatString(TNode node);
+  static Node rewriteMembership(TNode node);
 
   static RewriteResponse postRewrite(TNode node);
 
index d4ad38b0ffc77a9660bf4c6f6f17f98d94f879b9..31a39e6f9f4e25616322398250d6dfaa03d6f988 100644 (file)
@@ -22,7 +22,7 @@
 
 #include <iostream>
 #include <string>
-//#include "util/exception.h"
+//#include "util/cvc4_assert.h"
 //#include "util/integer.h"
 #include "util/hash.h"
 
@@ -126,6 +126,15 @@ public:
       return true;
   }
 
+
+  bool isEmptyString() const {
+         return ( d_str.size() == 0 );
+  }
+
+  unsigned int operator[] (const unsigned int i) const {
+       //Assert( i < d_str.size() && i >= 0);
+    return d_str[i];
+  }
   /*
    * Convenience functions
    */