reorganize substr, fix some potential bugs, adds cache for preprocessing
authorTianyi Liang <tianyi-liang@uiowa.edu>
Fri, 21 Feb 2014 21:19:05 +0000 (15:19 -0600)
committerTianyi Liang <tianyi-liang@uiowa.edu>
Fri, 21 Feb 2014 21:20:11 +0000 (15:20 -0600)
src/theory/strings/regexp_operation.cpp
src/theory/strings/regexp_operation.h
src/theory/strings/theory_strings.cpp
src/theory/strings/theory_strings.h
src/theory/strings/theory_strings_preprocess.cpp

index e7079081bf45a1e95f47070c6148458c1947d5e2..3b81ae4a5684ae7b93018669d93420e810e574e4 100644 (file)
@@ -645,6 +645,79 @@ Node RegExpOpr::complement( Node r ) {
        return retNode;\r
 }\r
 \r
+//simplify\r
+void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes) {\r
+       Assert(t.getKind() == kind::STRING_IN_REGEXP);\r
+       Node str = Rewriter::rewrite(t[0]);\r
+       Node re  = Rewriter::rewrite(t[1]);\r
+       simplifyRegExp( str, re, new_nodes );\r
+}\r
+void RegExpOpr::simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes ) {\r
+       std::pair < Node, Node > p(s, r);\r
+       std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p);\r
+       if(itr != d_simpl_cache.end()) {\r
+               new_nodes.push_back( itr->second );\r
+               return;\r
+       } else {\r
+               int k = r.getKind();\r
+               switch( k ) {\r
+                       case kind::STRING_TO_REGEXP:\r
+                       {\r
+                               Node eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, r[0] );\r
+                               new_nodes.push_back( eq );\r
+                               d_simpl_cache[p] = eq;\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_CONCAT:\r
+                       {\r
+                               std::vector< Node > cc;\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       if(r[i].getKind() == kind::STRING_TO_REGEXP) {\r
+                                               cc.push_back( r[i][0] );\r
+                                       } else {\r
+                                               Node sk = NodeManager::currentNM()->mkSkolem( "rcon_$$", s.getType(), "created for regular expression concat" );\r
+                                               simplifyRegExp( sk, r[i], new_nodes );\r
+                                               cc.push_back( sk );\r
+                                       }\r
+                               }\r
+                               Node cc_eq = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) );\r
+                               new_nodes.push_back( cc_eq );\r
+                               d_simpl_cache[p] = cc_eq;\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_OR:\r
+                       {\r
+                               std::vector< Node > c_or;\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       simplifyRegExp( s, r[i], c_or );\r
+                               }\r
+                               Node eq = NodeManager::currentNM()->mkNode( kind::OR, c_or );\r
+                               new_nodes.push_back( eq );\r
+                               d_simpl_cache[p] = eq;\r
+                       }\r
+                               break;\r
+                       case kind::REGEXP_INTER:\r
+                               for(unsigned i=0; i<r.getNumChildren(); ++i) {\r
+                                       simplifyRegExp( s, r[i], new_nodes );\r
+                               }\r
+                               break;\r
+                       case kind::REGEXP_STAR:\r
+                       {\r
+                               Node eq = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, s, r );\r
+                               new_nodes.push_back( eq );\r
+                               d_simpl_cache[p] = eq;\r
+                       }\r
+                               break;\r
+                       default:\r
+                               //TODO: special sym: sigma, none, all\r
+                               Trace("strings-preprocess") << "Unsupported term: " << r << " in simplifyRegExp." << std::endl;\r
+                               Assert( false );\r
+                               break;\r
+               }\r
+       }\r
+}\r
+\r
+//printing\r
 std::string RegExpOpr::niceChar( Node r ) {\r
        if(r.isConst()) {\r
                std::string s = r.getConst<CVC4::String>().toString() ;\r
index 9ed4be0c3c2e78b6fed0775c7ed5cdb2f4089050..d7dde018a8e760496462f255f704a88f089070ca 100644 (file)
@@ -37,20 +37,20 @@ private:
        Node d_sigma;\r
        Node d_sigma_star;\r
        \r
+       std::map< std::pair< Node, Node >, Node > d_simpl_cache;\r
        std::map< Node, Node > d_compl_cache;\r
        std::map< Node, int > d_delta_cache;\r
        std::map< PairDvStr, Node > d_dv_cache;\r
        std::map< Node, bool > d_cstre_cache;\r
        //bool checkStarPlus( Node t );\r
-       //void simplifyRegExp( Node s, Node r, std::vector< Node > &ret, std::vector< Node > &nn );\r
-       //Node simplify( Node t, std::vector< Node > &new_nodes );\r
+       void simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes );\r
        std::string niceChar( Node r );\r
        int gcd ( int a, int b );\r
 \r
 public:\r
        RegExpOpr();\r
        bool checkConstRegExp( Node r );\r
-    //void simplify(std::vector< Node > &vec_node);\r
+    void simplify(Node t, std::vector< Node > &new_nodes);\r
        Node mkAllExceptOne( char c );\r
        Node complement( Node r );\r
        int delta( Node r );\r
index 3c9f5902eef9063aa597e987dbc936552b1f52cc..ba36aa3718dbdd750535760289f2236a2061b2ea 100644 (file)
@@ -46,7 +46,6 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu
        //d_var_lmax( c ),
        d_str_pos_ctn( c ),
        d_str_neg_ctn( c ),
-       d_int_to_str( c ),
        d_reg_exp_mem( c ),
        d_curr_cardinality( c, 0 )
 {
@@ -286,6 +285,7 @@ void TheoryStrings::collectModelInfo( TheoryModel* m, bool fullModel ) {
                }
        }
        ////step 2 : assign arbitrary values for unknown lengths?
+       // confirmed by calculus invariant, see paper
        //for( unsigned i=0; i<col.size(); i++ ){
        //      if(
        //}
@@ -404,15 +404,29 @@ void TheoryStrings::preRegisterTerm(TNode n) {
        //do not add trigger here
     //d_equalityEngine.addTriggerPredicate(n);
     break;
-  case kind::CONST_STRING:
-  case kind::STRING_CONCAT:
   case kind::STRING_SUBSTR_TOTAL:
-  case kind::STRING_ITOS:
-  case kind::STRING_STOI:
-       d_equalityEngine.addTerm(n);
-       break;
+       {
+               Node lenxgti = NodeManager::currentNM()->mkNode( kind::GEQ, 
+                                                       NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, n[0] ),
+                                                       NodeManager::currentNM()->mkNode( kind::PLUS, n[1], n[2] ) );
+               Node t1geq0 = NodeManager::currentNM()->mkNode(kind::GEQ, n[1], d_zero);
+               Node t2geq0 = NodeManager::currentNM()->mkNode(kind::GEQ, n[2], d_zero);
+               Node cond = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::AND, lenxgti, t1geq0, t2geq0 ));
+               Node sk1 = NodeManager::currentNM()->mkSkolem( "ss1_$$", NodeManager::currentNM()->stringType(), "created for charat/substr" );
+               Node sk3 = NodeManager::currentNM()->mkSkolem( "ss3_$$", NodeManager::currentNM()->stringType(), "created for charat/substr" );
+               d_statistics.d_new_skolems += 2;
+               Node x_eq_123 = n[0].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, n, sk3 ) );
+               Node len_sk1_eq_i = n[1].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ) );
+               Node lenc = n[2].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, n ) );
+               Node lemma = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::ITE, cond, 
+                       NodeManager::currentNM()->mkNode( kind::AND, x_eq_123, len_sk1_eq_i, lenc ),
+                       n.eqNode(NodeManager::currentNM()->mkConst( ::CVC4::String("") )) ) );
+               Trace("strings-lemma") << "Strings::Lemma SUBSTR : " << lemma << std::endl;
+               d_out->lemma(lemma);
+       }
+       //d_equalityEngine.addTerm(n);
   default:
-    if(n.getType().isString() ) {
+    if(n.getType().isString() && n.getKind()!=kind::STRING_CONCAT && n.getKind()!=kind::CONST_STRING ) {
          if( std::find( d_length_intro_vars.begin(), d_length_intro_vars.end(), n )==d_length_intro_vars.end() ) {
                  Node n_len = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, n);
                  Node n_len_eq_z = n_len.eqNode( d_zero );
@@ -1762,8 +1776,7 @@ bool TheoryStrings::checkSimple() {
                        //if n is concat, and
                        //if n has not instantiatied the concat..length axiom
                        //then, add lemma
-                       if( n.getKind() == kind::CONST_STRING || n.getKind() == kind::STRING_CONCAT 
-                               || n.getKind() == kind::STRING_SUBSTR_TOTAL ) {
+                       if( n.getKind() == kind::CONST_STRING || n.getKind() == kind::STRING_CONCAT ) {
                                if( d_length_nodes.find(n)==d_length_nodes.end() ) {
                                        if( d_length_inst.find(n)==d_length_inst.end() ) {
                                                //Node nr = d_equalityEngine.getRepresentative( n );
@@ -1790,25 +1803,6 @@ bool TheoryStrings::checkSimple() {
                                                        } else if( n.getKind() == kind::CONST_STRING ) {
                                                                //add lemma
                                                                lsum = NodeManager::currentNM()->mkConst( ::CVC4::Rational( n.getConst<String>().size() ) );
-                                                       } else if( n.getKind() == kind::STRING_SUBSTR_TOTAL ) {
-                                                               Node lenxgti = NodeManager::currentNM()->mkNode( kind::GEQ,
-                                                                                                       NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, n[0] ),
-                                                                                                       NodeManager::currentNM()->mkNode( kind::PLUS, n[1], n[2] ) );
-                                                               Node t1geq0 = NodeManager::currentNM()->mkNode(kind::GEQ, n[1], d_zero);
-                                                               Node t2geq0 = NodeManager::currentNM()->mkNode(kind::GEQ, n[2], d_zero);
-                                                               Node cond = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::AND, lenxgti, t1geq0, t2geq0 ));
-                                                               lsum = NodeManager::currentNM()->mkNode( kind::ITE, cond, n[2], d_zero );
-                                                               /*
-                                                               Node sk1 = NodeManager::currentNM()->mkSkolem( "st1_$$", NodeManager::currentNM()->stringType(), "created for substr" );
-                                                               Node sk3 = NodeManager::currentNM()->mkSkolem( "st3_$$", NodeManager::currentNM()->stringType(), "created for substr" );
-                                                               d_statistics.d_new_skolems += 2;
-                                                               Node x_eq_123 = n[0].eqNode(NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, sk, sk3 ));
-                                                               Node len_sk1_eq_i = NodeManager::currentNM()->mkNode( kind::EQUAL, n[1],
-                                                                                                               NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ) );
-                                                               Node lemma = NodeManager::currentNM()->mkNode( kind::AND, x_eq_123, len_sk1_eq_i );
-                                                               lemma = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::IMPLIES, cond, lemma ) );
-                                                               Trace("strings-lemma") << "Strings::Lemma SUBSTR : " << lemma << std::endl;
-                                                               d_out->lemma(lemma);*/
                                                        }
                                                        Node ceq = NodeManager::currentNM()->mkNode( kind::EQUAL, skl, lsum );
                                                        ceq = Rewriter::rewrite(ceq);
@@ -2240,21 +2234,14 @@ bool TheoryStrings::unrollStar( Node atom ) {
                        cc.push_back(unr0);
                } else {
                        std::vector< Node > urc;
-                       urc.push_back( unr1 );
-
-                       StringsPreprocess spp;
-                       spp.simplify( urc );
-                       for( unsigned i=1; i<urc.size(); i++ ){
-                               //add the others as lemmas
-                               sendLemma( d_true, urc[i], "RegExp Definition");
-                       }
+                       d_regexp_opr.simplify(unr1, urc);
                        Node unr0 = sk_s.eqNode( d_emptyString ).negate();
-                       cc.push_back(unr0);     cc.push_back(urc[0]);
+                       cc.push_back(unr0);     //cc.push_back(urc1);
+                       cc.insert(cc.end(), urc.begin(), urc.end());
                }
 
                Node unr2 = x.eqNode( mkConcat( sk_s, sk_xp ) );
                Node unr3 = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, sk_xp, r );
-               unr3 = Rewriter::rewrite( unr3 );
                d_reg_exp_unroll_depth[unr3] = depth + 1;
                if( d_reg_exp_ant.find( atom )!=d_reg_exp_ant.end() ){
                        d_reg_exp_ant[unr3] = d_reg_exp_ant[atom];
@@ -2575,15 +2562,6 @@ bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) {
                // send lemma
                if(flag) {
                        if(x.isConst()) {
-                               /*
-                               left = d_emptyString;
-                               if(d_regexp_opr.delta(dc) == 1) {
-                                       //TODO yes possible?
-                               } else if(d_regexp_opr.delta(dc) == 0) {
-                                       //TODO possible?
-                               } else {
-                                       // TODO conflict possible?
-                               }*/
                                Assert(false, "Impossible: TheoryStrings::splitRegExp: const string in const regular expression.");
                                return false;
                        } else {
@@ -2597,16 +2575,12 @@ bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) {
                                conc = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, left, dc );
 
                                std::vector< Node > sdc;
-                               sdc.push_back( conc );
-
-                               StringsPreprocess spp;
-                               spp.simplify( sdc );
-                               for( unsigned i=1; i<sdc.size(); i++ ){
-                                       //add the others as lemmas
-                                       sendLemma( d_true, sdc[i], "RegExp-DEF");
+                               d_regexp_opr.simplify(conc, sdc);
+                               if(sdc.size() == 1) {
+                                       conc = sdc[0];
+                               } else {
+                                       conc = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, conc));
                                }
-
-                               conc = sdc[0];
                        }
                }
                sendLemma(ant, conc, "RegExp-CST-SP");
index 87cc3330a1104e95ecd873913c0463372b791ff3..ea92865b23ee910c536d458ef90305346577e094 100644 (file)
@@ -290,12 +290,10 @@ private:
        // Special String Functions
        NodeList d_str_pos_ctn;
        NodeList d_str_neg_ctn;
-       NodeList d_int_to_str;
        std::map< Node, bool > d_str_ctn_eqlen;
        std::map< Node, bool > d_str_neg_ctn_ulen;
        std::map< Node, bool > d_str_pos_ctn_rewritten;
        std::map< Node, bool > d_str_neg_ctn_rewritten;
-       std::map< std::pair <Node, int>, bool > d_int_to_str_rewritten;
 
        // Regular Expression
 private:
index ca4a4e75e1799272d0867915ee795ad59f19132c..964f5d8e1df3e5e2531255ab9d67a22925c17cf7 100644 (file)
@@ -227,7 +227,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
                        d_cache[t] = skk;
                        retNode = skk;
                } else {
-                       throw LogicException("string indexof not supported in this release");
+                       throw LogicException("string indexof not supported in default mode, try --string-exp");
                }
        } else if( t.getKind() == kind::STRING_ITOS ) {
                if(options::stringExp()) {
@@ -333,7 +333,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
                        d_cache[t] = t;
                        retNode = t;
                } else {
-                       throw LogicException("string int.to.str not supported in this release");
+                       throw LogicException("string int.to.str not supported in default mode, try --string-exp");
                }
        } else if( t.getKind() == kind::STRING_STOI ) {
                if(options::stringExp()) {
@@ -463,7 +463,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
                        d_cache[t] = t;
                        retNode = t;
                } else {
-                       throw LogicException("string int.to.str not supported in this release");
+                       throw LogicException("string int.to.str not supported in default mode, try --string-exp");
                }
        } else if( t.getKind() == kind::STRING_STRREPL ) {
                if(options::stringExp()) {
@@ -485,7 +485,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
                        d_cache[t] = skw;
                        retNode = skw;
                } else {
-                       throw LogicException("string replace not supported in this release");
+                       throw LogicException("string replace not supported in default mode, try --string-exp");
                }
        } else{
                d_cache[t] = Node::null();
@@ -525,6 +525,11 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
 }
 
 Node StringsPreprocess::decompose(Node t, std::vector< Node > & new_nodes) {
+    std::hash_map<TNode, Node, TNodeHashFunction>::const_iterator i = d_cache.find(t);
+    if(i != d_cache.end()) {
+      return (*i).second.isNull() ? t : (*i).second;
+    }
+
        unsigned num = t.getNumChildren();
        if(num == 0) {
                return simplify(t, new_nodes);