From 9b1c6e2f22f3699f582a52877bafb1a172d8bc3f Mon Sep 17 00:00:00 2001 From: Tianyi Liang Date: Fri, 21 Feb 2014 15:19:05 -0600 Subject: [PATCH] reorganize substr, fix some potential bugs, adds cache for preprocessing --- src/theory/strings/regexp_operation.cpp | 73 +++++++++++++++ src/theory/strings/regexp_operation.h | 6 +- src/theory/strings/theory_strings.cpp | 88 +++++++------------ src/theory/strings/theory_strings.h | 2 - .../strings/theory_strings_preprocess.cpp | 13 ++- 5 files changed, 116 insertions(+), 66 deletions(-) diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index e7079081b..3b81ae4a5 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -645,6 +645,79 @@ Node RegExpOpr::complement( Node r ) { return retNode; } +//simplify +void RegExpOpr::simplify(Node t, std::vector< Node > &new_nodes) { + Assert(t.getKind() == kind::STRING_IN_REGEXP); + Node str = Rewriter::rewrite(t[0]); + Node re = Rewriter::rewrite(t[1]); + simplifyRegExp( str, re, new_nodes ); +} +void RegExpOpr::simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes ) { + std::pair < Node, Node > p(s, r); + std::map < std::pair< Node, Node >, Node >::const_iterator itr = d_simpl_cache.find(p); + if(itr != d_simpl_cache.end()) { + new_nodes.push_back( itr->second ); + return; + } else { + int k = r.getKind(); + switch( k ) { + case kind::STRING_TO_REGEXP: + { + Node eq = NodeManager::currentNM()->mkNode( kind::EQUAL, s, r[0] ); + new_nodes.push_back( eq ); + d_simpl_cache[p] = eq; + } + break; + case kind::REGEXP_CONCAT: + { + std::vector< Node > cc; + for(unsigned i=0; imkSkolem( "rcon_$$", s.getType(), "created for regular expression concat" ); + simplifyRegExp( sk, r[i], new_nodes ); + cc.push_back( sk ); + } + } + Node cc_eq = s.eqNode( NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, cc) ); + new_nodes.push_back( cc_eq ); + d_simpl_cache[p] = cc_eq; + } + break; + case kind::REGEXP_OR: + { + std::vector< Node > c_or; + for(unsigned i=0; imkNode( kind::OR, c_or ); + new_nodes.push_back( eq ); + d_simpl_cache[p] = eq; + } + break; + case kind::REGEXP_INTER: + for(unsigned i=0; imkNode( kind::STRING_IN_REGEXP, s, r ); + new_nodes.push_back( eq ); + d_simpl_cache[p] = eq; + } + break; + default: + //TODO: special sym: sigma, none, all + Trace("strings-preprocess") << "Unsupported term: " << r << " in simplifyRegExp." << std::endl; + Assert( false ); + break; + } + } +} + +//printing std::string RegExpOpr::niceChar( Node r ) { if(r.isConst()) { std::string s = r.getConst().toString() ; diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index 9ed4be0c3..d7dde018a 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -37,20 +37,20 @@ private: Node d_sigma; Node d_sigma_star; + std::map< std::pair< Node, Node >, Node > d_simpl_cache; std::map< Node, Node > d_compl_cache; std::map< Node, int > d_delta_cache; std::map< PairDvStr, Node > d_dv_cache; std::map< Node, bool > d_cstre_cache; //bool checkStarPlus( Node t ); - //void simplifyRegExp( Node s, Node r, std::vector< Node > &ret, std::vector< Node > &nn ); - //Node simplify( Node t, std::vector< Node > &new_nodes ); + void simplifyRegExp( Node s, Node r, std::vector< Node > &new_nodes ); std::string niceChar( Node r ); int gcd ( int a, int b ); public: RegExpOpr(); bool checkConstRegExp( Node r ); - //void simplify(std::vector< Node > &vec_node); + void simplify(Node t, std::vector< Node > &new_nodes); Node mkAllExceptOne( char c ); Node complement( Node r ); int delta( Node r ); diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 3c9f5902e..ba36aa371 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -46,7 +46,6 @@ TheoryStrings::TheoryStrings(context::Context* c, context::UserContext* u, Outpu //d_var_lmax( c ), d_str_pos_ctn( c ), d_str_neg_ctn( c ), - d_int_to_str( c ), d_reg_exp_mem( c ), d_curr_cardinality( c, 0 ) { @@ -286,6 +285,7 @@ void TheoryStrings::collectModelInfo( TheoryModel* m, bool fullModel ) { } } ////step 2 : assign arbitrary values for unknown lengths? + // confirmed by calculus invariant, see paper //for( unsigned i=0; imkNode( kind::GEQ, + NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, n[0] ), + NodeManager::currentNM()->mkNode( kind::PLUS, n[1], n[2] ) ); + Node t1geq0 = NodeManager::currentNM()->mkNode(kind::GEQ, n[1], d_zero); + Node t2geq0 = NodeManager::currentNM()->mkNode(kind::GEQ, n[2], d_zero); + Node cond = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::AND, lenxgti, t1geq0, t2geq0 )); + Node sk1 = NodeManager::currentNM()->mkSkolem( "ss1_$$", NodeManager::currentNM()->stringType(), "created for charat/substr" ); + Node sk3 = NodeManager::currentNM()->mkSkolem( "ss3_$$", NodeManager::currentNM()->stringType(), "created for charat/substr" ); + d_statistics.d_new_skolems += 2; + Node x_eq_123 = n[0].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, n, sk3 ) ); + Node len_sk1_eq_i = n[1].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ) ); + Node lenc = n[2].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, n ) ); + Node lemma = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::ITE, cond, + NodeManager::currentNM()->mkNode( kind::AND, x_eq_123, len_sk1_eq_i, lenc ), + n.eqNode(NodeManager::currentNM()->mkConst( ::CVC4::String("") )) ) ); + Trace("strings-lemma") << "Strings::Lemma SUBSTR : " << lemma << std::endl; + d_out->lemma(lemma); + } + //d_equalityEngine.addTerm(n); default: - if(n.getType().isString() ) { + if(n.getType().isString() && n.getKind()!=kind::STRING_CONCAT && n.getKind()!=kind::CONST_STRING ) { if( std::find( d_length_intro_vars.begin(), d_length_intro_vars.end(), n )==d_length_intro_vars.end() ) { Node n_len = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, n); Node n_len_eq_z = n_len.eqNode( d_zero ); @@ -1762,8 +1776,7 @@ bool TheoryStrings::checkSimple() { //if n is concat, and //if n has not instantiatied the concat..length axiom //then, add lemma - if( n.getKind() == kind::CONST_STRING || n.getKind() == kind::STRING_CONCAT - || n.getKind() == kind::STRING_SUBSTR_TOTAL ) { + if( n.getKind() == kind::CONST_STRING || n.getKind() == kind::STRING_CONCAT ) { if( d_length_nodes.find(n)==d_length_nodes.end() ) { if( d_length_inst.find(n)==d_length_inst.end() ) { //Node nr = d_equalityEngine.getRepresentative( n ); @@ -1790,25 +1803,6 @@ bool TheoryStrings::checkSimple() { } else if( n.getKind() == kind::CONST_STRING ) { //add lemma lsum = NodeManager::currentNM()->mkConst( ::CVC4::Rational( n.getConst().size() ) ); - } else if( n.getKind() == kind::STRING_SUBSTR_TOTAL ) { - Node lenxgti = NodeManager::currentNM()->mkNode( kind::GEQ, - NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, n[0] ), - NodeManager::currentNM()->mkNode( kind::PLUS, n[1], n[2] ) ); - Node t1geq0 = NodeManager::currentNM()->mkNode(kind::GEQ, n[1], d_zero); - Node t2geq0 = NodeManager::currentNM()->mkNode(kind::GEQ, n[2], d_zero); - Node cond = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::AND, lenxgti, t1geq0, t2geq0 )); - lsum = NodeManager::currentNM()->mkNode( kind::ITE, cond, n[2], d_zero ); - /* - Node sk1 = NodeManager::currentNM()->mkSkolem( "st1_$$", NodeManager::currentNM()->stringType(), "created for substr" ); - Node sk3 = NodeManager::currentNM()->mkSkolem( "st3_$$", NodeManager::currentNM()->stringType(), "created for substr" ); - d_statistics.d_new_skolems += 2; - Node x_eq_123 = n[0].eqNode(NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, sk, sk3 )); - Node len_sk1_eq_i = NodeManager::currentNM()->mkNode( kind::EQUAL, n[1], - NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ) ); - Node lemma = NodeManager::currentNM()->mkNode( kind::AND, x_eq_123, len_sk1_eq_i ); - lemma = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::IMPLIES, cond, lemma ) ); - Trace("strings-lemma") << "Strings::Lemma SUBSTR : " << lemma << std::endl; - d_out->lemma(lemma);*/ } Node ceq = NodeManager::currentNM()->mkNode( kind::EQUAL, skl, lsum ); ceq = Rewriter::rewrite(ceq); @@ -2240,21 +2234,14 @@ bool TheoryStrings::unrollStar( Node atom ) { cc.push_back(unr0); } else { std::vector< Node > urc; - urc.push_back( unr1 ); - - StringsPreprocess spp; - spp.simplify( urc ); - for( unsigned i=1; imkNode( kind::STRING_IN_REGEXP, sk_xp, r ); - unr3 = Rewriter::rewrite( unr3 ); d_reg_exp_unroll_depth[unr3] = depth + 1; if( d_reg_exp_ant.find( atom )!=d_reg_exp_ant.end() ){ d_reg_exp_ant[unr3] = d_reg_exp_ant[atom]; @@ -2575,15 +2562,6 @@ bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) { // send lemma if(flag) { if(x.isConst()) { - /* - left = d_emptyString; - if(d_regexp_opr.delta(dc) == 1) { - //TODO yes possible? - } else if(d_regexp_opr.delta(dc) == 0) { - //TODO possible? - } else { - // TODO conflict possible? - }*/ Assert(false, "Impossible: TheoryStrings::splitRegExp: const string in const regular expression."); return false; } else { @@ -2597,16 +2575,12 @@ bool TheoryStrings::splitRegExp( Node x, Node r, Node ant ) { conc = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, left, dc ); std::vector< Node > sdc; - sdc.push_back( conc ); - - StringsPreprocess spp; - spp.simplify( sdc ); - for( unsigned i=1; imkNode(kind::AND, conc)); } - - conc = sdc[0]; } } sendLemma(ant, conc, "RegExp-CST-SP"); diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 87cc3330a..ea92865b2 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -290,12 +290,10 @@ private: // Special String Functions NodeList d_str_pos_ctn; NodeList d_str_neg_ctn; - NodeList d_int_to_str; std::map< Node, bool > d_str_ctn_eqlen; std::map< Node, bool > d_str_neg_ctn_ulen; std::map< Node, bool > d_str_pos_ctn_rewritten; std::map< Node, bool > d_str_neg_ctn_rewritten; - std::map< std::pair , bool > d_int_to_str_rewritten; // Regular Expression private: diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index ca4a4e75e..964f5d8e1 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -227,7 +227,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { d_cache[t] = skk; retNode = skk; } else { - throw LogicException("string indexof not supported in this release"); + throw LogicException("string indexof not supported in default mode, try --string-exp"); } } else if( t.getKind() == kind::STRING_ITOS ) { if(options::stringExp()) { @@ -333,7 +333,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { d_cache[t] = t; retNode = t; } else { - throw LogicException("string int.to.str not supported in this release"); + throw LogicException("string int.to.str not supported in default mode, try --string-exp"); } } else if( t.getKind() == kind::STRING_STOI ) { if(options::stringExp()) { @@ -463,7 +463,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { d_cache[t] = t; retNode = t; } else { - throw LogicException("string int.to.str not supported in this release"); + throw LogicException("string int.to.str not supported in default mode, try --string-exp"); } } else if( t.getKind() == kind::STRING_STRREPL ) { if(options::stringExp()) { @@ -485,7 +485,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { d_cache[t] = skw; retNode = skw; } else { - throw LogicException("string replace not supported in this release"); + throw LogicException("string replace not supported in default mode, try --string-exp"); } } else{ d_cache[t] = Node::null(); @@ -525,6 +525,11 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { } Node StringsPreprocess::decompose(Node t, std::vector< Node > & new_nodes) { + std::hash_map::const_iterator i = d_cache.find(t); + if(i != d_cache.end()) { + return (*i).second.isNull() ? t : (*i).second; + } + unsigned num = t.getNumChildren(); if(num == 0) { return simplify(t, new_nodes); -- 2.30.2