From: Andrew Reynolds Date: Fri, 14 Sep 2018 19:57:35 +0000 (-0500) Subject: Add Skolem cache for strings, refactor length registration (#2457) X-Git-Tag: cvc5-1.0.0~4649 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c2111c86973b8a80e20a3fdf3cbd0b2ff0dc7010;p=cvc5.git Add Skolem cache for strings, refactor length registration (#2457) This PR is in preparation for doing more aggressive caching of skolems (for example, in the strings preprocessor). It refactors sendLengthLemma -> registerLength, which unifies how length lemmas for skolems and other string terms are handled. --- diff --git a/src/Makefile.am b/src/Makefile.am index a8edbf1fd..e2109cf1e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -602,6 +602,8 @@ libcvc4_la_SOURCES = \ theory/strings/regexp_elim.h \ theory/strings/regexp_operation.cpp \ theory/strings/regexp_operation.h \ + theory/strings/skolem_cache.cpp \ + theory/strings/skolem_cache.h \ theory/strings/theory_strings.cpp \ theory/strings/theory_strings.h \ theory/strings/theory_strings_preprocess.cpp \ diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp new file mode 100644 index 000000000..db673dafe --- /dev/null +++ b/src/theory/strings/skolem_cache.cpp @@ -0,0 +1,50 @@ +/********************* */ +/*! \file skolem_cache.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of a cache of skolems for theory of strings. + **/ + +#include "theory/strings/skolem_cache.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +SkolemCache::SkolemCache() {} + +Node SkolemCache::mkSkolemCached(Node a, Node b, SkolemId id, const char* c) +{ + std::map::iterator it = d_skolemCache[a][b].find(id); + if (it == d_skolemCache[a][b].end()) + { + Node sk = mkSkolem(c); + d_skolemCache[a][b][id] = sk; + return sk; + } + return it->second; +} + +Node SkolemCache::mkSkolem(const char* c) +{ + NodeManager* nm = NodeManager::currentNM(); + Node n = nm->mkSkolem(c, nm->stringType(), "string skolem"); + d_allSkolems.insert(n); + return n; +} + +bool SkolemCache::isSkolem(Node n) const +{ + return d_allSkolems.find(n) != d_allSkolems.end(); +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h new file mode 100644 index 000000000..2984ccfe4 --- /dev/null +++ b/src/theory/strings/skolem_cache.h @@ -0,0 +1,103 @@ +/********************* */ +/*! \file skolem_cache.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief A cache of skolems for theory of strings. + **/ + +#include "cvc4_private.h" + +#ifndef __CVC4__THEORY__STRINGS__SKOLEM_CACHE_H +#define __CVC4__THEORY__STRINGS__SKOLEM_CACHE_H + +#include +#include +#include "expr/node.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** + * A cache of all string skolems generated by the TheoryStrings class. This + * cache is used to ensure that duplicate skolems are not generated when + * possible, and helps identify what skolems were allocated in the current run. + */ +class SkolemCache +{ + public: + SkolemCache(); + /** Identifiers for skolem types + * + * The comments below document the properties of each skolem introduced by + * inference in the strings solver, where by skolem we mean the fresh + * string variable that witnesses each of "exists k". + * + * The skolems with _REV suffixes are used for the reverse version of the + * preconditions below, e.g. where we are considering a' ++ a = b' ++ b. + */ + enum SkolemId + { + // a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' => + // exists k. a = "cccc" + k + SK_ID_C_SPT, + SK_ID_C_SPT_REV, + // a != "" ^ b = "c" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' => + // exists k. a = "c" ++ k + SK_ID_VC_SPT, + SK_ID_VC_SPT_REV, + // a != "" ^ b = "cccccccc" ^ len(a)!=len(b) a ++ a' = b = b' => + // exists k. a = "cccc" ++ k OR ( len(k) > 0 ^ "cccc" = a ++ k ) + SK_ID_VC_BIN_SPT, + SK_ID_VC_BIN_SPT_REV, + // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' => + // exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k ) + SK_ID_V_SPT, + SK_ID_V_SPT_REV, + // contains( a, b ) => + // exists k_pre, k_post. a = k_pre ++ b ++ k_post + SK_ID_CTN_PRE, + SK_ID_CTN_POST, + // a != "" ^ b = "c" ^ a ++ a' != b ++ b' => + // exists k, k_rem. + // len( k ) = 1 ^ + // ( ( a = k ++ k_rem ^ k != "c" ) OR ( a = "c" ++ k_rem ) ) + SK_ID_DC_SPT, + SK_ID_DC_SPT_REM, + // a != "" ^ b != "" ^ len( a ) != len( b ) ^ a ++ a' != b ++ b' => + // exists k_x k_y k_z. + // ( len( k_y ) = len( a ) ^ len( k_x ) = len( b ) ^ len( k_z) > 0 + // ( a = k_x ++ k_z OR b = k_y ++ k_z ) ) + SK_ID_DEQ_X, + SK_ID_DEQ_Y, + SK_ID_DEQ_Z, + }; + /** + * Returns a skolem of type string that is cached for (a,b,id) and has + * name c. + */ + Node mkSkolemCached(Node a, Node b, SkolemId id, const char* c); + /** Returns a (uncached) skolem of type string with name c */ + Node mkSkolem(const char* c); + /** Returns true if n is a skolem allocated by this class */ + bool isSkolem(Node n) const; + + private: + /** map from node pairs and identifiers to skolems */ + std::map > > d_skolemCache; + /** the set of all skolems we have generated */ + std::unordered_set d_allSkolems; +}; + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* __CVC4__THEORY__STRINGS__SKOLEM_CACHE_H */ diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 902ce460e..628ffbba7 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -112,7 +112,6 @@ TheoryStrings::TheoryStrings(context::Context* c, d_pregistered_terms_cache(u), d_registered_terms_cache(u), d_length_lemma_terms_cache(u), - d_skolem_ne_reg_cache(u), d_preproc(u), d_preproc_cache(u), d_extf_infer_cache(c), @@ -443,8 +442,10 @@ int TheoryStrings::getReduction( int effort, Node n, Node& nr ) { Node x = n[0]; Node s = n[1]; //positive contains reduces to a equality - Node sk1 = mkSkolemCached( x, s, sk_id_ctn_pre, "sc1" ); - Node sk2 = mkSkolemCached( x, s, sk_id_ctn_post, "sc2" ); + Node sk1 = d_sk_cache.mkSkolemCached( + x, s, SkolemCache::SK_ID_CTN_PRE, "sc1"); + Node sk2 = d_sk_cache.mkSkolemCached( + x, s, SkolemCache::SK_ID_CTN_POST, "sc2"); Node eq = Rewriter::rewrite( x.eqNode( mkConcat( sk1, s, sk2 ) ) ); std::vector< Node > exp_vec; exp_vec.push_back( n ); @@ -759,7 +760,7 @@ void TheoryStrings::preRegisterTerm(TNode n) { // but not an internally generated Skolem, or a term that does // not belong to this theory. if (options::stringFMF() - && (n.isVar() ? d_all_skolems.find(n) == d_all_skolems.end() + && (n.isVar() ? !d_sk_cache.isSkolem(n) : kindToTheoryId(k) != THEORY_STRINGS)) { d_input_vars.insert(n); @@ -2624,47 +2625,55 @@ void TheoryStrings::processNEqc( std::vector< std::vector< Node > > &normal_form } } } - if( !pinfer.empty() ){ - //now, determine which of the possible inferences we want to add - unsigned use_index = 0; - bool set_use_index = false; - Trace("strings-solve") << "Possible inferences (" << pinfer.size() << ") : " << std::endl; - unsigned min_id = 9; - unsigned max_index = 0; - for (unsigned i = 0, size = pinfer.size(); i < size; i++) + if (pinfer.empty()) + { + return; + } + // now, determine which of the possible inferences we want to add + unsigned use_index = 0; + bool set_use_index = false; + Trace("strings-solve") << "Possible inferences (" << pinfer.size() + << ") : " << std::endl; + unsigned min_id = 9; + unsigned max_index = 0; + for (unsigned i = 0, size = pinfer.size(); i < size; i++) + { + Trace("strings-solve") << "From " << pinfer[i].d_i << " / " << pinfer[i].d_j + << " (rev=" << pinfer[i].d_rev << ") : "; + Trace("strings-solve") << pinfer[i].d_conc << " by " << pinfer[i].d_id + << std::endl; + if (!set_use_index || pinfer[i].d_id < min_id + || (pinfer[i].d_id == min_id && pinfer[i].d_index > max_index)) { - Trace("strings-solve") << "From " << pinfer[i].d_i << " / " << pinfer[i].d_j << " (rev=" << pinfer[i].d_rev << ") : "; - Trace("strings-solve") - << pinfer[i].d_conc << " by " << pinfer[i].d_id << std::endl; - if (!set_use_index || pinfer[i].d_id < min_id - || (pinfer[i].d_id == min_id && pinfer[i].d_index > max_index)) - { - min_id = pinfer[i].d_id; - max_index = pinfer[i].d_index; - use_index = i; - set_use_index = true; - } + min_id = pinfer[i].d_id; + max_index = pinfer[i].d_index; + use_index = i; + set_use_index = true; } - //send the inference - if( !pinfer[use_index].d_nf_pair[0].isNull() ){ - Assert( !pinfer[use_index].d_nf_pair[1].isNull() ); - addNormalFormPair( pinfer[use_index].d_nf_pair[0], pinfer[use_index].d_nf_pair[1] ); - } - std::stringstream ssi; - ssi << pinfer[use_index].d_id; - sendInference(pinfer[use_index].d_ant, - pinfer[use_index].d_antn, - pinfer[use_index].d_conc, - ssi.str().c_str(), - pinfer[use_index].sendAsLemma()); - for( std::map< int, std::vector< Node > >::iterator it = pinfer[use_index].d_new_skolem.begin(); it != pinfer[use_index].d_new_skolem.end(); ++it ){ - for( unsigned i=0; isecond.size(); i++ ){ - if( it->first==0 ){ - sendLengthLemma( it->second[i] ); - }else if( it->first==1 ){ - registerNonEmptySkolem( it->second[i] ); - } - } + } + // send the inference + if (!pinfer[use_index].d_nf_pair[0].isNull()) + { + Assert(!pinfer[use_index].d_nf_pair[1].isNull()); + addNormalFormPair(pinfer[use_index].d_nf_pair[0], + pinfer[use_index].d_nf_pair[1]); + } + std::stringstream ssi; + ssi << pinfer[use_index].d_id; + sendInference(pinfer[use_index].d_ant, + pinfer[use_index].d_antn, + pinfer[use_index].d_conc, + ssi.str().c_str(), + pinfer[use_index].sendAsLemma()); + // Register the new skolems from this inference. We register them here + // (lazily), since the code above has now decided to use the inference + // at use_index that involves them. + for (const std::pair >& sks : + pinfer[use_index].d_new_skolem) + { + for (const Node& n : sks.second) + { + registerLength(n, sks.first); } } } @@ -2894,11 +2903,16 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal getExplanationVectorForPrefixEq( normal_forms, normal_form_src, normal_forms_exp, normal_forms_exp_depend, const_k, nconst_k, index_c_k, index_nc_k, isRev, info.d_ant ); Node prea = p==stra.size() ? const_str : NodeManager::currentNM()->mkConst( isRev ? stra.suffix( p ) : stra.prefix( p ) ); - Node sk = mkSkolemCached( other_str, prea, isRev ? sk_id_c_spt_rev : sk_id_c_spt, "c_spt", -1 ); + Node sk = d_sk_cache.mkSkolemCached( + other_str, + prea, + isRev ? SkolemCache::SK_ID_C_SPT_REV + : SkolemCache::SK_ID_C_SPT, + "c_spt"); Trace("strings-csp") << "Const Split: " << prea << " is removed from " << stra << " due to " << strb << ", p=" << p << std::endl; //set info info.d_conc = other_str.eqNode( isRev ? mkConcat( sk, prea ) : mkConcat(prea, sk) ); - info.d_new_skolem[0].push_back( sk ); + info.d_new_skolem[LENGTH_SPLIT].push_back(sk); info.d_id = INFER_SSPLIT_CST_PROP; info_valid = true; } @@ -2924,22 +2938,32 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal if( options::stringBinaryCsp() && stra.size()>3 ){ //split string in half Node c_firstHalf = NodeManager::currentNM()->mkConst( isRev ? stra.substr( stra.size()/2 ) : stra.substr(0, stra.size()/2 ) ); - Node sk = mkSkolemCached( other_str, c_firstHalf , isRev ? sk_id_vc_bin_spt_rev : sk_id_vc_bin_spt, "cb_spt", -1 ); + Node sk = d_sk_cache.mkSkolemCached( + other_str, + c_firstHalf, + isRev ? SkolemCache::SK_ID_VC_BIN_SPT_REV + : SkolemCache::SK_ID_VC_BIN_SPT, + "cb_spt"); Trace("strings-csp") << "Const Split: " << c_firstHalf << " is removed from " << const_str << " (binary) " << std::endl; info.d_conc = NodeManager::currentNM()->mkNode( kind::OR, other_str.eqNode( isRev ? mkConcat( sk, c_firstHalf ) : mkConcat( c_firstHalf, sk ) ), NodeManager::currentNM()->mkNode( kind::AND, sk.eqNode( d_emptyString ).negate(), c_firstHalf.eqNode( isRev ? mkConcat( sk, other_str ) : mkConcat( other_str, sk ) ) ) ); - info.d_new_skolem[0].push_back( sk ); + info.d_new_skolem[LENGTH_SPLIT].push_back(sk); info.d_id = INFER_SSPLIT_CST_BINARY; info_valid = true; }else{ // normal v/c split Node firstChar = stra.size() == 1 ? const_str : NodeManager::currentNM()->mkConst( isRev ? stra.suffix( 1 ) : stra.prefix( 1 ) ); - Node sk = mkSkolemCached( other_str, firstChar, isRev ? sk_id_vc_spt_rev : sk_id_vc_spt, "c_spt", -1 ); + Node sk = d_sk_cache.mkSkolemCached( + other_str, + firstChar, + isRev ? SkolemCache::SK_ID_VC_SPT_REV + : SkolemCache::SK_ID_VC_SPT, + "c_spt"); Trace("strings-csp") << "Const Split: " << firstChar << " is removed from " << const_str << " (serial) " << std::endl; info.d_conc = other_str.eqNode( isRev ? mkConcat( sk, firstChar ) : mkConcat(firstChar, sk) ); - info.d_new_skolem[0].push_back( sk ); + info.d_new_skolem[LENGTH_SPLIT].push_back(sk); info.d_id = INFER_SSPLIT_CST; info_valid = true; } @@ -2981,9 +3005,14 @@ void TheoryStrings::processSimpleNEq( std::vector< std::vector< Node > > &normal info.d_antn.push_back( xgtz ); } } - Node sk = mkSkolemCached( normal_forms[i][index], normal_forms[j][index], isRev ? sk_id_v_spt_rev : sk_id_v_spt, "v_spt", -1 ); - //must add length requirement - info.d_new_skolem[1].push_back( sk ); + Node sk = d_sk_cache.mkSkolemCached( + normal_forms[i][index], + normal_forms[j][index], + isRev ? SkolemCache::SK_ID_V_SPT_REV + : SkolemCache::SK_ID_V_SPT, + "v_spt"); + // must add length requirement + info.d_new_skolem[LENGTH_GEQ_ONE].push_back(sk); Node eq1 = normal_forms[i][index].eqNode( isRev ? mkConcat(sk, normal_forms[j][index]) : mkConcat(normal_forms[j][index], sk) ); Node eq2 = normal_forms[j][index].eqNode( isRev ? mkConcat(sk, normal_forms[i][index]) : mkConcat(normal_forms[i][index], sk) ); @@ -3200,9 +3229,10 @@ bool TheoryStrings::processLoop( std::vector< std::vector< Node > > &normal_form Trace("strings-loop") << "Strings::Loop: Normal Loop Breaking." << std::endl; // right - Node sk_w = mkSkolemS("w_loop"); - Node sk_y = mkSkolemS("y_loop", 1); - Node sk_z = mkSkolemS("z_loop"); + Node sk_w = d_sk_cache.mkSkolem("w_loop"); + Node sk_y = d_sk_cache.mkSkolem("y_loop"); + registerLength(sk_y, LENGTH_GEQ_ONE); + Node sk_z = d_sk_cache.mkSkolem("z_loop"); // t1 * ... * tn = y * z Node conc1 = t_yz.eqNode(mkConcat(sk_y, sk_z)); // s1 * ... * sk = z * y * r @@ -3301,8 +3331,14 @@ void TheoryStrings::processDeq( Node ni, Node nj ) { } } }else{ - Node sk = mkSkolemCached( nconst_k, firstChar, sk_id_dc_spt, "dc_spt", 2 ); - Node skr = mkSkolemCached( nconst_k, firstChar, sk_id_dc_spt_rem, "dc_spt_rem" ); + Node sk = d_sk_cache.mkSkolemCached( + nconst_k, firstChar, SkolemCache::SK_ID_DC_SPT, "dc_spt"); + registerLength(sk, LENGTH_ONE); + Node skr = + d_sk_cache.mkSkolemCached(nconst_k, + firstChar, + SkolemCache::SK_ID_DC_SPT_REM, + "dc_spt_rem"); Node eq1 = nconst_k.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk, skr ) ); eq1 = Rewriter::rewrite( eq1 ); Node eq2 = nconst_k.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, firstChar, skr ) ); @@ -3331,9 +3367,13 @@ void TheoryStrings::processDeq( Node ni, Node nj ) { } antec_new_lits.push_back( li.eqNode( lj ).negate() ); std::vector< Node > conc; - Node sk1 = mkSkolemCached( i, j, sk_id_deq_x, "x_dsplit" ); - Node sk2 = mkSkolemCached( i, j, sk_id_deq_y, "y_dsplit" ); - Node sk3 = mkSkolemCached( i, j, sk_id_deq_z, "z_dsplit", 1 ); + Node sk1 = d_sk_cache.mkSkolemCached( + i, j, SkolemCache::SK_ID_DEQ_X, "x_dsplit"); + Node sk2 = d_sk_cache.mkSkolemCached( + i, j, SkolemCache::SK_ID_DEQ_Y, "y_dsplit"); + Node sk3 = d_sk_cache.mkSkolemCached( + i, j, SkolemCache::SK_ID_DEQ_Z, "z_dsplit"); + registerLength(sk3, LENGTH_GEQ_ONE); //Node nemp = sk3.eqNode(d_emptyString).negate(); //conc.push_back(nemp); Node lsk1 = mkLength( sk1 ); @@ -3525,86 +3565,95 @@ void TheoryStrings::registerTerm( Node n, int effort ) { } else { - do_register = effort > 0 || n.getKind() != kind::STRING_CONCAT; + do_register = effort > 0 || n.getKind() != STRING_CONCAT; } } - if( do_register ){ - if(d_registered_terms_cache.find(n) == d_registered_terms_cache.end()) { - d_registered_terms_cache.insert(n); - Debug("strings-register") << "TheoryStrings::registerTerm() " << n << ", effort = " << effort << std::endl; - if (tn.isString()) + if (!do_register) + { + return; + } + if (d_registered_terms_cache.find(n) != d_registered_terms_cache.end()) + { + return; + } + d_registered_terms_cache.insert(n); + NodeManager* nm = NodeManager::currentNM(); + Debug("strings-register") << "TheoryStrings::registerTerm() " << n + << ", effort = " << effort << std::endl; + if (tn.isString()) + { + // register length information: + // for variables, split on empty vs positive length + // for concat/const/replace, introduce proxy var and state length relation + Node lsum; + if (n.getKind() != STRING_CONCAT && n.getKind() != CONST_STRING) + { + Node lsumb = nm->mkNode(STRING_LENGTH, n); + lsum = Rewriter::rewrite(lsumb); + // can register length term if it does not rewrite + if (lsum == lsumb) { - //register length information: - // for variables, split on empty vs positive length - // for concat/const/replace, introduce proxy var and state length relation - Node lsum; - bool processed = false; - if( n.getKind()!=kind::STRING_CONCAT && n.getKind()!=kind::CONST_STRING ) { - if( d_length_lemma_terms_cache.find( n )==d_length_lemma_terms_cache.end() ){ - Node lsumb = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, n ); - lsum = Rewriter::rewrite( lsumb ); - // can register length term if it does not rewrite - if( lsum==lsumb ){ - sendLengthLemma( n ); - processed = true; - } - }else{ - processed = true; - } - } - if( !processed ){ - Node sk = mkSkolemS( "lsym", -1 ); - StringsProxyVarAttribute spva; - sk.setAttribute(spva,true); - Node eq = Rewriter::rewrite( sk.eqNode(n) ); - Trace("strings-lemma") << "Strings::Lemma LENGTH Term : " << eq << std::endl; - d_proxy_var[n] = sk; - Trace("strings-assert") << "(assert " << eq << ")" << std::endl; - d_out->lemma(eq); - Node skl = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk ); - if( n.getKind()==kind::STRING_CONCAT ){ - std::vector node_vec; - for( unsigned i=0; imkNode( kind::STRING_LENGTH, n[i] ); - node_vec.push_back(lni); - } - } - lsum = NodeManager::currentNM()->mkNode( kind::PLUS, node_vec ); - lsum = Rewriter::rewrite( lsum ); - }else if( n.getKind()==kind::CONST_STRING ){ - lsum = NodeManager::currentNM()->mkConst( ::CVC4::Rational( n.getConst().size() ) ); - } - Assert( !lsum.isNull() ); - d_proxy_var_to_length[sk] = lsum; - Node ceq = Rewriter::rewrite( skl.eqNode( lsum ) ); - Trace("strings-lemma") << "Strings::Lemma LENGTH : " << ceq << std::endl; - Trace("strings-lemma-debug") << " prerewrite : " << skl.eqNode( lsum ) << std::endl; - Trace("strings-assert") << "(assert " << ceq << ")" << std::endl; - d_out->lemma(ceq); - } + registerLength(n, LENGTH_SPLIT); + return; } - else if (n.getKind() == kind::STRING_CODE) + } + Node sk = d_sk_cache.mkSkolem("lsym"); + StringsProxyVarAttribute spva; + sk.setAttribute(spva, true); + Node eq = Rewriter::rewrite(sk.eqNode(n)); + Trace("strings-lemma") << "Strings::Lemma LENGTH Term : " << eq + << std::endl; + d_proxy_var[n] = sk; + Trace("strings-assert") << "(assert " << eq << ")" << std::endl; + d_out->lemma(eq); + Node skl = nm->mkNode(STRING_LENGTH, sk); + if (n.getKind() == STRING_CONCAT) + { + std::vector node_vec; + for (unsigned i = 0; i < n.getNumChildren(); i++) { - d_has_str_code = true; - NodeManager* nm = NodeManager::currentNM(); - // ite( str.len(s)==1, 0 <= str.code(s) < num_codes, str.code(s)=-1 ) - Node code_len = mkLength(n[0]).eqNode(d_one); - Node code_eq_neg1 = n.eqNode(d_neg_one); - Node code_range = nm->mkNode( - kind::AND, - nm->mkNode(kind::GEQ, n, d_zero), - nm->mkNode( - kind::LT, n, nm->mkConst(Rational(CVC4::String::num_codes())))); - Node lem = nm->mkNode(kind::ITE, code_len, code_range, code_eq_neg1); - Trace("strings-lemma") << "Strings::Lemma CODE : " << lem << std::endl; - Trace("strings-assert") << "(assert " << lem << ")" << std::endl; - d_out->lemma(lem); + if (n[i].getAttribute(StringsProxyVarAttribute())) + { + Assert(d_proxy_var_to_length.find(n[i]) + != d_proxy_var_to_length.end()); + node_vec.push_back(d_proxy_var_to_length[n[i]]); + } + else + { + Node lni = nm->mkNode(STRING_LENGTH, n[i]); + node_vec.push_back(lni); + } } + lsum = nm->mkNode(PLUS, node_vec); + lsum = Rewriter::rewrite(lsum); } + else if (n.getKind() == CONST_STRING) + { + lsum = nm->mkConst(Rational(n.getConst().size())); + } + Assert(!lsum.isNull()); + d_proxy_var_to_length[sk] = lsum; + Node ceq = Rewriter::rewrite(skl.eqNode(lsum)); + Trace("strings-lemma") << "Strings::Lemma LENGTH : " << ceq << std::endl; + Trace("strings-lemma-debug") + << " prerewrite : " << skl.eqNode(lsum) << std::endl; + Trace("strings-assert") << "(assert " << ceq << ")" << std::endl; + d_out->lemma(ceq); + } + else if (n.getKind() == STRING_CODE) + { + d_has_str_code = true; + // ite( str.len(s)==1, 0 <= str.code(s) < num_codes, str.code(s)=-1 ) + Node code_len = mkLength(n[0]).eqNode(d_one); + Node code_eq_neg1 = n.eqNode(d_neg_one); + Node code_range = nm->mkNode( + AND, + nm->mkNode(GEQ, n, d_zero), + nm->mkNode(LT, n, nm->mkConst(Rational(CVC4::String::num_codes())))); + Node lem = nm->mkNode(ITE, code_len, code_range, code_eq_neg1); + Trace("strings-lemma") << "Strings::Lemma CODE : " << lem << std::endl; + Trace("strings-assert") << "(assert " << lem << ")" << std::endl; + d_out->lemma(lem); } } @@ -3723,11 +3772,41 @@ bool TheoryStrings::sendSplit(Node a, Node b, const char* c, bool preq) return false; } +void TheoryStrings::registerLength(Node n, LengthStatus s) +{ + if (d_length_lemma_terms_cache.find(n) != d_length_lemma_terms_cache.end()) + { + return; + } + d_length_lemma_terms_cache.insert(n); + + NodeManager* nm = NodeManager::currentNM(); + Node n_len = nm->mkNode(kind::STRING_LENGTH, n); + + if (s == LENGTH_GEQ_ONE) + { + Node neq_empty = n.eqNode(d_emptyString).negate(); + Node len_n_gt_z = nm->mkNode(GT, n_len, d_zero); + Node len_geq_one = nm->mkNode(AND, neq_empty, len_n_gt_z); + Trace("strings-lemma") << "Strings::Lemma SK-GEQ-ONE : " << len_geq_one + << std::endl; + Trace("strings-assert") << "(assert " << len_geq_one << ")" << std::endl; + d_out->lemma(len_geq_one); + return; + } + + if (s == LENGTH_ONE) + { + Node len_one = n_len.eqNode(d_one); + Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one + << std::endl; + Trace("strings-assert") << "(assert " << len_one << ")" << std::endl; + d_out->lemma(len_one); + return; + } + Assert(s == LENGTH_SPLIT); -void TheoryStrings::sendLengthLemma( Node n ){ - Node n_len = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, n); if( options::stringSplitEmp() || !options::stringLenGeqZ() ){ - NodeManager* nm = NodeManager::currentNM(); Node n_len_eq_z = n_len.eqNode( d_zero ); Node n_len_eq_z_2 = n.eqNode( d_emptyString ); Node case_empty = nm->mkNode(AND, n_len_eq_z, n_len_eq_z_2); @@ -3765,9 +3844,10 @@ void TheoryStrings::sendLengthLemma( Node n ){ Assert(false); } } - //AJR: probably a good idea + + // additionally add len( x ) >= 0 ? if( options::stringLenGeqZ() ){ - Node n_len_geq = NodeManager::currentNM()->mkNode( kind::GEQ, n_len, d_zero); + Node n_len_geq = nm->mkNode(kind::GEQ, n_len, d_zero); n_len_geq = Rewriter::rewrite( n_len_geq ); d_out->lemma( n_len_geq ); } @@ -3842,49 +3922,6 @@ Node TheoryStrings::mkLength( Node t ) { return Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t ) ); } -Node TheoryStrings::mkSkolemCached( Node a, Node b, int id, const char * c, int isLenSplit ){ - //return mkSkolemS( c, isLenSplit ); - std::map< int, Node >::iterator it = d_skolem_cache[a][b].find( id ); - if( it==d_skolem_cache[a][b].end() ){ - Node sk = mkSkolemS( c, isLenSplit ); - d_skolem_cache[a][b][id] = sk; - return sk; - }else{ - return it->second; - } -} - -//isLenSplit: -1-ignore, 0-no restriction, 1-greater than one, 2-one -Node TheoryStrings::mkSkolemS( const char *c, int isLenSplit ) { - Node n = NodeManager::currentNM()->mkSkolem( c, NodeManager::currentNM()->stringType(), "string sko" ); - d_all_skolems.insert(n); - d_length_lemma_terms_cache.insert( n ); - ++(d_statistics.d_new_skolems); - if( isLenSplit==0 ){ - sendLengthLemma( n ); - } else if( isLenSplit == 1 ){ - registerNonEmptySkolem( n ); - }else if( isLenSplit==2 ){ - Node len_one = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, n ).eqNode( d_one ); - Trace("strings-lemma") << "Strings::Lemma SK-ONE : " << len_one << std::endl; - Trace("strings-assert") << "(assert " << len_one << ")" << std::endl; - d_out->lemma( len_one ); - } - return n; -} - -void TheoryStrings::registerNonEmptySkolem( Node n ) { - if( d_skolem_ne_reg_cache.find( n )==d_skolem_ne_reg_cache.end() ){ - d_skolem_ne_reg_cache.insert( n ); - d_equalityEngine.assertEquality(n.eqNode(d_emptyString), false, d_true); - Node len_n_gt_z = NodeManager::currentNM()->mkNode(kind::GT, - NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n), d_zero); - Trace("strings-lemma") << "Strings::Lemma SK-NON-ZERO : " << len_n_gt_z << std::endl; - Trace("strings-assert") << "(assert " << len_n_gt_z << ")" << std::endl; - d_out->lemma(len_n_gt_z); - } -} - Node TheoryStrings::mkExplain( std::vector< Node >& a ) { std::vector< Node > an; return mkExplain( a, an ); @@ -4332,18 +4369,16 @@ Node TheoryStrings::ppRewrite(TNode atom) { } // Stats -TheoryStrings::Statistics::Statistics(): - d_splits("theory::strings::NumOfSplitOnDemands", 0), - d_eq_splits("theory::strings::NumOfEqSplits", 0), - d_deq_splits("theory::strings::NumOfDiseqSplits", 0), - d_loop_lemmas("theory::strings::NumOfLoops", 0), - d_new_skolems("theory::strings::NumOfNewSkolems", 0) +TheoryStrings::Statistics::Statistics() + : d_splits("theory::strings::NumOfSplitOnDemands", 0), + d_eq_splits("theory::strings::NumOfEqSplits", 0), + d_deq_splits("theory::strings::NumOfDiseqSplits", 0), + d_loop_lemmas("theory::strings::NumOfLoops", 0) { smtStatisticsRegistry()->registerStat(&d_splits); smtStatisticsRegistry()->registerStat(&d_eq_splits); smtStatisticsRegistry()->registerStat(&d_deq_splits); smtStatisticsRegistry()->registerStat(&d_loop_lemmas); - smtStatisticsRegistry()->registerStat(&d_new_skolems); } TheoryStrings::Statistics::~Statistics(){ @@ -4351,7 +4386,6 @@ TheoryStrings::Statistics::~Statistics(){ smtStatisticsRegistry()->unregisterStat(&d_eq_splits); smtStatisticsRegistry()->unregisterStat(&d_deq_splits); smtStatisticsRegistry()->unregisterStat(&d_loop_lemmas); - smtStatisticsRegistry()->unregisterStat(&d_new_skolems); } diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index c1bb1e0a0..5bc6b019f 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -24,6 +24,7 @@ #include "expr/attribute.h" #include "theory/strings/regexp_elim.h" #include "theory/strings/regexp_operation.h" +#include "theory/strings/skolem_cache.h" #include "theory/strings/theory_strings_preprocess.h" #include "theory/theory.h" #include "theory/uf/equality_engine.h" @@ -276,7 +277,6 @@ private: NodeSet d_pregistered_terms_cache; NodeSet d_registered_terms_cache; NodeSet d_length_lemma_terms_cache; - NodeSet d_skolem_ne_reg_cache; // preprocess cache StringsPreprocess d_preproc; NodeBoolMap d_preproc_cache; @@ -392,22 +392,53 @@ private: bool d_model_active; }; std::map< Node, ExtfInfoTmp > d_extf_info_tmp; -private: - class InferInfo { - public: + + private: + /** Length status, used for the registerLength function below */ + enum LengthStatus + { + LENGTH_SPLIT, + LENGTH_ONE, + LENGTH_GEQ_ONE + }; + /** register length + * + * This method is called on non-constant string terms n. It sends a lemma + * on the output channel that ensures that the length n satisfies its assigned + * status (given by argument s). + * + * If the status is LENGTH_ONE, we send the lemma len( n ) = 1. + * + * If the status is LENGTH_GEQ, we send a lemma n != "" ^ len( n ) > 0. + * + * If the status is LENGTH_SPLIT, we send a send a lemma of the form: + * ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0 + * This method also ensures that, when applicable, the left branch is taken + * first via calls to requirePhase. + */ + void registerLength(Node n, LengthStatus s); + + //------------------------- candidate inferences + class InferInfo + { + public: unsigned d_i; unsigned d_j; bool d_rev; - std::vector< Node > d_ant; - std::vector< Node > d_antn; - std::map< int, std::vector< Node > > d_new_skolem; + std::vector d_ant; + std::vector d_antn; + std::map > d_new_skolem; Node d_conc; Inference d_id; - std::map< Node, bool > d_pending_phase; + std::map d_pending_phase; unsigned d_index; Node d_nf_pair[2]; bool sendAsLemma(); }; + //------------------------- end candidate inferences + /** cache of all skolems */ + SkolemCache d_sk_cache; + void checkConstantEquivalenceClasses( TermIndex* ti, std::vector< Node >& vecc ); void checkExtfInference( Node n, Node nr, ExtfInfoTmp& in, int effort ); Node getSymbolicDefinition( Node n, std::vector< Node >& exp ); @@ -532,48 +563,13 @@ private: void sendLemma(Node ant, Node conc, const char* c); void sendInfer(Node eq_exp, Node eq, const char* c); bool sendSplit(Node a, Node b, const char* c, bool preq = true); - /** send length lemma - * - * This method is called on non-constant string terms n. It sends a lemma - * on the output channel that ensures that len( n ) >= 0. In particular, the - * this lemma is typically of the form: - * ( n = "" ^ len( n ) = 0 ) OR len( n ) > 0 - * This method also ensures that, when applicable, the left branch is taken - * first via calls to requirePhase. - */ - void sendLengthLemma(Node n); + /** mkConcat **/ inline Node mkConcat(Node n1, Node n2); inline Node mkConcat(Node n1, Node n2, Node n3); inline Node mkConcat(const std::vector& c); inline Node mkLength(Node n); - // mkSkolem - enum - { - sk_id_c_spt, - sk_id_vc_spt, - sk_id_vc_bin_spt, - sk_id_v_spt, - sk_id_c_spt_rev, - sk_id_vc_spt_rev, - sk_id_vc_bin_spt_rev, - sk_id_v_spt_rev, - sk_id_ctn_pre, - sk_id_ctn_post, - sk_id_dc_spt, - sk_id_dc_spt_rem, - sk_id_deq_x, - sk_id_deq_y, - sk_id_deq_z, - }; - std::map > > d_skolem_cache; - /** the set of all skolems we have generated */ - std::unordered_set d_all_skolems; - Node mkSkolemCached( - Node a, Node b, int id, const char* c, int isLenSplit = 0); - inline Node mkSkolemS(const char* c, int isLenSplit = 0); - void registerNonEmptySkolem(Node sk); - // inline Node mkSkolemI(const char * c); + /** mkExplain **/ Node mkExplain(std::vector& a); Node mkExplain(std::vector& a, std::vector& an); @@ -652,7 +648,6 @@ private: IntStat d_eq_splits; IntStat d_deq_splits; IntStat d_loop_lemmas; - IntStat d_new_skolems; Statistics(); ~Statistics(); };/* class TheoryStrings::Statistics */