From d2b692cb2c054199d75a05f0f700e54fcb4f6c3c Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Mon, 17 Sep 2018 15:40:28 -0500 Subject: [PATCH] More aggressive skolem caching for strings, document and clean preprocessor (#2478) --- src/options/strings_options.toml | 9 -- src/theory/strings/skolem_cache.cpp | 5 + src/theory/strings/skolem_cache.h | 22 ++++ src/theory/strings/theory_strings.cpp | 4 +- .../strings/theory_strings_preprocess.cpp | 124 +++++++----------- .../strings/theory_strings_preprocess.h | 73 +++++++---- 6 files changed, 126 insertions(+), 111 deletions(-) diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml index c48583bb2..77056e279 100644 --- a/src/options/strings_options.toml +++ b/src/options/strings_options.toml @@ -182,15 +182,6 @@ header = "options/strings_options.h" read_only = true help = "use model guessing to avoid string extended function reductions" -[[option]] - name = "stringUfReduct" - category = "regular" - long = "strings-uf-reduct" - type = "bool" - default = "false" - read_only = true - help = "use uninterpreted functions when applying extended function reductions" - [[option]] name = "stringBinaryCsp" category = "regular" diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp index db673dafe..4a78a11ff 100644 --- a/src/theory/strings/skolem_cache.cpp +++ b/src/theory/strings/skolem_cache.cpp @@ -32,6 +32,11 @@ Node SkolemCache::mkSkolemCached(Node a, Node b, SkolemId id, const char* c) return it->second; } +Node SkolemCache::mkSkolemCached(Node a, SkolemId id, const char* c) +{ + return mkSkolemCached(a, Node::null(), id, c); +} + Node SkolemCache::mkSkolem(const char* c) { NodeManager* nm = NodeManager::currentNM(); diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h index 2984ccfe4..c9b9fbe5a 100644 --- a/src/theory/strings/skolem_cache.h +++ b/src/theory/strings/skolem_cache.h @@ -42,9 +42,13 @@ class SkolemCache * * The skolems with _REV suffixes are used for the reverse version of the * preconditions below, e.g. where we are considering a' ++ a = b' ++ b. + * + * All skolems assume a and b are strings unless otherwise stated. */ enum SkolemId { + // exists k. k = a + SK_PURIFY, // a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' => // exists k. a = "cccc" + k SK_ID_C_SPT, @@ -78,12 +82,30 @@ class SkolemCache SK_ID_DEQ_X, SK_ID_DEQ_Y, SK_ID_DEQ_Z, + // contains( a, b ) => + // exists k_pre, k_post. a = k_pre ++ b ++ k_post ^ + // ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b) + SK_FIRST_CTN_PRE, + SK_FIRST_CTN_POST, + // For integer b, + // len( a ) > b => + // exists k. a = k ++ a' ^ len( k ) = b + SK_PREFIX, + // For integer b, + // b > 0 => + // exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 ) + SK_SUFFIX_REM, }; /** * Returns a skolem of type string that is cached for (a,b,id) and has * name c. */ Node mkSkolemCached(Node a, Node b, SkolemId id, const char* c); + /** + * Returns a skolem of type string that is cached for (a,[null],id) and has + * name c. + */ + Node mkSkolemCached(Node a, SkolemId id, const char* c); /** Returns a (uncached) skolem of type string with name c */ Node mkSkolem(const char* c); /** Returns true if n is a skolem allocated by this class */ diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 2e1d6c2c7..f007ae1df 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -112,7 +112,7 @@ TheoryStrings::TheoryStrings(context::Context* c, d_pregistered_terms_cache(u), d_registered_terms_cache(u), d_length_lemma_terms_cache(u), - d_preproc(u), + d_preproc(&d_sk_cache, u), d_preproc_cache(u), d_extf_infer_cache(c), d_extf_infer_cache_u(u), @@ -3649,7 +3649,7 @@ void TheoryStrings::registerTerm( Node n, int effort ) { return; } } - Node sk = d_sk_cache.mkSkolem("lsym"); + Node sk = d_sk_cache.mkSkolemCached(n, SkolemCache::SK_PURIFY, "lsym"); StringsProxyVarAttribute spva; sk.setAttribute(spva, true); Node eq = Rewriter::rewrite(sk.eqNode(n)); diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index dfab0dd83..2d5bef519 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -30,7 +30,9 @@ namespace CVC4 { namespace theory { namespace strings { -StringsPreprocess::StringsPreprocess( context::UserContext* u ){ +StringsPreprocess::StringsPreprocess(SkolemCache *sc, context::UserContext *u) + : d_sc(sc) +{ //Constants d_zero = NodeManager::currentNM()->mkConst(Rational(0)); d_one = NodeManager::currentNM()->mkConst(Rational(1)); @@ -41,36 +43,6 @@ StringsPreprocess::~StringsPreprocess(){ } -Node StringsPreprocess::getUfForNode( Kind k, Node n, unsigned id ) { - std::map< unsigned, Node >::iterator it = d_uf[k].find( id ); - if( it==d_uf[k].end() ){ - std::vector< TypeNode > types; - for( unsigned i=0; imkFunctionType( types, n.getType() ); - Node f = NodeManager::currentNM()->mkSkolem( "sop", typ, "op created for string op" ); - d_uf[k][id] = f; - return f; - }else{ - return it->second; - } -} - -//pro: congruence possible, con: introduces UF/requires theory combination -// currently hurts performance -//TODO: for all skolems below -Node StringsPreprocess::getUfAppForNode( Kind k, Node n, unsigned id ) { - std::vector< Node > children; - children.push_back( getUfForNode( k, n, id ) ); - for( unsigned i=0; imkNode( kind::APPLY_UF, children ); -} - -//returns an n such that t can be replaced by n, under the assumption of lemmas in new_nodes - Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { unsigned prev_new_nodes = new_nodes.size(); Trace("strings-preprocess-debug") << "StringsPreprocess::simplify: " << t << std::endl; @@ -78,48 +50,54 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { NodeManager *nm = NodeManager::currentNM(); if( t.getKind() == kind::STRING_SUBSTR ) { - Node skt; - if( options::stringUfReduct() ){ - skt = getUfAppForNode( kind::STRING_SUBSTR, t ); - }else{ - skt = NodeManager::currentNM()->mkSkolem( "sst", NodeManager::currentNM()->stringType(), "created for substr" ); - } - Node t12 = NodeManager::currentNM()->mkNode( kind::PLUS, t[1], t[2] ); - Node lt0 = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[0] ); + // processing term: substr( s, n, m ) + Node s = t[0]; + Node n = t[1]; + Node m = t[2]; + Node skt = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); + Node t12 = nm->mkNode(PLUS, n, m); + t12 = Rewriter::rewrite(t12); + Node lt0 = nm->mkNode(STRING_LENGTH, s); //start point is greater than or equal zero - Node c1 = NodeManager::currentNM()->mkNode( kind::GEQ, t[1], d_zero ); + Node c1 = nm->mkNode(GEQ, n, d_zero); //start point is less than end of string - Node c2 = NodeManager::currentNM()->mkNode( kind::GT, lt0, t[1] ); + Node c2 = nm->mkNode(GT, lt0, n); //length is positive - Node c3 = NodeManager::currentNM()->mkNode( kind::GT, t[2], d_zero ); - Node cond = NodeManager::currentNM()->mkNode( kind::AND, c1, c2, c3 ); - - Node sk1 = NodeManager::currentNM()->mkSkolem( "ss1", NodeManager::currentNM()->stringType(), "created for substr" ); - Node sk2 = NodeManager::currentNM()->mkSkolem( "ss2", NodeManager::currentNM()->stringType(), "created for substr" ); - Node b11 = t[0].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, skt, sk2 ) ); + Node c3 = nm->mkNode(GT, m, d_zero); + Node cond = nm->mkNode(AND, c1, c2, c3); + + Node sk1 = d_sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre"); + Node sk2 = + d_sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); + Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, skt, sk2)); //length of first skolem is second argument - Node b12 = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ).eqNode( t[1] ); + Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n); //length of second skolem is abs difference between end point and end of string - Node b13 = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk2 ).eqNode( - NodeManager::currentNM()->mkNode( kind::ITE, NodeManager::currentNM()->mkNode( kind::GEQ, lt0, t12 ), - NodeManager::currentNM()->mkNode( kind::MINUS, lt0, t12 ), d_zero ) ); + Node b13 = nm->mkNode(STRING_LENGTH, sk2) + .eqNode(nm->mkNode(ITE, + nm->mkNode(GEQ, lt0, t12), + nm->mkNode(MINUS, lt0, t12), + d_zero)); + + Node b1 = nm->mkNode(AND, b11, b12, b13); + Node b2 = skt.eqNode(d_empty_str); + Node lemma = nm->mkNode(ITE, cond, b1, b2); - Node b1 = NodeManager::currentNM()->mkNode( kind::AND, b11, b12, b13 ); - Node b2 = skt.eqNode( NodeManager::currentNM()->mkConst( ::CVC4::String("") ) ); - Node lemma = NodeManager::currentNM()->mkNode( kind::ITE, cond, b1, b2 ); + // assert: + // IF n >=0 AND n < len( s ) AND m > 0 + // THEN: s = sk1 ++ skt ++ sk2 AND + // len( sk1 ) = n AND + // len( sk2 ) = ite( len( s ) >= n+m, len( s )-(n+m), 0 ) + // ELSE: skt = "" new_nodes.push_back( lemma ); + + // Thus, substr( s, n, m ) = skt retNode = skt; } else if (t.getKind() == kind::STRING_STRIDOF) { // processing term: indexof( x, y, n ) - - Node skk; - if( options::stringUfReduct() ){ - skk = getUfAppForNode( kind::STRING_STRIDOF, t ); - }else{ - skk = nm->mkSkolem("iok", nm->integerType(), "created for indexof"); - } + Node skk = nm->mkSkolem("iok", nm->integerType(), "created for indexof"); Node negone = nm->mkConst(::CVC4::Rational(-1)); Node krange = nm->mkNode(kind::GEQ, skk, negone); @@ -196,12 +174,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { // NodeManager::currentNM()->mkNode(kind::GEQ, t[0], d_zero), // t[0], NodeManager::currentNM()->mkNode(kind::UMINUS, t[0]))); Node num = t[0]; - Node pret; - if( options::stringUfReduct() ){ - pret = NodeManager::currentNM()->mkNode(kind::STRING_ITOS, num); - }else{ - pret = NodeManager::currentNM()->mkSkolem( "itost", NodeManager::currentNM()->stringType(), "created for itos" ); - } + Node pret = nm->mkSkolem("itost", nm->stringType(), "created for itos"); Node lenp = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, pret); Node nonneg = NodeManager::currentNM()->mkNode(kind::GEQ, t[0], d_zero); @@ -298,14 +271,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { retNode = pret; } else if( t.getKind() == kind::STRING_STOI ) { Node str = t[0]; - Node pret; - if( options::stringUfReduct() ){ - pret = getUfAppForNode( kind::STRING_STOI, t ); - }else{ - pret = NodeManager::currentNM()->mkSkolem( "stoit", NodeManager::currentNM()->integerType(), "created for stoi" ); - } - //Node pret = NodeManager::currentNM()->mkNode(kind::STRING_STOI, str); - //Node pret = getUfAppForNode( kind::STRING_STOI, t ); + Node pret = nm->mkSkolem("stoit", nm->integerType(), "created for stoi"); Node lenp = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, str); Node negone = NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); @@ -410,9 +376,11 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node y = t[1]; Node z = t[2]; TypeNode tn = t[0].getType(); - Node rp1 = nm->mkSkolem("rp1", tn, "created for replace"); - Node rp2 = nm->mkSkolem("rp2", tn, "created for replace"); - Node rpw = nm->mkSkolem("rpw", tn, "created for replace"); + Node rp1 = + d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_PRE, "rfcpre"); + Node rp2 = + d_sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_POST, "rfcpost"); + Node rpw = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpw"); // y = "" Node cond1 = y.eqNode(nm->mkConst(CVC4::String(""))); diff --git a/src/theory/strings/theory_strings_preprocess.h b/src/theory/strings/theory_strings_preprocess.h index 78e92cd51..c670a5483 100644 --- a/src/theory/strings/theory_strings_preprocess.h +++ b/src/theory/strings/theory_strings_preprocess.h @@ -20,36 +20,65 @@ #define __CVC4__THEORY__STRINGS__PREPROCESS_H #include -#include "util/hash.h" -#include "theory/theory.h" -#include "theory/rewriter.h" #include "context/cdhashmap.h" +#include "theory/rewriter.h" +#include "theory/strings/skolem_cache.h" +#include "theory/theory.h" +#include "util/hash.h" namespace CVC4 { namespace theory { namespace strings { +/** Strings preprocessor + * + * This class is responsible for extended function reductions. It is used as + * a preprocessor when --no-strings-lazy-pp is enabled. By default, it is + * used for reducing extended functions on-demand during the "extended function + * reductions" inference schema of TheoryStrings. + */ class StringsPreprocess { - //Constants - Node d_zero; - Node d_one; - Node d_empty_str; - //mapping from kinds to UF - std::map< Kind, std::map< unsigned, Node > > d_uf; - //get UF for node - Node getUfForNode( Kind k, Node n, unsigned id = 0 ); - Node getUfAppForNode( Kind k, Node n, unsigned id = 0 ); - //recursive simplify - Node simplifyRec( Node t, std::vector< Node > &new_nodes, std::map< Node, Node >& visited ); public: - StringsPreprocess( context::UserContext* u ); - ~StringsPreprocess(); - //returns a node that is equivalent to t under assumptions in new_nodes - Node simplify( Node t, std::vector< Node > &new_nodes ); - //process assertion: guarentees to remove all extf - Node processAssertion( Node n, std::vector< Node > &new_nodes ); - //proces assertions: guarentees to remove all extf, rewrite in place - void processAssertions( std::vector< Node > &vec_node ); + StringsPreprocess(SkolemCache *sc, context::UserContext *u); + ~StringsPreprocess(); + /** + * Returns a node t' such that + * (exists k) new_nodes => t = t' + * is valid, where k are the free skolems introduced when constructing + * new_nodes. + */ + Node simplify(Node t, std::vector &new_nodes); + /** + * Applies simplifyRec on t until a fixed point is reached, and returns + * the resulting term t', which is such that + * (exists k) new_nodes => t = t' + * is valid, where k are the free skolems introduced when constructing + * new_nodes. + */ + Node processAssertion(Node t, std::vector &new_nodes); + /** + * Replaces all formulas t in vec_node with an equivalent formula t' that + * contains no free instances of extended functions (that is, extended + * functions may only appear beneath quantifiers). This applies simplifyRec + * on each assertion in vec_node until a fixed point is reached. + */ + void processAssertions(std::vector &vec_node); + +private: + /** commonly used constants */ + Node d_zero; + Node d_one; + Node d_empty_str; + /** pointer to the skolem cache used by this class */ + SkolemCache *d_sc; + /** + * Applies simplify to all top-level extended function subterms of t. New + * assertions created in this reduction are added to new_nodes. The argument + * visited stores a cache of previous results. + */ + Node simplifyRec(Node t, + std::vector &new_nodes, + std::map &visited); }; }/* CVC4::theory::strings namespace */ -- 2.30.2