From: Andrew Reynolds Date: Tue, 26 May 2020 14:51:13 +0000 (-0500) Subject: (proof-new) Updates to strings skolem cache. (#4524) X-Git-Tag: cvc5-1.0.0~3289 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=978f45596117f815fee943edceb9f8edf9c26c32;p=cvc5.git (proof-new) Updates to strings skolem cache. (#4524) This PR makes strings skolem cache call the ProofSkolemCache. This is required for proper proof checking, as it makes all skolems be associated with their formal definition, encapsulated by a witness term. It furthermore makes skolem sharing more uniform and aggressive by noting that most string skolems correspond to purification variables (typically for some substr term). A purification variable for a term that rewrites to a constant can be replaced by the constant itself. It also introduces an attribute IndexVarAttribute which is used to ensure reductions involving universal variables are deterministic. --- diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index aca43e8c8..604abb1d7 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -1510,11 +1510,11 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi, } } SkolemCache* skc = d_termReg.getSkolemCache(); - Node sk = skc->mkSkolemCached( - x, - y, - isRev ? SkolemCache::SK_ID_V_SPT_REV : SkolemCache::SK_ID_V_SPT, - "v_spt"); + Node sk = skc->mkSkolemCached(x, + y, + isRev ? SkolemCache::SK_ID_V_UNIFIED_SPT_REV + : SkolemCache::SK_ID_V_UNIFIED_SPT, + "v_spt"); iinfo.d_new_skolem[LENGTH_GEQ_ONE].push_back(sk); Node eq1 = x.eqNode(isRev ? utils::mkNConcat(sk, y) : utils::mkNConcat(y, sk)); @@ -1979,7 +1979,9 @@ void CoreSolver::processDeq(Node ni, Node nj) Node sk2 = skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Y, "y_dsplit"); Node sk3 = - skc->mkSkolemCached(x, y, SkolemCache::SK_ID_DEQ_Z, "z_dsplit"); + skc->mkSkolemCached(y, x, SkolemCache::SK_ID_V_SPT, "z_dsplit"); + Node sk4 = + skc->mkSkolemCached(x, y, SkolemCache::SK_ID_V_SPT, "w_dsplit"); d_termReg.registerTermAtomic(sk3, LENGTH_GEQ_ONE); Node sk1Len = utils::mkNLength(sk1); conc.push_back(sk1Len.eqNode(xLenTerm)); @@ -1987,7 +1989,7 @@ void CoreSolver::processDeq(Node ni, Node nj) conc.push_back(sk2Len.eqNode(yLenTerm)); conc.push_back(nm->mkNode(OR, y.eqNode(utils::mkNConcat(sk1, sk3)), - x.eqNode(utils::mkNConcat(sk2, sk3)))); + x.eqNode(utils::mkNConcat(sk2, sk4)))); d_im.sendInference(antec, antecNewLits, nm->mkNode(AND, conc), diff --git a/src/theory/strings/skolem_cache.cpp b/src/theory/strings/skolem_cache.cpp index 4d92c372f..4af75f1cc 100644 --- a/src/theory/strings/skolem_cache.cpp +++ b/src/theory/strings/skolem_cache.cpp @@ -14,8 +14,11 @@ #include "theory/strings/skolem_cache.h" +#include "expr/attribute.h" #include "theory/rewriter.h" #include "theory/strings/arith_entail.h" +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" #include "util/rational.h" using namespace CVC4::kind; @@ -24,7 +27,17 @@ namespace CVC4 { namespace theory { namespace strings { -SkolemCache::SkolemCache() +/** + * A bound variable corresponding to the universally quantified integer + * variable used to range over the valid positions in a string, used + * for axiomatizing the behavior of some term. + */ +struct IndexVarAttributeId +{ +}; +typedef expr::Attribute IndexVarAttribute; + +SkolemCache::SkolemCache(bool useOpts) : d_useOpts(useOpts) { NodeManager* nm = NodeManager::currentNM(); d_strType = nm->stringType(); @@ -44,22 +57,69 @@ Node SkolemCache::mkSkolemCached(Node a, SkolemId id, const char* c) Node SkolemCache::mkTypedSkolemCached( TypeNode tn, Node a, Node b, SkolemId id, const char* c) { + Trace("skolem-cache") << "mkTypedSkolemCached start: (" << id << ", " << a + << ", " << b << ")" << std::endl; + SkolemId idOrig = id; a = a.isNull() ? a : Rewriter::rewrite(a); b = b.isNull() ? b : Rewriter::rewrite(b); - if (tn == d_strType) + std::tie(id, a, b) = normalizeStringSkolem(id, a, b); + + // optimization: if we aren't asking for the purification skolem for constant + // a, and the skolem is equivalent to a, then we just return a. + if (d_useOpts && idOrig != SK_PURIFY && id == SK_PURIFY && a.isConst()) { - std::tie(id, a, b) = normalizeStringSkolem(id, a, b); + Trace("skolem-cache") << "...optimization: return constant " << a + << std::endl; + return a; } std::map::iterator it = d_skolemCache[a][b].find(id); - if (it == d_skolemCache[a][b].end()) + if (it != d_skolemCache[a][b].end()) + { + // already cached + return it->second; + } + + NodeManager* nm = NodeManager::currentNM(); + Node sk; + switch (id) { - Node sk = mkTypedSkolem(tn, c); - d_skolemCache[a][b][id] = sk; - return sk; + // exists k. k = a + case SK_PURIFY: + sk = ProofSkolemCache::mkPurifySkolem(a, c, "string purify skolem"); + break; + // these are eliminated by normalizeStringSkolem + case SK_ID_V_SPT: + case SK_ID_V_SPT_REV: + case SK_ID_VC_SPT: + case SK_ID_VC_SPT_REV: + case SK_FIRST_CTN_POST: + case SK_ID_C_SPT: + case SK_ID_C_SPT_REV: + case SK_ID_DC_SPT: + case SK_ID_DC_SPT_REM: + case SK_ID_DEQ_X: + case SK_ID_DEQ_Y: + case SK_FIRST_CTN_PRE: + case SK_PREFIX: + case SK_SUFFIX_REM: + Unhandled() << "Expected to eliminate Skolem ID " << id << std::endl; + break; + case SK_NUM_OCCUR: + case SK_OCCUR_INDEX: + default: + { + Notice() << "Don't know how to handle Skolem ID " << id << std::endl; + Node v = nm->mkBoundVar(tn); + Node cond = nm->mkConst(true); + sk = ProofSkolemCache::mkSkolem(v, cond, c, "string skolem"); + } + break; } - return it->second; + d_allSkolems.insert(sk); + d_skolemCache[a][b][id] = sk; + return sk; } Node SkolemCache::mkTypedSkolemCached(TypeNode tn, Node a, @@ -71,12 +131,8 @@ Node SkolemCache::mkTypedSkolemCached(TypeNode tn, Node SkolemCache::mkSkolem(const char* c) { - return mkTypedSkolem(d_strType, c); -} - -Node SkolemCache::mkTypedSkolem(TypeNode tn, const char* c) -{ - Node n = NodeManager::currentNM()->mkSkolem(c, tn, "string skolem"); + // TODO: eliminate this + Node n = NodeManager::currentNM()->mkSkolem(c, d_strType, "string skolem"); d_allSkolems.insert(n); return n; } @@ -89,26 +145,31 @@ bool SkolemCache::isSkolem(Node n) const std::tuple SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b) { - Trace("skolem-cache") << "normalizeStringSkolem start: (" << id << ", " << a - << ", " << b << ")" << std::endl; NodeManager* nm = NodeManager::currentNM(); + // eliminate in terms of prefix/suffix_rem if (id == SK_FIRST_CTN_POST) { // SK_FIRST_CTN_POST(x, y) ---> // SK_SUFFIX_REM(x, (+ (str.len SK_FIRST_CTN_PRE(x, y)) (str.len y))) id = SK_SUFFIX_REM; Node pre = mkSkolemCached(a, b, SK_FIRST_CTN_PRE, "pre"); - b = Rewriter::rewrite(nm->mkNode( - PLUS, nm->mkNode(STRING_LENGTH, pre), nm->mkNode(STRING_LENGTH, b))); + b = nm->mkNode( + PLUS, nm->mkNode(STRING_LENGTH, pre), nm->mkNode(STRING_LENGTH, b)); } - - if (id == SK_ID_C_SPT) + else if (id == SK_ID_V_SPT || id == SK_ID_C_SPT) { - // SK_ID_C_SPT(x, y) ---> SK_SUFFIX_REM(x, (str.len y)) + // SK_ID_*_SPT(x, y) ---> SK_SUFFIX_REM(x, (str.len y)) id = SK_SUFFIX_REM; - b = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, b)); + b = nm->mkNode(STRING_LENGTH, b); + } + else if (id == SK_ID_V_SPT_REV || id == SK_ID_C_SPT_REV) + { + // SK_ID_*_SPT_REV(x, y) ---> SK_PREFIX(x, (- (str.len x) (str.len y))) + id = SK_PREFIX; + b = nm->mkNode( + MINUS, nm->mkNode(STRING_LENGTH, a), nm->mkNode(STRING_LENGTH, b)); } else if (id == SK_ID_VC_SPT) { @@ -120,8 +181,8 @@ SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b) { // SK_ID_VC_SPT_REV(x, y) ---> SK_PREFIX(x, (- (str.len x) 1)) id = SK_PREFIX; - b = Rewriter::rewrite(nm->mkNode( - MINUS, nm->mkNode(STRING_LENGTH, a), nm->mkConst(Rational(1)))); + b = nm->mkNode( + MINUS, nm->mkNode(STRING_LENGTH, a), nm->mkConst(Rational(1))); } else if (id == SK_ID_DC_SPT) { @@ -141,52 +202,74 @@ SkolemCache::normalizeStringSkolem(SkolemId id, Node a, Node b) id = SK_PREFIX; Node aOld = a; a = b; - b = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, aOld)); + b = nm->mkNode(STRING_LENGTH, aOld); } else if (id == SK_ID_DEQ_Y) { // SK_ID_DEQ_Y(x, y) ---> SK_PREFIX(x, (str.len y)) id = SK_PREFIX; - b = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, b)); + b = nm->mkNode(STRING_LENGTH, b); } - - if (id == SK_PURIFY && a.getKind() == kind::STRING_SUBSTR) + else if (id == SK_FIRST_CTN_PRE) { - Node s = a[0]; - Node n = a[1]; - Node m = a[2]; + // SK_FIRST_CTN_PRE(x,y) ---> SK_PREFIX(x, indexof(x,y,0)) + id = SK_PREFIX; + b = nm->mkNode(STRING_STRIDOF, a, b, d_zero); + } - if (n == d_zero) - { - // SK_PURIFY((str.substr x 0 m)) ---> SK_PREFIX(x, m) - id = SK_PREFIX; - a = s; - b = m; - } - else if (ArithEntail::check(nm->mkNode(PLUS, n, m), - nm->mkNode(STRING_LENGTH, s))) - { - // SK_PURIFY((str.substr x n m)) ---> SK_SUFFIX_REM(x, n) - // if n + m >= (str.len x) - id = SK_SUFFIX_REM; - a = s; - b = n; - } + if (id == SK_ID_V_UNIFIED_SPT || id == SK_ID_V_UNIFIED_SPT_REV) + { + bool isRev = (id == SK_ID_V_UNIFIED_SPT_REV); + Node la = nm->mkNode(STRING_LENGTH, a); + Node lb = nm->mkNode(STRING_LENGTH, b); + Node ta = isRev ? utils::mkPrefix(a, nm->mkNode(MINUS, la, lb)) + : utils::mkSuffix(a, lb); + Node tb = isRev ? utils::mkPrefix(b, nm->mkNode(MINUS, lb, la)) + : utils::mkSuffix(b, la); + id = SK_PURIFY; + // SK_ID_V_UNIFIED_SPT(x,y) ---> + // ite(len(x) >= len(y), substr(x,0,str.len(y)), substr(y,0,str.len(x)) + a = nm->mkNode(ITE, nm->mkNode(GEQ, la, lb), ta, tb); + b = Node::null(); } - if (id == SK_PREFIX && b.getKind() == kind::STRING_STRIDOF && a == b[0] - && b[2] == d_zero) + // now, eliminate prefix/suffix_rem in terms of purify + if (id == SK_PREFIX) + { + // SK_PREFIX(x,y) ---> SK_PURIFY(substr(x,0,y)) + id = SK_PURIFY; + a = utils::mkPrefix(a, b); + b = Node::null(); + } + else if (id == SK_SUFFIX_REM) { - // SK_PREFIX(x, (str.indexof x y 0)) ---> SK_FIRST_CTN_PRE(x, y) - id = SK_FIRST_CTN_PRE; - b = b[1]; + // SK_SUFFIX_REM(x,y) ---> SK_PURIFY(substr(x,y,str.len(x)-y)) + id = SK_PURIFY; + a = utils::mkSuffix(a, b); + b = Node::null(); } + a = a.isNull() ? a : Rewriter::rewrite(a); + b = b.isNull() ? b : Rewriter::rewrite(b); + Trace("skolem-cache") << "normalizeStringSkolem end: (" << id << ", " << a << ", " << b << ")" << std::endl; return std::make_tuple(id, a, b); } +Node SkolemCache::mkIndexVar(Node t) +{ + IndexVarAttribute iva; + if (t.hasAttribute(iva)) + { + return t.getAttribute(iva); + } + NodeManager* nm = NodeManager::currentNM(); + Node v = nm->mkBoundVar(nm->integerType()); + t.setAttribute(iva, v); + return v; +} + } // namespace strings } // namespace theory } // namespace CVC4 diff --git a/src/theory/strings/skolem_cache.h b/src/theory/strings/skolem_cache.h index 8fcf46c14..0ebbb3277 100644 --- a/src/theory/strings/skolem_cache.h +++ b/src/theory/strings/skolem_cache.h @@ -22,6 +22,7 @@ #include #include "expr/node.h" +#include "expr/proof_skolem_cache.h" namespace CVC4 { namespace theory { @@ -35,7 +36,13 @@ namespace strings { class SkolemCache { public: - SkolemCache(); + /** + * Constructor. + * + * useOpts determines if we aggressively share Skolems or return the constants + * they are entailed to be equal to. + */ + SkolemCache(bool useOpts = true); /** Identifiers for skolem types * * The comments below document the properties of each skolem introduced by @@ -52,7 +59,7 @@ class SkolemCache // exists k. k = a SK_PURIFY, // a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' => - // exists k. a = "cccc" + k + // exists k. a = "cccc" ++ k SK_ID_C_SPT, SK_ID_C_SPT_REV, // a != "" ^ b = "c" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' => @@ -60,9 +67,15 @@ class SkolemCache SK_ID_VC_SPT, SK_ID_VC_SPT_REV, // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' => - // exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k ) + // exists k1 k2. len( k1 )>0 ^ len( k2 )>0 ^ + // ( a ++ k1 = b OR a = b ++ k2 ) + // k1 is the variable for (a,b) and k2 is the skolem for (b,a). SK_ID_V_SPT, SK_ID_V_SPT_REV, + // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' => + // exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k ) + SK_ID_V_UNIFIED_SPT, + SK_ID_V_UNIFIED_SPT_REV, // a != "" ^ b = "c" ^ a ++ a' != b ++ b' => // exists k, k_rem. // len( k ) = 1 ^ @@ -75,7 +88,6 @@ class SkolemCache // ( a = k_x ++ k_z OR b = k_y ++ k_z ) ) SK_ID_DEQ_X, SK_ID_DEQ_Y, - SK_ID_DEQ_Z, // contains( a, b ) => // exists k_pre, k_post. a = k_pre ++ b ++ k_post ^ // ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b) @@ -126,10 +138,18 @@ class SkolemCache Node mkTypedSkolemCached(TypeNode tn, Node a, SkolemId id, const char* c); /** Returns a (uncached) skolem of type string with name c */ Node mkSkolem(const char* c); - /** Same as above, but for custom type tn */ - Node mkTypedSkolem(TypeNode tn, const char* c); /** Returns true if n is a skolem allocated by this class */ bool isSkolem(Node n) const; + /** Make index variable + * + * This returns an integer variable of kind BOUND_VARIABLE that is used + * for axiomatizing the behavior of a term or predicate t. Notice that this + * index variable does *not* necessarily refer to indices in the term t + * itself. Instead, it refers to indices in the relevant string in the + * reduction of t. For example, the index variable for the term str.to_int(s) + * is used to quantify over the positions in string term s. + */ + static Node mkIndexVar(Node t); private: /** @@ -149,7 +169,8 @@ class SkolemCache std::tuple normalizeStringSkolem(SkolemId id, Node a, Node b); - + /** whether we are using optimizations */ + bool d_useOpts; /** string type */ TypeNode d_strType; /** Constant node zero */ diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index b64a0df01..e80607acf 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -90,10 +90,12 @@ void flattenOp(Kind k, Node n, std::vector& conj) visited.insert(cur); if (cur.getKind() == k) { - for (const Node& cn : cur) - { - visit.push_back(cn); - } + // Add in reverse order, so that we traverse left to right. + // This is important so that explantaions aren't reversed when they + // are flattened, which is important for proofs involving substitutions. + std::vector newChildren; + newChildren.insert(newChildren.end(), cur.begin(), cur.end()); + visit.insert(visit.end(), newChildren.rbegin(), newChildren.rend()); } else if (std::find(conj.begin(), conj.end(), cur) == conj.end()) { @@ -157,6 +159,19 @@ Node mkNLength(Node t) return Rewriter::rewrite(NodeManager::currentNM()->mkNode(STRING_LENGTH, t)); } +Node mkPrefix(Node t, Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + return nm->mkNode(STRING_SUBSTR, t, nm->mkConst(Rational(0)), n); +} + +Node mkSuffix(Node t, Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + return nm->mkNode( + STRING_SUBSTR, t, n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, t), n)); +} + Node getConstantComponent(Node t) { if (t.getKind() == STRING_TO_REGEXP) diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h index 9fc23499a..fd6e5122b 100644 --- a/src/theory/strings/theory_strings_utils.h +++ b/src/theory/strings/theory_strings_utils.h @@ -82,6 +82,16 @@ Node mkNConcat(const std::vector& c, TypeNode tn); */ Node mkNLength(Node t); +/** + * Returns (pre t n), which is (str.substr t 0 n). + */ +Node mkPrefix(Node t, Node n); + +/** + * Returns (suf t n), which is (str.substr t n (- (str.len t) n)). + */ +Node mkSuffix(Node t, Node n); + /** * Get constant component. Returns the string constant represented by the * string or regular expression t. For example: