From ed2ed9dffb709c9120890f665983abc594bdc0e5 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 31 Jul 2019 00:24:25 -0500 Subject: [PATCH] Eager conflict detection in strings based on constant prefix/suffix (#3110) --- src/theory/strings/theory_strings.cpp | 191 +++++++++++++++++++- src/theory/strings/theory_strings.h | 47 +++++ src/theory/strings/theory_strings_utils.cpp | 25 +++ src/theory/strings/theory_strings_utils.h | 20 ++ src/util/regexp.cpp | 37 ++++ src/util/regexp.h | 4 + 6 files changed, 320 insertions(+), 4 deletions(-) diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 164d22723..250d0f369 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -90,6 +90,7 @@ TheoryStrings::TheoryStrings(context::Context* c, d_equalityEngine(d_notify, c, "theory::strings", true), d_im(*this, c, u, d_equalityEngine, out), d_conflict(c, false), + d_pendingConflict(c), d_nf_pairs(c), d_pregistered_terms_cache(u), d_registered_terms_cache(u), @@ -1084,10 +1085,116 @@ TheoryStrings::EqcInfo::EqcInfo(context::Context* c) : d_length_term(c), d_code_term(c), d_cardinality_lem_k(c), - d_normalized_length(c) + d_normalized_length(c), + d_prefixC(c), + d_suffixC(c) { } +Node TheoryStrings::EqcInfo::addEndpointConst(Node t, Node c, bool isSuf) +{ + // check conflict + Node prev = isSuf ? d_suffixC : d_prefixC; + if (!prev.isNull()) + { + Trace("strings-eager-pconf-debug") << "Check conflict " << prev << ", " << t + << " post=" << isSuf << std::endl; + Node prevC = utils::getConstantEndpoint(prev, isSuf); + Assert(!prevC.isNull()); + Assert(prevC.getKind() == CONST_STRING); + if (c.isNull()) + { + c = utils::getConstantEndpoint(t, isSuf); + Assert(!c.isNull()); + } + Assert(c.getKind() == CONST_STRING); + bool conflict = false; + // if the constant prefixes are different + if (c != prevC) + { + // conflicts between constants should be handled by equality engine + Assert(!t.isConst() || !prev.isConst()); + Trace("strings-eager-pconf-debug") + << "Check conflict constants " << prevC << ", " << c << std::endl; + const String& ps = prevC.getConst(); + const String& cs = c.getConst(); + unsigned pvs = ps.size(); + unsigned cvs = cs.size(); + if (pvs == cvs || (pvs > cvs && t.isConst()) + || (cvs > pvs && prev.isConst())) + { + // If equal length, cannot be equal due to node check above. + // If one is fully constant and has less length than the other, then the + // other will not fit and we are in conflict. + conflict = true; + } + else + { + const String& larges = pvs > cvs ? ps : cs; + const String& smalls = pvs > cvs ? cs : ps; + if (isSuf) + { + conflict = !larges.hasSuffix(smalls); + } + else + { + conflict = !larges.hasPrefix(smalls); + } + } + if (!conflict && (pvs > cvs || prev.isConst())) + { + // current is subsumed, either shorter prefix or the other is a full + // constant + return Node::null(); + } + } + else if (!t.isConst()) + { + // current is subsumed since the other may be a full constant + return Node::null(); + } + if (conflict) + { + Trace("strings-eager-pconf") + << "Conflict for " << prevC << ", " << c << std::endl; + std::vector ccs; + Node r[2]; + for (unsigned i = 0; i < 2; i++) + { + Node tp = i == 0 ? t : prev; + if (tp.getKind() == STRING_IN_REGEXP) + { + ccs.push_back(tp); + r[i] = tp[0]; + } + else + { + r[i] = tp; + } + } + if (r[0] != r[1]) + { + ccs.push_back(r[0].eqNode(r[1])); + } + Assert(!ccs.empty()); + Node ret = + ccs.size() == 1 ? ccs[0] : NodeManager::currentNM()->mkNode(AND, ccs); + Trace("strings-eager-pconf") + << "String: eager prefix conflict: " << ret << std::endl; + return ret; + } + } + if (isSuf) + { + d_suffixC = t; + } + else + { + d_prefixC = t; + } + return Node::null(); +} + TheoryStrings::EqcInfo * TheoryStrings::getOrMakeEqcInfo( Node eqc, bool doMake ) { std::map< Node, EqcInfo* >::iterator eqc_i = d_eqc_info.find( eqc ); if( eqc_i==d_eqc_info.end() ){ @@ -1123,7 +1230,7 @@ void TheoryStrings::eqNotifyNewClass(TNode t){ { Trace("strings-debug") << "New length eqc : " << t << std::endl; Node r = d_equalityEngine.getRepresentative(t[0]); - EqcInfo * ei = getOrMakeEqcInfo( r, true ); + EqcInfo* ei = getOrMakeEqcInfo(r); if (k == kind::STRING_LENGTH) { ei->d_length_term = t[0]; @@ -1134,8 +1241,42 @@ void TheoryStrings::eqNotifyNewClass(TNode t){ } //we care about the length of this string registerTerm( t[0], 1 ); - }else{ - //getExtTheory()->registerTerm( t ); + return; + } + else if (k == CONST_STRING) + { + EqcInfo* ei = getOrMakeEqcInfo(t); + ei->d_prefixC = t; + ei->d_suffixC = t; + return; + } + else if (k == STRING_CONCAT) + { + addEndpointsToEqcInfo(t, t, t); + } +} + +void TheoryStrings::addEndpointsToEqcInfo(Node t, Node concat, Node eqc) +{ + Assert(concat.getKind() == STRING_CONCAT + || concat.getKind() == REGEXP_CONCAT); + EqcInfo* ei = nullptr; + // check each side + for (unsigned r = 0; r < 2; r++) + { + unsigned index = r == 0 ? 0 : concat.getNumChildren() - 1; + Node c = utils::getConstantComponent(concat[index]); + if (!c.isNull()) + { + if (ei == nullptr) + { + ei = getOrMakeEqcInfo(eqc); + } + Trace("strings-eager-pconf-debug") + << "New term: " << concat << " for " << t << " with prefix " << c + << " (" << (r == 1) << ")" << std::endl; + setPendingConflictWhen(ei->addEndpointConst(t, c, r == 1)); + } } } @@ -1152,6 +1293,16 @@ void TheoryStrings::eqNotifyPreMerge(TNode t1, TNode t2){ { e1->d_code_term.set(e2->d_code_term); } + if (!e2->d_prefixC.get().isNull()) + { + setPendingConflictWhen( + e1->addEndpointConst(e2->d_prefixC, Node::null(), false)); + } + if (!e2->d_suffixC.get().isNull()) + { + setPendingConflictWhen( + e1->addEndpointConst(e2->d_suffixC, Node::null(), true)); + } if( e2->d_cardinality_lem_k.get()>e1->d_cardinality_lem_k.get() ) { e1->d_cardinality_lem_k.set( e2->d_cardinality_lem_k ); } @@ -1297,6 +1448,30 @@ void TheoryStrings::assertPendingFact(Node atom, bool polarity, Node exp) { Trace("strings-pending-debug") << " Finished assert equality" << std::endl; } else { d_equalityEngine.assertPredicate( atom, polarity, exp ); + if (atom.getKind() == STRING_IN_REGEXP) + { + if (polarity && atom[1].getKind() == REGEXP_CONCAT) + { + Node eqc = d_equalityEngine.getRepresentative(atom[0]); + addEndpointsToEqcInfo(atom, atom[1], eqc); + } + } + } + // process the conflict + if (!d_conflict) + { + if (!d_pendingConflict.get().isNull()) + { + std::vector a; + a.push_back(d_pendingConflict.get()); + Trace("strings-pending") << "Process pending conflict " + << d_pendingConflict.get() << std::endl; + Node conflictNode = mkExplain(a); + d_conflict = true; + Trace("strings-conflict") + << "CONFLICT: Eager prefix : " << conflictNode << std::endl; + d_out->conflict(conflictNode); + } } Trace("strings-pending-debug") << " Now collect terms" << std::endl; // Collect extended function terms in the atom. Notice that we must register @@ -1306,6 +1481,14 @@ void TheoryStrings::assertPendingFact(Node atom, bool polarity, Node exp) { Trace("strings-pending-debug") << " Finished collect terms" << std::endl; } +void TheoryStrings::setPendingConflictWhen(Node conf) +{ + if (!conf.isNull() && d_pendingConflict.get().isNull()) + { + d_pendingConflict = conf; + } +} + void TheoryStrings::addToExplanation( Node a, Node b, std::vector< Node >& exp ) { if( a!=b ){ Debug("strings-explain") << "Add to explanation : " << a << " == " << b << std::endl; diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index e3bb2e719..3d4b28e7f 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -256,6 +256,8 @@ class TheoryStrings : public Theory { InferenceManager d_im; /** Are we in conflict */ context::CDO d_conflict; + /** Pending conflict */ + context::CDO d_pendingConflict; /** map from terms to their normal forms */ std::map d_normal_form; /** get normal form */ @@ -360,6 +362,29 @@ private: context::CDO d_code_term; context::CDO< unsigned > d_cardinality_lem_k; context::CDO< Node > d_normalized_length; + /** + * A node that explains the longest constant prefix known of this + * equivalence class. This can either be: + * (1) A term from this equivalence class, including a constant "ABC" or + * concatenation term (str.++ "ABC" ...), or + * (2) A membership of the form (str.in.re x R) where x is in this + * equivalence class and R is a regular expression of the form + * (str.to.re "ABC") or (re.++ (str.to.re "ABC") ...). + */ + context::CDO d_prefixC; + /** same as above, for suffix. */ + context::CDO d_suffixC; + /** add prefix constant + * + * This informs this equivalence class info that a term t in its + * equivalence class has a constant prefix (if isSuf=true) or suffix + * (if isSuf=false). The constant c (if non-null) is the value of that + * constant, if it has been computed already. + * + * If this method returns a non-null node ret, then ret is a conjunction + * corresponding to a conflict that holds in the current context. + */ + Node addEndpointConst(Node t, Node c, bool isSuf); }; /** map from representatives to information necessary for equivalence classes */ std::map< Node, EqcInfo* > d_eqc_info; @@ -659,6 +684,28 @@ private: * of atom, including calls to registerTerm. */ void assertPendingFact(Node atom, bool polarity, Node exp); + /** add endpoints to eqc info + * + * This method is called when term t is the explanation for why equivalence + * class eqc may have a constant endpoint due to a concatentation term concat. + * For example, we may call this method on: + * t := (str.++ x y), concat := (str.++ x y), eqc + * for some eqc that is currently equal to t. Another example is: + * t := (str.in.re z (re.++ r s)), concat := (re.++ r s), eqc + * for some eqc that is currently equal to z. + */ + void addEndpointsToEqcInfo(Node t, Node concat, Node eqc); + /** set pending conflict + * + * If conf is non-null, this is called when conf is a conjunction of literals + * that hold in the current context that are unsatisfiable. It is set as the + * "pending conflict" to be processed as a conflict lemma on the output + * channel of this class. It is not sent out immediately since it may require + * explanation from the equality engine, and may be called at any time, e.g. + * during a merge operation, when the equality engine is not in a state to + * provide explanations. + */ + void setPendingConflictWhen(Node conf); /** * This processes the infer info ii as an inference. In more detail, it calls * the inference manager to process the inference, it introduces Skolems, and diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index c8951e10a..d3ae5384e 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -109,6 +109,31 @@ Node mkConcat(Kind k, std::vector& c) : (c.size() == 1 ? c[0] : nm->mkConst(String(""))); } +Node getConstantComponent(Node t) +{ + Kind tk = t.getKind(); + if (tk == STRING_TO_REGEXP) + { + return t[0].isConst() ? t[0] : Node::null(); + } + return tk == CONST_STRING ? t : Node::null(); +} + +Node getConstantEndpoint(Node e, bool isSuf) +{ + Kind ek = e.getKind(); + if (ek == STRING_IN_REGEXP) + { + e = e[1]; + ek = e.getKind(); + } + if (ek == STRING_CONCAT || ek == REGEXP_CONCAT) + { + return getConstantComponent(e[isSuf ? e.getNumChildren() - 1 : 0]); + } + return getConstantComponent(e); +} + } // namespace utils } // namespace strings } // namespace theory diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h index 69400d005..57d882625 100644 --- a/src/theory/strings/theory_strings_utils.h +++ b/src/theory/strings/theory_strings_utils.h @@ -58,6 +58,26 @@ void getConcat(Node n, std::vector& c); */ Node mkConcat(Kind k, std::vector& c); +/** + * Get constant component. Returns the string constant represented by the + * string or regular expression t. For example: + * "ABC" -> "ABC", (str.to.re "ABC") -> "ABC", (str.++ x "ABC") -> null + */ +Node getConstantComponent(Node t); + +/** + * Get constant prefix / suffix from expression. For example, if isSuf=false: + * "ABC" -> "ABC" + * (str.++ "ABC" x) -> "ABC" + * (str.to.re "ABC") -> "ABC" + * (re.++ (str.to.re "ABC") ...) -> "ABC" + * (re.in x (str.to.re "ABC")) -> "ABC" + * (re.in x (re.++ (str.to.re "ABC") ...)) -> "ABC" + * (str.++ x "ABC") -> null + * (re.in x (re.++ (re.* "D") (str.to.re "ABC"))) -> null + */ +Node getConstantEndpoint(Node e, bool isSuf); + } // namespace utils } // namespace strings } // namespace theory diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp index ed9455e6d..a76e9084b 100644 --- a/src/util/regexp.cpp +++ b/src/util/regexp.cpp @@ -394,6 +394,43 @@ std::size_t String::rfind(const String &y, const std::size_t start) const { return std::string::npos; } +bool String::hasPrefix(const String& y) const +{ + size_t s = size(); + size_t ys = y.size(); + if (ys > s) + { + return false; + } + for (size_t i = 0; i < ys; i++) + { + if (d_str[i] != y.d_str[i]) + { + return false; + } + } + return true; +} + +bool String::hasSuffix(const String& y) const +{ + size_t s = size(); + size_t ys = y.size(); + if (ys > s) + { + return false; + } + size_t idiff = s - ys; + for (size_t i = 0; i < ys; i++) + { + if (d_str[i + idiff] != y.d_str[i]) + { + return false; + } + } + return true; +} + String String::replace(const String &s, const String &t) const { std::size_t ret = find(s); if (ret != std::string::npos) { diff --git a/src/util/regexp.h b/src/util/regexp.h index f7c6fb2ae..d1cb197fb 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -142,6 +142,10 @@ class CVC4_PUBLIC String { std::size_t find(const String& y, const std::size_t start = 0) const; std::size_t rfind(const String& y, const std::size_t start = 0) const; + /** Returns true if y is a prefix of this */ + bool hasPrefix(const String& y) const; + /** Returns true if y is a suffix of this */ + bool hasSuffix(const String& y) const; String replace(const String& s, const String& t) const; String substr(std::size_t i) const; -- 2.30.2