From 21419712aaa764031ad8731e777117c258e49ce6 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Thu, 10 Feb 2022 13:43:09 -0600 Subject: [PATCH] Use witness terms to represent values for large strings in models (#8089) This makes it so that we don't prematurely fail on some Facebook benchmarks. --- src/options/strings_options.toml | 9 +++ src/theory/strings/theory_strings.cpp | 56 +++++++++++++------ src/theory/strings/theory_strings_utils.cpp | 24 +++++++- src/theory/strings/theory_strings_utils.h | 7 +++ src/theory/theory_model.cpp | 9 ++- .../regress/regress0/strings/large-model.smt2 | 6 +- 6 files changed, 87 insertions(+), 24 deletions(-) diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml index 74112b286..610a1089f 100644 --- a/src/options/strings_options.toml +++ b/src/options/strings_options.toml @@ -241,3 +241,12 @@ name = "Strings Theory" [[option.mode.NONE]] name = "none" help = "do not use array-inspired solver for sequence updates" + +[[option]] + name = "stringsModelMaxLength" + category = "regular" + long = "strings-model-max-len=N" + type = "uint64_t" + default = "1000" + maximum = "65536" + help = "The maximum size of string values in models" diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 94393c41b..f4833ca11 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -306,6 +306,8 @@ bool TheoryStrings::collectModelInfoType( d_state.separateByLength(repVec, colT, ltsT); const std::vector >& col = colT[tn]; const std::vector& lts = ltsT[tn]; + // indices in col that have lengths that are too big to represent + std::unordered_set oobIndices; NodeManager* nm = NodeManager::currentNM(); std::map< Node, Node > processed; @@ -313,14 +315,9 @@ bool TheoryStrings::collectModelInfoType( std::vector< Node > lts_values; std::map values_used; std::vector len_splits; - for( unsigned i=0; i0 ) { - Trace("strings-model") << ", "; - } - Trace("strings-model") << col[i][j]; - } + for (size_t i = 0, csize = col.size(); i < csize; i++) + { + Trace("strings-model") << "Checking length for {" << col[i]; Trace("strings-model") << " } (length is " << lts[i] << ")" << std::endl; Node len_value; if( lts[i].isConst() ) { @@ -335,16 +332,19 @@ bool TheoryStrings::collectModelInfoType( { lts_values.push_back(Node::null()); } + else if (len_value.getConst() > options().strings.stringsModelMaxLength) + { + // note that we give a warning instead of throwing logic exception if we + // cannot construct the string, these are then assigned witness terms + // below + warning() << "The model was computed to have strings of length " + << len_value << ". Based on the current value of option --strings-model-max-len, we only allow strings up to length " + << options().strings.stringsModelMaxLength << std::endl; + oobIndices.insert(i); + lts_values.push_back(len_value); + } else { - // must throw logic exception if we cannot construct the string - if (len_value.getConst() > String::maxSize()) - { - std::stringstream ss; - ss << "The model was computed to have strings of length " << len_value - << ". We only allow strings up to length " << String::maxSize(); - throw LogicException(ss.str()); - } std::size_t lvalue = len_value.getConst().getNumerator().toUnsignedInt(); auto itvu = values_used.find(lvalue); @@ -371,7 +371,9 @@ bool TheoryStrings::collectModelInfoType( conSeq = &d_asolver.getConnectedSequences(); } //step 3 : assign values to equivalence classes that are pure variables - for( unsigned i=0; i pure_eq; Node lenValue = lts_values[i]; Trace("strings-model") << "Considering (" << col[i].size() @@ -390,6 +392,7 @@ bool TheoryStrings::collectModelInfoType( // in the term set and, as a result, are skipped when the equality // engine is asserted to the theory model. m->getEqualityEngine()->addTerm(eqc); + Trace("strings-model") << "-> constant" << std::endl; continue; } NormalForm& nfe = d_csolver.getNormalForm(eqc); @@ -398,6 +401,25 @@ bool TheoryStrings::collectModelInfoType( // will be assigned via a concatenation of normal form eqc continue; } + // check if the length is too big to represent + if (wasOob) + { + processed[eqc] = eqc; + Assert(!lenValue.isNull() && lenValue.isConst()); + // make the abstract value (witness ((x String)) (= (str.len x) + // lenValue)) + Node w = utils::mkAbstractStringValueForLength(eqc, lenValue); + Trace("strings-model") + << "-> length out of bounds, assign abstract " << w << std::endl; + if (!m->assertEquality(eqc, w, true)) + { + Unreachable() << "TheoryStrings::collectModelInfoType: Inconsistent " + "abstract equality" + << std::endl; + return false; + } + continue; + } // ensure we have decided on length value at this point if (lenValue.isNull()) { diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index a65f98e7e..9404298c4 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -18,10 +18,10 @@ #include #include "expr/attribute.h" +#include "expr/bound_var_manager.h" #include "expr/skolem_manager.h" #include "options/strings_options.h" #include "theory/quantifiers/fmf/bounded_integers.h" -#include "theory/quantifiers/quantifiers_attributes.h" #include "theory/rewriter.h" #include "theory/strings/arith_entail.h" #include "theory/strings/strings_entail.h" @@ -409,6 +409,28 @@ Node mkForallInternal(Node bvl, Node body) return quantifiers::BoundedIntegers::mkBoundedForall(bvl, body); } +/** + * Mapping to the variable used for binding the witness term for the abstract + * value below. + */ +struct StringValueForLengthVarAttributeId +{ +}; +typedef expr::Attribute + StringValueForLengthVarAttribute; + +Node mkAbstractStringValueForLength(Node n, Node len) +{ + NodeManager* nm = NodeManager::currentNM(); + BoundVarManager* bvm = nm->getBoundVarManager(); + Node cacheVal = BoundVarManager::getCacheValue(n, len); + Node v = bvm->mkBoundVar( + cacheVal, "s", n.getType()); + Node pred = nm->mkNode(STRING_LENGTH, v).eqNode(len); + // return (witness ((v String)) (= (str.len v) len)) + return nm->mkNode(WITNESS, nm->mkNode(BOUND_VAR_LIST, v), pred); +} + } // namespace utils } // namespace strings } // namespace theory diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h index f97391df8..6aff3742a 100644 --- a/src/theory/strings/theory_strings_utils.h +++ b/src/theory/strings/theory_strings_utils.h @@ -204,6 +204,13 @@ unsigned getLoopMinOccurrences(TNode node); */ Node mkForallInternal(Node bvl, Node body); +/** + * Make abstract value for string-like term n whose length is given by len. + * This is used for constructing models for strings whose lengths are too large + * to represent in memory. + */ +Node mkAbstractStringValueForLength(Node n, Node len); + } // namespace utils } // namespace strings } // namespace theory diff --git a/src/theory/theory_model.cpp b/src/theory/theory_model.cpp index 6a0d50ac0..eb0945ce5 100644 --- a/src/theory/theory_model.cpp +++ b/src/theory/theory_model.cpp @@ -805,8 +805,13 @@ std::string TheoryModel::debugPrintModelEqc() const bool TheoryModel::isValue(TNode node) { - return node.isConst() || node.getKind() == Kind::REAL_ALGEBRAIC_NUMBER - || node.getKind() == Kind::LAMBDA; + if (node.isConst()) + { + return true; + } + Kind k = node.getKind(); + return k == kind::REAL_ALGEBRAIC_NUMBER || k == kind::LAMBDA + || k == kind::WITNESS; } } // namespace theory diff --git a/test/regress/regress0/strings/large-model.smt2 b/test/regress/regress0/strings/large-model.smt2 index f3aa7f8f2..bfcb71d8b 100644 --- a/test/regress/regress0/strings/large-model.smt2 +++ b/test/regress/regress0/strings/large-model.smt2 @@ -1,7 +1,5 @@ -; COMMAND-LINE: --lang=smt2.6 --check-models -; SCRUBBER: sed -E 's/of length [0-9]+/of length LENGTH/' -; EXPECT: (error "The model was computed to have strings of length LENGTH. We only allow strings up to length 4294967295") -; EXIT: 1 +; COMMAND-LINE: -q +; EXPECT: sat (set-logic SLIA) (declare-fun x () String) (assert (> (str.len x) 100000000000000000000000000000000000000000000000000)) -- 2.30.2