From: Andrew Reynolds Date: Fri, 17 Dec 2021 14:27:26 +0000 (-0600) Subject: Implement model construction for the array extension of sequences (#7815) X-Git-Tag: cvc5-1.0.0~645 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0adba8bf8e9769da251bee670fb9b8dde4012927;p=cvc5.git Implement model construction for the array extension of sequences (#7815) The array solver is still not usable, the only remaining changes from seqArray are to connect the array solver to the strings theory solver via the strategy + the regressions. --- diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp index 868e855ab..eb3c70a83 100644 --- a/src/theory/strings/core_solver.cpp +++ b/src/theory/strings/core_solver.cpp @@ -1986,6 +1986,15 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi, void CoreSolver::processDeq(Node ni, Node nj) { + // If using the sequence update solver, we always apply extensionality. + // This is required for model soundness currently, although we could + // investigate determine cases where the disequality is already + // satisfied (for optimization). + if (options().strings.seqArray != options::SeqArrayMode::NONE) + { + processDeqExtensionality(ni, nj); + return; + } NodeManager* nm = NodeManager::currentNM(); NormalForm& nfni = getNormalForm(ni); NormalForm& nfnj = getNormalForm(nj); diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index ee068fe16..4c60b5141 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -78,6 +78,8 @@ TheoryStrings::TheoryStrings(Env& env, OutputChannel& out, Valuation valuation) d_csolver, d_extTheory, d_statistics), + d_asolver( + env, d_state, d_im, d_termReg, d_csolver, d_esolver, d_extTheory), d_rsolver( env, d_state, d_im, d_termReg, d_csolver, d_esolver, d_statistics), d_regexp_elim(options().strings.regExpElimAgg, d_pnm, userContext()), @@ -211,7 +213,6 @@ void TheoryStrings::presolve() { Debug("strings-presolve") << "Finished presolve" << std::endl; } - ///////////////////////////////////////////////////////////////////////////// // MODEL GENERATION ///////////////////////////////////////////////////////////////////////////// @@ -231,7 +232,7 @@ bool TheoryStrings::collectModelValues(TheoryModel* m, Trace("strings-model") << "TheoryStrings::collectModelValues" << std::endl; // Collects representatives by types and orders sequence types by how nested // they are - std::map > repSet; + std::map> repSet; std::unordered_set toProcess; // Generate model // get the relevant string equivalence classes @@ -259,6 +260,23 @@ bool TheoryStrings::collectModelValues(TheoryModel* m, return true; } +/** + * Object to sort by the value of pairs in the write model returned by the + * sequences array solver. + */ +struct SortSeqIndex +{ + SortSeqIndex() {} + /** the comparison */ + bool operator()(std::pair i, std::pair j) + { + Assert(i.first.isConst() && i.first.getType().isInteger() + && j.first.isConst() && j.first.getType().isInteger()); + Assert(i.first != j.first); + return i.first.getConst() < j.first.getConst(); + } +}; + bool TheoryStrings::collectModelInfoType( TypeNode tn, std::unordered_set& toProcess, @@ -278,6 +296,7 @@ bool TheoryStrings::collectModelInfoType( } toProcess.erase(tn); + SEnumLenSet sels; // get partition of strings of equal lengths for the representatives of the // current type std::map > > colT; @@ -343,116 +362,212 @@ bool TheoryStrings::collectModelInfoType( // confirmed by calculus invariant, see paper Trace("strings-model") << "Assign to equivalence classes..." << std::endl; std::map pure_eq_assign; + // if we are using the sequences array solver, get the connected sequences + const std::map* conSeq = nullptr; + std::map::const_iterator itcs; + if (options().strings.seqArray != options::SeqArrayMode::NONE) + { + conSeq = &d_asolver.getConnectedSequences(); + } //step 3 : assign values to equivalence classes that are pure variables for( unsigned i=0; i pure_eq; - Trace("strings-model") << "The (" << col[i].size() - << ") equivalence classes "; + Node lenValue = lts_values[i]; + Trace("strings-model") << "Considering (" << col[i].size() + << ") equivalence classes for length " << lenValue + << std::endl; for (const Node& eqc : col[i]) { - Trace("strings-model") << eqc << " "; + Trace("strings-model") << "- eqc: " << eqc << std::endl; //check if col[i][j] has only variables - if (!eqc.isConst()) + if (eqc.isConst()) + { + processed[eqc] = eqc; + // Make sure that constants are asserted to the theory model that we + // are building. It is possible that new constants were introduced by + // the eager evaluation in the equality engine. These terms are missing + // in the term set and, as a result, are skipped when the equality + // engine is asserted to the theory model. + m->getEqualityEngine()->addTerm(eqc); + continue; + } + NormalForm& nfe = d_csolver.getNormalForm(eqc); + if (nfe.d_nf.size() != 1) + { + // will be assigned via a concatenation of normal form eqc + continue; + } + // ensure we have decided on length value at this point + if (lenValue.isNull()) + { + // start with length two (other lengths have special precendence) + size_t lvalue = 2; + while (values_used.find(lvalue) != values_used.end()) + { + lvalue++; + } + Trace("strings-model") + << "*** Decide to make length of " << lvalue << std::endl; + lenValue = nm->mkConstInt(Rational(lvalue)); + values_used[lvalue] = Node::null(); + } + // is it an equivalence class with a seq.unit term? + Node assignedValue; + if (nfe.d_nf[0].getKind() == SEQ_UNIT) + { + Node argVal; + if (nfe.d_nf[0][0].getType().isStringLike()) + { + // By this point, we should have assigned model values for the + // elements of this sequence type because of the check in the + // beginning of this method + argVal = m->getRepresentative(nfe.d_nf[0][0]); + } + else + { + // Otherwise, we use the term itself. We cannot get the model + // value of this term, since it might not be available yet, as + // it may belong to a theory that has not built its model yet. + // Hence, we assign a (non-constant) skeleton (seq.unit argVal). + argVal = nfe.d_nf[0][0]; + } + Assert(!argVal.isNull()) << "No value for " << nfe.d_nf[0][0]; + assignedValue = Rewriter::rewrite(nm->mkNode(SEQ_UNIT, argVal)); + Trace("strings-model") + << "-> assign via seq.unit: " << assignedValue << std::endl; + } + else if (d_termReg.hasStringCode() && lenValue == d_one) + { + // It has a code and the length of these equivalence classes are one. + // Note this code is solely for strings, not sequences. + EqcInfo* eip = d_state.getOrMakeEqcInfo(eqc, false); + if (eip && !eip->d_codeTerm.get().isNull()) + { + // its value must be equal to its code + Node ct = nm->mkNode(kind::STRING_TO_CODE, eip->d_codeTerm.get()); + Node ctv = d_valuation.getModelValue(ct); + unsigned cvalue = + ctv.getConst().getNumerator().toUnsignedInt(); + Trace("strings-model") << "(code: " << cvalue << ") "; + std::vector vec; + vec.push_back(cvalue); + assignedValue = nm->mkConst(String(vec)); + Trace("strings-model") + << "-> assign via str.code: " << assignedValue << std::endl; + } + } + else if (options().strings.seqArray != options::SeqArrayMode::NONE) { - NormalForm& nfe = d_csolver.getNormalForm(eqc); - if (nfe.d_nf.size() == 1) + // determine skeleton based on the write model, if it exists + const std::map& writeModel = d_asolver.getWriteModel(eqc); + Trace("strings-model") + << "write model size " << writeModel.size() << std::endl; + if (!writeModel.empty()) { - // is it an equivalence class with a seq.unit term? - if (nfe.d_nf[0].getKind() == SEQ_UNIT) + Trace("strings-model") + << "Write model for " << tn << " is:" << std::endl; + std::vector> writes; + std::unordered_set usedWrites; + for (const std::pair& w : writeModel) { - Node argVal; - if (nfe.d_nf[0][0].getType().isStringLike()) + Trace("strings-model") + << " " << w.first << " -> " << w.second << std::endl; + Node ivalue = d_valuation.getModelValue(w.first); + Assert(ivalue.isConst() && ivalue.getType().isInteger()); + // ignore if out of bounds + Rational irat = ivalue.getConst(); + if (irat.sgn() == -1 || irat >= lenValue.getConst()) { - // By this point, we should have assigned model values for the - // elements of this sequence type because of the check in the - // beginning of this method - argVal = m->getRepresentative(nfe.d_nf[0][0]); + continue; } - else + if (usedWrites.find(ivalue) != usedWrites.end()) { - // Otherwise, we use the term itself. We cannot get the model - // value of this term, since it might not be available yet, as - // it may belong to a theory that has not built its model yet. - // Hence, we assign a (non-constant) skeleton (seq.unit argVal). - argVal = nfe.d_nf[0][0]; + continue; } - Assert(!argVal.isNull()) << "No value for " << nfe.d_nf[0][0]; - Node c = rewrite(nm->mkNode(SEQ_UNIT, argVal)); - pure_eq_assign[eqc] = c; - Trace("strings-model") << "(unit: " << nfe.d_nf[0] << ") "; - m->getEqualityEngine()->addTerm(c); + usedWrites.insert(ivalue); + Node wsunit = nm->mkNode(SEQ_UNIT, w.second); + writes.emplace_back(ivalue, wsunit); } - // does it have a code and the length of these equivalence classes are - // one? - else if (d_termReg.hasStringCode() && lts_values[i] == d_one) + // sort based on index value + SortSeqIndex ssi; + std::sort(writes.begin(), writes.end(), ssi); + std::vector cc; + uint32_t currIndex = 0; + for (size_t w = 0, wsize = writes.size(); w <= wsize; w++) { - EqcInfo* eip = d_state.getOrMakeEqcInfo(eqc, false); - if (eip && !eip->d_codeTerm.get().isNull()) + uint32_t nextIndex; + if (w == writes.size()) + { + nextIndex = + lenValue.getConst().getNumerator().toUnsignedInt(); + } + else + { + Node windex = writes[w].first; + Assert(windex.getConst() + <= Rational(String::maxSize())); + nextIndex = + windex.getConst().getNumerator().toUnsignedInt(); + Assert(nextIndex >= currIndex); + } + if (nextIndex > currIndex) + { + // allocate arbitrary value to fill gap + Assert(conSeq != nullptr); + Node base = eqc; + itcs = conSeq->find(eqc); + if (itcs != conSeq->end()) + { + base = itcs->second; + } + // use a skeleton for the gap and not a concrete value, as we + // do not know how which values from the element type are + // allowable (i.e. unconstrained) to assign to the gap + Node cgap = mkSkeletonFromBase(base, currIndex, nextIndex); + cc.push_back(cgap); + } + // then take read + if (w < wsize) { - // its value must be equal to its code - Node ct = nm->mkNode(kind::STRING_TO_CODE, eip->d_codeTerm.get()); - Node ctv = d_valuation.getModelValue(ct); - unsigned cvalue = - ctv.getConst().getNumerator().toUnsignedInt(); - Trace("strings-model") << "(code: " << cvalue << ") "; - std::vector vec; - vec.push_back(cvalue); - Node mv = nm->mkConst(String(vec)); - pure_eq_assign[eqc] = mv; - m->getEqualityEngine()->addTerm(mv); + cc.push_back(writes[w].second); } + currIndex = nextIndex + 1; } - pure_eq.push_back(eqc); + assignedValue = utils::mkConcat(cc, tn); + Trace("strings-model") + << "-> assign via seq.update/nth eqc: " << assignedValue + << std::endl; } } + if (!assignedValue.isNull()) + { + pure_eq_assign[eqc] = assignedValue; + m->getEqualityEngine()->addTerm(assignedValue); + } else { - processed[eqc] = eqc; - // Make sure that constants are asserted to the theory model that we - // are building. It is possible that new constants were introduced by - // the eager evaluation in the equality engine. These terms are missing - // in the term set and, as a result, are skipped when the equality - // engine is asserted to the theory model. - m->getEqualityEngine()->addTerm(eqc); + Trace("strings-model") << "-> no assignment" << std::endl; } + pure_eq.push_back(eqc); } - Trace("strings-model") << "have length " << lts_values[i] << std::endl; //assign a new length if necessary if( !pure_eq.empty() ){ - if( lts_values[i].isNull() ){ - // start with length two (other lengths have special precendence) - std::size_t lvalue = 2; - while( values_used.find( lvalue )!=values_used.end() ){ - lvalue++; - } - Trace("strings-model") << "*** Decide to make length of " << lvalue << std::endl; - lts_values[i] = nm->mkConstInt(Rational(lvalue)); - values_used[lvalue] = Node::null(); - } - Trace("strings-model") << "Need to assign values of length " << lts_values[i] << " to equivalence classes "; + Trace("strings-model") << "Need to assign values of length " << lenValue + << " to equivalence classes "; for( unsigned j=0; j() <= Rational(String::maxSize())) + Assert(lenValue.getConst() <= Rational(String::maxSize())) << "Exceeded UINT32_MAX in string model"; uint32_t currLen = - lts_values[i].getConst().getNumerator().toUnsignedInt(); - std::unique_ptr sel; + lenValue.getConst().getNumerator().toUnsignedInt(); Trace("strings-model") << "Cardinality of alphabet is " << d_termReg.getAlphabetCardinality() << std::endl; - if (tn.isString()) // string-only - { - sel.reset(new StringEnumLen( - currLen, currLen, d_termReg.getAlphabetCardinality())); - } - else - { - sel.reset(new SeqEnumLen(tn, nullptr, currLen, currLen)); - } + SEnumLen* sel = sels.getEnumerator(currLen, tn); for (const Node& eqc : pure_eq) { Node c; @@ -505,33 +620,8 @@ bool TheoryStrings::collectModelInfoType( if (tn.isSequence() && !d_env.isFiniteType(tn.getSequenceElementType())) { - // Make a skeleton instead. In particular, this means that - // a value: - // (seq.++ (seq.unit 0) (seq.unit 1) (seq.unit 2)) - // becomes: - // (seq.++ (seq.unit k_0) (seq.unit k_1) (seq.unit k_2)) - // where k_0, k_1, k_2 are fresh integer variables. These - // variables will be assigned values in the standard way by the - // model. This construction is necessary since the strings solver - // must constrain the length of the model of an equivalence class - // (e.g. in this case to length 3); moreover we cannot assign a - // concrete value since it may conflict with other skeletons we - // have assigned, e.g. for the case of (seq.unit argVal) above. - SkolemManager* sm = nm->getSkolemManager(); - BoundVarManager* bvm = nm->getBoundVarManager(); - Assert(c.getKind() == CONST_SEQUENCE); - const Sequence& sn = c.getConst(); - const std::vector& snvec = sn.getVec(); - std::vector skChildren; - for (const Node& snv : snvec) - { - TypeNode etn = snv.getType(); - Node v = bvm->mkBoundVar(snv, etn); - // use a skolem, not a bound variable - Node kv = sm->mkPurifySkolem(v, "smv"); - skChildren.push_back(nm->mkNode(SEQ_UNIT, kv)); - } - c = utils::mkConcat(skChildren, tn); + // Make a skeleton instead. + c = mkSkeletonFor(c); } // increment sel->increment(); @@ -616,6 +706,49 @@ bool TheoryStrings::collectModelInfoType( return true; } +Node TheoryStrings::mkSkeletonFor(Node c) +{ + NodeManager* nm = NodeManager::currentNM(); + SkolemManager* sm = nm->getSkolemManager(); + BoundVarManager* bvm = nm->getBoundVarManager(); + Assert(c.getKind() == CONST_SEQUENCE); + const Sequence& sn = c.getConst(); + const std::vector& snvec = sn.getVec(); + std::vector skChildren; + for (const Node& snv : snvec) + { + TypeNode etn = snv.getType(); + Node v = bvm->mkBoundVar(snv, etn); + // use a skolem, not a bound variable + Node kv = sm->mkPurifySkolem(v, "smv"); + skChildren.push_back(nm->mkNode(SEQ_UNIT, kv)); + } + return utils::mkConcat(skChildren, c.getType()); +} + +Node TheoryStrings::mkSkeletonFromBase(Node r, + size_t currIndex, + size_t nextIndex) +{ + Assert(!r.isNull()); + Assert(r.getType().isSequence()); + NodeManager* nm = NodeManager::currentNM(); + SkolemManager* sm = nm->getSkolemManager(); + std::vector cacheVals; + cacheVals.push_back(r); + std::vector skChildren; + TypeNode etn = r.getType().getSequenceElementType(); + for (size_t i = currIndex; i < nextIndex; i++) + { + cacheVals.push_back(nm->mkConst(CONST_RATIONAL, Rational(currIndex))); + Node kv = sm->mkSkolemFunction( + SkolemFunId::SEQ_MODEL_BASE_ELEMENT, etn, cacheVals); + skChildren.push_back(nm->mkNode(SEQ_UNIT, kv)); + cacheVals.pop_back(); + } + return utils::mkConcat(skChildren, r.getType()); +} + ///////////////////////////////////////////////////////////////////////////// // MAIN SOLVER ///////////////////////////////////////////////////////////////////////////// diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index dd15e08ec..fa9262e53 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -25,6 +25,7 @@ #include "context/cdlist.h" #include "expr/node_trie.h" #include "theory/ext_theory.h" +#include "theory/strings/array_solver.h" #include "theory/strings/base_solver.h" #include "theory/strings/core_solver.h" #include "theory/strings/eager_solver.h" @@ -189,8 +190,8 @@ class TheoryStrings : public Theory { * * @param tn The type to compute model values for * @param toProcess Remaining types to compute model values for - * @param repSet A map of types to the representatives of the equivalence - * classes of the given type + * @param repSet A map of types to representatives of + * the equivalence classes of the given type * @return false if a conflict is discovered while doing this assignment. */ bool collectModelInfoType( @@ -232,6 +233,32 @@ class TheoryStrings : public Theory { * there does not exist a term of the form str.len(si) in the current context. */ void checkRegisterTermsNormalForms(); + /** + * Turn a sequence constant into a skeleton specifying how to construct + * its value. + * In particular, this means that value: + * (seq.++ (seq.unit 0) (seq.unit 1) (seq.unit 2)) + * becomes: + * (seq.++ (seq.unit k_0) (seq.unit k_1) (seq.unit k_2)) + * where k_0, k_1, k_2 are fresh integer variables. These + * variables will be assigned values in the standard way by the + * model. This construction is necessary during model construction since the + * strings solver must constrain the length of the model of an equivalence + * class (e.g. in this case to length 3); moreover we cannot assign a concrete + * value since it may conflict with other skeletons we have assigned. + */ + Node mkSkeletonFor(Node value); + /** + * Make the skeleton for the basis of constructing sequence r between + * indices currIndex (inclusive) and nextIndex (exclusive). For example, if + * currIndex = 2 and nextIndex = 5, then this returns: + * (seq.++ (seq.unit k_{r,2}) (seq.unit k_{r,3}) (seq.unit k_{r,4})) + * where k_{r,2}, k_{r,3}, k_{r,4} are Skolem variables of the element type + * of r that are unique to the pairs (r,2), (r,3), (r,4). In other words, + * these Skolems abstractly represent the element at positions 2, 3, 4 in the + * model for r. + */ + Node mkSkeletonFromBase(Node r, size_t currIndex, size_t nextIndex); //-----------------------end inference steps /** run the given inference step */ void runInferStep(InferStep s, int effort); @@ -283,6 +310,11 @@ class TheoryStrings : public Theory { * involving extended string functions. */ ExtfSolver d_esolver; + /** + * The array solver, which implements specialized approaches for + * seq.nth/seq.update. + */ + ArraySolver d_asolver; /** regular expression solver module */ RegExpSolver d_rsolver; /** regular expression elimination module */ diff --git a/src/theory/theory_model.cpp b/src/theory/theory_model.cpp index 33ec5b23b..96e86e482 100644 --- a/src/theory/theory_model.cpp +++ b/src/theory/theory_model.cpp @@ -60,6 +60,7 @@ void TheoryModel::finishInit(eq::EqualityEngine* ee) d_equalityEngine->addFunctionKind(kind::APPLY_CONSTRUCTOR); d_equalityEngine->addFunctionKind(kind::APPLY_SELECTOR_TOTAL); d_equalityEngine->addFunctionKind(kind::APPLY_TESTER); + d_equalityEngine->addFunctionKind(kind::SEQ_NTH_TOTAL); // do not interpret APPLY_UF if we are not assigning function values if (!d_enableFuncModels) { diff --git a/src/theory/theory_model_builder.cpp b/src/theory/theory_model_builder.cpp index a83527004..f80cf5159 100644 --- a/src/theory/theory_model_builder.cpp +++ b/src/theory/theory_model_builder.cpp @@ -132,7 +132,8 @@ bool TheoryEngineModelBuilder::isValue(TNode n) bool TheoryEngineModelBuilder::isAssignable(TNode n) { - if (n.getKind() == kind::SELECT || n.getKind() == kind::APPLY_SELECTOR_TOTAL) + if (n.getKind() == kind::SELECT || n.getKind() == kind::APPLY_SELECTOR_TOTAL + || n.getKind() == kind::SEQ_NTH_TOTAL) { // selectors are always assignable (where we guarantee that they are not // evaluatable here)