From 108b8aa42f15b021d351790b36e2ece958871db8 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Fri, 14 Jan 2022 20:16:20 -0600 Subject: [PATCH] Add inverse inference for update-over-concat (#7954) This adds an inference that applies when an atomic update term becomes equal to a concatenation term. This fixes bogus models discovered when writing the model soundness proof for the sequences array solver. --- src/theory/inference_id.cpp | 2 + src/theory/inference_id.h | 2 + src/theory/strings/array_solver.cpp | 302 ++++++++++++------ src/theory/strings/array_solver.h | 6 + test/regress/CMakeLists.txt | 2 + .../seq/update-concat-non-atomic.smt2 | 17 + .../seq/update-concat-non-atomic2.smt2 | 24 ++ 7 files changed, 251 insertions(+), 104 deletions(-) create mode 100644 test/regress/regress0/seq/update-concat-non-atomic.smt2 create mode 100644 test/regress/regress0/seq/update-concat-non-atomic2.smt2 diff --git a/src/theory/inference_id.cpp b/src/theory/inference_id.cpp index a2dcdec8c..0d116b1eb 100644 --- a/src/theory/inference_id.cpp +++ b/src/theory/inference_id.cpp @@ -406,6 +406,8 @@ const char* toString(InferenceId i) return "STRINGS_ARRAY_UPDATE_UNIT"; case InferenceId::STRINGS_ARRAY_UPDATE_CONCAT: return "STRINGS_ARRAY_UPDATE_CONCAT"; + case InferenceId::STRINGS_ARRAY_UPDATE_CONCAT_INVERSE: + return "STRINGS_ARRAY_UPDATE_CONCAT_INVERSE"; case InferenceId::STRINGS_ARRAY_NTH_UNIT: return "STRINGS_ARRAY_NTH_UNIT"; case InferenceId::STRINGS_ARRAY_NTH_CONCAT: return "STRINGS_ARRAY_NTH_CONCAT"; diff --git a/src/theory/inference_id.h b/src/theory/inference_id.h index 8d9078503..a82093bab 100644 --- a/src/theory/inference_id.h +++ b/src/theory/inference_id.h @@ -686,6 +686,8 @@ enum class InferenceId STRINGS_ARRAY_UPDATE_UNIT, // update over conatenation STRINGS_ARRAY_UPDATE_CONCAT, + // update over conatenation, inverse + STRINGS_ARRAY_UPDATE_CONCAT_INVERSE, // nth over unit STRINGS_ARRAY_NTH_UNIT, // nth over conatenation diff --git a/src/theory/strings/array_solver.cpp b/src/theory/strings/array_solver.cpp index 672ca8b76..3d364bf3c 100644 --- a/src/theory/strings/array_solver.cpp +++ b/src/theory/strings/array_solver.cpp @@ -93,7 +93,6 @@ void ArraySolver::checkArrayEager() void ArraySolver::checkTerms(Kind k) { Assert(k == STRING_UPDATE || k == SEQ_NTH); - NodeManager* nm = NodeManager::currentNM(); // get all the active update terms that have not been reduced in the // current context by context-dependent simplification std::vector terms = d_esolver.getActive(k); @@ -101,22 +100,60 @@ void ArraySolver::checkTerms(Kind k) { Trace("seq-array-debug") << "check term " << t << "..." << std::endl; Assert(t.getKind() == k); - if (k == STRING_UPDATE && !d_termReg.isHandledUpdate(t)) + if (k == STRING_UPDATE) { - // not handled by procedure - Trace("seq-array-debug") << "...unhandled" << std::endl; - continue; + if (!d_termReg.isHandledUpdate(t)) + { + // not handled by procedure + Trace("seq-array-debug") << "...unhandled" << std::endl; + continue; + } + // for update terms, also check the inverse inference + checkTerm(t, true); } - Node r = d_state.getRepresentative(t[0]); - NormalForm& nf = d_csolver.getNormalForm(r); - Trace("seq-array-debug") << "...normal form " << nf.d_nf << std::endl; - std::vector nfChildren; + // check the normal inference + checkTerm(t, false); + } +} + +void ArraySolver::checkTerm(Node t, bool checkInv) +{ + NodeManager* nm = NodeManager::currentNM(); + Kind k = t.getKind(); + Node r = d_state.getRepresentative(t[0]); + Node rself; + NormalForm& nf = d_csolver.getNormalForm(r); + Trace("seq-array-debug") << "...normal form " << nf.d_nf << std::endl; + std::vector nfChildren; + if (checkInv) + { + if (k != STRING_UPDATE) + { + return; + } + // If the term we are updating is atomic, but the update itself + // not atomic, then we will apply the inverse version of the update + // concat rule, based on the normal form of the term itself. + rself = d_state.getRepresentative(t); + NormalForm& nfSelf = d_csolver.getNormalForm(rself); + if (nfSelf.d_nf.size() > 1) + { + nfChildren.insert( + nfChildren.end(), nfSelf.d_nf.begin(), nfSelf.d_nf.end()); + } + else + { + return; + } + } + else + { if (nf.d_nf.empty()) { // updates should have been reduced (UPD_EMPTYSTR) Assert(k != STRING_UPDATE); Trace("seq-array-debug") << "...empty" << std::endl; - continue; + return; } else if (nf.d_nf.size() == 1) { @@ -172,129 +209,186 @@ void ArraySolver::checkTerms(Kind k) d_eqProc.insert(eq); d_im.sendInference(exp, eq, iid); } - continue; + return; } else if (ck != CONST_SEQUENCE) { - // otherwise, if the normal form is not a constant sequence, the - // equivalence class is pure wrt concatenation. - d_currTerms[k].push_back(t); - continue; + bool isAtomic = true; + if (k == STRING_UPDATE) + { + // If the term we are updating is atomic, but the update itself + // not atomic, then we will apply the inverse version of the update + // concat rule, based on the normal form of the term itself. + rself = d_state.getRepresentative(t); + NormalForm& nfSelf = d_csolver.getNormalForm(rself); + if (nfSelf.d_nf.size() > 1) + { + isAtomic = false; + } + } + if (isAtomic) + { + // otherwise, if the normal form is not a constant sequence, and we + // are not a non-atomic update term, then this term will be given to + // the core array solver. + d_currTerms[k].push_back(t); + } + return; + } + else + { + // if the normal form is a constant sequence, it is treated as a + // concatenation. We split per character and case split on whether the + // nth/update falls on each character below, which must have a size + // greater than one. + std::vector chars = Word::getChars(nf.d_nf[0]); + Assert(chars.size() > 1); + nfChildren.insert(nfChildren.end(), chars.begin(), chars.end()); } - // if the normal form is a constant sequence, it is treated as a - // concatenation. We split per character and case split on whether the - // nth/update falls on each character below, which must have a size - // greater than one. - std::vector chars = Word::getChars(nf.d_nf[0]); - Assert (chars.size()>1); - nfChildren.insert(nfChildren.end(), chars.begin(), chars.end()); } else { nfChildren.insert(nfChildren.end(), nf.d_nf.begin(), nf.d_nf.end()); } - // otherwise, we are the concatenation of the components - // NOTE: for nth, split on index vs component lengths, do not introduce ITE - std::vector cond; - std::vector cchildren; - std::vector lacc; - for (const Node& c : nfChildren) + } + // otherwise, we are the concatenation of the components + // NOTE: for nth, split on index vs component lengths, do not introduce ITE + std::vector cond; + std::vector cchildren; + std::vector lacc; + SkolemCache* skc = d_termReg.getSkolemCache(); + for (const Node& c : nfChildren) + { + Trace("seq-array-debug") << "...process " << c << std::endl; + Node clen = nm->mkNode(STRING_LENGTH, c); + Node currIndex = t[1]; + Node currSum = d_zero; + if (!lacc.empty()) { - Trace("seq-array-debug") << "...process " << c << std::endl; - Node clen = nm->mkNode(STRING_LENGTH, c); - Node currIndex = t[1]; - if (!lacc.empty()) - { - Node currSum = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc); - currIndex = nm->mkNode(MINUS, currIndex, currSum); - } - Node cc; - // If it is a constant of length one, then the update/nth is determined - // in this interval. Notice this is done here as - // an optimization to short cut introducing terms like - // (seq.nth (seq.unit c) i), which by construction is only relevant in - // the context where i = 0, hence we replace by c here. - if (c.getKind() == CONST_SEQUENCE) - { - const Sequence& seq = c.getConst(); - if (seq.size() == 1) - { - if (k == STRING_UPDATE) - { - cc = nm->mkNode(ITE, t[1].eqNode(d_zero), t[2], c); - } - else - { - cc = seq.getVec()[0]; - } - } - } - // if we did not process as a constant of length one - if (cc.isNull()) + currSum = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc); + currIndex = nm->mkNode(MINUS, currIndex, currSum); + } + Node cc; + if (k == STRING_UPDATE && checkInv) + { + // component for the reverse form of the update inference is a fresh + // variable, in particular, the purification variable for the substring + // of the term we are updating. + Node sstr = nm->mkNode(STRING_SUBSTR, t[0], currSum, clen); + cc = skc->mkSkolemCached(sstr, SkolemCache::SkolemId::SK_PURIFY, "z"); + } + // If it is a constant of length one, then the update/nth is determined + // in this interval. Notice this is done here as + // an optimization to short cut introducing terms like + // (seq.nth (seq.unit c) i), which by construction is only relevant in + // the context where i = 0, hence we replace by c here. + else if (c.getKind() == CONST_SEQUENCE) + { + const Sequence& seq = c.getConst(); + if (seq.size() == 1) { if (k == STRING_UPDATE) { - cc = nm->mkNode(STRING_UPDATE, c, currIndex, t[2]); + cc = nm->mkNode(ITE, t[1].eqNode(d_zero), t[2], c); } else { - Assert(k == SEQ_NTH); - cc = nm->mkNode(SEQ_NTH, c, currIndex); + cc = seq.getVec()[0]; } } - Trace("seq-array-debug") << "......component " << cc << std::endl; - cchildren.push_back(cc); - lacc.push_back(clen); - if (k == SEQ_NTH) + } + // if we did not process as a constant of length one + if (cc.isNull()) + { + if (k == STRING_UPDATE) + { + cc = nm->mkNode(STRING_UPDATE, c, currIndex, t[2]); + } + else { - Node currSumPost = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc); - Node cf = nm->mkNode(LT, t[1], currSumPost); - Trace("seq-array-debug") << "......condition " << cf << std::endl; - cond.push_back(cf); + Assert(k == SEQ_NTH); + cc = nm->mkNode(SEQ_NTH, c, currIndex); } } - // z = (seq.++ x y) => - // (seq.update z n l) = - // (seq.++ (seq.update x n 1) (seq.update y (- n len(x)) 1)) - // z = (seq.++ x y) => - // (seq.nth z n) = - // (ite (or (< n 0) (>= n (+ (str.len x) (str.len y)))) (Uf z n) - // (ite (< n (str.len x)) (seq.nth x n) - // (seq.nth y (- n (str.len x))))) - InferenceId iid; - Node eq; - if (k == STRING_UPDATE) + Trace("seq-array-debug") << "......component " << cc << std::endl; + cchildren.push_back(cc); + lacc.push_back(clen); + if (k == SEQ_NTH) { - Node finalc = utils::mkConcat(cchildren, t.getType()); - eq = t.eqNode(finalc); - iid = InferenceId::STRINGS_ARRAY_UPDATE_CONCAT; + Node currSumPost = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc); + Node cf = nm->mkNode(LT, t[1], currSumPost); + Trace("seq-array-debug") << "......condition " << cf << std::endl; + cond.push_back(cf); + } + else if (k == STRING_UPDATE && checkInv) + { + Node ccu = nm->mkNode(STRING_UPDATE, cc, currIndex, t[2]); + Node eq = c.eqNode(ccu); + Trace("seq-array-debug") << "......condition " << eq << std::endl; + cond.push_back(eq); + } + } + // z = (seq.++ x y) => + // (seq.update z n l) = + // (seq.++ (seq.update x n 1) (seq.update y (- n len(x)) 1)) + // z = (seq.++ x y) => + // (seq.nth z n) = + // (ite (or (< n 0) (>= n (+ (str.len x) (str.len y)))) (Uf z n) + // (ite (< n (str.len x)) (seq.nth x n) + // (seq.nth y (- n (str.len x))))) + InferenceId iid; + Node eq; + if (k == STRING_UPDATE) + { + Node finalc = utils::mkConcat(cchildren, t.getType()); + if (checkInv) + { + eq = t[0].eqNode(finalc); + cond.push_back(eq); + eq = nm->mkAnd(cond); } else { - std::reverse(cchildren.begin(), cchildren.end()); - std::reverse(cond.begin(), cond.end()); - Node uf = SkolemCache::mkSkolemSeqNth(t[0].getType(), "Uf"); - eq = t.eqNode(cchildren[0]); - for (size_t i = 1, ncond = cond.size(); i < ncond; i++) - { - eq = nm->mkNode(ITE, cond[i], t.eqNode(cchildren[i]), eq); - } - Node ufa = nm->mkNode(APPLY_UF, uf, t[0], t[1]); - Node oobCond = - nm->mkNode(OR, nm->mkNode(LT, t[1], d_zero), cond[0].notNode()); - eq = nm->mkNode(ITE, oobCond, t.eqNode(ufa), eq); - iid = InferenceId::STRINGS_ARRAY_NTH_CONCAT; + eq = t.eqNode(finalc); } - std::vector exp; + iid = checkInv ? InferenceId::STRINGS_ARRAY_UPDATE_CONCAT_INVERSE + : InferenceId::STRINGS_ARRAY_UPDATE_CONCAT; + } + else + { + std::reverse(cchildren.begin(), cchildren.end()); + std::reverse(cond.begin(), cond.end()); + Node uf = SkolemCache::mkSkolemSeqNth(t[0].getType(), "Uf"); + eq = t.eqNode(cchildren[0]); + for (size_t i = 1, ncond = cond.size(); i < ncond; i++) + { + eq = nm->mkNode(ITE, cond[i], t.eqNode(cchildren[i]), eq); + } + Node ufa = nm->mkNode(APPLY_UF, uf, t[0], t[1]); + Node oobCond = + nm->mkNode(OR, nm->mkNode(LT, t[1], d_zero), cond[0].notNode()); + eq = nm->mkNode(ITE, oobCond, t.eqNode(ufa), eq); + iid = InferenceId::STRINGS_ARRAY_NTH_CONCAT; + } + std::vector exp; + if (checkInv) + { + d_im.addToExplanation(rself, t, exp); + NormalForm& nfSelf = d_csolver.getNormalForm(rself); + exp.insert(exp.end(), nfSelf.d_exp.begin(), nfSelf.d_exp.end()); + exp.push_back(t.eqNode(nfSelf.d_base)); + } + else + { d_im.addToExplanation(r, t[0], exp); exp.insert(exp.end(), nf.d_exp.begin(), nf.d_exp.end()); exp.push_back(t[0].eqNode(nf.d_base)); - if (d_eqProc.find(eq) == d_eqProc.end()) - { - d_eqProc.insert(eq); - Trace("seq-array") << "- send lemma - " << eq << std::endl; - d_im.sendInference(exp, eq, iid); - } + } + if (d_eqProc.find(eq) == d_eqProc.end()) + { + d_eqProc.insert(eq); + Trace("seq-array") << "- send lemma - " << eq << std::endl; + d_im.sendInference(exp, eq, iid); } } diff --git a/src/theory/strings/array_solver.h b/src/theory/strings/array_solver.h index 23bacd118..c129f4837 100644 --- a/src/theory/strings/array_solver.h +++ b/src/theory/strings/array_solver.h @@ -85,6 +85,12 @@ class ArraySolver : protected EnvObj private: /** check terms of given kind */ void checkTerms(Kind k); + /** check inferences for the given term + * + * @param t the term to check + * @param checkInv Whether we are checking the inverse of the rule + */ + void checkTerm(Node t, bool checkInv); /** The solver state object */ SolverState& d_state; /** The (custom) output channel of the theory of strings */ diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 6a09e04cf..7c536454a 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1136,6 +1136,8 @@ set(regress_0_tests regress0/seq/seq-nth.smt2 regress0/seq/seq-rewrites.smt2 regress0/seq/seq-types.smt2 + regress0/seq/update-concat-non-atomic.smt2 + regress0/seq/update-concat-non-atomic2.smt2 regress0/seq/update-eq-unsat.smt2 regress0/sets/abt-min.smt2 regress0/sets/abt-te-exh.smt2 diff --git a/test/regress/regress0/seq/update-concat-non-atomic.smt2 b/test/regress/regress0/seq/update-concat-non-atomic.smt2 new file mode 100644 index 000000000..346f9c45f --- /dev/null +++ b/test/regress/regress0/seq/update-concat-non-atomic.smt2 @@ -0,0 +1,17 @@ +; COMMAND-LINE: --strings-exp --seq-array=lazy -q +; EXPECT: sat +(set-logic ALL) +(declare-sort S 0) +(declare-const x (Seq S)) +(declare-const y (Seq S)) +(declare-const y1 (Seq S)) +(declare-const y2 (Seq S)) +(declare-const i Int) +(declare-const a S) + +(assert (= y (seq.update x i (seq.unit a)))) +(assert (= y (seq.++ y1 y2))) +(assert (> (seq.len y1) 0)) +(assert (> (seq.len y2) 0)) + +(check-sat) diff --git a/test/regress/regress0/seq/update-concat-non-atomic2.smt2 b/test/regress/regress0/seq/update-concat-non-atomic2.smt2 new file mode 100644 index 000000000..5c14a10cf --- /dev/null +++ b/test/regress/regress0/seq/update-concat-non-atomic2.smt2 @@ -0,0 +1,24 @@ +; COMMAND-LINE: --strings-exp --seq-array=lazy -q +; EXPECT: sat +(set-logic ALL) +(declare-sort S 0) +(declare-const x (Seq S)) +(declare-const y (Seq S)) +(declare-const y1 (Seq S)) +(declare-const y2 (Seq S)) +(declare-const x1 (Seq S)) +(declare-const x2 (Seq S)) +(declare-const i Int) +(declare-const a S) + +(assert (= y (seq.update x i (seq.unit a)))) +(assert (= y (seq.++ y1 y2))) +(assert (= x (seq.++ x1 x2))) +(assert (> (seq.len y1) 0)) +(assert (> (seq.len y2) 0)) +(assert (> (seq.len x1) 0)) +(assert (> (seq.len x2) 0)) +(assert (<= 0 i)) +(assert (< i (seq.len x))) + +(check-sat) -- 2.30.2