From 43170db86742dd5fbe33b4bdc9a938f6c1ceae5d Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Fri, 10 Dec 2021 12:59:47 -0800 Subject: [PATCH] Array-inspired Sequence Solver - Adding the ArrayCoreSolver class and options (#7723) This pull request adding the ArrayCoreSolver class and the options to turn on the array-inspired sequence solver. --- src/CMakeLists.txt | 2 + src/options/strings_options.toml | 18 ++ src/theory/arith/arith_rewriter.cpp | 2 +- src/theory/ext_theory.h | 2 + src/theory/inference_id.cpp | 5 + src/theory/inference_id.h | 6 + src/theory/strings/array_core_solver.cpp | 300 +++++++++++++++++++++++ src/theory/strings/array_core_solver.h | 127 ++++++++++ 8 files changed, 461 insertions(+), 1 deletion(-) create mode 100644 src/theory/strings/array_core_solver.cpp create mode 100644 src/theory/strings/array_core_solver.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index de5ef27af..07f1495fe 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1017,6 +1017,8 @@ libcvc5_add_sources( theory/sort_inference.h theory/strings/array_solver.cpp theory/strings/array_solver.h + theory/strings/array_core_solver.cpp + theory/strings/array_core_solver.h theory/strings/arith_entail.cpp theory/strings/arith_entail.h theory/strings/base_solver.cpp diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml index 01b92c1b7..05025708b 100644 --- a/src/options/strings_options.toml +++ b/src/options/strings_options.toml @@ -207,3 +207,21 @@ name = "Strings Theory" type = "bool" default = "false" help = "use regular expressions for eager length conflicts" + +[[option]] + name = "seqArray" + category = "expert" + long = "seq-array=MODE" + type = "SeqArrayMode" + default = "NONE" + help = "use array-inspired solver for sequence updates in eager or lazy mode" + help_mode = "use array-inspired solver for sequence updates in eager or lazy mode" +[[option.mode.LAZY]] + name = "lazy" + help = "use array-inspired solver for sequence updates in lazy mode" +[[option.mode.EAGER]] + name = "eager" + help = "use array-inspired solver for sequence updates in eager mode" +[[option.mode.NONE]] + name = "none" + help = "do not use array-inspired solver for sequence updates" diff --git a/src/theory/arith/arith_rewriter.cpp b/src/theory/arith/arith_rewriter.cpp index 8408f15bb..f5e7b427e 100644 --- a/src/theory/arith/arith_rewriter.cpp +++ b/src/theory/arith/arith_rewriter.cpp @@ -214,7 +214,7 @@ RewriteResponse ArithRewriter::postRewriteTerm(TNode t){ return RewriteResponse(REWRITE_DONE, t); case kind::TO_REAL: case kind::CAST_TO_REAL: return RewriteResponse(REWRITE_DONE, t[0]); - case kind::TO_INTEGER:return rewriteExtIntegerOp(t); + case kind::TO_INTEGER: return rewriteExtIntegerOp(t); case kind::POW: { if(t[1].getKind() == kind::CONST_RATIONAL){ diff --git a/src/theory/ext_theory.h b/src/theory/ext_theory.h index 998dac146..ecd061e7c 100644 --- a/src/theory/ext_theory.h +++ b/src/theory/ext_theory.h @@ -77,6 +77,8 @@ enum class ExtReducedId STRINGS_REGEXP_INCLUDE, // subsumed due to RE inclusion reasoning for negative memberships STRINGS_REGEXP_INCLUDE_NEG, + // reduction for seq.nth over seq.rev + STRINGS_NTH_REV, }; /** * Converts an ext reduced identifier to a string. diff --git a/src/theory/inference_id.cpp b/src/theory/inference_id.cpp index b525d1c53..5a4e8e7bc 100644 --- a/src/theory/inference_id.cpp +++ b/src/theory/inference_id.cpp @@ -406,6 +406,11 @@ const char* toString(InferenceId i) case InferenceId::STRINGS_ARRAY_NTH_UNIT: return "STRINGS_ARRAY_NTH_UNIT"; case InferenceId::STRINGS_ARRAY_NTH_CONCAT: return "STRINGS_ARRAY_NTH_CONCAT"; + case InferenceId::STRINGS_ARRAY_NTH_EXTRACT: + return "STRINGS_ARRAY_NTH_EXTRACT"; + case InferenceId::STRINGS_ARRAY_NTH_UPDATE: + return "STRINGS_ARRAY_NTH_UPDATE"; + case InferenceId::STRINGS_ARRAY_NTH_REV: return "STRINGS_ARRAY_NTH_REV"; case InferenceId::STRINGS_RE_NF_CONFLICT: return "STRINGS_RE_NF_CONFLICT"; case InferenceId::STRINGS_RE_UNFOLD_POS: return "STRINGS_RE_UNFOLD_POS"; case InferenceId::STRINGS_RE_UNFOLD_NEG: return "STRINGS_RE_UNFOLD_NEG"; diff --git a/src/theory/inference_id.h b/src/theory/inference_id.h index f6e826ac3..d5a3e1b2c 100644 --- a/src/theory/inference_id.h +++ b/src/theory/inference_id.h @@ -687,6 +687,12 @@ enum class InferenceId STRINGS_ARRAY_NTH_UNIT, // nth over conatenation STRINGS_ARRAY_NTH_CONCAT, + // nth over extract + STRINGS_ARRAY_NTH_EXTRACT, + // nth over update + STRINGS_ARRAY_NTH_UPDATE, + // nth over reverse + STRINGS_ARRAY_NTH_REV, //-------------------- regexp solver // regular expression normal form conflict // ( x in R ^ x = y ^ rewrite((str.in_re y R)) = false ) => false diff --git a/src/theory/strings/array_core_solver.cpp b/src/theory/strings/array_core_solver.cpp new file mode 100644 index 000000000..ed3690068 --- /dev/null +++ b/src/theory/strings/array_core_solver.cpp @@ -0,0 +1,300 @@ +/****************************************************************************** + * Top contributors (to current version): + * Andres Noetzli + * + * This file is part of the cvc5 project. + * + * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS + * in the top-level source directory and their institutional affiliations. + * All rights reserved. See the file COPYING in the top-level source + * directory for licensing information. + * **************************************************************************** + * + * Sequences solver for seq.nth/seq.update. + */ + +#include "theory/strings/array_core_solver.h" + +#include "theory/strings/array_solver.h" +#include "theory/strings/theory_strings_utils.h" +#include "theory/strings/word.h" +#include "util/rational.h" + +using namespace cvc5::context; +using namespace cvc5::kind; + +namespace cvc5 { +namespace theory { +namespace strings { + +ArrayCoreSolver::ArrayCoreSolver(Env& env, + SolverState& s, + InferenceManager& im, + TermRegistry& tr, + CoreSolver& cs, + ExtfSolver& es, + ExtTheory& extt) + : EnvObj(env), + d_state(s), + d_im(im), + d_termReg(tr), + d_csolver(cs), + d_esolver(es), + d_extt(extt), + d_lem(context()) +{ +} + +ArrayCoreSolver::~ArrayCoreSolver() {} + +void ArrayCoreSolver::sendInference(const std::vector& exp, + const Node& lem, + const InferenceId iid) +{ + if (d_lem.find(lem) == d_lem.end()) + { + d_lem.insert(lem); + Trace("seq-update") << "- send lemma - " << lem << std::endl; + d_im.sendInference(exp, lem, iid); + } +} + +void ArrayCoreSolver::checkNth(const std::vector& nthTerms) +{ + NodeManager* nm = NodeManager::currentNM(); + std::vector extractTerms = d_esolver.getActive(STRING_SUBSTR); + for (const Node& n : extractTerms) + { + if (d_termReg.isHandledUpdate(n)) + { + // (seq.extract A i l) ^ (<= 0 i) ^ (< i (str.len A)) --> (seq.unit + // (seq.nth A i)) + std::vector exp; + Node cond1 = nm->mkNode(LEQ, nm->mkConst(Rational(0)), n[1]); + Node cond2 = nm->mkNode(LT, n[1], nm->mkNode(STRING_LENGTH, n[0])); + Node cond = nm->mkNode(AND, cond1, cond2); + Node body1 = nm->mkNode( + EQUAL, n, nm->mkNode(SEQ_UNIT, nm->mkNode(SEQ_NTH, n[0], n[1]))); + Node body2 = nm->mkNode(EQUAL, n, Word::mkEmptyWord(n.getType())); + Node lem = nm->mkNode(ITE, cond, body1, body2); + sendInference(exp, lem, InferenceId::STRINGS_ARRAY_NTH_EXTRACT); + } + } +} + +void ArrayCoreSolver::checkUpdate(const std::vector& updateTerms) +{ + NodeManager* nm = NodeManager::currentNM(); + + Trace("seq-array-debug") << "updateTerms number: " << updateTerms.size() + << std::endl; + for (const Node& n : updateTerms) + { + // current term (seq.update x i a) + + // inference rule is: + // (seq.update x i a) in TERMS + // (seq.nth t j) in TERMS + // t == (seq.update x i a) + // ---------------------------------------------------------------------- + // (seq.nth (seq.update x i a) j) = + // (ITE, j in range(i, i+len(a)), (seq.nth a (j - i)), (seq.nth x j)) + + // t == (seq.update x i a) => + // (seq.nth t j) = (ITE, j in range(i, i+len(a)), (seq.nth a (j - i)), + // (seq.nth x j)) + + // note that the term could rewrites to a skolem + // get proxy variable for the update term as t + Node termProxy = d_termReg.getProxyVariableFor(n); + Trace("seq-update") << "- " << termProxy << " = " << n << std::endl; + std::vector exp; + d_im.addToExplanation(termProxy, n, exp); + + // optimization: add a short cut t == (seq.update n[0] n[1] n[2]) => t[i] == + // n[2][0] + Node left = nm->mkNode(SEQ_NTH, termProxy, n[1]); + Node right = + nm->mkNode(SEQ_NTH, n[2], nm->mkConst(Rational(0))); // n[2][0] + right = Rewriter::rewrite(right); + Node lem = nm->mkNode(EQUAL, left, right); + Trace("seq-array-debug") << "enter" << std::endl; + sendInference(exp, lem, InferenceId::STRINGS_ARRAY_NTH_UPDATE); + + // enumerate possible index + for (auto nth : d_index_map) + { + Node seq = nth.first; + if (d_state.areEqual(seq, n) || d_state.areEqual(seq, n[0])) + { + std::set indexes = nth.second; + for (Node j : indexes) + { + // optimization: add a short cut for special case (seq.update n[0] + // n[1] (seq.unit e)) + if (n[2].getKind() == SEQ_UNIT) + { + left = nm->mkNode(DISTINCT, n[1], j); + Node nth1 = nm->mkNode(SEQ_NTH, termProxy, j); + Node nth2 = nm->mkNode(SEQ_NTH, n[0], j); + right = nm->mkNode(EQUAL, nth1, nth2); + lem = nm->mkNode(IMPLIES, left, right); + sendInference(exp, lem, InferenceId::STRINGS_ARRAY_NTH_UPDATE); + } + + // normal cases + left = nm->mkNode(SEQ_NTH, termProxy, j); + Node cond = nm->mkNode( + AND, + nm->mkNode(LEQ, n[1], j), + nm->mkNode( + LT, + j, + nm->mkNode(PLUS, n[1], nm->mkNode(STRING_LENGTH, n[2])))); + Node body1 = nm->mkNode(SEQ_NTH, n[2], nm->mkNode(MINUS, j, n[1])); + Node body2 = nm->mkNode(SEQ_NTH, n[0], j); + right = nm->mkNode(ITE, cond, body1, body2); + lem = nm->mkNode(EQUAL, left, right); + sendInference(exp, lem, InferenceId::STRINGS_ARRAY_NTH_UPDATE); + } + } + } + } +} + +void ArrayCoreSolver::check(const std::vector& nthTerms, + const std::vector& updateTerms) +{ + NodeManager* nm = NodeManager::currentNM(); + + Trace("seq-array-debug") << "NTH SIZE: " << nthTerms.size() << std::endl; + if (Trace.isOn("seq-array-terms")) + { + for (const Node& n : nthTerms) + { + Trace("seq-array-terms") << n << std::endl; + } + } + Trace("seq-array-debug") << "UPDATE SIZE: " << updateTerms.size() + << std::endl; + if (Trace.isOn("seq-array-terms")) + { + for (const Node& n : updateTerms) + { + Trace("seq-array-terms") << n << std::endl; + } + } + Trace("seq-update") << "SequencesArraySolver::check..." << std::endl; + d_writeModel.clear(); + d_index_map.clear(); + for (const Node& n : nthTerms) + { + // (seq.nth n[0] n[1]) + Node r = d_state.getRepresentative(n[0]); + Trace("seq-update") << "- " << r << ": " << n[1] << " -> " << n + << std::endl; + d_writeModel[r][n[1]] = n; + if (d_index_map.find(r) == d_index_map.end()) + { + std::set indexes; + indexes.insert(n[1]); + d_index_map[r] = indexes; + } + else + { + d_index_map[r].insert(n[1]); + } + + if (n[0].getKind() == STRING_REV) + { + Node s = n[0][0]; + Node i = n[1]; + Node sLen = nm->mkNode(STRING_LENGTH, s); + Node iRev = nm->mkNode( + MINUS, sLen, nm->mkNode(PLUS, i, nm->mkConst(Rational(1)))); + + std::vector nexp; + nexp.push_back(nm->mkNode(LEQ, nm->mkConst(Rational(0)), i)); + nexp.push_back(nm->mkNode(LT, i, sLen)); + + // 0 <= i ^ i < len(s) => seq.nth(seq.rev(s), i) = seq.nth(s, len(s) - i - + // 1) + Node ret = nm->mkNode(SEQ_NTH, s, iRev); + d_im.sendInference( + {}, nexp, n.eqNode(ret), InferenceId::STRINGS_ARRAY_NTH_REV); + d_extt.markReduced(n, ExtReducedId::STRINGS_NTH_REV); + } + } + checkNth(nthTerms); + checkUpdate(updateTerms); + // compute connected sequences + if (!d_im.hasSent()) + { + computeConnected(updateTerms); + } +} + +void ArrayCoreSolver::computeConnected(const std::vector& updateTerms) +{ + d_connectedSeq.clear(); + std::map conTmp; + std::map::iterator it; + for (const Node& n : updateTerms) + { + Node newRep; + for (size_t i = 0; i < 2; i++) + { + Node s = i == 0 ? n[0] : n; + TNode r = d_state.getRepresentative(s); + // get the find + it = conTmp.find(r); + while (it != conTmp.end()) + { + r = it->second; + it = conTmp.find(r); + } + if (i == 0) + { + newRep = r; + } + else if (newRep != r) + { + conTmp[newRep] = r; + } + } + } + // go back and normalize the find to representatives + for (std::pair& c : conTmp) + { + TNode r = c.first; + it = conTmp.find(r); + while (it != conTmp.end()) + { + r = it->second; + it = conTmp.find(r); + } + d_connectedSeq[c.first] = r; + } +} + +const std::map& ArrayCoreSolver::getWriteModel(Node eqc) +{ + if (Trace.isOn("seq-write-model")) + { + Trace("seq-write-model") << "write model of " << eqc << ":" << std::endl; + for (auto& x : d_writeModel[eqc]) + { + Trace("seq-write-model") << x.first << ": " << x.second << std::endl; + } + } + return d_writeModel[eqc]; +} + +const std::map& ArrayCoreSolver::getConnectedSequences() +{ + return d_connectedSeq; +} + +} // namespace strings +} // namespace theory +} // namespace cvc5 diff --git a/src/theory/strings/array_core_solver.h b/src/theory/strings/array_core_solver.h new file mode 100644 index 000000000..7101da625 --- /dev/null +++ b/src/theory/strings/array_core_solver.h @@ -0,0 +1,127 @@ +/****************************************************************************** + * Top contributors (to current version): + * Andres Noetzli + * + * This file is part of the cvc5 project. + * + * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS + * in the top-level source directory and their institutional affiliations. + * All rights reserved. See the file COPYING in the top-level source + * directory for licensing information. + * **************************************************************************** + * + * Sequences solver for seq.nth/seq.update. + */ + +#include "cvc5_private.h" + +#ifndef CVC5__THEORY__STRINGS__ARRAY_CORE_SOLVER_H +#define CVC5__THEORY__STRINGS__ARRAY_CORE_SOLVER_H + +#include "theory/strings/core_solver.h" +#include "theory/strings/extf_solver.h" +#include "theory/strings/inference_manager.h" +#include "theory/strings/solver_state.h" +#include "theory/strings/term_registry.h" + +namespace cvc5 { +namespace theory { +namespace strings { + +class ArrayCoreSolver : protected EnvObj +{ + public: + ArrayCoreSolver(Env& env, + SolverState& s, + InferenceManager& im, + TermRegistry& tr, + CoreSolver& cs, + ExtfSolver& es, + ExtTheory& extt); + ~ArrayCoreSolver(); + + /** + * Perform reasoning about seq.nth and seq.update operations. + * + * Can assume that seq.update / seq.nth terms only apply to concatenation-free + * equivalence classes. + */ + void check(const std::vector& nthTerms, + const std::vector& updateTerms); + + /** + * + * @param eqc The sequence equivalence class representative. We can assume + * the equivalence class of eqc contains no concatenation terms. + * @return the map corresponding to the model for eqc. The domain of + * the returned map should be in distinct integer equivalence classes of the + * equality engine of strings theory. The model assigned to eqc will be + * a skeleton constructed via seq.++ where the components take values from + * this map. + */ + const std::map& getWriteModel(Node eqc); + + /** + * Get connected sequences + * @return a map M such that sequence equivalence class representatives x and + * y are connected if an only if M[x] = M[y]. + */ + const std::map& getConnectedSequences(); + + private: + void sendInference(const std::vector& exp, + const Node& lem, + const InferenceId iid); + + /** + * Perform reasoning about seq.nth operation. + * It handled the reduction from seq.extract to seq.nth, following the rule + * below: (t = (seq.extract A i 1)) ^ (0 <= i) ^ (i < (str.len A)) + * ---------------------------------------------------------------------- + * t = (seq.unit (seq.nth A i)) + */ + void checkNth(const std::vector& nthTerms); + + /** + * Perform reasoning about seq.update operation. + * It handled the reduction from seq.update to seq.nth, following the rule + * below: (seq.update x i a) in TERMS (seq.nth t j) in TERMS t == (seq.update + * x i a) + * ---------------------------------------------------------------------- + * (seq.nth (seq.update x i a) j) = (ITE, j in range(i, i+len(a)), (seq.nth a + * (j - i)), (seq.nth x j)) + */ + void checkUpdate(const std::vector& updateTerms); + + // TODO: document + void computeConnected(const std::vector& updateTerms); + + /** The solver state object */ + SolverState& d_state; + /** The (custom) output channel of the theory of strings */ + InferenceManager& d_im; + /** Reference to the term registry of theory of strings */ + TermRegistry& d_termReg; + /** reference to the core solver, used for certain queries */ + CoreSolver& d_csolver; + /** reference to the extended solver, used for certain queries */ + ExtfSolver& d_esolver; + /** the extended theory object for the theory of strings */ + ExtTheory& d_extt; + /** The write model */ + std::map> d_writeModel; + /** Connected */ + std::map d_connectedSeq; + /** The set of lemmas been sent */ + context::CDHashSet d_lem; + + // ========= data structure ========= + /** Map sequence variable to indices that occurred in nth terms */ + std::map> d_index_map; +}; + +} // namespace strings +} // namespace theory +} // namespace cvc5 + +#endif -- 2.30.2