From 43170db86742dd5fbe33b4bdc9a938f6c1ceae5d Mon Sep 17 00:00:00 2001
From: Ying Sheng <sqy1415@gmail.com>
Date: Fri, 10 Dec 2021 12:59:47 -0800
Subject: [PATCH] Array-inspired Sequence Solver - Adding the ArrayCoreSolver
 class and options (#7723)

This pull request adding the ArrayCoreSolver class and the options to turn on the array-inspired sequence solver.
---
 src/CMakeLists.txt                       |   2 +
 src/options/strings_options.toml         |  18 ++
 src/theory/arith/arith_rewriter.cpp      |   2 +-
 src/theory/ext_theory.h                  |   2 +
 src/theory/inference_id.cpp              |   5 +
 src/theory/inference_id.h                |   6 +
 src/theory/strings/array_core_solver.cpp | 300 +++++++++++++++++++++++
 src/theory/strings/array_core_solver.h   | 127 ++++++++++
 8 files changed, 461 insertions(+), 1 deletion(-)
 create mode 100644 src/theory/strings/array_core_solver.cpp
 create mode 100644 src/theory/strings/array_core_solver.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index de5ef27af..07f1495fe 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1017,6 +1017,8 @@ libcvc5_add_sources(
   theory/sort_inference.h
   theory/strings/array_solver.cpp
   theory/strings/array_solver.h
+  theory/strings/array_core_solver.cpp
+  theory/strings/array_core_solver.h
   theory/strings/arith_entail.cpp
   theory/strings/arith_entail.h
   theory/strings/base_solver.cpp
diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml
index 01b92c1b7..05025708b 100644
--- a/src/options/strings_options.toml
+++ b/src/options/strings_options.toml
@@ -207,3 +207,21 @@ name   = "Strings Theory"
   type       = "bool"
   default    = "false"
   help       = "use regular expressions for eager length conflicts"
+
+[[option]]
+  name       = "seqArray"
+  category   = "expert"
+  long       = "seq-array=MODE"
+  type       = "SeqArrayMode"
+  default    = "NONE"
+  help       = "use array-inspired solver for sequence updates in eager or lazy mode"
+  help_mode  = "use array-inspired solver for sequence updates in eager or lazy mode"
+[[option.mode.LAZY]]
+  name = "lazy"
+  help = "use array-inspired solver for sequence updates in lazy mode"
+[[option.mode.EAGER]]
+  name = "eager"
+  help = "use array-inspired solver for sequence updates in eager mode"
+[[option.mode.NONE]]
+  name = "none"
+  help = "do not use array-inspired solver for sequence updates"
diff --git a/src/theory/arith/arith_rewriter.cpp b/src/theory/arith/arith_rewriter.cpp
index 8408f15bb..f5e7b427e 100644
--- a/src/theory/arith/arith_rewriter.cpp
+++ b/src/theory/arith/arith_rewriter.cpp
@@ -214,7 +214,7 @@ RewriteResponse ArithRewriter::postRewriteTerm(TNode t){
       return RewriteResponse(REWRITE_DONE, t);
     case kind::TO_REAL:
     case kind::CAST_TO_REAL: return RewriteResponse(REWRITE_DONE, t[0]);
-    case kind::TO_INTEGER:return rewriteExtIntegerOp(t);
+    case kind::TO_INTEGER: return rewriteExtIntegerOp(t);
     case kind::POW:
       {
         if(t[1].getKind() == kind::CONST_RATIONAL){
diff --git a/src/theory/ext_theory.h b/src/theory/ext_theory.h
index 998dac146..ecd061e7c 100644
--- a/src/theory/ext_theory.h
+++ b/src/theory/ext_theory.h
@@ -77,6 +77,8 @@ enum class ExtReducedId
   STRINGS_REGEXP_INCLUDE,
   // subsumed due to RE inclusion reasoning for negative memberships
   STRINGS_REGEXP_INCLUDE_NEG,
+  // reduction for seq.nth over seq.rev
+  STRINGS_NTH_REV,
 };
 /**
  * Converts an ext reduced identifier to a string.
diff --git a/src/theory/inference_id.cpp b/src/theory/inference_id.cpp
index b525d1c53..5a4e8e7bc 100644
--- a/src/theory/inference_id.cpp
+++ b/src/theory/inference_id.cpp
@@ -406,6 +406,11 @@ const char* toString(InferenceId i)
     case InferenceId::STRINGS_ARRAY_NTH_UNIT: return "STRINGS_ARRAY_NTH_UNIT";
     case InferenceId::STRINGS_ARRAY_NTH_CONCAT:
       return "STRINGS_ARRAY_NTH_CONCAT";
+    case InferenceId::STRINGS_ARRAY_NTH_EXTRACT:
+      return "STRINGS_ARRAY_NTH_EXTRACT";
+    case InferenceId::STRINGS_ARRAY_NTH_UPDATE:
+      return "STRINGS_ARRAY_NTH_UPDATE";
+    case InferenceId::STRINGS_ARRAY_NTH_REV: return "STRINGS_ARRAY_NTH_REV";
     case InferenceId::STRINGS_RE_NF_CONFLICT: return "STRINGS_RE_NF_CONFLICT";
     case InferenceId::STRINGS_RE_UNFOLD_POS: return "STRINGS_RE_UNFOLD_POS";
     case InferenceId::STRINGS_RE_UNFOLD_NEG: return "STRINGS_RE_UNFOLD_NEG";
diff --git a/src/theory/inference_id.h b/src/theory/inference_id.h
index f6e826ac3..d5a3e1b2c 100644
--- a/src/theory/inference_id.h
+++ b/src/theory/inference_id.h
@@ -687,6 +687,12 @@ enum class InferenceId
   STRINGS_ARRAY_NTH_UNIT,
   // nth over conatenation
   STRINGS_ARRAY_NTH_CONCAT,
+  // nth over extract
+  STRINGS_ARRAY_NTH_EXTRACT,
+  // nth over update
+  STRINGS_ARRAY_NTH_UPDATE,
+  // nth over reverse
+  STRINGS_ARRAY_NTH_REV,
   //-------------------- regexp solver
   // regular expression normal form conflict
   //   ( x in R ^ x = y ^ rewrite((str.in_re y R)) = false ) => false
diff --git a/src/theory/strings/array_core_solver.cpp b/src/theory/strings/array_core_solver.cpp
new file mode 100644
index 000000000..ed3690068
--- /dev/null
+++ b/src/theory/strings/array_core_solver.cpp
@@ -0,0 +1,300 @@
+/******************************************************************************
+ * Top contributors (to current version):
+ *   Andres Noetzli
+ *
+ * This file is part of the cvc5 project.
+ *
+ * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
+ * in the top-level source directory and their institutional affiliations.
+ * All rights reserved.  See the file COPYING in the top-level source
+ * directory for licensing information.
+ * ****************************************************************************
+ *
+ * Sequences solver for seq.nth/seq.update.
+ */
+
+#include "theory/strings/array_core_solver.h"
+
+#include "theory/strings/array_solver.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
+#include "util/rational.h"
+
+using namespace cvc5::context;
+using namespace cvc5::kind;
+
+namespace cvc5 {
+namespace theory {
+namespace strings {
+
+ArrayCoreSolver::ArrayCoreSolver(Env& env,
+                                 SolverState& s,
+                                 InferenceManager& im,
+                                 TermRegistry& tr,
+                                 CoreSolver& cs,
+                                 ExtfSolver& es,
+                                 ExtTheory& extt)
+    : EnvObj(env),
+      d_state(s),
+      d_im(im),
+      d_termReg(tr),
+      d_csolver(cs),
+      d_esolver(es),
+      d_extt(extt),
+      d_lem(context())
+{
+}
+
+ArrayCoreSolver::~ArrayCoreSolver() {}
+
+void ArrayCoreSolver::sendInference(const std::vector<Node>& exp,
+                                    const Node& lem,
+                                    const InferenceId iid)
+{
+  if (d_lem.find(lem) == d_lem.end())
+  {
+    d_lem.insert(lem);
+    Trace("seq-update") << "- send lemma - " << lem << std::endl;
+    d_im.sendInference(exp, lem, iid);
+  }
+}
+
+void ArrayCoreSolver::checkNth(const std::vector<Node>& nthTerms)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  std::vector<Node> extractTerms = d_esolver.getActive(STRING_SUBSTR);
+  for (const Node& n : extractTerms)
+  {
+    if (d_termReg.isHandledUpdate(n))
+    {
+      // (seq.extract A i l) ^ (<= 0 i) ^ (< i (str.len A)) --> (seq.unit
+      // (seq.nth A i))
+      std::vector<Node> exp;
+      Node cond1 = nm->mkNode(LEQ, nm->mkConst(Rational(0)), n[1]);
+      Node cond2 = nm->mkNode(LT, n[1], nm->mkNode(STRING_LENGTH, n[0]));
+      Node cond = nm->mkNode(AND, cond1, cond2);
+      Node body1 = nm->mkNode(
+          EQUAL, n, nm->mkNode(SEQ_UNIT, nm->mkNode(SEQ_NTH, n[0], n[1])));
+      Node body2 = nm->mkNode(EQUAL, n, Word::mkEmptyWord(n.getType()));
+      Node lem = nm->mkNode(ITE, cond, body1, body2);
+      sendInference(exp, lem, InferenceId::STRINGS_ARRAY_NTH_EXTRACT);
+    }
+  }
+}
+
+void ArrayCoreSolver::checkUpdate(const std::vector<Node>& updateTerms)
+{
+  NodeManager* nm = NodeManager::currentNM();
+
+  Trace("seq-array-debug") << "updateTerms number: " << updateTerms.size()
+                           << std::endl;
+  for (const Node& n : updateTerms)
+  {
+    // current term (seq.update x i a)
+
+    // inference rule is:
+    // (seq.update x i a) in TERMS
+    // (seq.nth t j) in TERMS
+    // t == (seq.update x i a)
+    // ----------------------------------------------------------------------
+    // (seq.nth (seq.update x i a) j) =
+    //   (ITE, j in range(i, i+len(a)), (seq.nth a (j - i)),  (seq.nth x j))
+
+    // t == (seq.update x i a) =>
+    // (seq.nth t j) = (ITE, j in range(i, i+len(a)), (seq.nth a (j - i)),
+    // (seq.nth x j))
+
+    // note that the term could rewrites to a skolem
+    // get proxy variable for the update term as t
+    Node termProxy = d_termReg.getProxyVariableFor(n);
+    Trace("seq-update") << "- " << termProxy << " = " << n << std::endl;
+    std::vector<Node> exp;
+    d_im.addToExplanation(termProxy, n, exp);
+
+    // optimization: add a short cut t == (seq.update n[0] n[1] n[2]) => t[i] ==
+    // n[2][0]
+    Node left = nm->mkNode(SEQ_NTH, termProxy, n[1]);
+    Node right =
+        nm->mkNode(SEQ_NTH, n[2], nm->mkConst(Rational(0)));  // n[2][0]
+    right = Rewriter::rewrite(right);
+    Node lem = nm->mkNode(EQUAL, left, right);
+    Trace("seq-array-debug") << "enter" << std::endl;
+    sendInference(exp, lem, InferenceId::STRINGS_ARRAY_NTH_UPDATE);
+
+    // enumerate possible index
+    for (auto nth : d_index_map)
+    {
+      Node seq = nth.first;
+      if (d_state.areEqual(seq, n) || d_state.areEqual(seq, n[0]))
+      {
+        std::set<Node> indexes = nth.second;
+        for (Node j : indexes)
+        {
+          // optimization: add a short cut for special case (seq.update n[0]
+          // n[1] (seq.unit e))
+          if (n[2].getKind() == SEQ_UNIT)
+          {
+            left = nm->mkNode(DISTINCT, n[1], j);
+            Node nth1 = nm->mkNode(SEQ_NTH, termProxy, j);
+            Node nth2 = nm->mkNode(SEQ_NTH, n[0], j);
+            right = nm->mkNode(EQUAL, nth1, nth2);
+            lem = nm->mkNode(IMPLIES, left, right);
+            sendInference(exp, lem, InferenceId::STRINGS_ARRAY_NTH_UPDATE);
+          }
+
+          // normal cases
+          left = nm->mkNode(SEQ_NTH, termProxy, j);
+          Node cond = nm->mkNode(
+              AND,
+              nm->mkNode(LEQ, n[1], j),
+              nm->mkNode(
+                  LT,
+                  j,
+                  nm->mkNode(PLUS, n[1], nm->mkNode(STRING_LENGTH, n[2]))));
+          Node body1 = nm->mkNode(SEQ_NTH, n[2], nm->mkNode(MINUS, j, n[1]));
+          Node body2 = nm->mkNode(SEQ_NTH, n[0], j);
+          right = nm->mkNode(ITE, cond, body1, body2);
+          lem = nm->mkNode(EQUAL, left, right);
+          sendInference(exp, lem, InferenceId::STRINGS_ARRAY_NTH_UPDATE);
+        }
+      }
+    }
+  }
+}
+
+void ArrayCoreSolver::check(const std::vector<Node>& nthTerms,
+                            const std::vector<Node>& updateTerms)
+{
+  NodeManager* nm = NodeManager::currentNM();
+
+  Trace("seq-array-debug") << "NTH SIZE: " << nthTerms.size() << std::endl;
+  if (Trace.isOn("seq-array-terms"))
+  {
+    for (const Node& n : nthTerms)
+    {
+      Trace("seq-array-terms") << n << std::endl;
+    }
+  }
+  Trace("seq-array-debug") << "UPDATE SIZE: " << updateTerms.size()
+                           << std::endl;
+  if (Trace.isOn("seq-array-terms"))
+  {
+    for (const Node& n : updateTerms)
+    {
+      Trace("seq-array-terms") << n << std::endl;
+    }
+  }
+  Trace("seq-update") << "SequencesArraySolver::check..." << std::endl;
+  d_writeModel.clear();
+  d_index_map.clear();
+  for (const Node& n : nthTerms)
+  {
+    // (seq.nth n[0] n[1])
+    Node r = d_state.getRepresentative(n[0]);
+    Trace("seq-update") << "- " << r << ": " << n[1] << " -> " << n
+                        << std::endl;
+    d_writeModel[r][n[1]] = n;
+    if (d_index_map.find(r) == d_index_map.end())
+    {
+      std::set<Node> indexes;
+      indexes.insert(n[1]);
+      d_index_map[r] = indexes;
+    }
+    else
+    {
+      d_index_map[r].insert(n[1]);
+    }
+
+    if (n[0].getKind() == STRING_REV)
+    {
+      Node s = n[0][0];
+      Node i = n[1];
+      Node sLen = nm->mkNode(STRING_LENGTH, s);
+      Node iRev = nm->mkNode(
+          MINUS, sLen, nm->mkNode(PLUS, i, nm->mkConst(Rational(1))));
+
+      std::vector<Node> nexp;
+      nexp.push_back(nm->mkNode(LEQ, nm->mkConst(Rational(0)), i));
+      nexp.push_back(nm->mkNode(LT, i, sLen));
+
+      // 0 <= i ^ i < len(s) => seq.nth(seq.rev(s), i) = seq.nth(s, len(s) - i -
+      // 1)
+      Node ret = nm->mkNode(SEQ_NTH, s, iRev);
+      d_im.sendInference(
+          {}, nexp, n.eqNode(ret), InferenceId::STRINGS_ARRAY_NTH_REV);
+      d_extt.markReduced(n, ExtReducedId::STRINGS_NTH_REV);
+    }
+  }
+  checkNth(nthTerms);
+  checkUpdate(updateTerms);
+  // compute connected sequences
+  if (!d_im.hasSent())
+  {
+    computeConnected(updateTerms);
+  }
+}
+
+void ArrayCoreSolver::computeConnected(const std::vector<Node>& updateTerms)
+{
+  d_connectedSeq.clear();
+  std::map<Node, Node> conTmp;
+  std::map<Node, Node>::iterator it;
+  for (const Node& n : updateTerms)
+  {
+    Node newRep;
+    for (size_t i = 0; i < 2; i++)
+    {
+      Node s = i == 0 ? n[0] : n;
+      TNode r = d_state.getRepresentative(s);
+      // get the find
+      it = conTmp.find(r);
+      while (it != conTmp.end())
+      {
+        r = it->second;
+        it = conTmp.find(r);
+      }
+      if (i == 0)
+      {
+        newRep = r;
+      }
+      else if (newRep != r)
+      {
+        conTmp[newRep] = r;
+      }
+    }
+  }
+  // go back and normalize the find to representatives
+  for (std::pair<const Node, Node>& c : conTmp)
+  {
+    TNode r = c.first;
+    it = conTmp.find(r);
+    while (it != conTmp.end())
+    {
+      r = it->second;
+      it = conTmp.find(r);
+    }
+    d_connectedSeq[c.first] = r;
+  }
+}
+
+const std::map<Node, Node>& ArrayCoreSolver::getWriteModel(Node eqc)
+{
+  if (Trace.isOn("seq-write-model"))
+  {
+    Trace("seq-write-model") << "write model of " << eqc << ":" << std::endl;
+    for (auto& x : d_writeModel[eqc])
+    {
+      Trace("seq-write-model") << x.first << ": " << x.second << std::endl;
+    }
+  }
+  return d_writeModel[eqc];
+}
+
+const std::map<Node, Node>& ArrayCoreSolver::getConnectedSequences()
+{
+  return d_connectedSeq;
+}
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace cvc5
diff --git a/src/theory/strings/array_core_solver.h b/src/theory/strings/array_core_solver.h
new file mode 100644
index 000000000..7101da625
--- /dev/null
+++ b/src/theory/strings/array_core_solver.h
@@ -0,0 +1,127 @@
+/******************************************************************************
+ * Top contributors (to current version):
+ *   Andres Noetzli
+ *
+ * This file is part of the cvc5 project.
+ *
+ * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
+ * in the top-level source directory and their institutional affiliations.
+ * All rights reserved.  See the file COPYING in the top-level source
+ * directory for licensing information.
+ * ****************************************************************************
+ *
+ * Sequences solver for seq.nth/seq.update.
+ */
+
+#include "cvc5_private.h"
+
+#ifndef CVC5__THEORY__STRINGS__ARRAY_CORE_SOLVER_H
+#define CVC5__THEORY__STRINGS__ARRAY_CORE_SOLVER_H
+
+#include "theory/strings/core_solver.h"
+#include "theory/strings/extf_solver.h"
+#include "theory/strings/inference_manager.h"
+#include "theory/strings/solver_state.h"
+#include "theory/strings/term_registry.h"
+
+namespace cvc5 {
+namespace theory {
+namespace strings {
+
+class ArrayCoreSolver : protected EnvObj
+{
+ public:
+  ArrayCoreSolver(Env& env,
+                  SolverState& s,
+                  InferenceManager& im,
+                  TermRegistry& tr,
+                  CoreSolver& cs,
+                  ExtfSolver& es,
+                  ExtTheory& extt);
+  ~ArrayCoreSolver();
+
+  /**
+   * Perform reasoning about seq.nth and seq.update operations.
+   *
+   * Can assume that seq.update / seq.nth terms only apply to concatenation-free
+   * equivalence classes.
+   */
+  void check(const std::vector<Node>& nthTerms,
+             const std::vector<Node>& updateTerms);
+
+  /**
+   *
+   * @param eqc The sequence equivalence class representative. We can assume
+   * the equivalence class of eqc contains no concatenation terms.
+   * @return the map corresponding to the model for eqc. The domain of
+   * the returned map should be in distinct integer equivalence classes of the
+   * equality engine of strings theory. The model assigned to eqc will be
+   * a skeleton constructed via seq.++ where the components take values from
+   * this map.
+   */
+  const std::map<Node, Node>& getWriteModel(Node eqc);
+
+  /**
+   * Get connected sequences
+   * @return a map M such that sequence equivalence class representatives x and
+   * y are connected if an only if M[x] = M[y].
+   */
+  const std::map<Node, Node>& getConnectedSequences();
+
+ private:
+  void sendInference(const std::vector<Node>& exp,
+                     const Node& lem,
+                     const InferenceId iid);
+
+  /**
+   * Perform reasoning about seq.nth operation.
+   * It handled the reduction from seq.extract to seq.nth, following the rule
+   * below: (t = (seq.extract A i 1)) ^ (0 <= i) ^ (i < (str.len A))
+   * ----------------------------------------------------------------------
+   * t = (seq.unit (seq.nth A i))
+   */
+  void checkNth(const std::vector<Node>& nthTerms);
+
+  /**
+   * Perform reasoning about seq.update operation.
+   * It handled the reduction from seq.update to seq.nth, following the rule
+   * below: (seq.update x i a) in TERMS (seq.nth t j) in TERMS t == (seq.update
+   * x i a)
+   * ----------------------------------------------------------------------
+   * (seq.nth (seq.update x i a) j) = (ITE, j in range(i, i+len(a)), (seq.nth a
+   * (j - i)), (seq.nth x j))
+   */
+  void checkUpdate(const std::vector<Node>& updateTerms);
+
+  // TODO: document
+  void computeConnected(const std::vector<Node>& updateTerms);
+
+  /** The solver state object */
+  SolverState& d_state;
+  /** The (custom) output channel of the theory of strings */
+  InferenceManager& d_im;
+  /** Reference to the term registry of theory of strings */
+  TermRegistry& d_termReg;
+  /** reference to the core solver, used for certain queries */
+  CoreSolver& d_csolver;
+  /** reference to the extended solver, used for certain queries */
+  ExtfSolver& d_esolver;
+  /** the extended theory object for the theory of strings */
+  ExtTheory& d_extt;
+  /** The write model */
+  std::map<Node, std::map<Node, Node>> d_writeModel;
+  /** Connected */
+  std::map<Node, Node> d_connectedSeq;
+  /** The set of lemmas been sent */
+  context::CDHashSet<Node> d_lem;
+
+  // ========= data structure =========
+  /** Map sequence variable to indices that occurred in nth terms */
+  std::map<Node, std::set<Node>> d_index_map;
+};
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace cvc5
+
+#endif
-- 
2.30.2