From a6e09da79c31d9f7cf783f17072239a44e538162 Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Wed, 29 Sep 2021 10:39:50 -0500
Subject: [PATCH] Add the strings array solver (#7232)

This adds the arrays subsolver utility. It does not yet connect it down to the core array solver, or up to TheoryStrings.

It adds implementation of its lemma schemas for processing nth/update over concat.
---
 src/CMakeLists.txt                   |   2 +
 src/theory/inference_id.cpp          |   7 +
 src/theory/inference_id.h            |   9 ++
 src/theory/strings/array_solver.cpp  | 218 +++++++++++++++++++++++++++
 src/theory/strings/array_solver.h    |  83 ++++++++++
 src/theory/strings/term_registry.cpp |  29 ++++
 src/theory/strings/term_registry.h   |  13 ++
 7 files changed, 361 insertions(+)
 create mode 100644 src/theory/strings/array_solver.cpp
 create mode 100644 src/theory/strings/array_solver.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 13ab74d4b..6c03db7e7 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1051,6 +1051,8 @@ libcvc5_add_sources(
   theory/smt_engine_subsolver.h
   theory/sort_inference.cpp
   theory/sort_inference.h
+  theory/strings/array_solver.cpp
+  theory/strings/array_solver.h
   theory/strings/arith_entail.cpp
   theory/strings/arith_entail.h
   theory/strings/base_solver.cpp
diff --git a/src/theory/inference_id.cpp b/src/theory/inference_id.cpp
index fce867688..28557e472 100644
--- a/src/theory/inference_id.cpp
+++ b/src/theory/inference_id.cpp
@@ -401,6 +401,13 @@ const char* toString(InferenceId i)
       return "STRINGS_DEQ_EXTENSIONALITY";
     case InferenceId::STRINGS_CODE_PROXY: return "STRINGS_CODE_PROXY";
     case InferenceId::STRINGS_CODE_INJ: return "STRINGS_CODE_INJ";
+    case InferenceId::STRINGS_ARRAY_UPDATE_UNIT:
+      return "STRINGS_ARRAY_UPDATE_UNIT";
+    case InferenceId::STRINGS_ARRAY_UPDATE_CONCAT:
+      return "STRINGS_ARRAY_UPDATE_CONCAT";
+    case InferenceId::STRINGS_ARRAY_NTH_UNIT: return "STRINGS_ARRAY_NTH_UNIT";
+    case InferenceId::STRINGS_ARRAY_NTH_CONCAT:
+      return "STRINGS_ARRAY_NTH_CONCAT";
     case InferenceId::STRINGS_RE_NF_CONFLICT: return "STRINGS_RE_NF_CONFLICT";
     case InferenceId::STRINGS_RE_UNFOLD_POS: return "STRINGS_RE_UNFOLD_POS";
     case InferenceId::STRINGS_RE_UNFOLD_NEG: return "STRINGS_RE_UNFOLD_NEG";
diff --git a/src/theory/inference_id.h b/src/theory/inference_id.h
index 3def963ea..50c063651 100644
--- a/src/theory/inference_id.h
+++ b/src/theory/inference_id.h
@@ -683,6 +683,15 @@ enum class InferenceId
   STRINGS_CODE_PROXY,
   // str.code(x) = -1 V str.code(x) != str.code(y) V x = y
   STRINGS_CODE_INJ,
+  //-------------------- sequence update solver
+  // update over unit
+  STRINGS_ARRAY_UPDATE_UNIT,
+  // update over conatenation
+  STRINGS_ARRAY_UPDATE_CONCAT,
+  // nth over unit
+  STRINGS_ARRAY_NTH_UNIT,
+  // nth over conatenation
+  STRINGS_ARRAY_NTH_CONCAT,
   //-------------------- regexp solver
   // regular expression normal form conflict
   //   ( x in R ^ x = y ^ rewrite((str.in_re y R)) = false ) => false
diff --git a/src/theory/strings/array_solver.cpp b/src/theory/strings/array_solver.cpp
new file mode 100644
index 000000000..09e3aefdd
--- /dev/null
+++ b/src/theory/strings/array_solver.cpp
@@ -0,0 +1,218 @@
+/******************************************************************************
+ * Top contributors (to current version):
+ *   Andrew Reynolds, Andres Noetzli
+ *
+ * This file is part of the cvc5 project.
+ *
+ * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
+ * in the top-level source directory and their institutional affiliations.
+ * All rights reserved.  See the file COPYING in the top-level source
+ * directory for licensing information.
+ * ****************************************************************************
+ *
+ * Sequences solver for seq.nth/seq.update.
+ */
+
+#include "theory/strings/array_solver.h"
+
+#include "theory/strings/arith_entail.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "util/rational.h"
+
+using namespace cvc5::context;
+using namespace cvc5::kind;
+
+namespace cvc5 {
+namespace theory {
+namespace strings {
+
+ArraySolver::ArraySolver(Env& env,
+                         SolverState& s,
+                         InferenceManager& im,
+                         TermRegistry& tr,
+                         CoreSolver& cs,
+                         ExtfSolver& es,
+                         ExtTheory& extt)
+    : EnvObj(env),
+      d_state(s),
+      d_im(im),
+      d_termReg(tr),
+      d_csolver(cs),
+      d_esolver(es),
+      d_eqProc(context())
+{
+  NodeManager* nm = NodeManager::currentNM();
+  d_zero = nm->mkConst(Rational(0));
+}
+
+ArraySolver::~ArraySolver() {}
+
+void ArraySolver::checkArrayConcat()
+{
+  if (!d_termReg.hasSeqUpdate())
+  {
+    Trace("seq-array") << "No seq.update/seq.nth terms, skipping check..."
+                       << std::endl;
+    return;
+  }
+  d_currTerms.clear();
+  Trace("seq-array") << "ArraySolver::checkArrayConcat..." << std::endl;
+  checkTerms(STRING_UPDATE);
+  checkTerms(SEQ_NTH);
+}
+
+void ArraySolver::checkTerms(Kind k)
+{
+  Assert(k == STRING_UPDATE || k == SEQ_NTH);
+  NodeManager* nm = NodeManager::currentNM();
+  // get all the active update terms that have not been reduced in the
+  // current context by context-dependent simplification
+  std::vector<Node> terms = d_esolver.getActive(k);
+  for (const Node& t : terms)
+  {
+    Trace("seq-array-debug") << "check term " << t << "..." << std::endl;
+    Assert(t.getKind() == k);
+    if (k == STRING_UPDATE && !d_termReg.isHandledUpdate(t))
+    {
+      // not handled by procedure
+      Trace("seq-array-debug") << "...unhandled" << std::endl;
+      continue;
+    }
+    Node r = d_state.getRepresentative(t[0]);
+    NormalForm& nf = d_csolver.getNormalForm(r);
+    Trace("seq-array-debug") << "...normal form " << nf.d_nf << std::endl;
+    if (nf.d_nf.empty())
+    {
+      // updates should have been reduced (UPD_EMPTYSTR)
+      Assert(k != STRING_UPDATE);
+      Trace("seq-array-debug") << "...empty" << std::endl;
+      continue;
+    }
+    else if (nf.d_nf.size() == 1)
+    {
+      Trace("seq-array-debug") << "...norm form size 1" << std::endl;
+      // NOTE: could split on n=0 if needed, do not introduce ITE
+      if (nf.d_nf[0].getKind() == SEQ_UNIT)
+      {
+        // do we know whether n = 0 ?
+        // x = (seq.unit m) => (seq.update x n z) = ite(n=0, z, (seq.unit m))
+        // x = (seq.unit m) => (seq.nth x n) = ite(n=0, m, Uf(x, n))
+        Node thenBranch;
+        Node elseBranch;
+        InferenceId iid;
+        if (k == STRING_UPDATE)
+        {
+          thenBranch = t[2];
+          elseBranch = nf.d_nf[0];
+          iid = InferenceId::STRINGS_ARRAY_UPDATE_UNIT;
+        }
+        else
+        {
+          Assert(k == SEQ_NTH);
+          thenBranch = nf.d_nf[0][0];
+          Node uf = SkolemCache::mkSkolemSeqNth(t[0].getType(), "Uf");
+          elseBranch = nm->mkNode(APPLY_UF, uf, t[0], t[1]);
+          iid = InferenceId::STRINGS_ARRAY_NTH_UNIT;
+        }
+        std::vector<Node> exp;
+        d_im.addToExplanation(t[0], nf.d_nf[0], exp);
+        d_im.addToExplanation(r, t[0], exp);
+        Node eq = nm->mkNode(ITE,
+                             t[1].eqNode(d_zero),
+                             t.eqNode(thenBranch),
+                             t.eqNode(elseBranch));
+        if (d_eqProc.find(eq) == d_eqProc.end())
+        {
+          d_eqProc.insert(eq);
+          d_im.sendInference(exp, eq, iid);
+        }
+      }
+      // otherwise, the equivalence class is pure wrt concatenation
+      d_currTerms[k].push_back(t);
+      continue;
+    }
+    // otherwise, we are the concatenation of the components
+    // NOTE: for nth, split on index vs component lengths, do not introduce ITE
+    std::vector<Node> cond;
+    std::vector<Node> cchildren;
+    std::vector<Node> lacc;
+    for (const Node& c : nf.d_nf)
+    {
+      Trace("seq-array-debug") << "...process " << c << std::endl;
+      Node clen = nm->mkNode(STRING_LENGTH, c);
+      Node currIndex = t[1];
+      if (!lacc.empty())
+      {
+        Node currSum = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc);
+        currIndex = nm->mkNode(MINUS, currIndex, currSum);
+      }
+      if (k == STRING_UPDATE)
+      {
+        Node cc = nm->mkNode(STRING_UPDATE, c, currIndex, t[2]);
+        Trace("seq-array-debug") << "......component " << cc << std::endl;
+        cchildren.push_back(cc);
+      }
+      else
+      {
+        Assert(k == SEQ_NTH);
+        Node cc = nm->mkNode(SEQ_NTH, c, currIndex);
+        Trace("seq-array-debug") << "......component " << cc << std::endl;
+        cchildren.push_back(cc);
+      }
+      lacc.push_back(clen);
+      if (k == SEQ_NTH)
+      {
+        Node currSumPost = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc);
+        Node cc = nm->mkNode(LT, t[1], currSumPost);
+        Trace("seq-array-debug") << "......condition " << cc << std::endl;
+        cond.push_back(cc);
+      }
+    }
+    // z = (seq.++ x y) =>
+    // (seq.update z n l) =
+    //   (seq.++ (seq.update x n 1) (seq.update y (- n len(x)) 1))
+    // z = (seq.++ x y) =>
+    // (seq.nth z n) =
+    //    (ite (or (< n 0) (>= n (+ (str.len x) (str.len y)))) (Uf z n)
+    //    (ite (< n (str.len x)) (seq.nth x n)
+    //      (seq.nth y (- n (str.len x)))))
+    InferenceId iid;
+    Node eq;
+    if (k == STRING_UPDATE)
+    {
+      Node finalc = utils::mkConcat(cchildren, t.getType());
+      eq = t.eqNode(finalc);
+      iid = InferenceId::STRINGS_ARRAY_UPDATE_CONCAT;
+    }
+    else
+    {
+      std::reverse(cchildren.begin(), cchildren.end());
+      std::reverse(cond.begin(), cond.end());
+      Node uf = SkolemCache::mkSkolemSeqNth(t[0].getType(), "Uf");
+      eq = t.eqNode(cchildren[0]);
+      for (size_t i = 1, ncond = cond.size(); i < ncond; i++)
+      {
+        eq = nm->mkNode(ITE, cond[i], t.eqNode(cchildren[i]), eq);
+      }
+      Node ufa = nm->mkNode(APPLY_UF, uf, t[0], t[1]);
+      Node oobCond =
+          nm->mkNode(OR, nm->mkNode(LT, t[1], d_zero), cond[0].notNode());
+      eq = nm->mkNode(ITE, oobCond, t.eqNode(ufa), eq);
+      iid = InferenceId::STRINGS_ARRAY_NTH_CONCAT;
+    }
+    std::vector<Node> exp;
+    d_im.addToExplanation(r, t[0], exp);
+    exp.insert(exp.end(), nf.d_exp.begin(), nf.d_exp.end());
+    exp.push_back(t[0].eqNode(nf.d_base));
+    if (d_eqProc.find(eq) == d_eqProc.end())
+    {
+      d_eqProc.insert(eq);
+      Trace("seq-array") << "- send lemma - " << eq << std::endl;
+      d_im.sendInference(exp, eq, iid);
+    }
+  }
+}
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace cvc5
diff --git a/src/theory/strings/array_solver.h b/src/theory/strings/array_solver.h
new file mode 100644
index 000000000..941061e9e
--- /dev/null
+++ b/src/theory/strings/array_solver.h
@@ -0,0 +1,83 @@
+/******************************************************************************
+ * Top contributors (to current version):
+ *   Andrew Reynolds, Andres Noetzli
+ *
+ * This file is part of the cvc5 project.
+ *
+ * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
+ * in the top-level source directory and their institutional affiliations.
+ * All rights reserved.  See the file COPYING in the top-level source
+ * directory for licensing information.
+ * ****************************************************************************
+ *
+ * Sequences solver for seq.nth/seq.update.
+ */
+
+#include "cvc5_private.h"
+
+#ifndef CVC5__THEORY__STRINGS__ARRAY_SOLVER_H
+#define CVC5__THEORY__STRINGS__ARRAY_SOLVER_H
+
+#include "context/cdhashset.h"
+#include "theory/strings/core_solver.h"
+#include "theory/strings/extf_solver.h"
+#include "theory/strings/inference_manager.h"
+#include "theory/strings/solver_state.h"
+#include "theory/strings/term_registry.h"
+
+namespace cvc5 {
+namespace theory {
+namespace strings {
+
+/**
+ * This is a solver for reasoning about seq.update and seq.nth
+ * natively. This class specifically addresses the combination of this
+ * operators with concatenation. It relies on a subsolver for doing array
+ * like reasoning (sequences_array_solver.h).
+ */
+class ArraySolver : protected EnvObj
+{
+  typedef context::CDHashSet<Node> NodeSet;
+
+ public:
+  ArraySolver(Env& env,
+              SolverState& s,
+              InferenceManager& im,
+              TermRegistry& tr,
+              CoreSolver& cs,
+              ExtfSolver& es,
+              ExtTheory& extt);
+  ~ArraySolver();
+
+  /**
+   * Perform reasoning about seq.nth and seq.update operations, in particular,
+   * their application to concatenation terms.
+   */
+  void checkArrayConcat();
+
+ private:
+  /** check terms of given kind */
+  void checkTerms(Kind k);
+  /** The solver state object */
+  SolverState& d_state;
+  /** The (custom) output channel of the theory of strings */
+  InferenceManager& d_im;
+  /** Reference to the term registry of theory of strings */
+  TermRegistry& d_termReg;
+  /** reference to the core solver, used for certain queries */
+  CoreSolver& d_csolver;
+  /** reference to the extended solver, used for certain queries */
+  ExtfSolver& d_esolver;
+  /** Current terms */
+  std::map<Kind, std::vector<Node> > d_currTerms;
+  /** Common constants */
+  Node d_zero;
+  /** Equalities we have processed in the current context */
+  NodeSet d_eqProc;
+};
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace cvc5
+
+#endif
diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp
index dd53adecc..897d02366 100644
--- a/src/theory/strings/term_registry.cpp
+++ b/src/theory/strings/term_registry.cpp
@@ -49,6 +49,7 @@ TermRegistry::TermRegistry(Env& env,
       d_im(nullptr),
       d_statistics(statistics),
       d_hasStrCode(false),
+      d_aent(env.getRewriter()),
       d_functionsTerms(context()),
       d_inputVars(userContext()),
       d_preregisteredTerms(context()),
@@ -185,6 +186,10 @@ void TermRegistry::preRegisterTerm(TNode n)
   {
     d_hasStrCode = true;
   }
+  else if (k == SEQ_NTH || k == STRING_UPDATE)
+  {
+    d_hasSeqUpdate = true;
+  }
   else if (k == REGEXP_RANGE)
   {
     for (const Node& nc : n)
@@ -470,6 +475,30 @@ const context::CDHashSet<Node>& TermRegistry::getInputVars() const
 
 bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
 
+bool TermRegistry::hasSeqUpdate() const { return d_hasSeqUpdate; }
+
+bool TermRegistry::isHandledUpdate(Node n)
+{
+  Assert(n.getKind() == STRING_UPDATE || n.getKind() == STRING_SUBSTR);
+  NodeManager* nm = NodeManager::currentNM();
+  Node lenN = n[2];
+  if (n.getKind() == STRING_UPDATE)
+  {
+    lenN = nm->mkNode(STRING_LENGTH, n[2]);
+  }
+  Node one = nm->mkConst(Rational(1));
+  return d_aent.checkEq(lenN, one);
+}
+
+Node TermRegistry::getUpdateBase(Node n)
+{
+  while (n.getKind() == STRING_UPDATE)
+  {
+    n = n[0];
+  }
+  return n;
+}
+
 TrustNode TermRegistry::getRegisterTermAtomicLemma(
     Node n, LengthStatus s, std::map<Node, bool>& reqPhase)
 {
diff --git a/src/theory/strings/term_registry.h b/src/theory/strings/term_registry.h
index ba973fac8..4dfe58aab 100644
--- a/src/theory/strings/term_registry.h
+++ b/src/theory/strings/term_registry.h
@@ -24,6 +24,7 @@
 #include "proof/proof_node_manager.h"
 #include "smt/env_obj.h"
 #include "theory/output_channel.h"
+#include "theory/strings/arith_entail.h"
 #include "theory/strings/infer_info.h"
 #include "theory/strings/sequences_stats.h"
 #include "theory/strings/skolem_cache.h"
@@ -152,6 +153,14 @@ class TermRegistry : protected EnvObj
   const context::CDHashSet<Node>& getInputVars() const;
   /** Returns true if any str.code terms have been preregistered */
   bool hasStringCode() const;
+  /**
+   * @return true if any seq.nth or seq.update terms have been preregistered
+   */
+  bool hasSeqUpdate() const;
+  /** is handled update */
+  bool isHandledUpdate(Node n);
+  /** get base */
+  Node getUpdateBase(Node n);
   //---------------------------- end queries
   //---------------------------- proxy variables
   /** Get symbolic definition
@@ -216,8 +225,12 @@ class TermRegistry : protected EnvObj
   SequencesStatistics& d_statistics;
   /** have we asserted any str.code terms? */
   bool d_hasStrCode;
+  /** have we asserted any seq.update/seq.nth terms? */
+  bool d_hasSeqUpdate;
   /** The cache of all skolems, which is owned by this class. */
   SkolemCache d_skCache;
+  /** arithmetic entailment */
+  ArithEntail d_aent;
   /** All function terms that the theory has seen in the current SAT context */
   context::CDList<TNode> d_functionsTerms;
   /**
-- 
2.30.2