Add the strings array solver (#7232)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Wed, 29 Sep 2021 15:39:50 +0000 (10:39 -0500)

committer GitHub <noreply@github.com>

Wed, 29 Sep 2021 15:39:50 +0000 (15:39 +0000)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Wed, 29 Sep 2021 15:39:50 +0000 (10:39 -0500)
committer GitHub <noreply@github.com>
Wed, 29 Sep 2021 15:39:50 +0000 (15:39 +0000)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt

index 13ab74d4b379d0dc4baf09cb7d291a0ffd5c12d5..6c03db7e748dda54ab8768fba01363699a6a5e1b 100644 (file)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1051,6 +1051,8 @@ libcvc5_add_sources(
    theory/smt_engine_subsolver.h
    theory/sort_inference.cpp
    theory/sort_inference.h
+  theory/strings/array_solver.cpp
+  theory/strings/array_solver.h
    theory/strings/arith_entail.cpp
    theory/strings/arith_entail.h
    theory/strings/base_solver.cpp
diff --git a/src/theory/inference_id.cpp b/src/theory/inference_id.cpp

index fce8676889912eb3afd6dbf0b2836cce51309512..28557e4723b8656fc960248d073bb6abaff6b807 100644 (file)
--- a/src/theory/inference_id.cpp
+++ b/src/theory/inference_id.cpp
@@ -401,6 +401,13 @@ const char* toString(InferenceId i)
        return "STRINGS_DEQ_EXTENSIONALITY";
      case InferenceId::STRINGS_CODE_PROXY: return "STRINGS_CODE_PROXY";
      case InferenceId::STRINGS_CODE_INJ: return "STRINGS_CODE_INJ";
+    case InferenceId::STRINGS_ARRAY_UPDATE_UNIT:
+      return "STRINGS_ARRAY_UPDATE_UNIT";
+    case InferenceId::STRINGS_ARRAY_UPDATE_CONCAT:
+      return "STRINGS_ARRAY_UPDATE_CONCAT";
+    case InferenceId::STRINGS_ARRAY_NTH_UNIT: return "STRINGS_ARRAY_NTH_UNIT";
+    case InferenceId::STRINGS_ARRAY_NTH_CONCAT:
+      return "STRINGS_ARRAY_NTH_CONCAT";
      case InferenceId::STRINGS_RE_NF_CONFLICT: return "STRINGS_RE_NF_CONFLICT";
      case InferenceId::STRINGS_RE_UNFOLD_POS: return "STRINGS_RE_UNFOLD_POS";
      case InferenceId::STRINGS_RE_UNFOLD_NEG: return "STRINGS_RE_UNFOLD_NEG";
diff --git a/src/theory/inference_id.h b/src/theory/inference_id.h

index 3def963ea79f5e74e1abe48a191e00ea6d806835..50c063651f51ffb10a8dcc161e2b0d59e6789baf 100644 (file)
--- a/src/theory/inference_id.h
+++ b/src/theory/inference_id.h
@@ -683,6 +683,15 @@ enum class InferenceId
    STRINGS_CODE_PROXY,
    // str.code(x) = -1 V str.code(x) != str.code(y) V x = y
    STRINGS_CODE_INJ,
+  //-------------------- sequence update solver
+  // update over unit
+  STRINGS_ARRAY_UPDATE_UNIT,
+  // update over conatenation
+  STRINGS_ARRAY_UPDATE_CONCAT,
+  // nth over unit
+  STRINGS_ARRAY_NTH_UNIT,
+  // nth over conatenation
+  STRINGS_ARRAY_NTH_CONCAT,
    //-------------------- regexp solver
    // regular expression normal form conflict
    //   ( x in R ^ x = y ^ rewrite((str.in_re y R)) = false ) => false
diff --git a/src/theory/strings/array_solver.cpp b/src/theory/strings/array_solver.cpp

new file mode 100644 (file)

index 0000000..09e3aef
--- /dev/null
+++ b/src/theory/strings/array_solver.cpp
@@ -0,0 +1,218 @@
+/******************************************************************************
+ * Top contributors (to current version):
+ *   Andrew Reynolds, Andres Noetzli
+ *
+ * This file is part of the cvc5 project.
+ *
+ * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
+ * in the top-level source directory and their institutional affiliations.
+ * All rights reserved.  See the file COPYING in the top-level source
+ * directory for licensing information.
+ * ****************************************************************************
+ *
+ * Sequences solver for seq.nth/seq.update.
+ */
+
+#include "theory/strings/array_solver.h"
+
+#include "theory/strings/arith_entail.h"
+#include "theory/strings/theory_strings_utils.h"
+#include "util/rational.h"
+
+using namespace cvc5::context;
+using namespace cvc5::kind;
+
+namespace cvc5 {
+namespace theory {
+namespace strings {
+
+ArraySolver::ArraySolver(Env& env,
+                         SolverState& s,
+                         InferenceManager& im,
+                         TermRegistry& tr,
+                         CoreSolver& cs,
+                         ExtfSolver& es,
+                         ExtTheory& extt)
+    : EnvObj(env),
+      d_state(s),
+      d_im(im),
+      d_termReg(tr),
+      d_csolver(cs),
+      d_esolver(es),
+      d_eqProc(context())
+{
+  NodeManager* nm = NodeManager::currentNM();
+  d_zero = nm->mkConst(Rational(0));
+}
+
+ArraySolver::~ArraySolver() {}
+
+void ArraySolver::checkArrayConcat()
+{
+  if (!d_termReg.hasSeqUpdate())
+  {
+    Trace("seq-array") << "No seq.update/seq.nth terms, skipping check..."
+                       << std::endl;
+    return;
+  }
+  d_currTerms.clear();
+  Trace("seq-array") << "ArraySolver::checkArrayConcat..." << std::endl;
+  checkTerms(STRING_UPDATE);
+  checkTerms(SEQ_NTH);
+}
+
+void ArraySolver::checkTerms(Kind k)
+{
+  Assert(k == STRING_UPDATE || k == SEQ_NTH);
+  NodeManager* nm = NodeManager::currentNM();
+  // get all the active update terms that have not been reduced in the
+  // current context by context-dependent simplification
+  std::vector<Node> terms = d_esolver.getActive(k);
+  for (const Node& t : terms)
+  {
+    Trace("seq-array-debug") << "check term " << t << "..." << std::endl;
+    Assert(t.getKind() == k);
+    if (k == STRING_UPDATE && !d_termReg.isHandledUpdate(t))
+    {
+      // not handled by procedure
+      Trace("seq-array-debug") << "...unhandled" << std::endl;
+      continue;
+    }
+    Node r = d_state.getRepresentative(t[0]);
+    NormalForm& nf = d_csolver.getNormalForm(r);
+    Trace("seq-array-debug") << "...normal form " << nf.d_nf << std::endl;
+    if (nf.d_nf.empty())
+    {
+      // updates should have been reduced (UPD_EMPTYSTR)
+      Assert(k != STRING_UPDATE);
+      Trace("seq-array-debug") << "...empty" << std::endl;
+      continue;
+    }
+    else if (nf.d_nf.size() == 1)
+    {
+      Trace("seq-array-debug") << "...norm form size 1" << std::endl;
+      // NOTE: could split on n=0 if needed, do not introduce ITE
+      if (nf.d_nf[0].getKind() == SEQ_UNIT)
+      {
+        // do we know whether n = 0 ?
+        // x = (seq.unit m) => (seq.update x n z) = ite(n=0, z, (seq.unit m))
+        // x = (seq.unit m) => (seq.nth x n) = ite(n=0, m, Uf(x, n))
+        Node thenBranch;
+        Node elseBranch;
+        InferenceId iid;
+        if (k == STRING_UPDATE)
+        {
+          thenBranch = t[2];
+          elseBranch = nf.d_nf[0];
+          iid = InferenceId::STRINGS_ARRAY_UPDATE_UNIT;
+        }
+        else
+        {
+          Assert(k == SEQ_NTH);
+          thenBranch = nf.d_nf[0][0];
+          Node uf = SkolemCache::mkSkolemSeqNth(t[0].getType(), "Uf");
+          elseBranch = nm->mkNode(APPLY_UF, uf, t[0], t[1]);
+          iid = InferenceId::STRINGS_ARRAY_NTH_UNIT;
+        }
+        std::vector<Node> exp;
+        d_im.addToExplanation(t[0], nf.d_nf[0], exp);
+        d_im.addToExplanation(r, t[0], exp);
+        Node eq = nm->mkNode(ITE,
+                             t[1].eqNode(d_zero),
+                             t.eqNode(thenBranch),
+                             t.eqNode(elseBranch));
+        if (d_eqProc.find(eq) == d_eqProc.end())
+        {
+          d_eqProc.insert(eq);
+          d_im.sendInference(exp, eq, iid);
+        }
+      }
+      // otherwise, the equivalence class is pure wrt concatenation
+      d_currTerms[k].push_back(t);
+      continue;
+    }
+    // otherwise, we are the concatenation of the components
+    // NOTE: for nth, split on index vs component lengths, do not introduce ITE
+    std::vector<Node> cond;
+    std::vector<Node> cchildren;
+    std::vector<Node> lacc;
+    for (const Node& c : nf.d_nf)
+    {
+      Trace("seq-array-debug") << "...process " << c << std::endl;
+      Node clen = nm->mkNode(STRING_LENGTH, c);
+      Node currIndex = t[1];
+      if (!lacc.empty())
+      {
+        Node currSum = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc);
+        currIndex = nm->mkNode(MINUS, currIndex, currSum);
+      }
+      if (k == STRING_UPDATE)
+      {
+        Node cc = nm->mkNode(STRING_UPDATE, c, currIndex, t[2]);
+        Trace("seq-array-debug") << "......component " << cc << std::endl;
+        cchildren.push_back(cc);
+      }
+      else
+      {
+        Assert(k == SEQ_NTH);
+        Node cc = nm->mkNode(SEQ_NTH, c, currIndex);
+        Trace("seq-array-debug") << "......component " << cc << std::endl;
+        cchildren.push_back(cc);
+      }
+      lacc.push_back(clen);
+      if (k == SEQ_NTH)
+      {
+        Node currSumPost = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc);
+        Node cc = nm->mkNode(LT, t[1], currSumPost);
+        Trace("seq-array-debug") << "......condition " << cc << std::endl;
+        cond.push_back(cc);
+      }
+    }
+    // z = (seq.++ x y) =>
+    // (seq.update z n l) =
+    //   (seq.++ (seq.update x n 1) (seq.update y (- n len(x)) 1))
+    // z = (seq.++ x y) =>
+    // (seq.nth z n) =
+    //    (ite (or (< n 0) (>= n (+ (str.len x) (str.len y)))) (Uf z n)
+    //    (ite (< n (str.len x)) (seq.nth x n)
+    //      (seq.nth y (- n (str.len x)))))
+    InferenceId iid;
+    Node eq;
+    if (k == STRING_UPDATE)
+    {
+      Node finalc = utils::mkConcat(cchildren, t.getType());
+      eq = t.eqNode(finalc);
+      iid = InferenceId::STRINGS_ARRAY_UPDATE_CONCAT;
+    }
+    else
+    {
+      std::reverse(cchildren.begin(), cchildren.end());
+      std::reverse(cond.begin(), cond.end());
+      Node uf = SkolemCache::mkSkolemSeqNth(t[0].getType(), "Uf");
+      eq = t.eqNode(cchildren[0]);
+      for (size_t i = 1, ncond = cond.size(); i < ncond; i++)
+      {
+        eq = nm->mkNode(ITE, cond[i], t.eqNode(cchildren[i]), eq);
+      }
+      Node ufa = nm->mkNode(APPLY_UF, uf, t[0], t[1]);
+      Node oobCond =
+          nm->mkNode(OR, nm->mkNode(LT, t[1], d_zero), cond[0].notNode());
+      eq = nm->mkNode(ITE, oobCond, t.eqNode(ufa), eq);
+      iid = InferenceId::STRINGS_ARRAY_NTH_CONCAT;
+    }
+    std::vector<Node> exp;
+    d_im.addToExplanation(r, t[0], exp);
+    exp.insert(exp.end(), nf.d_exp.begin(), nf.d_exp.end());
+    exp.push_back(t[0].eqNode(nf.d_base));
+    if (d_eqProc.find(eq) == d_eqProc.end())
+    {
+      d_eqProc.insert(eq);
+      Trace("seq-array") << "- send lemma - " << eq << std::endl;
+      d_im.sendInference(exp, eq, iid);
+    }
+  }
+}
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace cvc5
diff --git a/src/theory/strings/array_solver.h b/src/theory/strings/array_solver.h

new file mode 100644 (file)

index 0000000..941061e
--- /dev/null
+++ b/src/theory/strings/array_solver.h
@@ -0,0 +1,83 @@
+/******************************************************************************
+ * Top contributors (to current version):
+ *   Andrew Reynolds, Andres Noetzli
+ *
+ * This file is part of the cvc5 project.
+ *
+ * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
+ * in the top-level source directory and their institutional affiliations.
+ * All rights reserved.  See the file COPYING in the top-level source
+ * directory for licensing information.
+ * ****************************************************************************
+ *
+ * Sequences solver for seq.nth/seq.update.
+ */
+
+#include "cvc5_private.h"
+
+#ifndef CVC5__THEORY__STRINGS__ARRAY_SOLVER_H
+#define CVC5__THEORY__STRINGS__ARRAY_SOLVER_H
+
+#include "context/cdhashset.h"
+#include "theory/strings/core_solver.h"
+#include "theory/strings/extf_solver.h"
+#include "theory/strings/inference_manager.h"
+#include "theory/strings/solver_state.h"
+#include "theory/strings/term_registry.h"
+
+namespace cvc5 {
+namespace theory {
+namespace strings {
+
+/**
+ * This is a solver for reasoning about seq.update and seq.nth
+ * natively. This class specifically addresses the combination of this
+ * operators with concatenation. It relies on a subsolver for doing array
+ * like reasoning (sequences_array_solver.h).
+ */
+class ArraySolver : protected EnvObj
+{
+  typedef context::CDHashSet<Node> NodeSet;
+
+ public:
+  ArraySolver(Env& env,
+              SolverState& s,
+              InferenceManager& im,
+              TermRegistry& tr,
+              CoreSolver& cs,
+              ExtfSolver& es,
+              ExtTheory& extt);
+  ~ArraySolver();
+
+  /**
+   * Perform reasoning about seq.nth and seq.update operations, in particular,
+   * their application to concatenation terms.
+   */
+  void checkArrayConcat();
+
+ private:
+  /** check terms of given kind */
+  void checkTerms(Kind k);
+  /** The solver state object */
+  SolverState& d_state;
+  /** The (custom) output channel of the theory of strings */
+  InferenceManager& d_im;
+  /** Reference to the term registry of theory of strings */
+  TermRegistry& d_termReg;
+  /** reference to the core solver, used for certain queries */
+  CoreSolver& d_csolver;
+  /** reference to the extended solver, used for certain queries */
+  ExtfSolver& d_esolver;
+  /** Current terms */
+  std::map<Kind, std::vector<Node> > d_currTerms;
+  /** Common constants */
+  Node d_zero;
+  /** Equalities we have processed in the current context */
+  NodeSet d_eqProc;
+};
+
+}  // namespace strings
+}  // namespace theory
+}  // namespace cvc5
+
+#endif
diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp

index dd53adecc9dae51920fcb7793865b5bd17825844..897d02366332df5622956ba2bcf9c58eac13f693 100644 (file)
--- a/src/theory/strings/term_registry.cpp
+++ b/src/theory/strings/term_registry.cpp
@@ -49,6 +49,7 @@ TermRegistry::TermRegistry(Env& env,
        d_im(nullptr),
        d_statistics(statistics),
        d_hasStrCode(false),
+      d_aent(env.getRewriter()),
        d_functionsTerms(context()),
        d_inputVars(userContext()),
        d_preregisteredTerms(context()),
@@ -185,6 +186,10 @@ void TermRegistry::preRegisterTerm(TNode n)
    {
      d_hasStrCode = true;
    }
+  else if (k == SEQ_NTH || k == STRING_UPDATE)
+  {
+    d_hasSeqUpdate = true;
+  }
    else if (k == REGEXP_RANGE)
    {
      for (const Node& nc : n)
@@ -470,6 +475,30 @@ const context::CDHashSet<Node>& TermRegistry::getInputVars() const
  
  bool TermRegistry::hasStringCode() const { return d_hasStrCode; }
  
+bool TermRegistry::hasSeqUpdate() const { return d_hasSeqUpdate; }
+
+bool TermRegistry::isHandledUpdate(Node n)
+{
+  Assert(n.getKind() == STRING_UPDATE || n.getKind() == STRING_SUBSTR);
+  NodeManager* nm = NodeManager::currentNM();
+  Node lenN = n[2];
+  if (n.getKind() == STRING_UPDATE)
+  {
+    lenN = nm->mkNode(STRING_LENGTH, n[2]);
+  }
+  Node one = nm->mkConst(Rational(1));
+  return d_aent.checkEq(lenN, one);
+}
+
+Node TermRegistry::getUpdateBase(Node n)
+{
+  while (n.getKind() == STRING_UPDATE)
+  {
+    n = n[0];
+  }
+  return n;
+}
+
  TrustNode TermRegistry::getRegisterTermAtomicLemma(
      Node n, LengthStatus s, std::map<Node, bool>& reqPhase)
  {
diff --git a/src/theory/strings/term_registry.h b/src/theory/strings/term_registry.h

index ba973fac8e0f4ff931f823e783d5be64609794c7..4dfe58aabf38c12ceb8a4bab8e40063a1bfc4bc5 100644 (file)
--- a/src/theory/strings/term_registry.h
+++ b/src/theory/strings/term_registry.h
@@ -24,6 +24,7 @@
  #include "proof/proof_node_manager.h"
  #include "smt/env_obj.h"
  #include "theory/output_channel.h"
+#include "theory/strings/arith_entail.h"
  #include "theory/strings/infer_info.h"
  #include "theory/strings/sequences_stats.h"
  #include "theory/strings/skolem_cache.h"
@@ -152,6 +153,14 @@ class TermRegistry : protected EnvObj
    const context::CDHashSet<Node>& getInputVars() const;
    /** Returns true if any str.code terms have been preregistered */
    bool hasStringCode() const;
+  /**
+   * @return true if any seq.nth or seq.update terms have been preregistered
+   */
+  bool hasSeqUpdate() const;
+  /** is handled update */
+  bool isHandledUpdate(Node n);
+  /** get base */
+  Node getUpdateBase(Node n);
    //---------------------------- end queries
    //---------------------------- proxy variables
    /** Get symbolic definition
@@ -216,8 +225,12 @@ class TermRegistry : protected EnvObj
    SequencesStatistics& d_statistics;
    /** have we asserted any str.code terms? */
    bool d_hasStrCode;
+  /** have we asserted any seq.update/seq.nth terms? */
+  bool d_hasSeqUpdate;
    /** The cache of all skolems, which is owned by this class. */
    SkolemCache d_skCache;
+  /** arithmetic entailment */
+  ArithEntail d_aent;
    /** All function terms that the theory has seen in the current SAT context */
    context::CDList<TNode> d_functionsTerms;
    /**
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Wed, 29 Sep 2021 15:39:50 +0000 (10:39 -0500)
committer	GitHub <noreply@github.com>
	Wed, 29 Sep 2021 15:39:50 +0000 (15:39 +0000)
src/CMakeLists.txt		patch \| blob \| history
src/theory/inference_id.cpp		patch \| blob \| history
src/theory/inference_id.h		patch \| blob \| history
src/theory/strings/array_solver.cpp	[new file with mode: 0644]	patch \| blob
src/theory/strings/array_solver.h	[new file with mode: 0644]	patch \| blob
src/theory/strings/term_registry.cpp		patch \| blob \| history
src/theory/strings/term_registry.h		patch \| blob \| history