From 108b8aa42f15b021d351790b36e2ece958871db8 Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Fri, 14 Jan 2022 20:16:20 -0600
Subject: [PATCH] Add inverse inference for update-over-concat (#7954)

This adds an inference that applies when an atomic update term becomes equal to a concatenation term.

This fixes bogus models discovered when writing the model soundness proof for the sequences array solver.
---
 src/theory/inference_id.cpp                   |   2 +
 src/theory/inference_id.h                     |   2 +
 src/theory/strings/array_solver.cpp           | 302 ++++++++++++------
 src/theory/strings/array_solver.h             |   6 +
 test/regress/CMakeLists.txt                   |   2 +
 .../seq/update-concat-non-atomic.smt2         |  17 +
 .../seq/update-concat-non-atomic2.smt2        |  24 ++
 7 files changed, 251 insertions(+), 104 deletions(-)
 create mode 100644 test/regress/regress0/seq/update-concat-non-atomic.smt2
 create mode 100644 test/regress/regress0/seq/update-concat-non-atomic2.smt2
diff --git a/src/theory/inference_id.cpp b/src/theory/inference_id.cpp
index a2dcdec8c..0d116b1eb 100644
--- a/src/theory/inference_id.cpp
+++ b/src/theory/inference_id.cpp
@@ -406,6 +406,8 @@ const char* toString(InferenceId i)
       return "STRINGS_ARRAY_UPDATE_UNIT";
     case InferenceId::STRINGS_ARRAY_UPDATE_CONCAT:
       return "STRINGS_ARRAY_UPDATE_CONCAT";
+    case InferenceId::STRINGS_ARRAY_UPDATE_CONCAT_INVERSE:
+      return "STRINGS_ARRAY_UPDATE_CONCAT_INVERSE";
     case InferenceId::STRINGS_ARRAY_NTH_UNIT: return "STRINGS_ARRAY_NTH_UNIT";
     case InferenceId::STRINGS_ARRAY_NTH_CONCAT:
       return "STRINGS_ARRAY_NTH_CONCAT";
diff --git a/src/theory/inference_id.h b/src/theory/inference_id.h
index 8d9078503..a82093bab 100644
--- a/src/theory/inference_id.h
+++ b/src/theory/inference_id.h
@@ -686,6 +686,8 @@ enum class InferenceId
   STRINGS_ARRAY_UPDATE_UNIT,
   // update over conatenation
   STRINGS_ARRAY_UPDATE_CONCAT,
+  // update over conatenation, inverse
+  STRINGS_ARRAY_UPDATE_CONCAT_INVERSE,
   // nth over unit
   STRINGS_ARRAY_NTH_UNIT,
   // nth over conatenation
diff --git a/src/theory/strings/array_solver.cpp b/src/theory/strings/array_solver.cpp
index 672ca8b76..3d364bf3c 100644
--- a/src/theory/strings/array_solver.cpp
+++ b/src/theory/strings/array_solver.cpp
@@ -93,7 +93,6 @@ void ArraySolver::checkArrayEager()
 void ArraySolver::checkTerms(Kind k)
 {
   Assert(k == STRING_UPDATE || k == SEQ_NTH);
-  NodeManager* nm = NodeManager::currentNM();
   // get all the active update terms that have not been reduced in the
   // current context by context-dependent simplification
   std::vector<Node> terms = d_esolver.getActive(k);
@@ -101,22 +100,60 @@ void ArraySolver::checkTerms(Kind k)
   {
     Trace("seq-array-debug") << "check term " << t << "..." << std::endl;
     Assert(t.getKind() == k);
-    if (k == STRING_UPDATE && !d_termReg.isHandledUpdate(t))
+    if (k == STRING_UPDATE)
     {
-      // not handled by procedure
-      Trace("seq-array-debug") << "...unhandled" << std::endl;
-      continue;
+      if (!d_termReg.isHandledUpdate(t))
+      {
+        // not handled by procedure
+        Trace("seq-array-debug") << "...unhandled" << std::endl;
+        continue;
+      }
+      // for update terms, also check the inverse inference
+      checkTerm(t, true);
     }
-    Node r = d_state.getRepresentative(t[0]);
-    NormalForm& nf = d_csolver.getNormalForm(r);
-    Trace("seq-array-debug") << "...normal form " << nf.d_nf << std::endl;
-    std::vector<Node> nfChildren;
+    // check the normal inference
+    checkTerm(t, false);
+  }
+}
+
+void ArraySolver::checkTerm(Node t, bool checkInv)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  Kind k = t.getKind();
+  Node r = d_state.getRepresentative(t[0]);
+  Node rself;
+  NormalForm& nf = d_csolver.getNormalForm(r);
+  Trace("seq-array-debug") << "...normal form " << nf.d_nf << std::endl;
+  std::vector<Node> nfChildren;
+  if (checkInv)
+  {
+    if (k != STRING_UPDATE)
+    {
+      return;
+    }
+    // If the term we are updating is atomic, but the update itself
+    // not atomic, then we will apply the inverse version of the update
+    // concat rule, based on the normal form of the term itself.
+    rself = d_state.getRepresentative(t);
+    NormalForm& nfSelf = d_csolver.getNormalForm(rself);
+    if (nfSelf.d_nf.size() > 1)
+    {
+      nfChildren.insert(
+          nfChildren.end(), nfSelf.d_nf.begin(), nfSelf.d_nf.end());
+    }
+    else
+    {
+      return;
+    }
+  }
+  else
+  {
     if (nf.d_nf.empty())
     {
       // updates should have been reduced (UPD_EMPTYSTR)
       Assert(k != STRING_UPDATE);
       Trace("seq-array-debug") << "...empty" << std::endl;
-      continue;
+      return;
     }
     else if (nf.d_nf.size() == 1)
     {
@@ -172,129 +209,186 @@ void ArraySolver::checkTerms(Kind k)
           d_eqProc.insert(eq);
           d_im.sendInference(exp, eq, iid);
         }
-        continue;
+        return;
       }
       else if (ck != CONST_SEQUENCE)
       {
-        // otherwise, if the normal form is not a constant sequence, the
-        // equivalence class is pure wrt concatenation.
-        d_currTerms[k].push_back(t);
-        continue;
+        bool isAtomic = true;
+        if (k == STRING_UPDATE)
+        {
+          // If the term we are updating is atomic, but the update itself
+          // not atomic, then we will apply the inverse version of the update
+          // concat rule, based on the normal form of the term itself.
+          rself = d_state.getRepresentative(t);
+          NormalForm& nfSelf = d_csolver.getNormalForm(rself);
+          if (nfSelf.d_nf.size() > 1)
+          {
+            isAtomic = false;
+          }
+        }
+        if (isAtomic)
+        {
+          // otherwise, if the normal form is not a constant sequence, and we
+          // are not a non-atomic update term, then this term will be given to
+          // the core array solver.
+          d_currTerms[k].push_back(t);
+        }
+        return;
+      }
+      else
+      {
+        // if the normal form is a constant sequence, it is treated as a
+        // concatenation. We split per character and case split on whether the
+        // nth/update falls on each character below, which must have a size
+        // greater than one.
+        std::vector<Node> chars = Word::getChars(nf.d_nf[0]);
+        Assert(chars.size() > 1);
+        nfChildren.insert(nfChildren.end(), chars.begin(), chars.end());
       }
-      // if the normal form is a constant sequence, it is treated as a
-      // concatenation. We split per character and case split on whether the
-      // nth/update falls on each character below, which must have a size
-      // greater than one.
-      std::vector<Node> chars = Word::getChars(nf.d_nf[0]);
-      Assert (chars.size()>1);
-      nfChildren.insert(nfChildren.end(), chars.begin(), chars.end());
     }
     else
     {
       nfChildren.insert(nfChildren.end(), nf.d_nf.begin(), nf.d_nf.end());
     }
-    // otherwise, we are the concatenation of the components
-    // NOTE: for nth, split on index vs component lengths, do not introduce ITE
-    std::vector<Node> cond;
-    std::vector<Node> cchildren;
-    std::vector<Node> lacc;
-    for (const Node& c : nfChildren)
+  }
+  // otherwise, we are the concatenation of the components
+  // NOTE: for nth, split on index vs component lengths, do not introduce ITE
+  std::vector<Node> cond;
+  std::vector<Node> cchildren;
+  std::vector<Node> lacc;
+  SkolemCache* skc = d_termReg.getSkolemCache();
+  for (const Node& c : nfChildren)
+  {
+    Trace("seq-array-debug") << "...process " << c << std::endl;
+    Node clen = nm->mkNode(STRING_LENGTH, c);
+    Node currIndex = t[1];
+    Node currSum = d_zero;
+    if (!lacc.empty())
     {
-      Trace("seq-array-debug") << "...process " << c << std::endl;
-      Node clen = nm->mkNode(STRING_LENGTH, c);
-      Node currIndex = t[1];
-      if (!lacc.empty())
-      {
-        Node currSum = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc);
-        currIndex = nm->mkNode(MINUS, currIndex, currSum);
-      }
-      Node cc;
-      // If it is a constant of length one, then the update/nth is determined
-      // in this interval. Notice this is done here as
-      // an optimization to short cut introducing terms like
-      // (seq.nth (seq.unit c) i), which by construction is only relevant in
-      // the context where i = 0, hence we replace by c here.
-      if (c.getKind() == CONST_SEQUENCE)
-      {
-        const Sequence& seq = c.getConst<Sequence>();
-        if (seq.size() == 1)
-        {
-          if (k == STRING_UPDATE)
-          {
-            cc = nm->mkNode(ITE, t[1].eqNode(d_zero), t[2], c);
-          }
-          else
-          {
-            cc = seq.getVec()[0];
-          }
-        }
-      }
-      // if we did not process as a constant of length one
-      if (cc.isNull())
+      currSum = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc);
+      currIndex = nm->mkNode(MINUS, currIndex, currSum);
+    }
+    Node cc;
+    if (k == STRING_UPDATE && checkInv)
+    {
+      // component for the reverse form of the update inference is a fresh
+      // variable, in particular, the purification variable for the substring
+      // of the term we are updating.
+      Node sstr = nm->mkNode(STRING_SUBSTR, t[0], currSum, clen);
+      cc = skc->mkSkolemCached(sstr, SkolemCache::SkolemId::SK_PURIFY, "z");
+    }
+    // If it is a constant of length one, then the update/nth is determined
+    // in this interval. Notice this is done here as
+    // an optimization to short cut introducing terms like
+    // (seq.nth (seq.unit c) i), which by construction is only relevant in
+    // the context where i = 0, hence we replace by c here.
+    else if (c.getKind() == CONST_SEQUENCE)
+    {
+      const Sequence& seq = c.getConst<Sequence>();
+      if (seq.size() == 1)
       {
         if (k == STRING_UPDATE)
         {
-          cc = nm->mkNode(STRING_UPDATE, c, currIndex, t[2]);
+          cc = nm->mkNode(ITE, t[1].eqNode(d_zero), t[2], c);
         }
         else
         {
-          Assert(k == SEQ_NTH);
-          cc = nm->mkNode(SEQ_NTH, c, currIndex);
+          cc = seq.getVec()[0];
         }
       }
-      Trace("seq-array-debug") << "......component " << cc << std::endl;
-      cchildren.push_back(cc);
-      lacc.push_back(clen);
-      if (k == SEQ_NTH)
+    }
+    // if we did not process as a constant of length one
+    if (cc.isNull())
+    {
+      if (k == STRING_UPDATE)
+      {
+        cc = nm->mkNode(STRING_UPDATE, c, currIndex, t[2]);
+      }
+      else
       {
-        Node currSumPost = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc);
-        Node cf = nm->mkNode(LT, t[1], currSumPost);
-        Trace("seq-array-debug") << "......condition " << cf << std::endl;
-        cond.push_back(cf);
+        Assert(k == SEQ_NTH);
+        cc = nm->mkNode(SEQ_NTH, c, currIndex);
       }
     }
-    // z = (seq.++ x y) =>
-    // (seq.update z n l) =
-    //   (seq.++ (seq.update x n 1) (seq.update y (- n len(x)) 1))
-    // z = (seq.++ x y) =>
-    // (seq.nth z n) =
-    //    (ite (or (< n 0) (>= n (+ (str.len x) (str.len y)))) (Uf z n)
-    //    (ite (< n (str.len x)) (seq.nth x n)
-    //      (seq.nth y (- n (str.len x)))))
-    InferenceId iid;
-    Node eq;
-    if (k == STRING_UPDATE)
+    Trace("seq-array-debug") << "......component " << cc << std::endl;
+    cchildren.push_back(cc);
+    lacc.push_back(clen);
+    if (k == SEQ_NTH)
     {
-      Node finalc = utils::mkConcat(cchildren, t.getType());
-      eq = t.eqNode(finalc);
-      iid = InferenceId::STRINGS_ARRAY_UPDATE_CONCAT;
+      Node currSumPost = lacc.size() == 1 ? lacc[0] : nm->mkNode(PLUS, lacc);
+      Node cf = nm->mkNode(LT, t[1], currSumPost);
+      Trace("seq-array-debug") << "......condition " << cf << std::endl;
+      cond.push_back(cf);
+    }
+    else if (k == STRING_UPDATE && checkInv)
+    {
+      Node ccu = nm->mkNode(STRING_UPDATE, cc, currIndex, t[2]);
+      Node eq = c.eqNode(ccu);
+      Trace("seq-array-debug") << "......condition " << eq << std::endl;
+      cond.push_back(eq);
+    }
+  }
+  // z = (seq.++ x y) =>
+  // (seq.update z n l) =
+  //   (seq.++ (seq.update x n 1) (seq.update y (- n len(x)) 1))
+  // z = (seq.++ x y) =>
+  // (seq.nth z n) =
+  //    (ite (or (< n 0) (>= n (+ (str.len x) (str.len y)))) (Uf z n)
+  //    (ite (< n (str.len x)) (seq.nth x n)
+  //      (seq.nth y (- n (str.len x)))))
+  InferenceId iid;
+  Node eq;
+  if (k == STRING_UPDATE)
+  {
+    Node finalc = utils::mkConcat(cchildren, t.getType());
+    if (checkInv)
+    {
+      eq = t[0].eqNode(finalc);
+      cond.push_back(eq);
+      eq = nm->mkAnd(cond);
     }
     else
     {
-      std::reverse(cchildren.begin(), cchildren.end());
-      std::reverse(cond.begin(), cond.end());
-      Node uf = SkolemCache::mkSkolemSeqNth(t[0].getType(), "Uf");
-      eq = t.eqNode(cchildren[0]);
-      for (size_t i = 1, ncond = cond.size(); i < ncond; i++)
-      {
-        eq = nm->mkNode(ITE, cond[i], t.eqNode(cchildren[i]), eq);
-      }
-      Node ufa = nm->mkNode(APPLY_UF, uf, t[0], t[1]);
-      Node oobCond =
-          nm->mkNode(OR, nm->mkNode(LT, t[1], d_zero), cond[0].notNode());
-      eq = nm->mkNode(ITE, oobCond, t.eqNode(ufa), eq);
-      iid = InferenceId::STRINGS_ARRAY_NTH_CONCAT;
+      eq = t.eqNode(finalc);
     }
-    std::vector<Node> exp;
+    iid = checkInv ? InferenceId::STRINGS_ARRAY_UPDATE_CONCAT_INVERSE
+                   : InferenceId::STRINGS_ARRAY_UPDATE_CONCAT;
+  }
+  else
+  {
+    std::reverse(cchildren.begin(), cchildren.end());
+    std::reverse(cond.begin(), cond.end());
+    Node uf = SkolemCache::mkSkolemSeqNth(t[0].getType(), "Uf");
+    eq = t.eqNode(cchildren[0]);
+    for (size_t i = 1, ncond = cond.size(); i < ncond; i++)
+    {
+      eq = nm->mkNode(ITE, cond[i], t.eqNode(cchildren[i]), eq);
+    }
+    Node ufa = nm->mkNode(APPLY_UF, uf, t[0], t[1]);
+    Node oobCond =
+        nm->mkNode(OR, nm->mkNode(LT, t[1], d_zero), cond[0].notNode());
+    eq = nm->mkNode(ITE, oobCond, t.eqNode(ufa), eq);
+    iid = InferenceId::STRINGS_ARRAY_NTH_CONCAT;
+  }
+  std::vector<Node> exp;
+  if (checkInv)
+  {
+    d_im.addToExplanation(rself, t, exp);
+    NormalForm& nfSelf = d_csolver.getNormalForm(rself);
+    exp.insert(exp.end(), nfSelf.d_exp.begin(), nfSelf.d_exp.end());
+    exp.push_back(t.eqNode(nfSelf.d_base));
+  }
+  else
+  {
     d_im.addToExplanation(r, t[0], exp);
     exp.insert(exp.end(), nf.d_exp.begin(), nf.d_exp.end());
     exp.push_back(t[0].eqNode(nf.d_base));
-    if (d_eqProc.find(eq) == d_eqProc.end())
-    {
-      d_eqProc.insert(eq);
-      Trace("seq-array") << "- send lemma - " << eq << std::endl;
-      d_im.sendInference(exp, eq, iid);
-    }
+  }
+  if (d_eqProc.find(eq) == d_eqProc.end())
+  {
+    d_eqProc.insert(eq);
+    Trace("seq-array") << "- send lemma - " << eq << std::endl;
+    d_im.sendInference(exp, eq, iid);
   }
 }
 
diff --git a/src/theory/strings/array_solver.h b/src/theory/strings/array_solver.h
index 23bacd118..c129f4837 100644
--- a/src/theory/strings/array_solver.h
+++ b/src/theory/strings/array_solver.h
@@ -85,6 +85,12 @@ class ArraySolver : protected EnvObj
  private:
   /** check terms of given kind */
   void checkTerms(Kind k);
+  /** check inferences for the given term
+   *
+   * @param t the term to check
+   * @param checkInv Whether we are checking the inverse of the rule
+   */
+  void checkTerm(Node t, bool checkInv);
   /** The solver state object */
   SolverState& d_state;
   /** The (custom) output channel of the theory of strings */
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt
index 6a09e04cf..7c536454a 100644
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -1136,6 +1136,8 @@ set(regress_0_tests
   regress0/seq/seq-nth.smt2
   regress0/seq/seq-rewrites.smt2
   regress0/seq/seq-types.smt2
+  regress0/seq/update-concat-non-atomic.smt2
+  regress0/seq/update-concat-non-atomic2.smt2
   regress0/seq/update-eq-unsat.smt2
   regress0/sets/abt-min.smt2
   regress0/sets/abt-te-exh.smt2
diff --git a/test/regress/regress0/seq/update-concat-non-atomic.smt2 b/test/regress/regress0/seq/update-concat-non-atomic.smt2
new file mode 100644
index 000000000..346f9c45f
--- /dev/null
+++ b/test/regress/regress0/seq/update-concat-non-atomic.smt2
@@ -0,0 +1,17 @@
+; COMMAND-LINE: --strings-exp --seq-array=lazy -q
+; EXPECT: sat
+(set-logic ALL)
+(declare-sort S 0)
+(declare-const x (Seq S))
+(declare-const y (Seq S))
+(declare-const y1 (Seq S))
+(declare-const y2 (Seq S))
+(declare-const i Int)
+(declare-const a S)
+
+(assert (= y (seq.update x i (seq.unit a))))
+(assert (= y (seq.++ y1 y2)))
+(assert (> (seq.len y1) 0))
+(assert (> (seq.len y2) 0))
+
+(check-sat)
diff --git a/test/regress/regress0/seq/update-concat-non-atomic2.smt2 b/test/regress/regress0/seq/update-concat-non-atomic2.smt2
new file mode 100644
index 000000000..5c14a10cf
--- /dev/null
+++ b/test/regress/regress0/seq/update-concat-non-atomic2.smt2
@@ -0,0 +1,24 @@
+; COMMAND-LINE: --strings-exp --seq-array=lazy -q
+; EXPECT: sat
+(set-logic ALL)
+(declare-sort S 0)
+(declare-const x (Seq S))
+(declare-const y (Seq S))
+(declare-const y1 (Seq S))
+(declare-const y2 (Seq S))
+(declare-const x1 (Seq S))
+(declare-const x2 (Seq S))
+(declare-const i Int)
+(declare-const a S)
+
+(assert (= y (seq.update x i (seq.unit a))))
+(assert (= y (seq.++ y1 y2)))
+(assert (= x (seq.++ x1 x2)))
+(assert (> (seq.len y1) 0))
+(assert (> (seq.len y2) 0))
+(assert (> (seq.len x1) 0))
+(assert (> (seq.len x2) 0))
+(assert (<= 0 i))
+(assert (< i (seq.len x)))
+
+(check-sat)
-- 
2.30.2