From fa43b8842bd9082401cbf85c8de213a8ae152603 Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Wed, 22 Jun 2022 19:45:52 -0500
Subject: [PATCH] Fix handling of injectivity for str.unit (#8899)

Fixes #8890.
---
 src/theory/inference_id.cpp                   |  2 +
 src/theory/inference_id.h                     |  5 +++
 src/theory/strings/base_solver.cpp            | 43 +++++++++++++++++--
 src/theory/strings/base_solver.h              |  5 +++
 src/theory/strings/core_solver.cpp            |  6 ++-
 src/theory/strings/infer_proof_cons.cpp       |  1 +
 src/theory/strings/term_registry.cpp          | 12 +-----
 src/theory/strings/theory_strings_utils.cpp   |  8 ++++
 src/theory/strings/theory_strings_utils.h     |  5 +++
 test/regress/cli/CMakeLists.txt               |  1 +
 .../regress1/strings/issue8890-inj-oob.smt2   |  9 ++++
 11 files changed, 81 insertions(+), 16 deletions(-)
 create mode 100644 test/regress/cli/regress1/strings/issue8890-inj-oob.smt2

diff --git a/src/theory/inference_id.cpp b/src/theory/inference_id.cpp
index 55cb72451..495971dc9 100644
--- a/src/theory/inference_id.cpp
+++ b/src/theory/inference_id.cpp
@@ -387,6 +387,8 @@ const char* toString(InferenceId i)
     case InferenceId::STRINGS_I_CONST_CONFLICT:
       return "STRINGS_I_CONST_CONFLICT";
     case InferenceId::STRINGS_I_NORM: return "STRINGS_I_NORM";
+    case InferenceId::STRINGS_UNIT_SPLIT: return "STRINGS_UNIT_SPLIT";
+    case InferenceId::STRINGS_UNIT_INJ_OOB: return "STRINGS_UNIT_INJ_OOB";
     case InferenceId::STRINGS_UNIT_INJ: return "STRINGS_UNIT_INJ";
     case InferenceId::STRINGS_UNIT_CONST_CONFLICT:
       return "STRINGS_UNIT_CONST_CONFLICT";
diff --git a/src/theory/inference_id.h b/src/theory/inference_id.h
index 3a6452e45..4166560e3 100644
--- a/src/theory/inference_id.h
+++ b/src/theory/inference_id.h
@@ -561,6 +561,11 @@ enum class InferenceId
   // equal after e.g. removing strings that are currently empty. For example:
   //   y = "" ^ z = "" => x ++ y = z ++ x
   STRINGS_I_NORM,
+  // split between the argument of two equated str.unit terms
+  STRINGS_UNIT_SPLIT,
+  // a code point must be out of bounds due to (str.unit x) = (str.unit y) and
+  // x != y.
+  STRINGS_UNIT_INJ_OOB,
   // injectivity of seq.unit
   // (seq.unit x) = (seq.unit y) => x=y, or
   // (seq.unit x) = (seq.unit c) => x=c
diff --git a/src/theory/strings/base_solver.cpp b/src/theory/strings/base_solver.cpp
index 9193551cf..998926802 100644
--- a/src/theory/strings/base_solver.cpp
+++ b/src/theory/strings/base_solver.cpp
@@ -37,7 +37,12 @@ BaseSolver::BaseSolver(Env& env,
                        SolverState& s,
                        InferenceManager& im,
                        TermRegistry& tr)
-    : EnvObj(env), d_state(s), d_im(im), d_termReg(tr), d_congruent(context())
+    : EnvObj(env),
+      d_state(s),
+      d_im(im),
+      d_termReg(tr),
+      d_congruent(context()),
+      d_strUnitOobEq(userContext())
 {
   d_false = NodeManager::currentNM()->mkConst(false);
   d_cardSize = options().strings.stringsAlphaCard;
@@ -56,6 +61,7 @@ void BaseSolver::checkInit()
   // count of congruent, non-congruent per operator (independent of type),
   // for debugging.
   std::map<Kind, std::pair<uint32_t, uint32_t>> congruentCount;
+  NodeManager* nm = NodeManager::currentNM();
   eq::EqualityEngine* ee = d_state.getEqualityEngine();
   eq::EqClassesIterator eqcs_i = eq::EqClassesIterator(ee);
   while (!eqcs_i.isFinished())
@@ -122,10 +128,39 @@ void BaseSolver::checkInit()
             }
             if (!d_state.areEqual(s, t))
             {
-              // (seq.unit x) = (seq.unit y) => x=y, or
-              // (seq.unit x) = (seq.unit c) => x=c
               Assert(s.getType() == t.getType());
-              d_im.sendInference(exp, s.eqNode(t), InferenceId::STRINGS_UNIT_INJ);
+              Node eq = s.eqNode(t);
+              if (n.getType().isString())
+              {
+                // String unit is not injective, due to invalid code points.
+                // We do an inference scheme in two parts.
+                // for (str.unit x), (str.unit y): x = y or x != y
+                if (!d_state.areDisequal(s, t))
+                {
+                  d_im.sendSplit(s, t, InferenceId::STRINGS_UNIT_SPLIT);
+                }
+                else if (d_strUnitOobEq.find(eq) == d_strUnitOobEq.end())
+                {
+                  // cache that we have performed this inference
+                  Node eqSym = t.eqNode(s);
+                  d_strUnitOobEq.insert(eq);
+                  d_strUnitOobEq.insert(eqSym);
+                  exp.push_back(eq.notNode());
+                  // (str.unit x) = (str.unit y) ^ x != y =>
+                  // x or y is not a valid code point
+                  Node scr = utils::mkCodeRange(s, d_cardSize);
+                  Node tcr = utils::mkCodeRange(t, d_cardSize);
+                  Node conc = nm->mkNode(OR, scr.notNode(), tcr.notNode());
+                  d_im.sendInference(
+                      exp, conc, InferenceId::STRINGS_UNIT_INJ_OOB);
+                }
+              }
+              else
+              {
+                // (seq.unit x) = (seq.unit y) => x=y, or
+                // (seq.unit x) = (seq.unit c) => x=c
+                d_im.sendInference(exp, eq, InferenceId::STRINGS_UNIT_INJ);
+              }
             }
           }
           // update best content
diff --git a/src/theory/strings/base_solver.h b/src/theory/strings/base_solver.h
index 69c14d347..f687ed0eb 100644
--- a/src/theory/strings/base_solver.h
+++ b/src/theory/strings/base_solver.h
@@ -233,6 +233,11 @@ class BaseSolver : protected EnvObj
    * various inference schemas implemented by this class.
    */
   NodeSet d_congruent;
+  /**
+   * Set of equalities that we have applied STRINGS_UNIT_INJ_OOB to
+   * in the current user context
+   */
+  NodeSet d_strUnitOobEq;
   /**
    * Maps equivalence classes to their info, see description of `BaseEqcInfo`
    * for more information.
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp
index 8c74af50f..99aac3815 100644
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -1276,7 +1276,10 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
     Trace("strings-solve-debug")
         << "Process " << x << " ... " << y << std::endl;
 
-    if (x == y)
+    // require checking equal here, usually x == y, but this also holds the
+    // case of (str.unit a) and (str.unit b) for distinct a, b, where we do
+    // not want to unify these terms here.
+    if (d_state.areEqual(x, y))
     {
       // The normal forms have the same term at the current position. We just
       // continue with the next index. By construction of the normal forms, we
@@ -1290,7 +1293,6 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
       index++;
       continue;
     }
-    Assert(!d_state.areEqual(x, y));
 
     std::vector<Node> lenExp;
     Node xLenTerm = d_state.getLength(x, lenExp);
diff --git a/src/theory/strings/infer_proof_cons.cpp b/src/theory/strings/infer_proof_cons.cpp
index 212bc56d3..567f61f26 100644
--- a/src/theory/strings/infer_proof_cons.cpp
+++ b/src/theory/strings/infer_proof_cons.cpp
@@ -664,6 +664,7 @@ void InferProofCons::convert(InferenceId infer,
     case InferenceId::STRINGS_DEQ_STRINGS_EQ:
     case InferenceId::STRINGS_DEQ_LENS_EQ:
     case InferenceId::STRINGS_DEQ_LENGTH_SP:
+    case InferenceId::STRINGS_UNIT_SPLIT:
     {
       if (conc.getKind() != OR)
       {
diff --git a/src/theory/strings/term_registry.cpp b/src/theory/strings/term_registry.cpp
index b85e3460b..4ca2ac5f7 100644
--- a/src/theory/strings/term_registry.cpp
+++ b/src/theory/strings/term_registry.cpp
@@ -77,14 +77,6 @@ uint32_t TermRegistry::getAlphabetCardinality() const { return d_alphaCard; }
 
 void TermRegistry::finishInit(InferenceManager* im) { d_im = im; }
 
-Node mkCodeRange(Node t, uint32_t alphaCard)
-{
-  NodeManager* nm = NodeManager::currentNM();
-  return nm->mkNode(AND,
-                    nm->mkNode(GEQ, t, nm->mkConstInt(Rational(0))),
-                    nm->mkNode(LT, t, nm->mkConstInt(Rational(alphaCard))));
-}
-
 Node TermRegistry::eagerReduce(Node t, SkolemCache* sc, uint32_t alphaCard)
 {
   NodeManager* nm = NodeManager::currentNM();
@@ -96,7 +88,7 @@ Node TermRegistry::eagerReduce(Node t, SkolemCache* sc, uint32_t alphaCard)
     Node len = nm->mkNode(STRING_LENGTH, t[0]);
     Node code_len = len.eqNode(nm->mkConstInt(Rational(1)));
     Node code_eq_neg1 = t.eqNode(nm->mkConstInt(Rational(-1)));
-    Node code_range = mkCodeRange(t, alphaCard);
+    Node code_range = utils::mkCodeRange(t, alphaCard);
     lemma = nm->mkNode(ITE, code_len, code_range, code_eq_neg1);
   }
   else if (tk == SEQ_NTH)
@@ -111,7 +103,7 @@ Node TermRegistry::eagerReduce(Node t, SkolemCache* sc, uint32_t alphaCard)
       Node c2 = nm->mkNode(GT, nm->mkNode(STRING_LENGTH, s), n);
       // check whether this application of seq.nth is defined.
       Node cond = nm->mkNode(AND, c1, c2);
-      Node code_range = mkCodeRange(t, alphaCard);
+      Node code_range = utils::mkCodeRange(t, alphaCard);
       // the lemma for `seq.nth`
       lemma = nm->mkNode(ITE, cond, code_range, t.eqNode(nm->mkConstInt(Rational(-1))));
       // IF: n >=0 AND n < len( s )
diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp
index e692e93c4..a3ab50919 100644
--- a/src/theory/strings/theory_strings_utils.cpp
+++ b/src/theory/strings/theory_strings_utils.cpp
@@ -449,6 +449,14 @@ Node mkAbstractStringValueForLength(Node n, Node len, size_t id)
   return quantifiers::mkNamedQuant(WITNESS, bvl, pred, ss.str());
 }
 
+Node mkCodeRange(Node t, uint32_t alphaCard)
+{
+  NodeManager* nm = NodeManager::currentNM();
+  return nm->mkNode(AND,
+                    nm->mkNode(GEQ, t, nm->mkConstInt(Rational(0))),
+                    nm->mkNode(LT, t, nm->mkConstInt(Rational(alphaCard))));
+}
+
 }  // namespace utils
 }  // namespace strings
 }  // namespace theory
diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h
index e6671d87e..14172a3df 100644
--- a/src/theory/strings/theory_strings_utils.h
+++ b/src/theory/strings/theory_strings_utils.h
@@ -217,6 +217,11 @@ Node mkForallInternal(Node bvl, Node body);
  */
 Node mkAbstractStringValueForLength(Node n, Node len, size_t id);
 
+/**
+ * Make the formula (and (>= t 0) (< t alphaCard)).
+ */
+Node mkCodeRange(Node t, uint32_t alphaCard);
+
 }  // namespace utils
 }  // namespace strings
 }  // namespace theory
diff --git a/test/regress/cli/CMakeLists.txt b/test/regress/cli/CMakeLists.txt
index 45eb213b8..30f1d7177 100644
--- a/test/regress/cli/CMakeLists.txt
+++ b/test/regress/cli/CMakeLists.txt
@@ -2622,6 +2622,7 @@ set(regress_1_tests
   regress1/strings/issue8094-witness-model.smt2
   regress1/strings/issue8347-has-skolem.smt2
   regress1/strings/issue8434-nterm-str-rw.smt2
+  regress1/strings/issue8890-inj-oob.smt2
   regress1/strings/kaluza-fl.smt2
   regress1/strings/loop002.smt2
   regress1/strings/loop003.smt2
diff --git a/test/regress/cli/regress1/strings/issue8890-inj-oob.smt2 b/test/regress/cli/regress1/strings/issue8890-inj-oob.smt2
new file mode 100644
index 000000000..4705a91e5
--- /dev/null
+++ b/test/regress/cli/regress1/strings/issue8890-inj-oob.smt2
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --strings-exp --mbqi --strings-fmf
+; EXPECT: sat
+(set-logic ALL)
+(declare-fun b (Int) Bool)
+(declare-fun v () String)
+(declare-fun a () String)
+(assert (or (b 0) (= 0 (ite (= 1 (str.len v)) 1 0))))
+(assert (forall ((e String) (va Int)) (or (= va 0) (distinct 0 (ite (str.prefixof "-" a) (str.to_int (str.substr v 1 (str.len e))) (str.to_int e))))))
+(check-sat)
-- 
2.30.2