From 475985ccc80b1ddb38d912a3c6658912f1fc6207 Mon Sep 17 00:00:00 2001
From: Andres Noetzli <andres.noetzli@gmail.com>
Date: Sat, 1 Aug 2020 23:56:27 -0700
Subject: [PATCH] Ensure strict length constraint for decompose rule (#4821)

Fixes #4820. The performance issue was caused by
0988217562006d3f59e01dc261f39121df6d75f5. That commit introduced an
option (`--strings-len-conc`) that optionally moves the length
constraint for skolems to the conclusion of an inference instead of
making it part of the term registration. However, for
`DEQ_DISL_STRINGS_SPLIT`, we were only asserting that `LENGTH_GEQ_ONE`
in the case where this option was not enabled, instead of asserting that
the length of the skolem is equal to the component on the other side of
the disequality. This lead to an infinite loop of inferences because we
effectively were just splitting a component into two skolems and the
only restriction was that the first one was non-empty. We guessed the
second skolem to be empty, so the first skolem was equal to the
component, the skolem was marked congruent, and we ended up with the
same normal form as before.

The commit fixes the issue by adding an argument to
`getDecomposeConclusion()` that specifies whether to add the length
constraint or not. This option is used to always add the length
constraint in the case of `DEQ_DISL_STRINGS_SPLIT`.
---
 src/theory/strings/core_solver.cpp           | 43 ++++++++++++--------
 src/theory/strings/core_solver.h             |  5 ++-
 test/regress/CMakeLists.txt                  |  1 +
 test/regress/regress0/strings/issue4820.smt2 | 15 +++++++
 4 files changed, 46 insertions(+), 18 deletions(-)
 create mode 100644 test/regress/regress0/strings/issue4820.smt2
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp
index 062bfe12f..438a559b8 100644
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -839,8 +839,12 @@ size_t CoreSolver::getSufficientNonEmptyOverlap(Node c, Node d, bool isRev)
   return p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p);
 }
 
-Node CoreSolver::getDecomposeConclusion(
-    Node x, Node l, bool isRev, SkolemCache* skc, std::vector<Node>& newSkolems)
+Node CoreSolver::getDecomposeConclusion(Node x,
+                                        Node l,
+                                        bool isRev,
+                                        bool addLenConc,
+                                        SkolemCache* skc,
+                                        std::vector<Node>& newSkolems)
 {
   Assert(l.getType().isInteger());
   NodeManager* nm = NodeManager::currentNM();
@@ -850,7 +854,7 @@ Node CoreSolver::getDecomposeConclusion(
   Node sk2 = skc->mkSkolemCached(x, n, SkolemCache::SK_SUFFIX_REM, "dc_spt2");
   newSkolems.push_back(sk2);
   Node conc = x.eqNode(nm->mkNode(STRING_CONCAT, sk1, sk2));
-  if (options::stringLenConc())
+  if (addLenConc)
   {
     Node lc = nm->mkNode(STRING_LENGTH, isRev ? sk2 : sk1).eqNode(l);
     conc = nm->mkNode(AND, conc, lc);
@@ -2081,8 +2085,8 @@ void CoreSolver::processDeq(Node ni, Node nj)
           // len(x)>=1 => x = k1 ++ k2 ^ len(k1) = 1
           SkolemCache* skc = d_termReg.getSkolemCache();
           std::vector<Node> newSkolems;
-          Node conc =
-              getDecomposeConclusion(nck, d_one, false, skc, newSkolems);
+          Node conc = getDecomposeConclusion(
+              nck, d_one, false, options::stringLenConc(), skc, newSkolems);
           Assert(newSkolems.size() == 2);
           if (options::stringLenConc())
           {
@@ -2108,8 +2112,8 @@ void CoreSolver::processDeq(Node ni, Node nj)
         // are both non-constants. We split them into parts that have the same
         // lengths.
         //
-        // len(x) >= len(y) => x = k1 ++ k2 ^ len(k1) = len(y)
-        // len(y) >= len(x) => y = k3 ++ k4 ^ len(k3) = len(x)
+        // len(x) > len(y) => x = k1 ++ k2 ^ len(k1) = len(y)
+        // len(y) > len(x) => y = k3 ++ k4 ^ len(k3) = len(x)
         Trace("strings-solve")
             << "Non-Simple Case 1 : add lemmas " << std::endl;
         SkolemCache* skc = d_termReg.getSkolemCache();
@@ -2120,19 +2124,24 @@ void CoreSolver::processDeq(Node ni, Node nj)
           Node uy = r == 0 ? y : x;
           Node uxLen = nm->mkNode(STRING_LENGTH, ux);
           Node uyLen = nm->mkNode(STRING_LENGTH, uy);
+          // We always request the length constraint in the conclusion here
+          // because the skolem needs to have length `uyLen`. If we only assert
+          // that the skolem's length is greater or equal to one, we can end up
+          // in a loop:
+          //
+          // 1. Split: x = k1 ++ k2 ^ len(k1) >= 1
+          // 2. Assume: k2 = ""
+          // 3. Deduce: x = k1
+          //
+          // After step 3, `k1` is marked congruent because `x` is the older
+          // variable. So we get `x` in the normal form again.
           std::vector<Node> newSkolems;
-          Node conc = getDecomposeConclusion(ux, uyLen, false, skc, newSkolems);
+          Node conc =
+              getDecomposeConclusion(ux, uyLen, false, true, skc, newSkolems);
           Assert(newSkolems.size() == 2);
-          if (options::stringLenConc())
-          {
-            d_termReg.registerTermAtomic(newSkolems[0], LENGTH_IGNORE);
-          }
-          else
-          {
-            d_termReg.registerTermAtomic(newSkolems[0], LENGTH_GEQ_ONE);
-          }
+          d_termReg.registerTermAtomic(newSkolems[1], LENGTH_GEQ_ONE);
           std::vector<Node> antecLen;
-          antecLen.push_back(nm->mkNode(GEQ, uxLen, uyLen));
+          antecLen.push_back(nm->mkNode(GT, uxLen, uyLen));
           d_im.sendInference(
               {}, antecLen, conc, Inference::DEQ_DISL_STRINGS_SPLIT, true);
         }
diff --git a/src/theory/strings/core_solver.h b/src/theory/strings/core_solver.h
index 2ee61e759..da364e243 100644
--- a/src/theory/strings/core_solver.h
+++ b/src/theory/strings/core_solver.h
@@ -260,11 +260,13 @@ class CoreSolver
    * a conjunction of splitting string x into pieces based on length l, e.g.:
    *   x = k_1 ++ k_2
    * where k_1 (resp. k_2) is a skolem corresponding to a substring of x of
-   * length l if isRev is false (resp. true).
+   * length l if isRev is false (resp. true). The function optionally adds a
+   * length constraint len(k_1) = l (resp. len(k_2) = l).
    *
    * @param x The string term
    * @param l The length term
    * @param isRev Whether the equation is in a reverse direction
+   * @param addLenConc Whether to add the length constraint
    * @param skc The skolem cache (to allocate fresh variables if necessary)
    * @param newSkolems The vector to add new variables to
    * @return The conclusion of the inference.
@@ -272,6 +274,7 @@ class CoreSolver
   static Node getDecomposeConclusion(Node x,
                                      Node l,
                                      bool isRev,
+                                     bool addLenConc,
                                      SkolemCache* skc,
                                      std::vector<Node>& newSkolems);
 
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt
index a0fee2185..f9840e552 100644
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -983,6 +983,7 @@ set(regress_0_tests
   regress0/strings/issue4376.smt2
   regress0/strings/issue4662-consume-nterm.smt2
   regress0/strings/issue4674-recomp-nf.smt2
+  regress0/strings/issue4820.smt2
   regress0/strings/itos-entail.smt2
   regress0/strings/large-model.smt2
   regress0/strings/leadingzero001.smt2
diff --git a/test/regress/regress0/strings/issue4820.smt2 b/test/regress/regress0/strings/issue4820.smt2
new file mode 100644
index 000000000..0bf0fac65
--- /dev/null
+++ b/test/regress/regress0/strings/issue4820.smt2
@@ -0,0 +1,15 @@
+; EXPECT: sat
+(set-logic QF_S)
+(declare-fun _substvar_270_ () String)
+(declare-fun _substvar_273_ () Bool)
+(declare-fun _substvar_275_ () Bool)
+(declare-fun _substvar_293_ () Bool)
+(declare-const v0 Bool)
+(declare-const v1 Bool)
+(declare-const v2 Bool)
+(declare-const v3 Bool)
+(declare-const Str3 String)
+(declare-const Str7 String)
+(declare-const Str15 String)
+(assert (or (xor true true (xor v0 v3 false v1 false v2 v2 v1 false v2 v1) true true (=> (or v1 (distinct Str15 _substvar_270_ Str3 (str.++ Str7 Str7)) v2) false) false true) _substvar_275_))
+(check-sat)
-- 
2.30.2