From ddf647904de838e8e6ee266ad13de8a6a90250c8 Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Mon, 22 Feb 2021 14:15:43 -0600
Subject: [PATCH] Require length-in-conclusion form for strings inferences
 (#5953)

Previously, it was optional whether to put length constraints in conclusion for deq string inferences. However, due to optimizations in skolem caching, it is now required to guard these length constraints.

It furthermore changes the policy to not ignore the lengths for the registered skolem, since it may be shared elsewhere.

Fixes #5940.

Notice that proof-new already requires this option to be enabled when proofs are enabled. Hence, this will simplify proof-new when merged.
---
 src/options/strings_options.toml              |  8 -----
 src/theory/strings/core_solver.cpp            | 31 +++++--------------
 src/theory/strings/core_solver.h              |  9 +++---
 test/regress/CMakeLists.txt                   |  2 ++
 .../strings/issue5940-2-skc-len-conc.smt2     |  8 +++++
 .../strings/issue5940-skc-len-conc.smt2       |  9 ++++++
 6 files changed, 31 insertions(+), 36 deletions(-)
 create mode 100644 test/regress/regress1/strings/issue5940-2-skc-len-conc.smt2
 create mode 100644 test/regress/regress1/strings/issue5940-skc-len-conc.smt2
diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml
index f9893ef3a..75da2a35b 100644
--- a/src/options/strings_options.toml
+++ b/src/options/strings_options.toml
@@ -215,14 +215,6 @@ header = "options/strings_options.h"
   read_only  = true
   help       = "use a single skolem for the variable splitting rule"
 
-[[option]]
-  name       = "stringLenConc"
-  category   = "regular"
-  long       = "strings-len-conc"
-  type       = "bool"
-  default    = "false"
-  help       = "add skolem length constraints in conclusions of inferences"
-
 [[option]]
   name       = "stringEagerEval"
   category   = "regular"
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp
index 7268a35e2..5446f67be 100644
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -764,7 +764,7 @@ Node CoreSolver::getConclusion(Node x,
       // make agnostic to x/y
       conc = x < y ? nm->mkNode(OR, eq1, eq2) : nm->mkNode(OR, eq2, eq1);
     }
-    if (options::stringUnifiedVSpt() && options::stringLenConc())
+    if (options::stringUnifiedVSpt())
     {
       // we can assume its length is greater than zero
       Node emp = Word::mkEmptyWord(sk1.getType());
@@ -842,7 +842,6 @@ size_t CoreSolver::getSufficientNonEmptyOverlap(Node c, Node d, bool isRev)
 Node CoreSolver::getDecomposeConclusion(Node x,
                                         Node l,
                                         bool isRev,
-                                        bool addLenConc,
                                         SkolemCache* skc,
                                         std::vector<Node>& newSkolems)
 {
@@ -854,12 +853,9 @@ Node CoreSolver::getDecomposeConclusion(Node x,
   Node sk2 = skc->mkSkolemCached(x, n, SkolemCache::SK_SUFFIX_REM, "dc_spt2");
   newSkolems.push_back(sk2);
   Node conc = x.eqNode(nm->mkNode(STRING_CONCAT, sk1, sk2));
-  if (addLenConc)
-  {
-    Node lc = nm->mkNode(STRING_LENGTH, isRev ? sk2 : sk1).eqNode(l);
-    conc = nm->mkNode(AND, conc, lc);
-  }
-  return conc;
+  // add the length constraint to the conclusion
+  Node lc = nm->mkNode(STRING_LENGTH, isRev ? sk2 : sk1).eqNode(l);
+  return nm->mkNode(AND, conc, lc);
 }
 
 void CoreSolver::getNormalForms(Node eqc,
@@ -1706,11 +1702,6 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
       iinfo.setId(InferenceId::STRINGS_SSPLIT_VAR);
       iinfo.d_conc =
           getConclusion(x, y, PfRule::CONCAT_SPLIT, isRev, skc, newSkolems);
-      if (options::stringUnifiedVSpt() && !options::stringLenConc())
-      {
-        Assert(newSkolems.size() == 1);
-        iinfo.d_skolems[LENGTH_GEQ_ONE].push_back(newSkolems[0]);
-      }
     }
     else if (lentTestSuccess == 0)
     {
@@ -2180,16 +2171,8 @@ void CoreSolver::processDeq(Node ni, Node nj)
           SkolemCache* skc = d_termReg.getSkolemCache();
           std::vector<Node> newSkolems;
           Node conc = getDecomposeConclusion(
-              nck, d_one, false, options::stringLenConc(), skc, newSkolems);
+              nck, d_one, false, skc, newSkolems);
           Assert(newSkolems.size() == 2);
-          if (options::stringLenConc())
-          {
-            d_termReg.registerTermAtomic(newSkolems[0], LENGTH_IGNORE);
-          }
-          else
-          {
-            d_termReg.registerTermAtomic(newSkolems[0], LENGTH_ONE);
-          }
           std::vector<Node> antecLen;
           antecLen.push_back(nm->mkNode(GEQ, nckLenTerm, d_one));
           d_im.sendInference(antecLen,
@@ -2219,7 +2202,7 @@ void CoreSolver::processDeq(Node ni, Node nj)
           Node uy = r == 0 ? y : x;
           Node uxLen = nm->mkNode(STRING_LENGTH, ux);
           Node uyLen = nm->mkNode(STRING_LENGTH, uy);
-          // We always request the length constraint in the conclusion here
+          // We always add the length constraint in the conclusion here
           // because the skolem needs to have length `uyLen`. If we only assert
           // that the skolem's length is greater or equal to one, we can end up
           // in a loop:
@@ -2232,7 +2215,7 @@ void CoreSolver::processDeq(Node ni, Node nj)
           // variable. So we get `x` in the normal form again.
           std::vector<Node> newSkolems;
           Node conc =
-              getDecomposeConclusion(ux, uyLen, false, true, skc, newSkolems);
+              getDecomposeConclusion(ux, uyLen, false, skc, newSkolems);
           Assert(newSkolems.size() == 2);
           d_termReg.registerTermAtomic(newSkolems[1], LENGTH_GEQ_ONE);
           std::vector<Node> antecLen;
diff --git a/src/theory/strings/core_solver.h b/src/theory/strings/core_solver.h
index 8c4c28a96..a3909d11d 100644
--- a/src/theory/strings/core_solver.h
+++ b/src/theory/strings/core_solver.h
@@ -260,13 +260,15 @@ class CoreSolver
    * a conjunction of splitting string x into pieces based on length l, e.g.:
    *   x = k_1 ++ k_2
    * where k_1 (resp. k_2) is a skolem corresponding to a substring of x of
-   * length l if isRev is false (resp. true). The function optionally adds a
-   * length constraint len(k_1) = l (resp. len(k_2) = l).
+   * length l if isRev is false (resp. true). The function also adds a
+   * length constraint len(k_1) = l (resp. len(k_2) = l). Note that adding this
+   * constraint to the conclusion is *not* optional, since the skolems k_1 and
+   * k_2 may be shared, hence their length constraint must be guarded by the
+   * premises of this inference.
    *
    * @param x The string term
    * @param l The length term
    * @param isRev Whether the equation is in a reverse direction
-   * @param addLenConc Whether to add the length constraint
    * @param skc The skolem cache (to allocate fresh variables if necessary)
    * @param newSkolems The vector to add new variables to
    * @return The conclusion of the inference.
@@ -274,7 +276,6 @@ class CoreSolver
   static Node getDecomposeConclusion(Node x,
                                      Node l,
                                      bool isRev,
-                                     bool addLenConc,
                                      SkolemCache* skc,
                                      std::vector<Node>& newSkolems);
 
diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt
index fa4628de7..e4a1251b5 100644
--- a/test/regress/CMakeLists.txt
+++ b/test/regress/CMakeLists.txt
@@ -2029,6 +2029,8 @@ set(regress_1_tests
   regress1/strings/issue5610-2-infer-proxy.smt2
   regress1/strings/issue5611-deq-norm-emp.smt2
   regress1/strings/issue5692-infer-proxy.smt2
+  regress1/strings/issue5940-skc-len-conc.smt2
+  regress1/strings/issue5940-2-skc-len-conc.smt2
   regress1/strings/kaluza-fl.smt2
   regress1/strings/loop002.smt2
   regress1/strings/loop003.smt2
diff --git a/test/regress/regress1/strings/issue5940-2-skc-len-conc.smt2 b/test/regress/regress1/strings/issue5940-2-skc-len-conc.smt2
new file mode 100644
index 000000000..793381503
--- /dev/null
+++ b/test/regress/regress1/strings/issue5940-2-skc-len-conc.smt2
@@ -0,0 +1,8 @@
+; COMMAND-LINE: --strings-exp
+; EXPECT: sat
+(set-logic ALL)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () Int)
+(assert (not (= (str.prefixof (str.replace x x (str.replace "A" x "")) (str.replace "" x "A")) (= (not (= (not (not (= (= (str.prefixof (str.replace "A" x "") x) (str.prefixof "A" x)) (str.prefixof x x)))) (str.prefixof (str.replace "A" x "") (str.replace "A" "A" "")))) (str.prefixof x "A")))))
+(check-sat)
diff --git a/test/regress/regress1/strings/issue5940-skc-len-conc.smt2 b/test/regress/regress1/strings/issue5940-skc-len-conc.smt2
new file mode 100644
index 000000000..f78a12deb
--- /dev/null
+++ b/test/regress/regress1/strings/issue5940-skc-len-conc.smt2
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --strings-exp
+; EXPECT: sat
+(set-logic ALL)
+(define-fun z () Int 0)                                                            
+(define-fun y () String "")                                                        
+(define-fun x () String "")                                                        
+(assert (= "B" (str.replace (str.substr "A" 0 z) "" 
+             (str.replace "B" (str.substr "B" 0 0) (str.substr "A" 0 z)))))
+(check-sat)
-- 
2.30.2