From e3231523b7bd8da0871b1efb63f23f1b3c4adbe7 Mon Sep 17 00:00:00 2001
From: Andres Noetzli <andres.noetzli@gmail.com>
Date: Sun, 16 Feb 2020 10:09:31 -0800
Subject: [PATCH] Activate reverse variant of F-Split inference (#3745)

This commit activates the reverse variant of the F-Split inference.
---
 src/theory/strings/core_solver.cpp            | 194 ++++++++----------
 src/theory/strings/normal_form.cpp            |  25 +++
 src/theory/strings/normal_form.h              |  14 ++
 .../strings/theory_strings_rewriter.cpp       |  21 --
 src/theory/strings/theory_strings_rewriter.h  |   3 -
 5 files changed, 129 insertions(+), 128 deletions(-)
diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp
index c27304dc8..723520b67 100644
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -1293,115 +1293,101 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
         break;
       }
 
-      // FIXME
-      if (!isRev)
+      // At this point, we know that `nc` is non-empty, so we add that to our
+      // explanation.
+      Node ncnz = nc.eqNode(d_emptyString).negate();
+      info.d_ant.push_back(ncnz);
+
+      size_t ncIndex = index + 1;
+      Node nextConstStr = nfnc.collectConstantStringAt(ncIndex);
+      if (!nextConstStr.isNull())
       {
-        // At this point, we know that `nc` is non-empty, so we add that to our
-        // explanation.
-        Node ncnz = nc.eqNode(d_emptyString).negate();
-        info.d_ant.push_back(ncnz);
-
-        size_t ncIndex = index + 1;
-        Node nextConstStr = TheoryStringsRewriter::collectConstantStringAt(
-            nfncv, ncIndex, false);
-        if (!nextConstStr.isNull())
+        // We are in the case where we have a constant after `nc`, so we
+        // split `nc`.
+        //
+        // E.g. "abc" ++ ... = nc ++ "b" ++ ... ---> nc = "a" ++ k
+        size_t cIndex = index;
+        Node constStr = nfc.collectConstantStringAt(cIndex);
+        Assert(!constStr.isNull());
+        CVC4::String stra = constStr.getConst<String>();
+        CVC4::String strb = nextConstStr.getConst<String>();
+        // Since `nc` is non-empty, we start with character 1
+        size_t p;
+        if (isRev)
         {
-          // We are in the case where we have a constant after `nc`, so we
-          // split `nc`.
-          //
-          // E.g. "abc" ++ ... = nc ++ "b" ++ ... ---> nc = "a" ++ k
-          size_t cIndex = index;
-          Node constStr = TheoryStringsRewriter::collectConstantStringAt(
-              nfcv, cIndex, false);
-          Assert(!constStr.isNull());
-          CVC4::String stra = constStr.getConst<String>();
-          CVC4::String strb = nextConstStr.getConst<String>();
-          // Since `nc` is non-empty, we start with character 1
-          size_t p;
-          if (isRev)
-          {
-            CVC4::String stra1 = stra.prefix(stra.size() - 1);
-            p = stra.size() - stra1.roverlap(strb);
-            Trace("strings-csp-debug") << "Compute roverlap : " << constStr
-                                       << " " << nextConstStr << std::endl;
-            size_t p2 = stra1.rfind(strb);
-            p = p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p);
-            Trace("strings-csp-debug")
-                << "overlap : " << stra1 << " " << strb << " returned " << p
-                << " " << p2 << " " << (p2 == std::string::npos) << std::endl;
-          }
-          else
-          {
-            CVC4::String stra1 = stra.substr(1);
-            p = stra.size() - stra1.overlap(strb);
-            Trace("strings-csp-debug") << "Compute overlap : " << constStr
-                                       << " " << nextConstStr << std::endl;
-            size_t p2 = stra1.find(strb);
-            p = p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p);
-            Trace("strings-csp-debug")
-                << "overlap : " << stra1 << " " << strb << " returned " << p
-                << " " << p2 << " " << (p2 == std::string::npos) << std::endl;
-          }
-
-          // If we can't split off more than a single character from the
-          // constant, we might as well do regular constant/non-constant
-          // inference (see below).
-          if (p > 1)
-          {
-            NormalForm::getExplanationForPrefixEq(
-                nfc, nfnc, cIndex, ncIndex, info.d_ant);
-            Node prea = p == stra.size() ? constStr
-                                         : nm->mkConst(isRev ? stra.suffix(p)
-                                                             : stra.prefix(p));
-            Node sk = d_skCache.mkSkolemCached(
-                nc,
-                prea,
-                isRev ? SkolemCache::SK_ID_C_SPT_REV : SkolemCache::SK_ID_C_SPT,
-                "c_spt");
-            Trace("strings-csp")
-                << "Const Split: " << prea << " is removed from " << stra
-                << " due to " << strb << ", p=" << p << std::endl;
-            info.d_conc = nc.eqNode(isRev ? utils::mkNConcat(sk, prea)
-                                          : utils::mkNConcat(prea, sk));
-            info.d_new_skolem[LENGTH_SPLIT].push_back(sk);
-            info.d_id = INFER_SSPLIT_CST_PROP;
-            pinfer.push_back(info);
-            break;
-          }
+          CVC4::String stra1 = stra.prefix(stra.size() - 1);
+          p = stra.size() - stra1.roverlap(strb);
+          Trace("strings-csp-debug") << "Compute roverlap : " << constStr << " "
+                                     << nextConstStr << std::endl;
+          size_t p2 = stra1.rfind(strb);
+          p = p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p);
+          Trace("strings-csp-debug")
+              << "roverlap : " << stra1 << " " << strb << " returned " << p
+              << " " << p2 << " " << (p2 == std::string::npos) << std::endl;
+        }
+        else
+        {
+          CVC4::String stra1 = stra.substr(1);
+          p = stra.size() - stra1.overlap(strb);
+          Trace("strings-csp-debug") << "Compute overlap : " << constStr << " "
+                                     << nextConstStr << std::endl;
+          size_t p2 = stra1.find(strb);
+          p = p2 == std::string::npos ? p : (p > p2 + 1 ? p2 + 1 : p);
+          Trace("strings-csp-debug")
+              << "overlap : " << stra1 << " " << strb << " returned " << p
+              << " " << p2 << " " << (p2 == std::string::npos) << std::endl;
         }
 
-        // Since non of the other inferences apply, we just infer that `nc` has
-        // to start with the first character of the constant.
-        //
-        // E.g. "abc" ++ ... = nc ++ ... ---> nc = "a" ++ k
-        Node constStr = nfcv[index];
-        CVC4::String stra = constStr.getConst<String>();
-        Node firstChar = stra.size() == 1 ? constStr
-                                          : nm->mkConst(isRev ? stra.suffix(1)
-                                                              : stra.prefix(1));
-        Node sk = d_skCache.mkSkolemCached(
-            nc,
-            isRev ? SkolemCache::SK_ID_VC_SPT_REV : SkolemCache::SK_ID_VC_SPT,
-            "c_spt");
-        Trace("strings-csp")
-            << "Const Split: " << firstChar << " is removed from " << constStr
-            << " (serial) " << std::endl;
-        NormalForm::getExplanationForPrefixEq(
-            nfi, nfj, index, index, info.d_ant);
-        info.d_conc = nc.eqNode(isRev ? utils::mkNConcat(sk, firstChar)
-                                      : utils::mkNConcat(firstChar, sk));
-        info.d_new_skolem[LENGTH_SPLIT].push_back(sk);
-        info.d_id = INFER_SSPLIT_CST;
-        pinfer.push_back(info);
-        break;
-      }
-      else
-      {
-        // Either `x` or `y` is constant but we couldn't make an inference
-        // because our inferences do not work when in the reverse direction.
-        // To avoid doing a F-Split here, we break out of the loop.
-        break;
+        // If we can't split off more than a single character from the
+        // constant, we might as well do regular constant/non-constant
+        // inference (see below).
+        if (p > 1)
+        {
+          NormalForm::getExplanationForPrefixEq(
+              nfc, nfnc, cIndex, ncIndex, info.d_ant);
+          Node prea = p == stra.size() ? constStr
+                                       : nm->mkConst(isRev ? stra.suffix(p)
+                                                           : stra.prefix(p));
+          Node sk = d_skCache.mkSkolemCached(
+              nc,
+              prea,
+              isRev ? SkolemCache::SK_ID_C_SPT_REV : SkolemCache::SK_ID_C_SPT,
+              "c_spt");
+          Trace("strings-csp")
+              << "Const Split: " << prea << " is removed from " << stra
+              << " due to " << strb << ", p=" << p << std::endl;
+          info.d_conc = nc.eqNode(isRev ? utils::mkNConcat(sk, prea)
+                                        : utils::mkNConcat(prea, sk));
+          info.d_new_skolem[LENGTH_SPLIT].push_back(sk);
+          info.d_id = INFER_SSPLIT_CST_PROP;
+          pinfer.push_back(info);
+          break;
+        }
       }
+
+      // Since none of the other inferences apply, we just infer that `nc` has
+      // to start with the first character of the constant.
+      //
+      // E.g. "abc" ++ ... = nc ++ ... ---> nc = "a" ++ k
+      Node constStr = nfcv[index];
+      CVC4::String stra = constStr.getConst<String>();
+      Node firstChar = stra.size() == 1 ? constStr
+                                        : nm->mkConst(isRev ? stra.suffix(1)
+                                                            : stra.prefix(1));
+      Node sk = d_skCache.mkSkolemCached(
+          nc,
+          isRev ? SkolemCache::SK_ID_VC_SPT_REV : SkolemCache::SK_ID_VC_SPT,
+          "c_spt");
+      Trace("strings-csp") << "Const Split: " << firstChar
+                           << " is removed from " << constStr << " (serial) "
+                           << std::endl;
+      NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, info.d_ant);
+      info.d_conc = nc.eqNode(isRev ? utils::mkNConcat(sk, firstChar)
+                                    : utils::mkNConcat(firstChar, sk));
+      info.d_new_skolem[LENGTH_SPLIT].push_back(sk);
+      info.d_id = INFER_SSPLIT_CST;
+      pinfer.push_back(info);
+      break;
     }
 
     // Below, we deal with the case where `x` and `y` are two non-constant
diff --git a/src/theory/strings/normal_form.cpp b/src/theory/strings/normal_form.cpp
index c70845d64..ac28be245 100644
--- a/src/theory/strings/normal_form.cpp
+++ b/src/theory/strings/normal_form.cpp
@@ -17,6 +17,7 @@
 
 #include "options/strings_options.h"
 #include "theory/rewriter.h"
+#include "theory/strings/theory_strings_utils.h"
 
 using namespace std;
 using namespace CVC4::kind;
@@ -135,6 +136,30 @@ void NormalForm::getExplanation(int index, std::vector<Node>& curr_exp)
   }
 }
 
+Node NormalForm::collectConstantStringAt(size_t& index)
+{
+  std::vector<Node> c;
+  while (d_nf.size() > index && d_nf[index].isConst())
+  {
+    c.push_back(d_nf[index]);
+    index++;
+  }
+  if (!c.empty())
+  {
+    if (d_isRev)
+    {
+      std::reverse(c.begin(), c.end());
+    }
+    Node cc = Rewriter::rewrite(utils::mkConcat(STRING_CONCAT, c));
+    Assert(cc.isConst());
+    return cc;
+  }
+  else
+  {
+    return Node::null();
+  }
+}
+
 void NormalForm::getExplanationForPrefixEq(NormalForm& nfi,
                                            NormalForm& nfj,
                                            int index_i,
diff --git a/src/theory/strings/normal_form.h b/src/theory/strings/normal_form.h
index 3d1326570..35a7fadb3 100644
--- a/src/theory/strings/normal_form.h
+++ b/src/theory/strings/normal_form.h
@@ -138,6 +138,20 @@ class NormalForm
    * when isRev is false (resp. true).
    */
   void getExplanation(int index, std::vector<Node>& curr_exp);
+
+  /**
+   * Collects the constant string starting at a given index, i.e. concatenates
+   * all the consecutive constant strings. If the normal form is reverse order,
+   * this function searches backwards but the result will be in the original
+   * order.
+   *
+   * @param index The index to start at, updated to point to the first
+   *              non-constant component of the normal form or set equal to the
+   *              size of the normal form if the remainder is all constants
+   * @return The combined string constants
+   */
+  Node collectConstantStringAt(size_t& index);
+
   /** get explanation for prefix equality
    *
    * This adds to curr_exp the reason why the prefix of nfi up to index index_i
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp
index 0016e5658..e9a4ebfd1 100644
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -3496,27 +3496,6 @@ bool TheoryStringsRewriter::canConstantContainList( Node c, std::vector< Node >&
   return true;
 }
 
-Node TheoryStringsRewriter::collectConstantStringAt(
-    const std::vector<Node>& vec, size_t& end_index, bool isRev)
-{
-  std::vector< Node > c;
-  while( vec.size()>end_index && vec[ end_index ].isConst() ){
-    c.push_back( vec[ end_index ] );
-    end_index++;
-    //break;
-  }
-  if( !c.empty() ){
-    if( isRev ){
-      std::reverse( c.begin(), c.end() );
-    }
-    Node cc = Rewriter::rewrite(utils::mkConcat(STRING_CONCAT, c));
-    Assert(cc.isConst());
-    return cc;
-  }else{
-    return Node::null();
-  }
-}
-
 bool TheoryStringsRewriter::stripSymbolicLength(std::vector<Node>& n1,
                                                 std::vector<Node>& nr,
                                                 int dir,
diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h
index 8bef8c110..7d76234bc 100644
--- a/src/theory/strings/theory_strings_rewriter.h
+++ b/src/theory/strings/theory_strings_rewriter.h
@@ -283,9 +283,6 @@ class TheoryStringsRewriter : public TheoryRewriter
   * same as above but with n = str.++( l ) instead of l
   */
   static bool canConstantContainConcat(Node c, Node n, int& firstc, int& lastc);
-  static Node collectConstantStringAt(const std::vector<Node>& vec,
-                                      size_t& end_index,
-                                      bool isRev);
 
   /** strip symbolic length
    *
-- 
2.30.2