From 4b7de240edeee362a0b9ca440c22a8b0744cf34b Mon Sep 17 00:00:00 2001
From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Wed, 26 Feb 2020 18:04:16 -0600
Subject: [PATCH] Some initial work on using words (#3819)

Towards support for sequences (CVC4/cvc4-projects#23.)
---
 .../strings/theory_strings_rewriter.cpp       | 91 ++++++++++---------
 src/theory/strings/word.cpp                   | 10 ++
 src/theory/strings/word.h                     | 78 ++++++++--------
 3 files changed, 101 insertions(+), 78 deletions(-)
diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp
index 9cd4c1ecc..e58a51e63 100644
--- a/src/theory/strings/theory_strings_rewriter.cpp
+++ b/src/theory/strings/theory_strings_rewriter.cpp
@@ -25,6 +25,7 @@
 #include "theory/arith/arith_msum.h"
 #include "theory/strings/regexp_operation.h"
 #include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
 #include "theory/theory.h"
 #include "util/integer.h"
 #include "util/rational.h"
@@ -357,20 +358,23 @@ Node TheoryStringsRewriter::rewriteStrEqualityExt(Node node)
     if (!c[0].empty() && !c[1].empty() && c[0].back().isConst()
         && c[1].back().isConst())
     {
-      String cs[2];
+      Node cs[2];
+      size_t csl[2];
       for (unsigned j = 0; j < 2; j++)
       {
-        cs[j] = c[j].back().getConst<String>();
+        cs[j] = c[j].back();
+        csl[j] = Word::getLength(cs[j]);
       }
-      unsigned larger = cs[0].size() > cs[1].size() ? 0 : 1;
-      unsigned smallerSize = cs[1 - larger].size();
+      size_t larger = csl[0] > csl[1] ? 0 : 1;
+      size_t smallerSize = csl[1 - larger];
       if (cs[1 - larger]
-          == (i == 0 ? cs[larger].suffix(smallerSize)
-                     : cs[larger].prefix(smallerSize)))
+          == (i == 0 ? Word::suffix(cs[larger], smallerSize)
+                     : Word::prefix(cs[larger], smallerSize)))
       {
-        unsigned sizeDiff = cs[larger].size() - smallerSize;
-        c[larger][c[larger].size() - 1] = nm->mkConst(
-            i == 0 ? cs[larger].prefix(sizeDiff) : cs[larger].suffix(sizeDiff));
+        size_t sizeDiff = csl[larger] - smallerSize;
+        c[larger][c[larger].size() - 1] =
+            i == 0 ? Word::prefix(cs[larger], sizeDiff)
+                   : Word::suffix(cs[larger], sizeDiff);
         c[1 - larger].pop_back();
         changed = true;
       }
@@ -393,10 +397,10 @@ Node TheoryStringsRewriter::rewriteStrEqualityExt(Node node)
   for (unsigned i = 0; i < 2; i++)
   {
     Node cn = checkEntailHomogeneousString(node[i]);
-    if (!cn.isNull() && cn.getConst<String>().size() > 0)
+    if (!cn.isNull() && !Word::isEmpty(cn))
     {
       Assert(cn.isConst());
-      Assert(cn.getConst<String>().size() == 1);
+      Assert(Word::getLength(cn) == 1);
       unsigned hchar = cn.getConst<String>().front();
 
       // The operands of the concat on each side of the equality without
@@ -758,7 +762,8 @@ Node TheoryStringsRewriter::rewriteConcat(Node node)
     }
     if(!tmpNode.isConst()) {
       if(!preNode.isNull()) {
-        if(preNode.getKind() == kind::CONST_STRING && !preNode.getConst<String>().isEmptyString() ) {
+        if (preNode.isConst() && !Word::isEmpty(preNode))
+        {
           node_vec.push_back( preNode );
         }
         preNode = Node::null();
@@ -768,11 +773,15 @@ Node TheoryStringsRewriter::rewriteConcat(Node node)
       if( preNode.isNull() ){
         preNode = tmpNode;
       }else{
-        preNode = NodeManager::currentNM()->mkConst( preNode.getConst<String>().concat( tmpNode.getConst<String>() ) );
+        std::vector<Node> vec;
+        vec.push_back(preNode);
+        vec.push_back(tmpNode);
+        preNode = Word::mkWord(vec);
       }
     }
   }
-  if( !preNode.isNull() && ( preNode.getKind()!=kind::CONST_STRING || !preNode.getConst<String>().isEmptyString() ) ){
+  if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode)))
+  {
     node_vec.push_back( preNode );
   }
 
@@ -828,7 +837,7 @@ Node TheoryStringsRewriter::rewriteConcatRegExp(TNode node)
       }
     }
     else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst()
-             && c[0].getConst<String>().isEmptyString())
+             && Word::isEmpty(c[0]))
     {
       changed = true;
       emptyRe = c;
@@ -969,9 +978,8 @@ Node TheoryStringsRewriter::rewriteStarRegExp(TNode node)
     // ((R)*)* ---> R*
     return returnRewrite(node, node[0], "re-star-nested-star");
   }
-  else if (node[0].getKind() == STRING_TO_REGEXP
-           && node[0][0].getKind() == CONST_STRING
-           && node[0][0].getConst<String>().isEmptyString())
+  else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst()
+           && Word::isEmpty(node[0][0]))
   {
     // ("")* ---> ""
     return returnRewrite(node, node[0], "re-star-empty-string");
@@ -991,8 +999,8 @@ Node TheoryStringsRewriter::rewriteStarRegExp(TNode node)
       std::vector<Node> node_vec;
       for (const Node& nc : node[0])
       {
-        if (nc.getKind() == STRING_TO_REGEXP && nc[0].getKind() == CONST_STRING
-            && nc[0].getConst<String>().isEmptyString())
+        if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst()
+            && Word::isEmpty(nc[0]))
         {
           // can be removed
           changed = true;
@@ -1571,18 +1579,19 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) {
   {
     Kind nk0 = node[0].getKind();
     if( node[0].isConst() ){
-      retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational( node[0].getConst<String>().size() ) );
+      retNode = nm->mkConst(Rational(Word::getLength(node[0])));
     }
     else if (nk0 == kind::STRING_CONCAT)
     {
       Node tmpNode = node[0];
       if(tmpNode.isConst()) {
-        retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational( tmpNode.getConst<String>().size() ) );
+        retNode = nm->mkConst(Rational(Word::getLength(tmpNode)));
       }else if( tmpNode.getKind()==kind::STRING_CONCAT ){
         std::vector<Node> node_vec;
         for(unsigned int i=0; i<tmpNode.getNumChildren(); ++i) {
           if(tmpNode[i].isConst()) {
-            node_vec.push_back( NodeManager::currentNM()->mkConst( ::CVC4::Rational( tmpNode[i].getConst<String>().size() ) ) );
+            node_vec.push_back(
+                nm->mkConst(Rational(Word::getLength(tmpNode[i]))));
           } else {
             node_vec.push_back( NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, tmpNode[i]) );
           }
@@ -1758,7 +1767,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node)
   NodeManager* nm = NodeManager::currentNM();
   if (node[0].isConst())
   {
-    if (node[0].getConst<String>().size() == 0)
+    if (Word::isEmpty(node[0]))
     {
       Node ret = node[0];
       return returnRewrite(node, ret, "ss-emptystr");
@@ -1773,13 +1782,13 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node)
       {
         // start beyond the maximum size of strings
         // thus, it must be beyond the end point of this string
-        Node ret = nm->mkConst(::CVC4::String(""));
+        Node ret = Word::mkEmptyWord(node.getType());
         return returnRewrite(node, ret, "ss-const-start-max-oob");
       }
       else if (node[1].getConst<Rational>().sgn() < 0)
       {
         // start before the beginning of the string
-        Node ret = nm->mkConst(::CVC4::String(""));
+        Node ret = Word::mkEmptyWord(node.getType());
         return returnRewrite(node, ret, "ss-const-start-neg");
       }
       else
@@ -1788,7 +1797,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node)
         if (start >= s.size())
         {
           // start beyond the end of the string
-          Node ret = nm->mkConst(::CVC4::String(""));
+          Node ret = Word::mkEmptyWord(node.getType());
           return returnRewrite(node, ret, "ss-const-start-oob");
         }
       }
@@ -1800,7 +1809,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node)
       }
       else if (node[2].getConst<Rational>().sgn() <= 0)
       {
-        Node ret = nm->mkConst(::CVC4::String(""));
+        Node ret = Word::mkEmptyWord(node.getType());
         return returnRewrite(node, ret, "ss-const-len-non-pos");
       }
       else
@@ -1827,12 +1836,12 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node)
   // if entailed non-positive length or negative start point
   if (checkEntailArith(zero, node[1], true))
   {
-    Node ret = nm->mkConst(::CVC4::String(""));
+    Node ret = Word::mkEmptyWord(node.getType());
     return returnRewrite(node, ret, "ss-start-neg");
   }
   else if (checkEntailArith(zero, node[2]))
   {
-    Node ret = nm->mkConst(::CVC4::String(""));
+    Node ret = Word::mkEmptyWord(node.getType());
     return returnRewrite(node, ret, "ss-len-non-pos");
   }
 
@@ -1858,7 +1867,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node)
     // be empty.
     if (checkEntailArith(node[1], node[0][2]))
     {
-      Node ret = nm->mkConst(::CVC4::String(""));
+      Node ret = Word::mkEmptyWord(node.getType());
       return returnRewrite(node, ret, "ss-start-geq-len");
     }
   }
@@ -1942,7 +1951,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node)
           Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len));
       if (checkEntailArithWithAssumption(n1_lt_tot_len, zero, node[2], false))
       {
-        Node ret = nm->mkConst(::CVC4::String(""));
+        Node ret = Word::mkEmptyWord(node.getType());
         return returnRewrite(node, ret, "ss-start-entails-zero-len");
       }
 
@@ -1951,7 +1960,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node)
           Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2]));
       if (checkEntailArithWithAssumption(non_zero_len, node[1], tot_len, false))
       {
-        Node ret = nm->mkConst(::CVC4::String(""));
+        Node ret = Word::mkEmptyWord(node.getType());
         return returnRewrite(node, ret, "ss-non-zero-len-entails-oob");
       }
 
@@ -1960,14 +1969,14 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node)
           Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero));
       if (checkEntailArithWithAssumption(geq_zero_start, zero, tot_len, false))
       {
-        Node ret = nm->mkConst(String(""));
+        Node ret = Word::mkEmptyWord(node.getType());
         return returnRewrite(node, ret, "ss-geq-zero-start-entails-emp-s");
       }
 
       // (str.substr s x x) ---> "" if (str.len s) <= 1
       if (node[1] == node[2] && checkEntailLengthOne(node[0]))
       {
-        Node ret = nm->mkConst(::CVC4::String(""));
+        Node ret = Word::mkEmptyWord(node.getType());
         return returnRewrite(node, ret, "ss-len-one-z-z");
       }
     }
@@ -2053,13 +2062,11 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) {
     CVC4::String s = node[0].getConst<String>();
     if (node[1].isConst())
     {
-      CVC4::String t = node[1].getConst<String>();
-      Node ret =
-          NodeManager::currentNM()->mkConst(s.find(t) != std::string::npos);
+      Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos);
       return returnRewrite(node, ret, "ctn-const");
     }else{
       Node t = node[1];
-      if (s.size() == 0)
+      if (Word::isEmpty(node[0]))
       {
         Node len1 =
             NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
@@ -2103,14 +2110,14 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) {
   }
   if (node[1].isConst())
   {
-    CVC4::String t = node[1].getConst<String>();
-    if (t.size() == 0)
+    size_t len = Word::getLength(node[1]);
+    if (len == 0)
     {
       // contains( x, "" ) ---> true
       Node ret = NodeManager::currentNM()->mkConst(true);
       return returnRewrite(node, ret, "ctn-rhs-emptystr");
     }
-    else if (t.size() == 1)
+    else if (len == 1)
     {
       // The following rewrites are specific to a single character second
       // argument of contains, where we can reason that this character is
diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp
index 655df7da3..a0ee0d224 100644
--- a/src/theory/strings/word.cpp
+++ b/src/theory/strings/word.cpp
@@ -22,6 +22,16 @@ namespace CVC4 {
 namespace theory {
 namespace strings {
 
+Node Word::mkEmptyWord(TypeNode tn)
+{
+  if (tn.isString())
+  {
+    return mkEmptyWord(CONST_STRING);
+  }
+  Unimplemented();
+  return Node::null();
+}
+
 Node Word::mkEmptyWord(Kind k)
 {
   NodeManager* nm = NodeManager::currentNM();
diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h
index e4d10247a..74f50ee9a 100644
--- a/src/theory/strings/word.h
+++ b/src/theory/strings/word.h
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "expr/node.h"
+#include "expr/type_node.h"
 
 namespace CVC4 {
 namespace theory {
@@ -28,7 +29,10 @@ namespace strings {
 // ------------------------------ for words (string or sequence constants)
 class Word
 {
-public:
+ public:
+  /** make empty constant of type tn */
+  static Node mkEmptyWord(TypeNode tn);
+
   /** make empty constant of kind k */
   static Node mkEmptyWord(Kind k);
 
@@ -45,9 +49,9 @@ public:
   static std::size_t find(TNode x, TNode y, std::size_t start = 0);
 
   /**
-  * Return the first position y occurs in x searching from the end of x, or
-  * std::string::npos otherwise
-  */
+   * Return the first position y occurs in x searching from the end of x, or
+   * std::string::npos otherwise
+   */
   static std::size_t rfind(TNode x, TNode y, std::size_t start = 0);
 
   /** Returns true if y is a prefix of x */
@@ -62,8 +66,9 @@ public:
   /** Return the substring/subsequence of x starting at index i */
   static Node substr(TNode x, std::size_t i);
 
-  /** Return the substring/subsequence of x starting at index i with size at most
-  * j */
+  /** Return the substring/subsequence of x starting at index i with size at
+   * most
+   * j */
   static Node substr(TNode x, std::size_t i, std::size_t j);
 
   /** Return the prefix of x of size at most i */
@@ -73,42 +78,43 @@ public:
   static Node suffix(TNode x, std::size_t i);
 
   /**
-  * Checks if there is any overlap between word x and another word y. This
-  * corresponds to checking whether one string contains the other and whether a
-  * substring/subsequence of one is a prefix of the other and vice-versa.
-  *
-  * @param x The first string
-  * @param y The second string
-  * @return True if there is an overlap, false otherwise
-  */
+   * Checks if there is any overlap between word x and another word y. This
+   * corresponds to checking whether one string contains the other and whether a
+   * substring/subsequence of one is a prefix of the other and vice-versa.
+   *
+   * @param x The first string
+   * @param y The second string
+   * @return True if there is an overlap, false otherwise
+   */
   static bool noOverlapWith(TNode x, TNode y);
 
   /** overlap
-  *
-  * if overlap returns m>0,
-  * then the maximal suffix of this string that is a prefix of y is of length m.
-  *
-  * For example, if x is "abcdef", then:
-  * x.overlap("defg") = 3
-  * x.overlap("ab") = 0
-  * x.overlap("d") = 0
-  * x.overlap("bcdefdef") = 5
-  */
+   *
+   * if overlap returns m>0,
+   * then the maximal suffix of this string that is a prefix of y is of length
+   * m.
+   *
+   * For example, if x is "abcdef", then:
+   * x.overlap("defg") = 3
+   * x.overlap("ab") = 0
+   * x.overlap("d") = 0
+   * x.overlap("bcdefdef") = 5
+   */
   static std::size_t overlap(TNode x, TNode y);
 
   /** reverse overlap
-  *
-  * if roverlap returns m>0,
-  * then the maximal prefix of this word that is a suffix of y is of length m.
-  *
-  * For example, if x is "abcdef", then:
-  * x.roverlap("aaabc") = 3
-  * x.roverlap("def") = 0
-  * x.roverlap("d") = 0
-  * x.roverlap("defabcde") = 5
-  *
-  * Notice that x.overlap(y) = y.roverlap(x)
-  */
+   *
+   * if roverlap returns m>0,
+   * then the maximal prefix of this word that is a suffix of y is of length m.
+   *
+   * For example, if x is "abcdef", then:
+   * x.roverlap("aaabc") = 3
+   * x.roverlap("def") = 0
+   * x.roverlap("d") = 0
+   * x.roverlap("defabcde") = 5
+   *
+   * Notice that x.overlap(y) = y.roverlap(x)
+   */
   static std::size_t roverlap(TNode x, TNode y);
 };
 
-- 
2.30.2