From: Andrew Reynolds <andrew.j.reynolds@gmail.com>
Date: Mon, 9 Mar 2020 18:35:59 +0000 (-0500)
Subject: Convert more uses of strings to words (#3921)
X-Git-Tag: cvc5-1.0.0~3547
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9f77ec44decf18fe23c738988281373795dcca0d;p=cvc5.git

Convert more uses of strings to words (#3921)

Towards theory of sequences.

Also adds documentation to strncmp/rstrncmp and adds them to the Word interface.
---

diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp
index 5414c9b98..e2cafa4d3 100644
--- a/src/theory/strings/core_solver.cpp
+++ b/src/theory/strings/core_solver.cpp
@@ -16,10 +16,10 @@
 
 #include "theory/strings/core_solver.h"
 
-#include "theory/strings/theory_strings_utils.h"
 #include "options/strings_options.h"
 #include "theory/strings/theory_strings_rewriter.h"
-
+#include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
 
 using namespace std;
 using namespace CVC4::context;
@@ -37,7 +37,7 @@ d_nf_pairs(c)
   d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) );
   d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) );
   d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1));
-  d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
+  d_emptyString = Word::mkEmptyWord(CONST_STRING);
   d_true = NodeManager::currentNM()->mkConst( true );
   d_false = NodeManager::currentNM()->mkConst( false );
 }
@@ -1071,10 +1071,10 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
     Assert(!d_state.areEqual(x, y));
 
     std::vector<Node> lenExp;
-    Node xLen = d_state.getLength(x, lenExp);
-    Node yLen = d_state.getLength(y, lenExp);
+    Node xLenTerm = d_state.getLength(x, lenExp);
+    Node yLenTerm = d_state.getLength(y, lenExp);
 
-    if (d_state.areEqual(xLen, yLen))
+    if (d_state.areEqual(xLenTerm, yLenTerm))
     {
       // `x` and `y` have the same length. We infer that the two components
       // have to be the same.
@@ -1083,7 +1083,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
       Trace("strings-solve-debug")
           << "Simple Case 2 : string lengths are equal" << std::endl;
       Node eq = x.eqNode(y);
-      Node leneq = xLen.eqNode(yLen);
+      Node leneq = xLenTerm.eqNode(yLenTerm);
       NormalForm::getExplanationForPrefixEq(nfi, nfj, index, index, lenExp);
       lenExp.push_back(leneq);
       d_im.sendInference(lenExp, eq, "N_Unify");
@@ -1137,13 +1137,14 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
       // Constants in both normal forms.
       //
       // E.g. "abc" ++ ... = "ab" ++ ...
-      String c1 = x.getConst<String>();
-      String c2 = y.getConst<String>();
+      size_t lenX = Word::getLength(x);
+      size_t lenY = Word::getLength(y);
       Trace("strings-solve-debug")
           << "Simple Case 4 : Const Split : " << x << " vs " << y
           << " at index " << index << ", isRev = " << isRev << std::endl;
-      size_t minLen = std::min(c1.size(), c2.size());
-      bool isSameFix = isRev ? c1.rstrncmp(c2, minLen) : c1.strncmp(c2, minLen);
+      size_t minLen = std::min(lenX, lenY);
+      bool isSameFix =
+          isRev ? Word::rstrncmp(x, y, minLen) : Word::strncmp(x, y, minLen);
       if (isSameFix)
       {
         // The shorter constant is a prefix/suffix of the longer constant. We
@@ -1152,20 +1153,20 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
         //
         // E.g. "abc" ++ x' ++ ... = "ab" ++ y' ++ ... --->
         //      "c" ++ x' ++ ... = y' ++ ...
-        bool c1Shorter = c1.size() < c2.size();
-        NormalForm& nfl = c1Shorter ? nfj : nfi;
-        const String& cl = c1Shorter ? c2 : c1;
-        Node ns = c1Shorter ? x : y;
+        bool xShorter = lenX < lenY;
+        NormalForm& nfl = xShorter ? nfj : nfi;
+        Node cl = xShorter ? y : x;
+        Node ns = xShorter ? x : y;
 
         Node remainderStr;
         if (isRev)
         {
-          int newlen = cl.size() - minLen;
-          remainderStr = nm->mkConst(cl.substr(0, newlen));
+          size_t newlen = std::max(lenX, lenY) - minLen;
+          remainderStr = Word::substr(cl, 0, newlen);
         }
         else
         {
-          remainderStr = nm->mkConst(cl.substr(minLen));
+          remainderStr = Word::substr(cl, minLen);
         }
         Trace("strings-solve-debug-test")
             << "Break normal form of " << cl << " into " << ns << ", "
@@ -1195,7 +1196,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
     info.d_j = nfj.d_base;
     info.d_rev = isRev;
     Assert(index < nfiv.size() - rproc && index < nfjv.size() - rproc);
-    if (!d_state.areDisequal(xLen, yLen) && !d_state.areEqual(xLen, yLen)
+    if (!d_state.areDisequal(xLenTerm, yLenTerm) && !d_state.areEqual(xLenTerm, yLenTerm)
         && !x.isConst()
         && !y.isConst())  // AJR: remove the latter 2 conditions?
     {
@@ -1207,7 +1208,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
       Trace("strings-solve-debug")
           << "Non-simple Case 1 : string lengths neither equal nor disequal"
           << std::endl;
-      Node lenEq = nm->mkNode(EQUAL, xLen, yLen);
+      Node lenEq = nm->mkNode(EQUAL, xLenTerm, yLenTerm);
       lenEq = Rewriter::rewrite(lenEq);
       info.d_conc = nm->mkNode(OR, lenEq, lenEq.negate());
       info.d_pending_phase[lenEq] = true;
@@ -1395,7 +1396,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
     // is a prefix/suffix of the other.
     //
     // E.g. x ++ ... = y ++ ... ---> (x = y ++ k) v (y = x ++ k)
-    Assert(d_state.areDisequal(xLen, yLen));
+    Assert(d_state.areDisequal(xLenTerm, yLenTerm));
     Assert(x.getKind() != CONST_STRING);
     Assert(y.getKind() != CONST_STRING);
 
@@ -1411,8 +1412,8 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
         // do not infer constants are larger than variables
         if (t.getKind() != CONST_STRING)
         {
-          Node lt1 = e == 0 ? xLen : yLen;
-          Node lt2 = e == 0 ? yLen : xLen;
+          Node lt1 = e == 0 ? xLenTerm : yLenTerm;
+          Node lt2 = e == 0 ? yLenTerm : xLenTerm;
           Node entLit = Rewriter::rewrite(nm->mkNode(GT, lt1, lt2));
           std::pair<bool, Node> et = d_state.entailmentCheck(
               options::TheoryOfMode::THEORY_OF_TYPE_BASED, entLit);
@@ -1465,7 +1466,7 @@ void CoreSolver::processSimpleNEq(NormalForm& nfi,
     }
     else
     {
-      Node ldeq = nm->mkNode(EQUAL, xLen, yLen).negate();
+      Node ldeq = nm->mkNode(EQUAL, xLenTerm, yLenTerm).negate();
       info.d_ant.push_back(ldeq);
       info.d_conc = nm->mkNode(OR, eq1, eq2);
       info.d_id = INFER_SSPLIT_VAR;
@@ -1625,13 +1626,12 @@ CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi,
   {
     Trace("strings-loop") << "Strings::Loop: Const Normal Breaking."
                           << std::endl;
-    CVC4::String s = t_yz.getConst<CVC4::String>();
-    unsigned size = s.size();
+    unsigned size = Word::getLength(t_yz);
     std::vector<Node> vconc;
     for (unsigned len = 1; len <= size; len++)
     {
-      Node y = nm->mkConst(s.substr(0, len));
-      Node z = nm->mkConst(s.substr(len, size - len));
+      Node y = Word::substr(t_yz, 0, len);
+      Node z = Word::substr(t_yz, len, size - len);
       Node restr = s_zy;
       Node cc;
       if (r != d_emptyString)
@@ -1770,8 +1770,9 @@ void CoreSolver::processDeq( Node ni, Node nj ) {
                 return;
               }else{
                 //split on first character
-                CVC4::String str = const_k.getConst<String>();
-                Node firstChar = str.size() == 1 ? const_k : NodeManager::currentNM()->mkConst( str.prefix( 1 ) );
+                Node firstChar = Word::getLength(const_k) == 1
+                                     ? const_k
+                                     : Word::prefix(const_k, 1);
                 if (d_state.areEqual(lnck, d_one))
                 {
                   if (d_state.areDisequal(firstChar, nconst_k))
@@ -1947,26 +1948,31 @@ int CoreSolver::processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >&
       if (!d_state.areEqual(i, j))
       {
         if( i.getKind()==kind::CONST_STRING && j.getKind()==kind::CONST_STRING ) {
-          unsigned int len_short = i.getConst<String>().size() < j.getConst<String>().size() ? i.getConst<String>().size() : j.getConst<String>().size();
+          size_t lenI = Word::getLength(i);
+          size_t lenJ = Word::getLength(j);
+          unsigned int len_short = lenI < lenJ ? lenI : lenJ;
           bool isSameFix = isRev ? i.getConst<String>().rstrncmp(j.getConst<String>(), len_short): i.getConst<String>().strncmp(j.getConst<String>(), len_short);
           if( isSameFix ) {
             //same prefix/suffix
             //k is the index of the string that is shorter
-            Node nk = i.getConst<String>().size() < j.getConst<String>().size() ? i : j;
-            Node nl = i.getConst<String>().size() < j.getConst<String>().size() ? j : i;
+            Node nk = lenI < lenJ ? i : j;
+            Node nl = lenI < lenJ ? j : i;
             Node remainderStr;
             if( isRev ){
-              int new_len = nl.getConst<String>().size() - len_short;
-              remainderStr = NodeManager::currentNM()->mkConst( nl.getConst<String>().substr(0, new_len) );
+              int new_len = Word::getLength(nl) - len_short;
+              remainderStr = Word::substr(nl, 0, new_len);
               Trace("strings-solve-debug-test") << "Rev. Break normal form of " << nl << " into " << nk << ", " << remainderStr << std::endl;
             } else {
-              remainderStr = NodeManager::currentNM()->mkConst( nl.getConst<String>().substr( len_short ) );
+              remainderStr = Word::substr(nl, len_short);
               Trace("strings-solve-debug-test") << "Break normal form of " << nl << " into " << nk << ", " << remainderStr << std::endl;
             }
-            if( i.getConst<String>().size() < j.getConst<String>().size() ) {
+            if (lenI < lenJ)
+            {
               nfj.insert( nfj.begin() + index + 1, remainderStr );
               nfj[index] = nfi[index];
-            } else {
+            }
+            else
+            {
               nfi.insert( nfi.begin() + index + 1, remainderStr );
               nfi[index] = nfj[index];
             }
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp
index 530564a35..f91b59834 100644
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -20,6 +20,7 @@
 #include "options/strings_options.h"
 #include "theory/strings/theory_strings_rewriter.h"
 #include "theory/strings/theory_strings_utils.h"
+#include "theory/strings/word.h"
 
 using namespace CVC4;
 using namespace CVC4::kind;
@@ -29,11 +30,8 @@ namespace theory {
 namespace strings {
 
 RegExpOpr::RegExpOpr()
-    : d_emptyString(NodeManager::currentNM()->mkConst(::CVC4::String(""))),
-      d_true(NodeManager::currentNM()->mkConst(true)),
+    : d_true(NodeManager::currentNM()->mkConst(true)),
       d_false(NodeManager::currentNM()->mkConst(false)),
-      d_emptySingleton(NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP,
-                                                        d_emptyString)),
       d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY,
                                                      std::vector<Node>{})),
       d_zero(NodeManager::currentNM()->mkConst(::CVC4::Rational(0))),
@@ -42,6 +40,10 @@ RegExpOpr::RegExpOpr()
                                                std::vector<Node>{})),
       d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma))
 {
+  d_emptyString = Word::mkEmptyWord(CONST_STRING);
+
+  d_emptySingleton =
+      NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString);
   d_lastchar = utils::getAlphabetCardinality() - 1;
 }
 
@@ -295,6 +297,7 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
 
   int ret = 1;
   retNode = d_emptyRegexp;
+  NodeManager* nm = NodeManager::currentNM();
 
   PairNodeStr dv = std::make_pair( r, c );
   if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
@@ -332,10 +335,12 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
           if(tmp == d_emptyString) {
             ret = 2;
           } else {
-            if (tmp.getConst<CVC4::String>().front() == c.front())
+            if (tmp.getConst<String>().front() == c.front())
             {
-              retNode =  NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
-                tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
+              retNode =
+                  nm->mkNode(STRING_TO_REGEXP,
+                             Word::getLength(tmp) == 1 ? d_emptyString
+                                                       : Word::substr(tmp, 1));
             } else {
               ret = 2;
             }
@@ -346,10 +351,12 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
           if(tmp.getKind() == kind::STRING_CONCAT) {
             Node t2 = tmp[0];
             if(t2.isConst()) {
-              if (t2.getConst<CVC4::String>().front() == c.front())
+              if (t2.getConst<String>().front() == c.front())
               {
-                Node n =  NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
-                  tmp.getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( tmp.getConst< CVC4::String >().substr(1) ) );
+                Node n = nm->mkNode(STRING_TO_REGEXP,
+                                    Word::getLength(tmp) == 1
+                                        ? d_emptyString
+                                        : Word::substr(tmp, 1));
                 std::vector< Node > vec_nodes;
                 vec_nodes.push_back(n);
                 for(unsigned i=1; i<tmp.getNumChildren(); i++) {
@@ -541,6 +548,7 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
   Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
   Node retNode = d_emptyRegexp;
   PairNodeStr dv = std::make_pair( r, c );
+  NodeManager* nm = NodeManager::currentNM();
   if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
     retNode = d_dv_cache[dv];
   } else if( c.isEmptyString() ){
@@ -576,10 +584,12 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
           if(r[0] == d_emptyString) {
             retNode = d_emptyRegexp;
           } else {
-            if (r[0].getConst<CVC4::String>().front() == c.front())
+            if (r[0].getConst<String>().front() == c.front())
             {
-              retNode =  NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP,
-                r[0].getConst< CVC4::String >().size() == 1 ? d_emptyString : NodeManager::currentNM()->mkConst( r[0].getConst< CVC4::String >().substr(1) ) );
+              retNode = nm->mkNode(STRING_TO_REGEXP,
+                                   Word::getLength(r[0]) == 1
+                                       ? d_emptyString
+                                       : Word::substr(r[0], 1));
             } else {
               retNode = d_emptyRegexp;
             }
@@ -743,7 +753,7 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
       case kind::STRING_TO_REGEXP: {
         Node st = Rewriter::rewrite(r[0]);
         if(st.isConst()) {
-          CVC4::String s = st.getConst< CVC4::String >();
+          String s = st.getConst<String>();
           if(s.size() != 0) {
             unsigned sc = s.front();
             sc = String::convertUnsignedIntToCode(sc);
@@ -753,7 +763,7 @@ void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
         else if (st.getKind() == kind::STRING_CONCAT)
         {
           if(st[0].isConst()) {
-            CVC4::String s = st[0].getConst<CVC4::String>();
+            String s = st[0].getConst<String>();
             unsigned sc = s.front();
             sc = String::convertUnsignedIntToCode(sc);
             cset.insert(sc);
@@ -1638,7 +1648,7 @@ Node RegExpOpr::intersect(Node r1, Node r2, bool &spflag) {
 //printing
 std::string RegExpOpr::niceChar(Node r) {
   if(r.isConst()) {
-    std::string s = r.getConst<CVC4::String>().toString() ;
+    std::string s = r.getConst<String>().toString();
     return s == "." ? "\\." : s;
   } else {
     std::string ss = "$" + r.toString();
diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp
index a5604925c..5015db3f1 100644
--- a/src/theory/strings/theory_strings.cpp
+++ b/src/theory/strings/theory_strings.cpp
@@ -29,6 +29,7 @@
 #include "theory/strings/theory_strings_rewriter.h"
 #include "theory/strings/theory_strings_utils.h"
 #include "theory/strings/type_enumerator.h"
+#include "theory/strings/word.h"
 #include "theory/theory_model.h"
 #include "theory/valuation.h"
 
@@ -111,7 +112,7 @@ TheoryStrings::TheoryStrings(context::Context* c,
   d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) );
   d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) );
   d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1));
-  d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") );
+  d_emptyString = Word::mkEmptyWord(CONST_STRING);
   d_true = NodeManager::currentNM()->mkConst( true );
   d_false = NodeManager::currentNM()->mkConst( false );
 
diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp
index a0ee0d224..dd573b68c 100644
--- a/src/theory/strings/word.cpp
+++ b/src/theory/strings/word.cpp
@@ -78,6 +78,34 @@ size_t Word::getLength(TNode x)
 
 bool Word::isEmpty(TNode x) { return getLength(x) == 0; }
 
+bool Word::strncmp(TNode x, TNode y, std::size_t n)
+{
+  Kind k = x.getKind();
+  if (k == CONST_STRING)
+  {
+    Assert(y.getKind() == CONST_STRING);
+    String sx = x.getConst<String>();
+    String sy = y.getConst<String>();
+    return sx.strncmp(sy, n);
+  }
+  Unimplemented();
+  return false;
+}
+
+bool Word::rstrncmp(TNode x, TNode y, std::size_t n)
+{
+  Kind k = x.getKind();
+  if (k == CONST_STRING)
+  {
+    Assert(y.getKind() == CONST_STRING);
+    String sx = x.getConst<String>();
+    String sy = y.getConst<String>();
+    return sx.rstrncmp(sy, n);
+  }
+  Unimplemented();
+  return false;
+}
+
 std::size_t Word::find(TNode x, TNode y, std::size_t start)
 {
   Kind k = x.getKind();
diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h
index 74f50ee9a..7b813a0b2 100644
--- a/src/theory/strings/word.h
+++ b/src/theory/strings/word.h
@@ -45,6 +45,22 @@ class Word
   /** Return true if x is empty */
   static bool isEmpty(TNode x);
 
+  /** string compare
+   *
+   * Returns true if x is equal to y for their first n characters.
+   * If n is larger than the length of x or y, this method returns
+   * true if and only if x is equal to y.
+   */
+  static bool strncmp(TNode x, TNode y, std::size_t n);
+
+  /** reverse string compare
+   *
+   * Returns true if x is equal to y for their last n characters.
+   * If n is larger than the length of tx or y, this method returns
+   * true if and only if x is equal to y.
+   */
+  static bool rstrncmp(TNode x, TNode y, std::size_t n);
+
   /** Return the first position y occurs in x, or std::string::npos otherwise */
   static std::size_t find(TNode x, TNode y, std::size_t start = 0);
 
diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp
index 4dfe68b51..00066edb6 100644
--- a/src/util/regexp.cpp
+++ b/src/util/regexp.cpp
@@ -88,8 +88,8 @@ String String::concat(const String &other) const {
   return String(ret_vec);
 }
 
-bool String::strncmp(const String &y, const std::size_t np) const {
-  std::size_t n = np;
+bool String::strncmp(const String& y, std::size_t n) const
+{
   std::size_t b = (size() >= y.size()) ? size() : y.size();
   std::size_t s = (size() <= y.size()) ? size() : y.size();
   if (n > s) {
@@ -105,8 +105,8 @@ bool String::strncmp(const String &y, const std::size_t np) const {
   return true;
 }
 
-bool String::rstrncmp(const String &y, const std::size_t np) const {
-  std::size_t n = np;
+bool String::rstrncmp(const String& y, std::size_t n) const
+{
   std::size_t b = (size() >= y.size()) ? size() : y.size();
   std::size_t s = (size() <= y.size()) ? size() : y.size();
   if (n > s) {
diff --git a/src/util/regexp.h b/src/util/regexp.h
index 1cde53687..731736f72 100644
--- a/src/util/regexp.h
+++ b/src/util/regexp.h
@@ -109,8 +109,18 @@ class CVC4_PUBLIC String {
   bool operator<=(const String& y) const { return cmp(y) <= 0; }
   bool operator>=(const String& y) const { return cmp(y) >= 0; }
 
-  bool strncmp(const String& y, const std::size_t np) const;
-  bool rstrncmp(const String& y, const std::size_t np) const;
+  /**
+   * Returns true if this string is equal to y for their first n characters.
+   * If n is larger than the length of this string or y, this method returns
+   * true if and only if this string is equal to y.
+   */
+  bool strncmp(const String& y, std::size_t n) const;
+  /**
+   * Returns true if this string is equal to y for their last n characters.
+   * Similar to strncmp, if n is larger than the length of this string or y,
+   * this method returns true if and only if this string is equal to y.
+   */
+  bool rstrncmp(const String& y, std::size_t n) const;
 
   /* toString
   * Converts this string to a std::string.