Convert more uses of strings to words (#4584)

author Andrew Reynolds <andrew.j.reynolds@gmail.com>

Fri, 19 Jun 2020 21:48:10 +0000 (16:48 -0500)

committer GitHub <noreply@github.com>

Fri, 19 Jun 2020 21:48:10 +0000 (16:48 -0500)
author Andrew Reynolds <andrew.j.reynolds@gmail.com>
Fri, 19 Jun 2020 21:48:10 +0000 (16:48 -0500)
committer GitHub <noreply@github.com>
Fri, 19 Jun 2020 21:48:10 +0000 (16:48 -0500)
diff --git a/src/expr/type_node.cpp b/src/expr/type_node.cpp

index edf88846fdd71899214b238332835ca162cfbace..ff4de9dc88ddaf150e6d9f27eb24dc6e27c6b4d2 100644 (file)
--- a/src/expr/type_node.cpp
+++ b/src/expr/type_node.cpp
@@ -304,11 +304,7 @@ Node TypeNode::mkGroundValue() const
    return *te;
  }
  
-bool TypeNode::isStringLike() const
-{
-  // TODO (cvc4-projects #23): sequence here
-  return isString();
-}
+bool TypeNode::isStringLike() const { return isString() || isSequence(); }
  
  bool TypeNode::isSubtypeOf(TypeNode t) const {
    if(*this == t) {
diff --git a/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp b/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp

index 8df43087f5471b8eb4c9a870e84f5cefbc459298..074b023a2f37eedfefb069a128a3e8e9c6c7c53e 100644 (file)
--- a/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp
+++ b/src/theory/quantifiers/sygus/sygus_grammar_cons.cpp
@@ -459,6 +459,10 @@ void CegGrammarConstructor::collectSygusGrammarTypesFor(
          // theory of strings shares the integer type
          TypeNode intType = NodeManager::currentNM()->integerType();
          collectSygusGrammarTypesFor(intType,types);
+        if (range.isSequence())
+        {
+          collectSygusGrammarTypesFor(range.getSequenceElementType(), types);
+        }
        }
        else if (range.isFunction())
        {
@@ -805,7 +809,7 @@ void CegGrammarConstructor::mkSygusDefaultGrammar(
          sdts[i].addConstructor(kind, cargsBinary);
        }
      }
-    else if (types[i].isString())
+    else if (types[i].isStringLike())
      {
        // concatenation
        std::vector<TypeNode> cargsBinary;
@@ -823,6 +827,18 @@ void CegGrammarConstructor::mkSygusDefaultGrammar(
        std::vector<TypeNode> cargsLen;
        cargsLen.push_back(unres_t);
        sdts[i_intType].addConstructor(STRING_LENGTH, cargsLen);
+      if (types[i].isSequence())
+      {
+        TypeNode etype = types[i].getSequenceElementType();
+        // retrieve element unresolved type
+        Assert(type_to_unres.find(etype) != type_to_unres.end());
+        TypeNode unresElemType = type_to_unres[etype];
+
+        Trace("sygus-grammar-def") << "...add for seq.unit" << std::endl;
+        std::vector<TypeNode> cargsSeqUnit;
+        cargsSeqUnit.push_back(unresElemType);
+        sdts[i].addConstructor(SEQ_UNIT, cargsSeqUnit);
+      }
      }
      else if (types[i].isArray())
      {
diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp

index f43babad213565ddca51286b4199c60e4d268f6e..a91210a7b9f9543ade822d10fd7a91974c1a59b5 100644 (file)
--- a/src/theory/strings/regexp_operation.cpp
+++ b/src/theory/strings/regexp_operation.cpp
@@ -40,7 +40,7 @@ RegExpOpr::RegExpOpr()
                                                 std::vector<Node>{})),
        d_sigma_star(NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma))
  {
-  d_emptyString = Word::mkEmptyWord(CONST_STRING);
+  d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType());
  
    d_emptySingleton =
        NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString);
diff --git a/src/theory/strings/strings_entail.cpp b/src/theory/strings/strings_entail.cpp

index e166845c7b096e50b01a7d4b14a5c31f5c80a642..9f502e1f63f3fbb890bf299b34bbeae44cb9a94d 100644 (file)
--- a/src/theory/strings/strings_entail.cpp
+++ b/src/theory/strings/strings_entail.cpp
@@ -37,8 +37,6 @@ bool StringsEntail::canConstantContainConcat(Node c,
                                               int& lastc)
  {
    Assert(c.isConst());
-  CVC4::String t = c.getConst<String>();
-  const std::vector<unsigned>& tvec = t.getVec();
    Assert(n.getKind() == STRING_CONCAT);
    // must find constant components in order
    size_t pos = 0;
@@ -50,19 +48,20 @@ bool StringsEntail::canConstantContainConcat(Node c,
      {
        firstc = firstc == -1 ? i : firstc;
        lastc = i;
-      CVC4::String s = n[i].getConst<String>();
-      size_t new_pos = t.find(s, pos);
+      size_t new_pos = Word::find(c, n[i], pos);
        if (new_pos == std::string::npos)
        {
          return false;
        }
        else
        {
-        pos = new_pos + s.size();
+        pos = new_pos + Word::getLength(n[i]);
        }
      }
      else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
      {
+      Assert(c.getType().isString());
+      const std::vector<unsigned>& tvec = c.getConst<String>().getVec();
        // find the first occurrence of a digit starting at pos
        while (pos < tvec.size() && !String::isDigit(tvec[pos]))
        {
@@ -116,7 +115,8 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
  {
    Assert(dir == 1 || dir == -1);
    Assert(nr.empty());
-  Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0));
+  NodeManager* nm = NodeManager::currentNM();
+  Node zero = nm->mkConst(CVC4::Rational(0));
    bool ret = false;
    bool success;
    unsigned sindex = 0;
@@ -139,18 +139,16 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
            if (lbr.sgn() > 0)
            {
              Assert(ArithEntail::check(curr, true));
-            CVC4::String s = n1[sindex_use].getConst<String>();
-            Node ncl =
-                NodeManager::currentNM()->mkConst(CVC4::Rational(s.size()));
-            Node next_s =
-                NodeManager::currentNM()->mkNode(MINUS, lowerBound, ncl);
+            Node s = n1[sindex_use];
+            size_t slen = Word::getLength(s);
+            Node ncl = nm->mkConst(CVC4::Rational(slen));
+            Node next_s = nm->mkNode(MINUS, lowerBound, ncl);
              next_s = Rewriter::rewrite(next_s);
              Assert(next_s.isConst());
              // we can remove the entire constant
              if (next_s.getConst<Rational>().sgn() >= 0)
              {
-              curr = Rewriter::rewrite(
-                  NodeManager::currentNM()->mkNode(MINUS, curr, ncl));
+              curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl));
                success = true;
                sindex++;
              }
@@ -160,25 +158,20 @@ bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
                // lower bound minus the length of a concrete string is negative,
                // hence lowerBound cannot be larger than long max
                Assert(lbr < Rational(String::maxSize()));
-              curr = Rewriter::rewrite(
-                  NodeManager::currentNM()->mkNode(MINUS, curr, lowerBound));
+              curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound));
                uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
-              Assert(lbsize < s.size());
+              Assert(lbsize < slen);
                if (dir == 1)
                {
                  // strip partially from the front
-                nr.push_back(
-                    NodeManager::currentNM()->mkConst(s.prefix(lbsize)));
-                n1[sindex_use] = NodeManager::currentNM()->mkConst(
-                    s.suffix(s.size() - lbsize));
+                nr.push_back(Word::prefix(s, lbsize));
+                n1[sindex_use] = Word::suffix(s, slen - lbsize);
                }
                else
                {
                  // strip partially from the back
-                nr.push_back(
-                    NodeManager::currentNM()->mkConst(s.suffix(lbsize)));
-                n1[sindex_use] = NodeManager::currentNM()->mkConst(
-                    s.prefix(s.size() - lbsize));
+                nr.push_back(Word::suffix(s, lbsize));
+                n1[sindex_use] = Word::prefix(s, slen - lbsize);
                }
                ret = true;
              }
@@ -496,8 +489,6 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
  {
    Assert(nb.empty());
    Assert(ne.empty());
-
-  NodeManager* nm = NodeManager::currentNM();
    bool changed = false;
    // for ( forwards, backwards ) direction
    for (unsigned r = 0; r < 2; r++)
@@ -509,7 +500,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
        bool removeComponent = false;
        Node n1cmp = n1[index0];
  
-      if (n1cmp.isConst() && n1cmp.getConst<String>().size() == 0)
+      if (n1cmp.isConst() && Word::isEmpty(n1cmp))
        {
          return false;
        }
@@ -522,14 +513,15 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
            << ", dir = " << dir << std::endl;
        if (n1cmp.isConst())
        {
-        CVC4::String s = n1cmp.getConst<String>();
+        Node s = n1cmp;
+        size_t slen = Word::getLength(s);
          // overlap is an overapproximation of the number of characters
          // n2[index1] can match in s
-        unsigned overlap = s.size();
+        unsigned overlap = Word::getLength(s);
          if (n2[index1].isConst())
          {
-          CVC4::String t = n2[index1].getConst<String>();
-          std::size_t ret = r == 0 ? s.find(t) : s.rfind(t);
+          Node t = n2[index1];
+          std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t);
            if (ret == std::string::npos)
            {
              if (n1.size() == 1)
@@ -545,7 +537,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
                // This is used to partially strip off the endpoint
                // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
                // str.contains( str.++( "c", x ), str.++( "cd", y ) )
-              overlap = r == 0 ? s.overlap(t) : t.overlap(s);
+              overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
              }
              else
              {
@@ -553,19 +545,20 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
                // if there is no overlap
                //   e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
                //        --> str.contains( x, "a" )
-              removeComponent = ((r == 0 ? s.overlap(t) : t.overlap(s)) == 0);
+              removeComponent =
+                  ((r == 0 ? Word::overlap(s, t) : Word::overlap(t, s)) == 0);
              }
            }
            else if (sss.empty())  // only if not substr
            {
-            Assert(ret < s.size());
+            Assert(ret < slen);
              // can strip off up to the find position, e.g.
              // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
              // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
              // and
              // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
              // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
-            overlap = s.size() - ret;
+            overlap = slen - ret;
            }
          }
          else
@@ -573,7 +566,7 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
            // inconclusive
          }
          // process the overlap
-        if (overlap < s.size())
+        if (overlap < slen)
          {
            changed = true;
            if (overlap == 0)
@@ -586,13 +579,13 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
              // component
              if (r == 0)
              {
-              nb.push_back(nm->mkConst(s.prefix(s.size() - overlap)));
-              n1[index0] = nm->mkConst(s.suffix(overlap));
+              nb.push_back(Word::prefix(s, slen - overlap));
+              n1[index0] = Word::suffix(s, overlap);
              }
              else
              {
-              ne.push_back(nm->mkConst(s.suffix(s.size() - overlap)));
-              n1[index0] = nm->mkConst(s.prefix(overlap));
+              ne.push_back(Word::suffix(s, slen - overlap));
+              n1[index0] = Word::prefix(s, overlap);
              }
            }
          }
@@ -601,8 +594,8 @@ bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
        {
          if (n2[index1].isConst())
          {
+          Assert(n2[index1].getType().isString());
            CVC4::String t = n2[index1].getConst<String>();
-
            if (n1.size() == 1)
            {
              // if n1.size()==1, then if n2[index1] is not a number, we can drop
diff --git a/src/theory/strings/word.cpp b/src/theory/strings/word.cpp

index 484b0df2d75d24209ea01249c3364852a424e1ea..35b315e357a970daa1dcbcbd07c98f06e39b6cf3 100644 (file)
--- a/src/theory/strings/word.cpp
+++ b/src/theory/strings/word.cpp
@@ -27,7 +27,8 @@ Node Word::mkEmptyWord(TypeNode tn)
  {
    if (tn.isString())
    {
-    return mkEmptyWord(CONST_STRING);
+    std::vector<unsigned> vec;
+    return NodeManager::currentNM()->mkConst(String(vec));
    }
    else if (tn.isSequence())
    {
@@ -39,17 +40,6 @@ Node Word::mkEmptyWord(TypeNode tn)
    return Node::null();
  }
  
-Node Word::mkEmptyWord(Kind k)
-{
-  if (k == CONST_STRING)
-  {
-    std::vector<unsigned> vec;
-    return NodeManager::currentNM()->mkConst(String(vec));
-  }
-  Unimplemented();
-  return Node::null();
-}
-
  Node Word::mkWordFlatten(const std::vector<Node>& xs)
  {
    Assert(!xs.empty());
@@ -81,7 +71,8 @@ Node Word::mkWordFlatten(const std::vector<Node>& xs)
          seq.push_back(c.toExpr());
        }
      }
-    return NodeManager::currentNM()->mkConst(ExprSequence(tn.toType(), seq));
+    return NodeManager::currentNM()->mkConst(
+        ExprSequence(tn.getSequenceElementType().toType(), seq));
    }
    Unimplemented();
    return Node::null();
@@ -98,17 +89,17 @@ size_t Word::getLength(TNode x)
    {
      return x.getConst<ExprSequence>().getSequence().size();
    }
-  Unimplemented();
+  Unimplemented() << "Word::getLength on " << x;
    return 0;
  }
  
  std::vector<Node> Word::getChars(TNode x)
  {
    Kind k = x.getKind();
+  std::vector<Node> ret;
+  NodeManager* nm = NodeManager::currentNM();
    if (k == CONST_STRING)
    {
-    std::vector<Node> ret;
-    NodeManager* nm = NodeManager::currentNM();
      std::vector<unsigned> ccVec;
      const std::vector<unsigned>& cvec = x.getConst<String>().getVec();
      for (unsigned chVal : cvec)
@@ -120,8 +111,18 @@ std::vector<Node> Word::getChars(TNode x)
      }
      return ret;
    }
+  else if (k == CONST_SEQUENCE)
+  {
+    Type t = x.getConst<ExprSequence>().getType();
+    const Sequence& sx = x.getConst<ExprSequence>().getSequence();
+    const std::vector<Node>& vec = sx.getVec();
+    for (const Node& v : vec)
+    {
+      ret.push_back(nm->mkConst(ExprSequence(t, {v.toExpr()})));
+    }
+    return ret;
+  }
    Unimplemented();
-  std::vector<Node> ret;
    return ret;
  }
  
diff --git a/src/theory/strings/word.h b/src/theory/strings/word.h

index 4ebe43e2f3ec420f44ab1ab0fdd7e4c2ffd1d03a..3b15b763ab679f0658187b7d426d43b4d31b2e87 100644 (file)
--- a/src/theory/strings/word.h
+++ b/src/theory/strings/word.h
@@ -33,9 +33,6 @@ class Word
    /** make empty constant of type tn */
    static Node mkEmptyWord(TypeNode tn);
  
-  /** make empty constant of kind k */
-  static Node mkEmptyWord(Kind k);
-
    /** make word from constants in (non-empty) vector vec */
    static Node mkWordFlatten(const std::vector<Node>& xs);
  
diff --git a/test/unit/theory/theory_strings_word_white.h b/test/unit/theory/theory_strings_word_white.h

index bc6279cb8ce9e5cf06a0dc49c17c6f8bb1173bf3..2e29d50b8ef7f2dd673f607b90c92510bcdf1c9e 100644 (file)
--- a/test/unit/theory/theory_strings_word_white.h
+++ b/test/unit/theory/theory_strings_word_white.h
@@ -63,7 +63,8 @@ class TheoryStringsWordWhite : public CxxTest::TestSuite
      Node cac = d_nm->mkConst(String("cac"));
      Node abca = d_nm->mkConst(String("abca"));
  
-    TS_ASSERT(Word::mkEmptyWord(kind::CONST_STRING) == empty);
+    TypeNode stringType = d_nm->stringType();
+    TS_ASSERT(Word::mkEmptyWord(stringType) == empty);
  
      std::vector<Node> vec;
      vec.push_back(abc);
author	Andrew Reynolds <andrew.j.reynolds@gmail.com>
	Fri, 19 Jun 2020 21:48:10 +0000 (16:48 -0500)
committer	GitHub <noreply@github.com>
	Fri, 19 Jun 2020 21:48:10 +0000 (16:48 -0500)
src/expr/type_node.cpp		patch \| blob \| history
src/theory/quantifiers/sygus/sygus_grammar_cons.cpp		patch \| blob \| history
src/theory/strings/regexp_operation.cpp		patch \| blob \| history
src/theory/strings/strings_entail.cpp		patch \| blob \| history
src/theory/strings/word.cpp		patch \| blob \| history
src/theory/strings/word.h		patch \| blob \| history
test/unit/theory/theory_strings_word_white.h		patch \| blob \| history