Initial support for string reverse (#3581)
authorAndrew Reynolds <andrew.j.reynolds@gmail.com>
Mon, 23 Dec 2019 23:18:19 +0000 (17:18 -0600)
committerAndres Noetzli <andres.noetzli@gmail.com>
Mon, 23 Dec 2019 23:18:19 +0000 (15:18 -0800)
Type rules, parsing and printing, basic rewriting including constant evaluation, reduction for string reverse (`str.rev`).

Also improves support in a few places for tolower/toupper.

19 files changed:
src/parser/cvc/Cvc.g
src/parser/smt2/smt2.cpp
src/printer/smt2/smt2_printer.cpp
src/theory/strings/kinds
src/theory/strings/theory_strings.cpp
src/theory/strings/theory_strings_preprocess.cpp
src/theory/strings/theory_strings_rewriter.cpp
src/theory/strings/theory_strings_rewriter.h
src/util/regexp.h
test/regress/CMakeLists.txt
test/regress/regress0/strings/parser-syms.cvc [new file with mode: 0644]
test/regress/regress0/strings/str-rev-simple.smt2 [new file with mode: 0644]
test/regress/regress1/strings/rev-conv1.smt2 [new file with mode: 0644]
test/regress/regress1/strings/rev-ex1.smt2 [new file with mode: 0644]
test/regress/regress1/strings/rev-ex2.smt2 [new file with mode: 0644]
test/regress/regress1/strings/rev-ex3.smt2 [new file with mode: 0644]
test/regress/regress1/strings/rev-ex4.smt2 [new file with mode: 0644]
test/regress/regress1/strings/rev-ex5.smt2 [new file with mode: 0644]
test/regress/regress1/strings/str-rev-simple-s.smt2 [new file with mode: 0644]

index e4849aae64e82c180b9b539e32771356329c4f1b..2dceba768c7e015066e566c8b035abb3a176bdec 100644 (file)
@@ -227,6 +227,9 @@ tokens {
   STRING_STOI_TOK = 'STRING_TO_INTEGER';
   STRING_ITOS_TOK = 'INTEGER_TO_STRING';
   STRING_TO_REGEXP_TOK = 'STRING_TO_REGEXP';
+  STRING_TOLOWER_TOK = 'TOLOWER';
+  STRING_TOUPPER_TOK = 'TOUPPER';
+  STRING_REV_TOK = 'REVERSE';
   REGEXP_CONCAT_TOK = 'RE_CONCAT';
   REGEXP_UNION_TOK = 'RE_UNION';
   REGEXP_INTER_TOK = 'RE_INTER';
@@ -2038,6 +2041,12 @@ stringTerm[CVC4::Expr& f]
     { f = MK_EXPR(CVC4::kind::STRING_ITOS, f); }   
   | STRING_TO_REGEXP_TOK LPAREN formula[f] RPAREN
     { f = MK_EXPR(CVC4::kind::STRING_TO_REGEXP, f); }
+  | STRING_TOLOWER_TOK LPAREN formula[f] RPAREN
+    { f = MK_EXPR(CVC4::kind::STRING_TOLOWER, f); }
+  | STRING_TOUPPER_TOK LPAREN formula[f] RPAREN
+    { f = MK_EXPR(CVC4::kind::STRING_TOUPPER, f); }
+  | STRING_REV_TOK LPAREN formula[f] RPAREN
+    { f = MK_EXPR(CVC4::kind::STRING_REV, f); }
   | REGEXP_CONCAT_TOK LPAREN formula[f] { args.push_back(f); }
     ( COMMA formula[f2] { args.push_back(f2); } )+ RPAREN
     { f = MK_EXPR(CVC4::kind::REGEXP_CONCAT, args); }
index 3dd03977565d4a9fdee0969dbdb51f0bce8d4e00..291885278107d424b52d0999db8164b26d7fe12a 100644 (file)
@@ -168,6 +168,7 @@ void Smt2::addStringOperators() {
   {
     addOperator(kind::STRING_TOLOWER, "str.tolower");
     addOperator(kind::STRING_TOUPPER, "str.toupper");
+    addOperator(kind::STRING_REV, "str.rev");
   }
   addOperator(kind::STRING_PREFIX, "str.prefixof" );
   addOperator(kind::STRING_SUFFIX, "str.suffixof" );
index f0a1e740f4acfe4aa4bca02afdf64dd1fa55459c..223bd9af917e705d9bdc841a09cfe0f1a5498eb9 100644 (file)
@@ -666,6 +666,7 @@ void Smt2Printer::toStream(std::ostream& out,
   case kind::STRING_STRREPLALL:
   case kind::STRING_TOLOWER:
   case kind::STRING_TOUPPER:
+  case kind::STRING_REV:
   case kind::STRING_PREFIX:
   case kind::STRING_SUFFIX:
   case kind::STRING_LEQ:
@@ -1228,6 +1229,7 @@ static string smtKindString(Kind k, Variant v)
   case kind::STRING_STRREPLALL: return "str.replaceall";
   case kind::STRING_TOLOWER: return "str.tolower";
   case kind::STRING_TOUPPER: return "str.toupper";
+  case kind::STRING_REV: return "str.rev";
   case kind::STRING_PREFIX: return "str.prefixof" ;
   case kind::STRING_SUFFIX: return "str.suffixof" ;
   case kind::STRING_LEQ: return "str.<=";
index 4e90d1583e3a685bcb6acc20464b17716ff26dfb..aa1e2627ab061c52169ccd5bb027467974999034 100644 (file)
@@ -30,6 +30,7 @@ operator STRING_STOI 1 "string to integer (total function)"
 operator STRING_CODE 1 "string to code, returns the code of the first character of the string if it has length one, -1 otherwise"
 operator STRING_TOLOWER 1 "string to lowercase conversion"
 operator STRING_TOUPPER 1 "string to uppercase conversion"
+operator STRING_REV 1 "string reverse"
 
 sort STRING_TYPE \
     Cardinality::INTEGERS \
@@ -104,6 +105,7 @@ typerule STRING_STOI "SimpleTypeRule<RInteger, AString>"
 typerule STRING_CODE "SimpleTypeRule<RInteger, AString>"
 typerule STRING_TOUPPER "SimpleTypeRule<RString, AString>"
 typerule STRING_TOLOWER "SimpleTypeRule<RString, AString>"
+typerule STRING_REV "SimpleTypeRule<RString, AString>"
 
 typerule STRING_IN_REGEXP "SimpleTypeRule<RBool, AString, ARegExp>"
 
index 7b00ed2e2d8d11f1ec38f1fd2d6964f128a3f226..1bc1040960799d31785fd359995f3d43dcec185e 100644 (file)
@@ -129,6 +129,7 @@ TheoryStrings::TheoryStrings(context::Context* c,
   getExtTheory()->addFunctionKind(kind::STRING_CODE);
   getExtTheory()->addFunctionKind(kind::STRING_TOLOWER);
   getExtTheory()->addFunctionKind(kind::STRING_TOUPPER);
+  getExtTheory()->addFunctionKind(kind::STRING_REV);
 
   // The kinds we are treating as function application in congruence
   d_equalityEngine.addFunctionKind(kind::STRING_LENGTH);
@@ -147,6 +148,7 @@ TheoryStrings::TheoryStrings(context::Context* c,
   d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL);
   d_equalityEngine.addFunctionKind(kind::STRING_TOLOWER);
   d_equalityEngine.addFunctionKind(kind::STRING_TOUPPER);
+  d_equalityEngine.addFunctionKind(kind::STRING_REV);
 
   d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) );
   d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) );
@@ -437,7 +439,7 @@ bool TheoryStrings::doReduction(int effort, Node n, bool& isCd)
     Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF
            || k == STRING_ITOS || k == STRING_STOI || k == STRING_STRREPL
            || k == STRING_STRREPLALL || k == STRING_LEQ || k == STRING_TOLOWER
-           || k == STRING_TOUPPER);
+           || k == STRING_TOUPPER || k == STRING_REV);
     std::vector<Node> new_nodes;
     Node res = d_preproc.simplify(n, new_nodes);
     Assert(res != n);
@@ -765,7 +767,8 @@ void TheoryStrings::preRegisterTerm(TNode n) {
       if (k == kind::STRING_STRIDOF || k == kind::STRING_ITOS
           || k == kind::STRING_STOI || k == kind::STRING_STRREPL
           || k == kind::STRING_STRREPLALL || k == kind::STRING_STRCTN
-          || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER)
+          || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER
+          || k == STRING_REV)
       {
         std::stringstream ss;
         ss << "Term of kind " << k
index 4d10675839d1b3bf93841fd105124ce2b9cca446..a3b864089511f8d0261da8f603bae32d1f399f74 100644 (file)
@@ -476,9 +476,9 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
     Node x = t[0];
     Node r = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r");
 
-    Node lent = nm->mkNode(STRING_LENGTH, t);
+    Node lenx = nm->mkNode(STRING_LENGTH, x);
     Node lenr = nm->mkNode(STRING_LENGTH, r);
-    Node eqLenA = lent.eqNode(lenr);
+    Node eqLenA = lenx.eqNode(lenr);
 
     Node i = nm->mkBoundVar(nm->integerType());
     Node bvi = nm->mkNode(BOUND_VAR_LIST, i);
@@ -498,7 +498,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
         ci);
 
     Node bound =
-        nm->mkNode(AND, nm->mkNode(LEQ, d_zero, i), nm->mkNode(LEQ, i, lenr));
+        nm->mkNode(AND, nm->mkNode(LEQ, d_zero, i), nm->mkNode(LT, i, lenr));
     Node rangeA =
         nm->mkNode(FORALL, bvi, nm->mkNode(OR, bound.negate(), ri.eqNode(res)));
 
@@ -514,7 +514,40 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) {
 
     // Thus, toLower( x ) = r
     retNode = r;
-  } else if( t.getKind() == kind::STRING_STRCTN ){
+  }
+  else if (t.getKind() == STRING_REV)
+  {
+    Node x = t[0];
+    Node r = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r");
+
+    Node lenx = nm->mkNode(STRING_LENGTH, x);
+    Node lenr = nm->mkNode(STRING_LENGTH, r);
+    Node eqLenA = lenx.eqNode(lenr);
+
+    Node i = nm->mkBoundVar(nm->integerType());
+    Node bvi = nm->mkNode(BOUND_VAR_LIST, i);
+
+    Node revi = nm->mkNode(
+        MINUS, nm->mkNode(STRING_LENGTH, x), nm->mkNode(PLUS, i, d_one));
+    Node ssr = nm->mkNode(STRING_SUBSTR, r, i, d_one);
+    Node ssx = nm->mkNode(STRING_SUBSTR, x, revi, d_one);
+
+    Node bound =
+        nm->mkNode(AND, nm->mkNode(LEQ, d_zero, i), nm->mkNode(LT, i, lenr));
+    Node rangeA = nm->mkNode(
+        FORALL, bvi, nm->mkNode(OR, bound.negate(), ssr.eqNode(ssx)));
+    // assert:
+    //   len(r) = len(x) ^
+    //   forall i. 0 <= i < len(r) =>
+    //     substr(r,i,1) = substr(x,len(x)-(i+1),1)
+    Node assert = nm->mkNode(AND, eqLenA, rangeA);
+    new_nodes.push_back(assert);
+
+    // Thus, (str.rev x) = r
+    retNode = r;
+  }
+  else if (t.getKind() == kind::STRING_STRCTN)
+  {
     Node x = t[0];
     Node s = t[1];
     //negative contains reduces to existential
index 5ae0d87b3cb621163a1b63517162375e4a35fbe3..f179440274771eeb8e3172fcdc61c70e174d4787 100644 (file)
@@ -1600,6 +1600,12 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) {
         retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
       }
     }
+    else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER
+             || nk0 == STRING_REV)
+    {
+      // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev.
+      retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
+    }
   }
   else if (nk == kind::STRING_CHARAT)
   {
@@ -1641,6 +1647,10 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) {
   {
     retNode = rewriteStrConvert(node);
   }
+  else if (nk == STRING_REV)
+  {
+    retNode = rewriteStrReverse(node);
+  }
   else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX)
   {
     retNode = rewritePrefixSuffix(node);
@@ -3217,6 +3227,39 @@ Node TheoryStringsRewriter::rewriteStrConvert(Node node)
   return node;
 }
 
+Node TheoryStringsRewriter::rewriteStrReverse(Node node)
+{
+  Assert(node.getKind() == STRING_REV);
+  NodeManager* nm = NodeManager::currentNM();
+  Node x = node[0];
+  if (x.isConst())
+  {
+    std::vector<unsigned> nvec = node[0].getConst<String>().getVec();
+    std::reverse(nvec.begin(), nvec.end());
+    Node retNode = nm->mkConst(String(nvec));
+    return returnRewrite(node, retNode, "str-conv-const");
+  }
+  else if (x.getKind() == STRING_CONCAT)
+  {
+    std::vector<Node> children;
+    for (const Node& nc : x)
+    {
+      children.push_back(nm->mkNode(STRING_REV, nc));
+    }
+    std::reverse(children.begin(), children.end());
+    // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 )
+    Node retNode = nm->mkNode(STRING_CONCAT, children);
+    return returnRewrite(node, retNode, "str-rev-minscope-concat");
+  }
+  else if (x.getKind() == STRING_REV)
+  {
+    // rev( rev( x ) ) --> x
+    Node retNode = x[0];
+    return returnRewrite(node, retNode, "str-rev-idem");
+  }
+  return node;
+}
+
 Node TheoryStringsRewriter::rewriteStringLeq(Node n)
 {
   Assert(n.getKind() == kind::STRING_LEQ);
index 35805e1c22bc02a52af73379c21607173e352645..dd83df24f65e688c77f49a3f6a067cd55df56e17 100644 (file)
@@ -232,6 +232,13 @@ class TheoryStringsRewriter : public TheoryRewriter
    * Returns the rewritten form of node.
    */
   static Node rewriteStrConvert(Node node);
+  /** rewrite string reverse
+   *
+   * This is the entry point for post-rewriting terms n of the form
+   *   str.rev( s )
+   * Returns the rewritten form of node.
+   */
+  static Node rewriteStrReverse(Node node);
   /** rewrite string less than or equal
   * This is the entry point for post-rewriting terms n of the form
   *   str.<=( t, s )
index d1cb197fb60bab90744bc73c4a583fa997ed7a21..5b6dc2b625d1368e697ce1f9b00dbc471e99e118 100644 (file)
@@ -38,7 +38,7 @@ class CVC4_PUBLIC String {
  public:
   /**
    * The start ASCII code. In our string representation below, we represent
-   * characters using a vector d_vec of unsigned integers. We refer to this as
+   * characters using a vector d_str of unsigned integers. We refer to this as
    * the "internal representation" for the string.
    *
    * We make unsigned integer 0 correspond to the 65th character ("A") in the
@@ -57,7 +57,7 @@ class CVC4_PUBLIC String {
   static inline unsigned num_codes() { return 256; }
   /**
    * Convert unsigned char to the unsigned used in the internal representation
-   * in d_vec below.
+   * in d_str below.
    */
   static unsigned convertCharToUnsignedInt(unsigned char c);
   /** Convert the internal unsigned to a unsigned char. */
index 67cc44a42e9d15afe7e2727487e8ec7ae4fe5ca2..c85065ef9ad60b300c186680b120e17c4fb68717 100644 (file)
@@ -893,6 +893,7 @@ set(regress_0_tests
   regress0/strings/ncontrib-rewrites.smt2
   regress0/strings/norn-31.smt2
   regress0/strings/norn-simp-rew.smt2
+  regress0/strings/parser-syms.cvc
   regress0/strings/re.all.smt2
   regress0/strings/regexp-native-simple.cvc
   regress0/strings/regexp_inclusion.smt2
@@ -907,6 +908,7 @@ set(regress_0_tests
   regress0/strings/str004.smt2
   regress0/strings/str005.smt2
   regress0/strings/str_unsound_ext_rew_eq.smt2
+  regress0/strings/str-rev-simple.smt2
   regress0/strings/strings-charat.cvc
   regress0/strings/strings-native-simple.cvc
   regress0/strings/strip-endpoint-itos.smt2
@@ -1669,6 +1671,12 @@ set(regress_1_tests
   regress1/strings/repl-soundness-sem.smt2
   regress1/strings/replaceall-len.smt2
   regress1/strings/replaceall-replace.smt2
+  regress1/strings/rev-conv1.smt2
+  regress1/strings/rev-ex1.smt2
+  regress1/strings/rev-ex2.smt2
+  regress1/strings/rev-ex3.smt2
+  regress1/strings/rev-ex4.smt2
+  regress1/strings/rev-ex5.smt2
   regress1/strings/rew-020618.smt2
   regress1/strings/rew-check1.smt2
   regress1/strings/simple-re-consume.smt2
@@ -1678,6 +1686,7 @@ set(regress_1_tests
   regress1/strings/str-code-unsat-2.smt2
   regress1/strings/str-code-unsat-3.smt2
   regress1/strings/str-code-unsat.smt2
+  regress1/strings/str-rev-simple-s.smt2
   regress1/strings/str001.smt2
   regress1/strings/str002.smt2
   regress1/strings/str006.smt2
diff --git a/test/regress/regress0/strings/parser-syms.cvc b/test/regress/regress0/strings/parser-syms.cvc
new file mode 100644 (file)
index 0000000..20c37cf
--- /dev/null
@@ -0,0 +1,9 @@
+% EXPECT: sat
+
+x : STRING;
+y : STRING;
+
+ASSERT CONCAT( REVERSE("abc"), "d") = x;
+ASSERT CONCAT( TOLOWER("ABC"), TOUPPER("abc")) = y;
+
+CHECKSAT;
diff --git a/test/regress/regress0/strings/str-rev-simple.smt2 b/test/regress/regress0/strings/str-rev-simple.smt2
new file mode 100644 (file)
index 0000000..0001168
--- /dev/null
@@ -0,0 +1,11 @@
+(set-logic ALL)
+(set-info :status sat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+(assert (= (str.rev "ABC") "CBA"))
+(assert (= (str.rev "") ""))
+(assert (= (str.rev "A") x))
+(assert (= (str.rev (str.++ x y)) "BCA"))
+(assert (= (str.rev "BC") y))
+(check-sat)
diff --git a/test/regress/regress1/strings/rev-conv1.smt2 b/test/regress/regress1/strings/rev-conv1.smt2
new file mode 100644 (file)
index 0000000..ca20076
--- /dev/null
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --strings-exp
+; EXPECT: sat
+(set-logic ALL)
+(set-info :status sat)
+(declare-fun x () String)
+(assert (= (str.rev (str.toupper x)) "CBA"))
+(assert (not (= x "ABC")))
+(assert (not (= x "abc")))
+(check-sat)
diff --git a/test/regress/regress1/strings/rev-ex1.smt2 b/test/regress/regress1/strings/rev-ex1.smt2
new file mode 100644 (file)
index 0000000..36d40fc
--- /dev/null
@@ -0,0 +1,9 @@
+; COMMAND-LINE: --strings-exp
+; EXPECT: sat
+(set-logic ALL)
+(set-info :status sat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+(assert (not (= (str.rev (str.++ x y)) (str.rev (str.++ x z)))))
+(check-sat)
diff --git a/test/regress/regress1/strings/rev-ex2.smt2 b/test/regress/regress1/strings/rev-ex2.smt2
new file mode 100644 (file)
index 0000000..163785b
--- /dev/null
@@ -0,0 +1,10 @@
+; COMMAND-LINE: --strings-exp
+; EXPECT: unsat
+(set-logic ALL)
+(set-info :status unsat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+(assert (= x (str.rev (str.++ y "A"))))
+(assert (> (str.len x) (+ (str.len y) 1)))
+(check-sat)
diff --git a/test/regress/regress1/strings/rev-ex3.smt2 b/test/regress/regress1/strings/rev-ex3.smt2
new file mode 100644 (file)
index 0000000..e317191
--- /dev/null
@@ -0,0 +1,12 @@
+; COMMAND-LINE: --strings-exp
+; EXPECT: unsat
+(set-logic ALL)
+(set-info :status unsat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+(declare-fun w () String)
+(assert (= x (str.rev (str.++ y "A"))))
+(assert (= x (str.rev (str.++ "B" z))))
+(assert (= z (str.++ w "C")))
+(check-sat)
diff --git a/test/regress/regress1/strings/rev-ex4.smt2 b/test/regress/regress1/strings/rev-ex4.smt2
new file mode 100644 (file)
index 0000000..a3eed8e
--- /dev/null
@@ -0,0 +1,13 @@
+; COMMAND-LINE: --strings-exp --strings-fmf
+; EXPECT: unsat
+(set-logic ALL)
+(set-info :status unsat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+(declare-fun w () String)
+(assert (= x (str.rev y)))
+(assert (= y (str.rev z)))
+(assert (distinct x z w))
+(assert (< (str.len x) 2))
+(check-sat)
diff --git a/test/regress/regress1/strings/rev-ex5.smt2 b/test/regress/regress1/strings/rev-ex5.smt2
new file mode 100644 (file)
index 0000000..74a3749
--- /dev/null
@@ -0,0 +1,8 @@
+; COMMAND-LINE: --strings-exp
+; EXPECT: sat
+(set-logic ALL)
+(set-info :status sat)
+(declare-fun x () String)
+(assert (= x (str.rev x)))
+(assert (> (str.len x) 1))
+(check-sat)
diff --git a/test/regress/regress1/strings/str-rev-simple-s.smt2 b/test/regress/regress1/strings/str-rev-simple-s.smt2
new file mode 100644 (file)
index 0000000..5445a19
--- /dev/null
@@ -0,0 +1,13 @@
+; COMMAND-LINE: --strings-exp
+; EXPECT: sat
+(set-logic ALL)
+(set-info :status sat)
+(declare-fun x () String)
+(declare-fun y () String)
+(declare-fun z () String)
+(assert (= (str.rev "ABC") "CBA"))
+(assert (= (str.rev "") ""))
+(assert (= (str.rev "A") x))
+(assert (= (str.rev (str.++ x y)) "BCA"))
+(assert (= (str.rev y) "BC"))
+(check-sat)