From b3471b719f1cd031d35e9a431027088b0dec156b Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Mon, 23 Dec 2019 17:18:19 -0600 Subject: [PATCH] Initial support for string reverse (#3581) Type rules, parsing and printing, basic rewriting including constant evaluation, reduction for string reverse (`str.rev`). Also improves support in a few places for tolower/toupper. --- src/parser/cvc/Cvc.g | 9 ++++ src/parser/smt2/smt2.cpp | 1 + src/printer/smt2/smt2_printer.cpp | 2 + src/theory/strings/kinds | 2 + src/theory/strings/theory_strings.cpp | 7 ++- .../strings/theory_strings_preprocess.cpp | 41 ++++++++++++++++-- .../strings/theory_strings_rewriter.cpp | 43 +++++++++++++++++++ src/theory/strings/theory_strings_rewriter.h | 7 +++ src/util/regexp.h | 4 +- test/regress/CMakeLists.txt | 9 ++++ test/regress/regress0/strings/parser-syms.cvc | 9 ++++ .../regress0/strings/str-rev-simple.smt2 | 11 +++++ test/regress/regress1/strings/rev-conv1.smt2 | 9 ++++ test/regress/regress1/strings/rev-ex1.smt2 | 9 ++++ test/regress/regress1/strings/rev-ex2.smt2 | 10 +++++ test/regress/regress1/strings/rev-ex3.smt2 | 12 ++++++ test/regress/regress1/strings/rev-ex4.smt2 | 13 ++++++ test/regress/regress1/strings/rev-ex5.smt2 | 8 ++++ .../regress1/strings/str-rev-simple-s.smt2 | 13 ++++++ 19 files changed, 211 insertions(+), 8 deletions(-) create mode 100644 test/regress/regress0/strings/parser-syms.cvc create mode 100644 test/regress/regress0/strings/str-rev-simple.smt2 create mode 100644 test/regress/regress1/strings/rev-conv1.smt2 create mode 100644 test/regress/regress1/strings/rev-ex1.smt2 create mode 100644 test/regress/regress1/strings/rev-ex2.smt2 create mode 100644 test/regress/regress1/strings/rev-ex3.smt2 create mode 100644 test/regress/regress1/strings/rev-ex4.smt2 create mode 100644 test/regress/regress1/strings/rev-ex5.smt2 create mode 100644 test/regress/regress1/strings/str-rev-simple-s.smt2 diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g index e4849aae6..2dceba768 100644 --- a/src/parser/cvc/Cvc.g +++ b/src/parser/cvc/Cvc.g @@ -227,6 +227,9 @@ tokens { STRING_STOI_TOK = 'STRING_TO_INTEGER'; STRING_ITOS_TOK = 'INTEGER_TO_STRING'; STRING_TO_REGEXP_TOK = 'STRING_TO_REGEXP'; + STRING_TOLOWER_TOK = 'TOLOWER'; + STRING_TOUPPER_TOK = 'TOUPPER'; + STRING_REV_TOK = 'REVERSE'; REGEXP_CONCAT_TOK = 'RE_CONCAT'; REGEXP_UNION_TOK = 'RE_UNION'; REGEXP_INTER_TOK = 'RE_INTER'; @@ -2038,6 +2041,12 @@ stringTerm[CVC4::Expr& f] { f = MK_EXPR(CVC4::kind::STRING_ITOS, f); } | STRING_TO_REGEXP_TOK LPAREN formula[f] RPAREN { f = MK_EXPR(CVC4::kind::STRING_TO_REGEXP, f); } + | STRING_TOLOWER_TOK LPAREN formula[f] RPAREN + { f = MK_EXPR(CVC4::kind::STRING_TOLOWER, f); } + | STRING_TOUPPER_TOK LPAREN formula[f] RPAREN + { f = MK_EXPR(CVC4::kind::STRING_TOUPPER, f); } + | STRING_REV_TOK LPAREN formula[f] RPAREN + { f = MK_EXPR(CVC4::kind::STRING_REV, f); } | REGEXP_CONCAT_TOK LPAREN formula[f] { args.push_back(f); } ( COMMA formula[f2] { args.push_back(f2); } )+ RPAREN { f = MK_EXPR(CVC4::kind::REGEXP_CONCAT, args); } diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp index 3dd039775..291885278 100644 --- a/src/parser/smt2/smt2.cpp +++ b/src/parser/smt2/smt2.cpp @@ -168,6 +168,7 @@ void Smt2::addStringOperators() { { addOperator(kind::STRING_TOLOWER, "str.tolower"); addOperator(kind::STRING_TOUPPER, "str.toupper"); + addOperator(kind::STRING_REV, "str.rev"); } addOperator(kind::STRING_PREFIX, "str.prefixof" ); addOperator(kind::STRING_SUFFIX, "str.suffixof" ); diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp index f0a1e740f..223bd9af9 100644 --- a/src/printer/smt2/smt2_printer.cpp +++ b/src/printer/smt2/smt2_printer.cpp @@ -666,6 +666,7 @@ void Smt2Printer::toStream(std::ostream& out, case kind::STRING_STRREPLALL: case kind::STRING_TOLOWER: case kind::STRING_TOUPPER: + case kind::STRING_REV: case kind::STRING_PREFIX: case kind::STRING_SUFFIX: case kind::STRING_LEQ: @@ -1228,6 +1229,7 @@ static string smtKindString(Kind k, Variant v) case kind::STRING_STRREPLALL: return "str.replaceall"; case kind::STRING_TOLOWER: return "str.tolower"; case kind::STRING_TOUPPER: return "str.toupper"; + case kind::STRING_REV: return "str.rev"; case kind::STRING_PREFIX: return "str.prefixof" ; case kind::STRING_SUFFIX: return "str.suffixof" ; case kind::STRING_LEQ: return "str.<="; diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 4e90d1583..aa1e2627a 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -30,6 +30,7 @@ operator STRING_STOI 1 "string to integer (total function)" operator STRING_CODE 1 "string to code, returns the code of the first character of the string if it has length one, -1 otherwise" operator STRING_TOLOWER 1 "string to lowercase conversion" operator STRING_TOUPPER 1 "string to uppercase conversion" +operator STRING_REV 1 "string reverse" sort STRING_TYPE \ Cardinality::INTEGERS \ @@ -104,6 +105,7 @@ typerule STRING_STOI "SimpleTypeRule" typerule STRING_CODE "SimpleTypeRule" typerule STRING_TOUPPER "SimpleTypeRule" typerule STRING_TOLOWER "SimpleTypeRule" +typerule STRING_REV "SimpleTypeRule" typerule STRING_IN_REGEXP "SimpleTypeRule" diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 7b00ed2e2..1bc104096 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -129,6 +129,7 @@ TheoryStrings::TheoryStrings(context::Context* c, getExtTheory()->addFunctionKind(kind::STRING_CODE); getExtTheory()->addFunctionKind(kind::STRING_TOLOWER); getExtTheory()->addFunctionKind(kind::STRING_TOUPPER); + getExtTheory()->addFunctionKind(kind::STRING_REV); // The kinds we are treating as function application in congruence d_equalityEngine.addFunctionKind(kind::STRING_LENGTH); @@ -147,6 +148,7 @@ TheoryStrings::TheoryStrings(context::Context* c, d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL); d_equalityEngine.addFunctionKind(kind::STRING_TOLOWER); d_equalityEngine.addFunctionKind(kind::STRING_TOUPPER); + d_equalityEngine.addFunctionKind(kind::STRING_REV); d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) ); d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); @@ -437,7 +439,7 @@ bool TheoryStrings::doReduction(int effort, Node n, bool& isCd) Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI || k == STRING_STRREPL || k == STRING_STRREPLALL || k == STRING_LEQ || k == STRING_TOLOWER - || k == STRING_TOUPPER); + || k == STRING_TOUPPER || k == STRING_REV); std::vector new_nodes; Node res = d_preproc.simplify(n, new_nodes); Assert(res != n); @@ -765,7 +767,8 @@ void TheoryStrings::preRegisterTerm(TNode n) { if (k == kind::STRING_STRIDOF || k == kind::STRING_ITOS || k == kind::STRING_STOI || k == kind::STRING_STRREPL || k == kind::STRING_STRREPLALL || k == kind::STRING_STRCTN - || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER) + || k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER + || k == STRING_REV) { std::stringstream ss; ss << "Term of kind " << k diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 4d1067583..a3b864089 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -476,9 +476,9 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { Node x = t[0]; Node r = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); - Node lent = nm->mkNode(STRING_LENGTH, t); + Node lenx = nm->mkNode(STRING_LENGTH, x); Node lenr = nm->mkNode(STRING_LENGTH, r); - Node eqLenA = lent.eqNode(lenr); + Node eqLenA = lenx.eqNode(lenr); Node i = nm->mkBoundVar(nm->integerType()); Node bvi = nm->mkNode(BOUND_VAR_LIST, i); @@ -498,7 +498,7 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { ci); Node bound = - nm->mkNode(AND, nm->mkNode(LEQ, d_zero, i), nm->mkNode(LEQ, i, lenr)); + nm->mkNode(AND, nm->mkNode(LEQ, d_zero, i), nm->mkNode(LT, i, lenr)); Node rangeA = nm->mkNode(FORALL, bvi, nm->mkNode(OR, bound.negate(), ri.eqNode(res))); @@ -514,7 +514,40 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes ) { // Thus, toLower( x ) = r retNode = r; - } else if( t.getKind() == kind::STRING_STRCTN ){ + } + else if (t.getKind() == STRING_REV) + { + Node x = t[0]; + Node r = d_sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); + + Node lenx = nm->mkNode(STRING_LENGTH, x); + Node lenr = nm->mkNode(STRING_LENGTH, r); + Node eqLenA = lenx.eqNode(lenr); + + Node i = nm->mkBoundVar(nm->integerType()); + Node bvi = nm->mkNode(BOUND_VAR_LIST, i); + + Node revi = nm->mkNode( + MINUS, nm->mkNode(STRING_LENGTH, x), nm->mkNode(PLUS, i, d_one)); + Node ssr = nm->mkNode(STRING_SUBSTR, r, i, d_one); + Node ssx = nm->mkNode(STRING_SUBSTR, x, revi, d_one); + + Node bound = + nm->mkNode(AND, nm->mkNode(LEQ, d_zero, i), nm->mkNode(LT, i, lenr)); + Node rangeA = nm->mkNode( + FORALL, bvi, nm->mkNode(OR, bound.negate(), ssr.eqNode(ssx))); + // assert: + // len(r) = len(x) ^ + // forall i. 0 <= i < len(r) => + // substr(r,i,1) = substr(x,len(x)-(i+1),1) + Node assert = nm->mkNode(AND, eqLenA, rangeA); + new_nodes.push_back(assert); + + // Thus, (str.rev x) = r + retNode = r; + } + else if (t.getKind() == kind::STRING_STRCTN) + { Node x = t[0]; Node s = t[1]; //negative contains reduces to existential diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 5ae0d87b3..f17944027 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -1600,6 +1600,12 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { retNode = nm->mkNode(STRING_LENGTH, node[0][0]); } } + else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER + || nk0 == STRING_REV) + { + // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev. + retNode = nm->mkNode(STRING_LENGTH, node[0][0]); + } } else if (nk == kind::STRING_CHARAT) { @@ -1641,6 +1647,10 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { { retNode = rewriteStrConvert(node); } + else if (nk == STRING_REV) + { + retNode = rewriteStrReverse(node); + } else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX) { retNode = rewritePrefixSuffix(node); @@ -3217,6 +3227,39 @@ Node TheoryStringsRewriter::rewriteStrConvert(Node node) return node; } +Node TheoryStringsRewriter::rewriteStrReverse(Node node) +{ + Assert(node.getKind() == STRING_REV); + NodeManager* nm = NodeManager::currentNM(); + Node x = node[0]; + if (x.isConst()) + { + std::vector nvec = node[0].getConst().getVec(); + std::reverse(nvec.begin(), nvec.end()); + Node retNode = nm->mkConst(String(nvec)); + return returnRewrite(node, retNode, "str-conv-const"); + } + else if (x.getKind() == STRING_CONCAT) + { + std::vector children; + for (const Node& nc : x) + { + children.push_back(nm->mkNode(STRING_REV, nc)); + } + std::reverse(children.begin(), children.end()); + // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 ) + Node retNode = nm->mkNode(STRING_CONCAT, children); + return returnRewrite(node, retNode, "str-rev-minscope-concat"); + } + else if (x.getKind() == STRING_REV) + { + // rev( rev( x ) ) --> x + Node retNode = x[0]; + return returnRewrite(node, retNode, "str-rev-idem"); + } + return node; +} + Node TheoryStringsRewriter::rewriteStringLeq(Node n) { Assert(n.getKind() == kind::STRING_LEQ); diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index 35805e1c2..dd83df24f 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -232,6 +232,13 @@ class TheoryStringsRewriter : public TheoryRewriter * Returns the rewritten form of node. */ static Node rewriteStrConvert(Node node); + /** rewrite string reverse + * + * This is the entry point for post-rewriting terms n of the form + * str.rev( s ) + * Returns the rewritten form of node. + */ + static Node rewriteStrReverse(Node node); /** rewrite string less than or equal * This is the entry point for post-rewriting terms n of the form * str.<=( t, s ) diff --git a/src/util/regexp.h b/src/util/regexp.h index d1cb197fb..5b6dc2b62 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -38,7 +38,7 @@ class CVC4_PUBLIC String { public: /** * The start ASCII code. In our string representation below, we represent - * characters using a vector d_vec of unsigned integers. We refer to this as + * characters using a vector d_str of unsigned integers. We refer to this as * the "internal representation" for the string. * * We make unsigned integer 0 correspond to the 65th character ("A") in the @@ -57,7 +57,7 @@ class CVC4_PUBLIC String { static inline unsigned num_codes() { return 256; } /** * Convert unsigned char to the unsigned used in the internal representation - * in d_vec below. + * in d_str below. */ static unsigned convertCharToUnsignedInt(unsigned char c); /** Convert the internal unsigned to a unsigned char. */ diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 67cc44a42..c85065ef9 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -893,6 +893,7 @@ set(regress_0_tests regress0/strings/ncontrib-rewrites.smt2 regress0/strings/norn-31.smt2 regress0/strings/norn-simp-rew.smt2 + regress0/strings/parser-syms.cvc regress0/strings/re.all.smt2 regress0/strings/regexp-native-simple.cvc regress0/strings/regexp_inclusion.smt2 @@ -907,6 +908,7 @@ set(regress_0_tests regress0/strings/str004.smt2 regress0/strings/str005.smt2 regress0/strings/str_unsound_ext_rew_eq.smt2 + regress0/strings/str-rev-simple.smt2 regress0/strings/strings-charat.cvc regress0/strings/strings-native-simple.cvc regress0/strings/strip-endpoint-itos.smt2 @@ -1669,6 +1671,12 @@ set(regress_1_tests regress1/strings/repl-soundness-sem.smt2 regress1/strings/replaceall-len.smt2 regress1/strings/replaceall-replace.smt2 + regress1/strings/rev-conv1.smt2 + regress1/strings/rev-ex1.smt2 + regress1/strings/rev-ex2.smt2 + regress1/strings/rev-ex3.smt2 + regress1/strings/rev-ex4.smt2 + regress1/strings/rev-ex5.smt2 regress1/strings/rew-020618.smt2 regress1/strings/rew-check1.smt2 regress1/strings/simple-re-consume.smt2 @@ -1678,6 +1686,7 @@ set(regress_1_tests regress1/strings/str-code-unsat-2.smt2 regress1/strings/str-code-unsat-3.smt2 regress1/strings/str-code-unsat.smt2 + regress1/strings/str-rev-simple-s.smt2 regress1/strings/str001.smt2 regress1/strings/str002.smt2 regress1/strings/str006.smt2 diff --git a/test/regress/regress0/strings/parser-syms.cvc b/test/regress/regress0/strings/parser-syms.cvc new file mode 100644 index 000000000..20c37cf52 --- /dev/null +++ b/test/regress/regress0/strings/parser-syms.cvc @@ -0,0 +1,9 @@ +% EXPECT: sat + +x : STRING; +y : STRING; + +ASSERT CONCAT( REVERSE("abc"), "d") = x; +ASSERT CONCAT( TOLOWER("ABC"), TOUPPER("abc")) = y; + +CHECKSAT; diff --git a/test/regress/regress0/strings/str-rev-simple.smt2 b/test/regress/regress0/strings/str-rev-simple.smt2 new file mode 100644 index 000000000..00011681c --- /dev/null +++ b/test/regress/regress0/strings/str-rev-simple.smt2 @@ -0,0 +1,11 @@ +(set-logic ALL) +(set-info :status sat) +(declare-fun x () String) +(declare-fun y () String) +(declare-fun z () String) +(assert (= (str.rev "ABC") "CBA")) +(assert (= (str.rev "") "")) +(assert (= (str.rev "A") x)) +(assert (= (str.rev (str.++ x y)) "BCA")) +(assert (= (str.rev "BC") y)) +(check-sat) diff --git a/test/regress/regress1/strings/rev-conv1.smt2 b/test/regress/regress1/strings/rev-conv1.smt2 new file mode 100644 index 000000000..ca20076cb --- /dev/null +++ b/test/regress/regress1/strings/rev-conv1.smt2 @@ -0,0 +1,9 @@ +; COMMAND-LINE: --strings-exp +; EXPECT: sat +(set-logic ALL) +(set-info :status sat) +(declare-fun x () String) +(assert (= (str.rev (str.toupper x)) "CBA")) +(assert (not (= x "ABC"))) +(assert (not (= x "abc"))) +(check-sat) diff --git a/test/regress/regress1/strings/rev-ex1.smt2 b/test/regress/regress1/strings/rev-ex1.smt2 new file mode 100644 index 000000000..36d40fcdd --- /dev/null +++ b/test/regress/regress1/strings/rev-ex1.smt2 @@ -0,0 +1,9 @@ +; COMMAND-LINE: --strings-exp +; EXPECT: sat +(set-logic ALL) +(set-info :status sat) +(declare-fun x () String) +(declare-fun y () String) +(declare-fun z () String) +(assert (not (= (str.rev (str.++ x y)) (str.rev (str.++ x z))))) +(check-sat) diff --git a/test/regress/regress1/strings/rev-ex2.smt2 b/test/regress/regress1/strings/rev-ex2.smt2 new file mode 100644 index 000000000..163785bdc --- /dev/null +++ b/test/regress/regress1/strings/rev-ex2.smt2 @@ -0,0 +1,10 @@ +; COMMAND-LINE: --strings-exp +; EXPECT: unsat +(set-logic ALL) +(set-info :status unsat) +(declare-fun x () String) +(declare-fun y () String) +(declare-fun z () String) +(assert (= x (str.rev (str.++ y "A")))) +(assert (> (str.len x) (+ (str.len y) 1))) +(check-sat) diff --git a/test/regress/regress1/strings/rev-ex3.smt2 b/test/regress/regress1/strings/rev-ex3.smt2 new file mode 100644 index 000000000..e317191ad --- /dev/null +++ b/test/regress/regress1/strings/rev-ex3.smt2 @@ -0,0 +1,12 @@ +; COMMAND-LINE: --strings-exp +; EXPECT: unsat +(set-logic ALL) +(set-info :status unsat) +(declare-fun x () String) +(declare-fun y () String) +(declare-fun z () String) +(declare-fun w () String) +(assert (= x (str.rev (str.++ y "A")))) +(assert (= x (str.rev (str.++ "B" z)))) +(assert (= z (str.++ w "C"))) +(check-sat) diff --git a/test/regress/regress1/strings/rev-ex4.smt2 b/test/regress/regress1/strings/rev-ex4.smt2 new file mode 100644 index 000000000..a3eed8e35 --- /dev/null +++ b/test/regress/regress1/strings/rev-ex4.smt2 @@ -0,0 +1,13 @@ +; COMMAND-LINE: --strings-exp --strings-fmf +; EXPECT: unsat +(set-logic ALL) +(set-info :status unsat) +(declare-fun x () String) +(declare-fun y () String) +(declare-fun z () String) +(declare-fun w () String) +(assert (= x (str.rev y))) +(assert (= y (str.rev z))) +(assert (distinct x z w)) +(assert (< (str.len x) 2)) +(check-sat) diff --git a/test/regress/regress1/strings/rev-ex5.smt2 b/test/regress/regress1/strings/rev-ex5.smt2 new file mode 100644 index 000000000..74a374962 --- /dev/null +++ b/test/regress/regress1/strings/rev-ex5.smt2 @@ -0,0 +1,8 @@ +; COMMAND-LINE: --strings-exp +; EXPECT: sat +(set-logic ALL) +(set-info :status sat) +(declare-fun x () String) +(assert (= x (str.rev x))) +(assert (> (str.len x) 1)) +(check-sat) diff --git a/test/regress/regress1/strings/str-rev-simple-s.smt2 b/test/regress/regress1/strings/str-rev-simple-s.smt2 new file mode 100644 index 000000000..5445a19ea --- /dev/null +++ b/test/regress/regress1/strings/str-rev-simple-s.smt2 @@ -0,0 +1,13 @@ +; COMMAND-LINE: --strings-exp +; EXPECT: sat +(set-logic ALL) +(set-info :status sat) +(declare-fun x () String) +(declare-fun y () String) +(declare-fun z () String) +(assert (= (str.rev "ABC") "CBA")) +(assert (= (str.rev "") "")) +(assert (= (str.rev "A") x)) +(assert (= (str.rev (str.++ x y)) "BCA")) +(assert (= (str.rev y) "BC")) +(check-sat) -- 2.30.2