From 6a656809c353776c9de9580b19a6de79ef5a76d4 Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Mon, 26 Mar 2018 19:27:31 -0700 Subject: [PATCH] Better normalization of string concatenation (#1719) --- .../strings/theory_strings_rewriter.cpp | 78 +++++++++++++++++++ src/theory/strings/theory_strings_rewriter.h | 17 ++++ .../theory/theory_strings_rewriter_white.h | 70 +++++++++++++++++ 3 files changed, 165 insertions(+) diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 0777d696f..1c885434c 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -17,6 +17,7 @@ #include "theory/strings/theory_strings_rewriter.h" #include +#include #include "options/strings_options.h" #include "smt/logic_exception.h" @@ -348,6 +349,32 @@ Node TheoryStringsRewriter::rewriteConcat(Node node) if( !preNode.isNull() && ( preNode.getKind()!=kind::CONST_STRING || !preNode.getConst().isEmptyString() ) ){ node_vec.push_back( preNode ); } + + // Sort adjacent operands in str.++ that all result in the same string or the + // empty string. + // + // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) --> + // (str.++ ... [sort those 3 arguments] ... ) + size_t lastIdx = 0; + Node lastX; + for (size_t i = 0; i < node_vec.size(); i++) + { + Node s = getStringOrEmpty(node_vec[i]); + bool nextX = false; + if (s != lastX) + { + nextX = true; + } + + if (nextX) + { + std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i); + lastX = s; + lastIdx = i; + } + } + std::sort(node_vec.begin() + lastIdx, node_vec.end()); + retNode = mkConcat( kind::STRING_CONCAT, node_vec ); Trace("strings-prerewrite") << "Strings::rewriteConcat end " << retNode << std::endl; @@ -3280,6 +3307,57 @@ Node TheoryStringsRewriter::mkSubstrChain(Node base, return base; } +Node TheoryStringsRewriter::getStringOrEmpty(Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + Node res; + while (res.isNull()) + { + switch (n.getKind()) + { + case kind::STRING_STRREPL: + { + Node empty = nm->mkConst(::CVC4::String("")); + if (n[0] == empty) + { + // (str.replace "" x y) --> y + n = n[2]; + break; + } + + Node strlen = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n[0])); + if (strlen == nm->mkConst(Rational(1)) && n[2] == empty) + { + // (str.replace "A" x "") --> "A" + res = n[0]; + break; + } + + res = n; + break; + } + case kind::STRING_SUBSTR: + { + Node strlen = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n[0])); + if (strlen == nm->mkConst(Rational(1))) + { + // (str.substr "A" x y) --> "A" + res = n[0]; + break; + } + res = n; + break; + } + default: + { + res = n; + break; + } + } + } + return res; +} + Node TheoryStringsRewriter::returnRewrite(Node node, Node ret, const char* c) { Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << c diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index 0e4cb594a..9671fa815 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -437,6 +437,23 @@ private: static Node mkSubstrChain(Node base, const std::vector& ss, const std::vector& ls); + + /** + * Overapproximates the possible values of node n. This overapproximation + * assumes that n can return a value x or the empty string and tries to find + * the simplest x such that this holds. In the general case, x is the same as + * the input n. This overapproximation can be used to sort terms with the + * same possible values in string concatenation for example. + * + * Example: + * + * getStringOrEmpty( (str.replace "" x y) ) --> y because (str.replace "" x y) + * either returns y or "" + * + * getStringOrEmpty( (str.substr "ABC" x y) ) --> (str.substr "ABC" x y) + * because the function could not compute a simpler + */ + static Node getStringOrEmpty(Node n); };/* class TheoryStringsRewriter */ }/* CVC4::theory::strings namespace */ diff --git a/test/unit/theory/theory_strings_rewriter_white.h b/test/unit/theory/theory_strings_rewriter_white.h index a17299f80..acee9754b 100644 --- a/test/unit/theory/theory_strings_rewriter_white.h +++ b/test/unit/theory/theory_strings_rewriter_white.h @@ -175,4 +175,74 @@ class TheoryStringsRewriterWhite : public CxxTest::TestSuite res = TheoryStringsRewriter::rewriteSubstr(n); TS_ASSERT_EQUALS(res, n); } + + void testRewriteConcat() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node empty = d_nm->mkConst(::CVC4::String("")); + Node a = d_nm->mkConst(::CVC4::String("A")); + Node zero = d_nm->mkConst(Rational(0)); + Node three = d_nm->mkConst(Rational(3)); + + Node i = d_nm->mkVar("i", intType); + Node s = d_nm->mkVar("s", strType); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + + // Same normal form for: + // + // (str.++ (str.replace "A" x "") "A") + // + // (str.++ "A" (str.replace "A" x "")) + Node repl_a_x_e = d_nm->mkNode(kind::STRING_STRREPL, a, x, empty); + Node repl_a = d_nm->mkNode(kind::STRING_CONCAT, repl_a_x_e, a); + Node a_repl = d_nm->mkNode(kind::STRING_CONCAT, a, repl_a_x_e); + Node res_repl_a = Rewriter::rewrite(repl_a); + Node res_a_repl = Rewriter::rewrite(a_repl); + TS_ASSERT_EQUALS(res_repl_a, res_a_repl); + + // Same normal form for: + // + // (str.++ y (str.replace "" x (str.substr y 0 3)) (str.substr y 0 3) "A" (str.substr y 0 3)) + // + // (str.++ y (str.substr y 0 3) (str.replace "" x (str.substr y 0 3)) "A" (str.substr y 0 3)) + Node z = d_nm->mkNode(kind::STRING_SUBSTR, y, zero, three); + Node repl_e_x_z = d_nm->mkNode(kind::STRING_STRREPL, empty, x, z); + repl_a = d_nm->mkNode(kind::STRING_CONCAT, y, repl_e_x_z, z, a, z); + a_repl = d_nm->mkNode(kind::STRING_CONCAT, y, z, repl_e_x_z, a, z); + res_repl_a = Rewriter::rewrite(repl_a); + res_a_repl = Rewriter::rewrite(a_repl); + TS_ASSERT_EQUALS(res_repl_a, res_a_repl); + + // Same normal form for: + // + // (str.++ "A" (str.replace "A" x "") (str.substr "A" 0 i)) + // + // (str.++ (str.substr "A" 0 i) (str.replace "A" x "") "A") + Node substr_a = d_nm->mkNode(kind::STRING_SUBSTR, a, zero, i); + Node a_substr_repl = + d_nm->mkNode(kind::STRING_CONCAT, a, substr_a, repl_a_x_e); + Node substr_repl_a = + d_nm->mkNode(kind::STRING_CONCAT, substr_a, repl_a_x_e, a); + Node res_a_substr_repl = Rewriter::rewrite(a_substr_repl); + Node res_substr_repl_a = Rewriter::rewrite(substr_repl_a); + TS_ASSERT_EQUALS(res_a_substr_repl, res_substr_repl_a); + + // Same normal form for: + // + // (str.++ (str.replace "" x (str.substr "A" 0 i)) (str.substr "A" 0 i) (str.at "A" i)) + // + // (str.++ (str.at "A" i) (str.replace "" x (str.substr "A" 0 i)) (str.substr "A" 0 i)) + Node charat_a = d_nm->mkNode(kind::STRING_CHARAT, a, i); + Node repl_e_x_s = d_nm->mkNode(kind::STRING_STRREPL, empty, x, substr_a); + Node repl_substr_a = + d_nm->mkNode(kind::STRING_CONCAT, repl_e_x_s, substr_a, charat_a); + Node a_repl_substr = + d_nm->mkNode(kind::STRING_CONCAT, charat_a, repl_e_x_s, substr_a); + Node res_repl_substr_a = Rewriter::rewrite(repl_substr_a); + Node res_a_repl_substr = Rewriter::rewrite(a_repl_substr); + TS_ASSERT_EQUALS(res_repl_substr_a, res_a_repl_substr); + } }; -- 2.30.2