From c116c6c1ec757fe51f2b874e750ad8281baea103 Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Fri, 19 Oct 2018 08:14:22 -0700 Subject: [PATCH] Add helper to detect length one string terms (#2654) This commit introduces a helper function to detect string terms that have exactly/at most length one. This is useful for a lot of rewrites because strings of at most length one are guaranteed to not overlap multiple components in a concatenation, which allows for more aggressive rewriting. This commit has been tested with --sygus-rr-synth-check for >1h on the string term grammar. --- .../strings/theory_strings_rewriter.cpp | 36 +++++++++--------- src/theory/strings/theory_strings_rewriter.h | 13 +++++++ .../theory/theory_strings_rewriter_white.h | 37 +++++++++++++++++++ 3 files changed, 68 insertions(+), 18 deletions(-) diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 7c008dc14..1dc894117 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -507,9 +507,7 @@ Node TheoryStringsRewriter::rewriteStrEqualityExt(Node node) } // (= "" (str.replace x "A" "")) ---> (str.prefix x "A") - Node one = nm->mkConst(Rational(1)); - Node ylen = nm->mkNode(STRING_LENGTH, ne[1]); - if (checkEntailArithEq(ylen, one) && ne[2] == empty) + if (checkEntailLengthOne(ne[1]) && ne[2] == empty) { Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]); return returnRewrite(node, ret, "str-emp-repl-emp"); @@ -1658,11 +1656,8 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) // if (str.len y) = 1 and (str.len z) = 1 if (node[1] == zero) { - Node one = nm->mkConst(Rational(1)); - Node n1len = nm->mkNode(kind::STRING_LENGTH, node[0][1]); - Node n2len = nm->mkNode(kind::STRING_LENGTH, node[0][2]); - if (checkEntailArith(one, n1len) && checkEntailArith(one, n2len) - && checkEntailNonEmpty(node[0][1]) && checkEntailNonEmpty(node[0][2])) + if (checkEntailLengthOne(node[0][1], true) + && checkEntailLengthOne(node[0][2], true)) { Node ret = nm->mkNode( kind::STRING_STRREPL, @@ -1739,8 +1734,7 @@ Node TheoryStringsRewriter::rewriteSubstr(Node node) } // (str.substr s x x) ---> "" if (str.len s) <= 1 - Node one = nm->mkConst(CVC4::Rational(1)); - if (node[1] == node[2] && checkEntailArith(one, tot_len)) + if (node[1] == node[2] && checkEntailLengthOne(node[0])) { Node ret = nm->mkConst(::CVC4::String("")); return returnRewrite(node, ret, "ss-len-one-z-z"); @@ -2155,8 +2149,7 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { // (str.contains (str.replace x y x) z) ---> (str.contains x z) // if (str.len z) <= 1 - Node one = nm->mkConst(Rational(1)); - if (checkEntailArith(one, len_n2)) + if (checkEntailLengthOne(node[1])) { Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]); return returnRewrite(node, ret, "ctn-repl-len-one-to-ctn"); @@ -2474,7 +2467,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x) // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "") - if (checkEntailArith(nm->mkConst(Rational(1)), l0)) + if (checkEntailLengthOne(node[0])) { Node empty = nm->mkConst(String("")); Node rn1 = Rewriter::rewrite( @@ -2850,7 +2843,7 @@ Node TheoryStringsRewriter::rewriteReplace( Node node ) { // str.replace( x ++ y ++ x ++ y, "A", z ) --> // str.replace( x ++ y, "A", z ) ++ x ++ y // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y. - if (node[1].isConst() && node[1].getConst().size() == 1) + if (checkEntailLengthOne(node[1])) { Node lastLhs; unsigned lastCheckIndex = 0; @@ -3826,6 +3819,15 @@ bool TheoryStringsRewriter::checkEntailNonEmpty(Node a) return checkEntailArith(len, true); } +bool TheoryStringsRewriter::checkEntailLengthOne(Node s, bool strict) +{ + NodeManager* nm = NodeManager::currentNM(); + Node one = nm->mkConst(Rational(1)); + Node len = nm->mkNode(STRING_LENGTH, s); + len = Rewriter::rewrite(len); + return checkEntailArith(one, len) && (!strict || checkEntailArith(len, true)); +} + bool TheoryStringsRewriter::checkEntailArithEq(Node a, Node b) { if (a == b) @@ -4714,8 +4716,7 @@ Node TheoryStringsRewriter::getStringOrEmpty(Node n) break; } - Node strlen = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n[0])); - if (strlen == nm->mkConst(Rational(1)) && n[2] == empty) + if (checkEntailLengthOne(n[0]) && n[2] == empty) { // (str.replace "A" x "") --> "A" res = n[0]; @@ -4727,8 +4728,7 @@ Node TheoryStringsRewriter::getStringOrEmpty(Node n) } case kind::STRING_SUBSTR: { - Node strlen = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n[0])); - if (strlen == nm->mkConst(Rational(1))) + if (checkEntailLengthOne(n[0])) { // (str.substr "A" x y) --> "A" res = n[0]; diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index 3d71423ab..2e356f8f7 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -455,6 +455,19 @@ class TheoryStringsRewriter { * the call checkArithEntail( len( a ), true ). */ static bool checkEntailNonEmpty(Node a); + + /** + * Checks whether string has at most/exactly length one. Length one strings + * can be used for more aggressive rewriting because there is guaranteed that + * it cannot be overlap multiple components in a string concatenation. + * + * @param s The string to check + * @param strict If true, the string must have exactly length one, otherwise + * at most length one + * @return True if the string has at most/exactly length one, false otherwise + */ + static bool checkEntailLengthOne(Node s, bool strict = false); + /** check arithmetic entailment equal * Returns true if it is always the case that a = b. */ diff --git a/test/unit/theory/theory_strings_rewriter_white.h b/test/unit/theory/theory_strings_rewriter_white.h index c92597224..87817872b 100644 --- a/test/unit/theory/theory_strings_rewriter_white.h +++ b/test/unit/theory/theory_strings_rewriter_white.h @@ -80,6 +80,43 @@ class TheoryStringsRewriterWhite : public CxxTest::TestSuite TS_ASSERT_DIFFERS(res_t1, res_t2); } + void testCheckEntailLengthOne() + { + TypeNode intType = d_nm->integerType(); + TypeNode strType = d_nm->stringType(); + + Node a = d_nm->mkConst(::CVC4::String("A")); + Node abcd = d_nm->mkConst(::CVC4::String("ABCD")); + Node aaad = d_nm->mkConst(::CVC4::String("AAAD")); + Node b = d_nm->mkConst(::CVC4::String("B")); + Node x = d_nm->mkVar("x", strType); + Node y = d_nm->mkVar("y", strType); + Node negOne = d_nm->mkConst(Rational(-1)); + Node zero = d_nm->mkConst(Rational(0)); + Node one = d_nm->mkConst(Rational(1)); + Node two = d_nm->mkConst(Rational(2)); + Node three = d_nm->mkConst(Rational(3)); + Node i = d_nm->mkVar("i", intType); + + TS_ASSERT(TheoryStringsRewriter::checkEntailLengthOne(a)); + TS_ASSERT(TheoryStringsRewriter::checkEntailLengthOne(a, true)); + + Node substr = d_nm->mkNode(kind::STRING_SUBSTR, x, zero, one); + TS_ASSERT(TheoryStringsRewriter::checkEntailLengthOne(substr)); + TS_ASSERT(!TheoryStringsRewriter::checkEntailLengthOne(substr, true)); + + substr = d_nm->mkNode(kind::STRING_SUBSTR, + d_nm->mkNode(kind::STRING_CONCAT, a, x), + zero, + one); + TS_ASSERT(TheoryStringsRewriter::checkEntailLengthOne(substr)); + TS_ASSERT(TheoryStringsRewriter::checkEntailLengthOne(substr, true)); + + substr = d_nm->mkNode(kind::STRING_SUBSTR, x, zero, two); + TS_ASSERT(!TheoryStringsRewriter::checkEntailLengthOne(substr)); + TS_ASSERT(!TheoryStringsRewriter::checkEntailLengthOne(substr, true)); + } + void testCheckEntailArith() { TypeNode intType = d_nm->integerType(); -- 2.30.2