From: Andrew Reynolds Date: Mon, 30 Mar 2020 23:15:07 +0000 (-0500) Subject: Rewrites for all remaining return statements in strings rewriter (#4178) X-Git-Tag: cvc5-1.0.0~3423 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=bc4055d4543f3b697ade38b810f7ac3cf02dc3c8;p=cvc5.git Rewrites for all remaining return statements in strings rewriter (#4178) Towards proofs for string rewrites. All return statements all now associated with an enum value. An indentation in a block of code changed in rewriteMembership. --- diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp index 5bc62b6e4..f1a818bf3 100644 --- a/src/theory/strings/rewrites.cpp +++ b/src/theory/strings/rewrites.cpp @@ -170,6 +170,35 @@ const char* toString(Rewrite r) case Rewrite::SUF_PREFIX_EQ: return "SUF_PREFIX_EQ"; case Rewrite::SUF_PREFIX_TO_EQS: return "SUF_PREFIX_TO_EQS"; case Rewrite::TO_CODE_EVAL: return "TO_CODE_EVAL"; + case Rewrite::EQ_REFL: return "EQ_REFL"; + case Rewrite::EQ_CONST_FALSE: return "EQ_CONST_FALSE"; + case Rewrite::EQ_SYM: return "EQ_SYM"; + case Rewrite::CONCAT_NORM: return "CONCAT_NORM"; + case Rewrite::IS_DIGIT_ELIM: return "IS_DIGIT_ELIM"; + case Rewrite::RE_CONCAT_EMPTY: return "RE_CONCAT_EMPTY"; + case Rewrite::RE_CONSUME_CCONF: return "RE_CONSUME_CCONF"; + case Rewrite::RE_CONSUME_S: return "RE_CONSUME_S"; + case Rewrite::RE_CONSUME_S_CCONF: return "RE_CONSUME_S_CCONF"; + case Rewrite::RE_CONSUME_S_FULL: return "RE_CONSUME_S_FULL"; + case Rewrite::RE_IN_EMPTY: return "RE_IN_EMPTY"; + case Rewrite::RE_IN_SIGMA: return "RE_IN_SIGMA"; + case Rewrite::RE_IN_EVAL: return "RE_IN_EVAL"; + case Rewrite::RE_IN_COMPLEMENT: return "RE_IN_COMPLEMENT"; + case Rewrite::RE_IN_RANGE: return "RE_IN_RANGE"; + case Rewrite::RE_IN_CSTRING: return "RE_IN_CSTRING"; + case Rewrite::RE_IN_ANDOR: return "RE_IN_ANDOR"; + case Rewrite::RE_REPEAT_ELIM: return "RE_REPEAT_ELIM"; + case Rewrite::SUF_PREFIX_ELIM: return "SUF_PREFIX_ELIM"; + case Rewrite::STR_LT_ELIM: return "STR_LT_ELIM"; + case Rewrite::RE_RANGE_SINGLE: return "RE_RANGE_SINGLE"; + case Rewrite::RE_OPT_ELIM: return "RE_OPT_ELIM"; + case Rewrite::RE_PLUS_ELIM: return "RE_PLUS_ELIM"; + case Rewrite::RE_DIFF_ELIM: return "RE_DIFF_ELIM"; + case Rewrite::LEN_EVAL: return "LEN_EVAL"; + case Rewrite::LEN_CONCAT: return "LEN_CONCAT"; + case Rewrite::LEN_REPL_INV: return "LEN_REPL_INV"; + case Rewrite::LEN_CONV_INV: return "LEN_CONV_INV"; + case Rewrite::CHARAT_ELIM: return "CHARAT_ELIM"; default: return "?"; } } diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h index 91d52197c..cfa8c8448 100644 --- a/src/theory/strings/rewrites.h +++ b/src/theory/strings/rewrites.h @@ -172,7 +172,36 @@ enum class Rewrite : uint32_t SUF_PREFIX_EMPTY_CONST, SUF_PREFIX_EQ, SUF_PREFIX_TO_EQS, - TO_CODE_EVAL + TO_CODE_EVAL, + EQ_REFL, + EQ_CONST_FALSE, + EQ_SYM, + CONCAT_NORM, + IS_DIGIT_ELIM, + RE_CONCAT_EMPTY, + RE_CONSUME_CCONF, + RE_CONSUME_S, + RE_CONSUME_S_CCONF, + RE_CONSUME_S_FULL, + RE_IN_EMPTY, + RE_IN_SIGMA, + RE_IN_EVAL, + RE_IN_COMPLEMENT, + RE_IN_RANGE, + RE_IN_CSTRING, + RE_IN_ANDOR, + RE_REPEAT_ELIM, + SUF_PREFIX_ELIM, + STR_LT_ELIM, + RE_RANGE_SINGLE, + RE_OPT_ELIM, + RE_PLUS_ELIM, + RE_DIFF_ELIM, + LEN_EVAL, + LEN_CONCAT, + LEN_REPL_INV, + LEN_CONV_INV, + CHARAT_ELIM }; /** diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index a86c9599f..d7ee459c7 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -310,11 +310,13 @@ Node SequencesRewriter::rewriteEquality(Node node) Assert(node.getKind() == kind::EQUAL); if (node[0] == node[1]) { - return NodeManager::currentNM()->mkConst(true); + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(node, ret, Rewrite::EQ_REFL); } else if (node[0].isConst() && node[1].isConst()) { - return NodeManager::currentNM()->mkConst(false); + Node ret = NodeManager::currentNM()->mkConst(false); + return returnRewrite(node, ret, Rewrite::EQ_CONST_FALSE); } // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false ) @@ -388,7 +390,8 @@ Node SequencesRewriter::rewriteEquality(Node node) // standard ordering if (node[0] > node[1]) { - return NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]); + Node ret = NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]); + return returnRewrite(node, ret, Rewrite::EQ_SYM); } return node; } @@ -790,6 +793,59 @@ Node SequencesRewriter::rewriteArithEqualityExt(Node node) return node; } +Node SequencesRewriter::rewriteLength(Node node) +{ + Assert(node.getKind() == STRING_LENGTH); + NodeManager* nm = NodeManager::currentNM(); + Kind nk0 = node[0].getKind(); + if (node[0].isConst()) + { + Node retNode = nm->mkConst(Rational(Word::getLength(node[0]))); + return returnRewrite(node, retNode, Rewrite::LEN_EVAL); + } + else if (nk0 == kind::STRING_CONCAT) + { + Node tmpNode = node[0]; + if (tmpNode.getKind() == kind::STRING_CONCAT) + { + std::vector node_vec; + for (unsigned int i = 0; i < tmpNode.getNumChildren(); ++i) + { + if (tmpNode[i].isConst()) + { + node_vec.push_back( + nm->mkConst(Rational(Word::getLength(tmpNode[i])))); + } + else + { + node_vec.push_back(NodeManager::currentNM()->mkNode( + kind::STRING_LENGTH, tmpNode[i])); + } + } + Node retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec); + return returnRewrite(node, retNode, Rewrite::LEN_CONCAT); + } + } + else if (nk0 == STRING_STRREPL || nk0 == STRING_STRREPLALL) + { + Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1])); + Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2])); + if (len1 == len2) + { + // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x ) + Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]); + return returnRewrite(node, retNode, Rewrite::LEN_REPL_INV); + } + } + else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER || nk0 == STRING_REV) + { + // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev. + Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]); + return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV); + } + return node; +} + // TODO (#1180) add rewrite // str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) ---> // str.substr( x, n1, n2+n3 ) @@ -799,7 +855,6 @@ Node SequencesRewriter::rewriteConcat(Node node) Trace("strings-rewrite-debug") << "Strings::rewriteConcat start " << node << std::endl; NodeManager* nm = NodeManager::currentNM(); - Node retNode = node; std::vector node_vec; Node preNode = Node::null(); for (Node tmpNode : node) @@ -890,10 +945,14 @@ Node SequencesRewriter::rewriteConcat(Node node) std::sort(node_vec.begin() + lastIdx, node_vec.end()); TypeNode tn = node.getType(); - retNode = utils::mkConcat(node_vec, tn); + Node retNode = utils::mkConcat(node_vec, tn); Trace("strings-rewrite-debug") << "Strings::rewriteConcat end " << retNode << std::endl; - return retNode; + if (retNode != node) + { + return returnRewrite(node, retNode, Rewrite::CONCAT_NORM); + } + return node; } Node SequencesRewriter::rewriteConcatRegExp(TNode node) @@ -940,7 +999,8 @@ Node SequencesRewriter::rewriteConcatRegExp(TNode node) { // re.++( ..., empty, ... ) ---> empty std::vector nvec; - return nm->mkNode(REGEXP_EMPTY, nvec); + Node ret = nm->mkNode(REGEXP_EMPTY, nvec); + return returnRewrite(node, ret, Rewrite::RE_CONCAT_EMPTY); } else { @@ -1236,6 +1296,59 @@ Node SequencesRewriter::rewriteLoopRegExp(TNode node) return node; } +Node SequencesRewriter::rewriteRepeatRegExp(TNode node) +{ + Assert(node.getKind() == REGEXP_REPEAT); + NodeManager* nm = NodeManager::currentNM(); + // ((_ re.^ n) R) --> ((_ re.loop n n) R) + unsigned r = utils::getRepeatAmount(node); + Node lop = nm->mkConst(RegExpLoop(r, r)); + Node retNode = nm->mkNode(REGEXP_LOOP, lop, node[0]); + return returnRewrite(node, retNode, Rewrite::RE_REPEAT_ELIM); +} + +Node SequencesRewriter::rewriteOptionRegExp(TNode node) +{ + Assert(node.getKind() == REGEXP_OPT); + NodeManager* nm = NodeManager::currentNM(); + Node retNode = + nm->mkNode(REGEXP_UNION, + nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))), + node[0]); + return returnRewrite(node, retNode, Rewrite::RE_OPT_ELIM); +} + +Node SequencesRewriter::rewritePlusRegExp(TNode node) +{ + Assert(node.getKind() == REGEXP_PLUS); + NodeManager* nm = NodeManager::currentNM(); + Node retNode = + nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0])); + return returnRewrite(node, retNode, Rewrite::RE_PLUS_ELIM); +} + +Node SequencesRewriter::rewriteDifferenceRegExp(TNode node) +{ + Assert(node.getKind() == REGEXP_DIFF); + NodeManager* nm = NodeManager::currentNM(); + Node retNode = + nm->mkNode(REGEXP_INTER, node[0], nm->mkNode(REGEXP_COMPLEMENT, node[1])); + return returnRewrite(node, retNode, Rewrite::RE_DIFF_ELIM); +} + +Node SequencesRewriter::rewriteRangeRegExp(TNode node) +{ + Assert(node.getKind() == REGEXP_RANGE); + if (node[0] == node[1]) + { + NodeManager* nm = NodeManager::currentNM(); + Node retNode = nm->mkNode(STRING_TO_REGEXP, node[0]); + // re.range( "A", "A" ) ---> str.to_re( "A" ) + return returnRewrite(node, retNode, Rewrite::RE_RANGE_SINGLE); + } + return node; +} + bool SequencesRewriter::isConstRegExp(TNode t) { if (t.getKind() == kind::STRING_TO_REGEXP) @@ -1503,7 +1616,6 @@ bool SequencesRewriter::testConstStringInRegExp(CVC4::String& s, Node SequencesRewriter::rewriteMembership(TNode node) { NodeManager* nm = NodeManager::currentNM(); - Node retNode = node; Node x = node[0]; Node r = node[1]; @@ -1512,19 +1624,22 @@ Node SequencesRewriter::rewriteMembership(TNode node) if(r.getKind() == kind::REGEXP_EMPTY) { - retNode = NodeManager::currentNM()->mkConst( false ); + Node retNode = NodeManager::currentNM()->mkConst(false); + return returnRewrite(node, retNode, Rewrite::RE_IN_EMPTY); } else if (x.isConst() && isConstRegExp(r)) { // test whether x in node[1] CVC4::String s = x.getConst(); - retNode = + Node retNode = NodeManager::currentNM()->mkConst(testConstStringInRegExp(s, 0, r)); + return returnRewrite(node, retNode, Rewrite::RE_IN_EVAL); } else if (r.getKind() == kind::REGEXP_SIGMA) { Node one = nm->mkConst(Rational(1)); - retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x)); + Node retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x)); + return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA); } else if (r.getKind() == kind::REGEXP_STAR) { @@ -1533,7 +1648,7 @@ Node SequencesRewriter::rewriteMembership(TNode node) String s = x.getConst(); if (s.size() == 0) { - retNode = nm->mkConst(true); + Node retNode = nm->mkConst(true); // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true return returnRewrite(node, retNode, Rewrite::RE_EMPTY_IN_STR_STAR); } @@ -1541,7 +1656,7 @@ Node SequencesRewriter::rewriteMembership(TNode node) { if (r[0].getKind() == STRING_TO_REGEXP) { - retNode = r[0][0].eqNode(x); + Node retNode = r[0][0].eqNode(x); // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x return returnRewrite(node, retNode, Rewrite::RE_CHAR_IN_STR_STAR); } @@ -1570,7 +1685,7 @@ Node SequencesRewriter::rewriteMembership(TNode node) } if (r[0].getKind() == kind::REGEXP_SIGMA) { - retNode = NodeManager::currentNM()->mkConst(true); + Node retNode = NodeManager::currentNM()->mkConst(true); return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA_STAR); } } @@ -1620,14 +1735,14 @@ Node SequencesRewriter::rewriteMembership(TNode node) // x in re.++(_*, _, _) ---> str.len(x) >= 2 Node num = nm->mkConst(Rational(allSigmaMinSize)); Node lenx = nm->mkNode(STRING_LENGTH, x); - retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num); + Node retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num); return returnRewrite(node, retNode, Rewrite::RE_CONCAT_PURE_ALLCHAR); } else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0 && constIdx != nchildren - 1) { // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc") - retNode = nm->mkNode(STRING_STRCTN, x, constStr); + Node retNode = nm->mkNode(STRING_STRCTN, x, constStr); return returnRewrite(node, retNode, Rewrite::RE_CONCAT_TO_CONTAINS); } } @@ -1641,132 +1756,125 @@ Node SequencesRewriter::rewriteMembership(TNode node) mvec.push_back( NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r[i])); } - retNode = NodeManager::currentNM()->mkNode( + Node retNode = NodeManager::currentNM()->mkNode( r.getKind() == kind::REGEXP_INTER ? kind::AND : kind::OR, mvec); + return returnRewrite(node, retNode, Rewrite::RE_IN_ANDOR); } else if (r.getKind() == kind::STRING_TO_REGEXP) { - retNode = x.eqNode(r[0]); + Node retNode = x.eqNode(r[0]); + return returnRewrite(node, retNode, Rewrite::RE_IN_CSTRING); } else if (r.getKind() == REGEXP_RANGE) { // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j Node xcode = nm->mkNode(STRING_TO_CODE, x); - retNode = + Node retNode = nm->mkNode(AND, nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode), nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1]))); + return returnRewrite(node, retNode, Rewrite::RE_IN_RANGE); } else if (r.getKind() == REGEXP_COMPLEMENT) { - retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate(); - } - else if (x != node[0] || r != node[1]) - { - retNode = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r); + Node retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate(); + return returnRewrite(node, retNode, Rewrite::RE_IN_COMPLEMENT); } // do simple consumes - if (retNode == node) + Node retNode = node; + if (r.getKind() == kind::REGEXP_STAR) { - if (r.getKind() == kind::REGEXP_STAR) + for (unsigned dir = 0; dir <= 1; dir++) { - for (unsigned dir = 0; dir <= 1; dir++) + std::vector mchildren; + utils::getConcat(x, mchildren); + bool success = true; + while (success) { - std::vector mchildren; - utils::getConcat(x, mchildren); - bool success = true; - while (success) + success = false; + std::vector children; + utils::getConcat(r[0], children); + Node scn = simpleRegexpConsume(mchildren, children, dir); + if (!scn.isNull()) { - success = false; - std::vector children; - utils::getConcat(r[0], children); - Node scn = simpleRegexpConsume(mchildren, children, dir); - if (!scn.isNull()) + Trace("regexp-ext-rewrite") + << "Regexp star : const conflict : " << node << std::endl; + return returnRewrite(node, scn, Rewrite::RE_CONSUME_S_CCONF); + } + else if (children.empty()) + { + // fully consumed one copy of the STAR + if (mchildren.empty()) { Trace("regexp-ext-rewrite") - << "Regexp star : const conflict : " << node << std::endl; - return scn; + << "Regexp star : full consume : " << node << std::endl; + Node ret = NodeManager::currentNM()->mkConst(true); + return returnRewrite(node, ret, Rewrite::RE_CONSUME_S_FULL); } - else if (children.empty()) + else { - // fully consumed one copy of the STAR - if (mchildren.empty()) - { - Trace("regexp-ext-rewrite") - << "Regexp star : full consume : " << node << std::endl; - return NodeManager::currentNM()->mkConst(true); - } - else - { - retNode = nm->mkNode(STRING_IN_REGEXP, - utils::mkConcat(mchildren, stype), - r); - success = true; - } + retNode = nm->mkNode( + STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r); + success = true; } } - if (retNode != node) - { - Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node - << " -> " << retNode << std::endl; - break; - } + } + if (retNode != node) + { + Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node + << " -> " << retNode << std::endl; + return returnRewrite(node, retNode, Rewrite::RE_CONSUME_S); } } - else - { - std::vector children; - utils::getConcat(r, children); - std::vector mchildren; - utils::getConcat(x, mchildren); - unsigned prevSize = children.size() + mchildren.size(); - Node scn = simpleRegexpConsume(mchildren, children); - if (!scn.isNull()) + } + else + { + std::vector children; + utils::getConcat(r, children); + std::vector mchildren; + utils::getConcat(x, mchildren); + unsigned prevSize = children.size() + mchildren.size(); + Node scn = simpleRegexpConsume(mchildren, children); + if (!scn.isNull()) + { + Trace("regexp-ext-rewrite") + << "Regexp : const conflict : " << node << std::endl; + return returnRewrite(node, scn, Rewrite::RE_CONSUME_CCONF); + } + else if ((children.size() + mchildren.size()) != prevSize) + { + // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm), + // above, we strip components to construct an equivalent membership: + // (str.++ xi .. xj) in (re.++ rk ... rl). + Node xn = utils::mkConcat(mchildren, stype); + Node emptyStr = nm->mkConst(String("")); + if (children.empty()) { - Trace("regexp-ext-rewrite") - << "Regexp : const conflict : " << node << std::endl; - return scn; + // If we stripped all components on the right, then the left is + // equal to the empty string. + // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "") + retNode = xn.eqNode(emptyStr); } else { - if ((children.size() + mchildren.size()) != prevSize) - { - // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm), - // above, we strip components to construct an equivalent membership: - // (str.++ xi .. xj) in (re.++ rk ... rl). - Node xn = utils::mkConcat(mchildren, stype); - Node emptyStr = nm->mkConst(String("")); - if (children.empty()) - { - // If we stripped all components on the right, then the left is - // equal to the empty string. - // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "") - retNode = xn.eqNode(emptyStr); - } - else - { - // otherwise, construct the updated regular expression - retNode = nm->mkNode( - STRING_IN_REGEXP, xn, utils::mkConcat(children, rtype)); - } - Trace("regexp-ext-rewrite") << "Regexp : rewrite : " << node << " -> " - << retNode << std::endl; - return returnRewrite(node, retNode, Rewrite::RE_SIMPLE_CONSUME); - } + // otherwise, construct the updated regular expression + retNode = + nm->mkNode(STRING_IN_REGEXP, xn, utils::mkConcat(children, rtype)); } + Trace("regexp-ext-rewrite") + << "Regexp : rewrite : " << node << " -> " << retNode << std::endl; + return returnRewrite(node, retNode, Rewrite::RE_SIMPLE_CONSUME); } } - return retNode; + return node; } RewriteResponse SequencesRewriter::postRewrite(TNode node) { Trace("strings-postrewrite") << "Strings::postRewrite start " << node << std::endl; - NodeManager* nm = NodeManager::currentNM(); Node retNode = node; - Node orig = retNode; Kind nk = node.getKind(); if (nk == kind::STRING_CONCAT) { @@ -1778,59 +1886,11 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) } else if (nk == kind::STRING_LENGTH) { - Kind nk0 = node[0].getKind(); - if (node[0].isConst()) - { - retNode = nm->mkConst(Rational(Word::getLength(node[0]))); - } - else if (nk0 == kind::STRING_CONCAT) - { - Node tmpNode = node[0]; - if (tmpNode.isConst()) - { - retNode = nm->mkConst(Rational(Word::getLength(tmpNode))); - } - else if (tmpNode.getKind() == kind::STRING_CONCAT) - { - std::vector node_vec; - for (unsigned int i = 0; i < tmpNode.getNumChildren(); ++i) - { - if (tmpNode[i].isConst()) - { - node_vec.push_back( - nm->mkConst(Rational(Word::getLength(tmpNode[i])))); - } - else - { - node_vec.push_back(NodeManager::currentNM()->mkNode( - kind::STRING_LENGTH, tmpNode[i])); - } - } - retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec); - } - } - else if (nk0 == STRING_STRREPL || nk0 == STRING_STRREPLALL) - { - Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1])); - Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2])); - if (len1 == len2) - { - // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x ) - retNode = nm->mkNode(STRING_LENGTH, node[0][0]); - } - } - else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER - || nk0 == STRING_REV) - { - // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev. - retNode = nm->mkNode(STRING_LENGTH, node[0][0]); - } + retNode = rewriteLength(node); } else if (nk == kind::STRING_CHARAT) { - Node one = NodeManager::currentNM()->mkConst(Rational(1)); - retNode = NodeManager::currentNM()->mkNode( - kind::STRING_SUBSTR, node[0], node[1], one); + retNode = rewriteCharAt(node); } else if (nk == kind::STRING_SUBSTR) { @@ -1842,10 +1902,7 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) } else if (nk == kind::STRING_LT) { - // eliminate s < t ---> s != t AND s <= t - retNode = nm->mkNode(AND, - node[0].eqNode(node[1]).negate(), - nm->mkNode(STRING_LEQ, node[0], node[1])); + retNode = StringsRewriter::rewriteStringLt(node); } else if (nk == kind::STRING_LEQ) { @@ -1877,11 +1934,7 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) } else if (nk == STRING_IS_DIGIT) { - // eliminate str.is_digit(s) ----> 48 <= str.to_code(s) <= 57 - Node t = nm->mkNode(STRING_TO_CODE, node[0]); - retNode = nm->mkNode(AND, - nm->mkNode(LEQ, nm->mkConst(Rational(48)), t), - nm->mkNode(LEQ, t, nm->mkConst(Rational(57)))); + retNode = StringsRewriter::rewriteStringIsDigit(node); } else if (nk == kind::STRING_ITOS) { @@ -1913,8 +1966,7 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) } else if (nk == REGEXP_DIFF) { - retNode = nm->mkNode( - REGEXP_INTER, node[0], nm->mkNode(REGEXP_COMPLEMENT, node[1])); + retNode = rewriteDifferenceRegExp(node); } else if (nk == REGEXP_STAR) { @@ -1922,21 +1974,15 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) } else if (nk == REGEXP_PLUS) { - retNode = - nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0])); + retNode = rewritePlusRegExp(node); } else if (nk == REGEXP_OPT) { - retNode = nm->mkNode(REGEXP_UNION, - nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))), - node[0]); + retNode = rewriteOptionRegExp(node); } else if (nk == REGEXP_RANGE) { - if (node[0] == node[1]) - { - retNode = nm->mkNode(STRING_TO_REGEXP, node[0]); - } + retNode = rewriteRangeRegExp(node); } else if (nk == REGEXP_LOOP) { @@ -1944,21 +1990,18 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) } else if (nk == REGEXP_REPEAT) { - // ((_ re.^ n) R) --> ((_ re.loop n n) R) - unsigned r = utils::getRepeatAmount(node); - Node lop = nm->mkConst(RegExpLoop(r, r)); - retNode = nm->mkNode(REGEXP_LOOP, lop, node[0]); + retNode = rewriteRepeatRegExp(node); } Trace("strings-postrewrite") << "Strings::postRewrite returning " << retNode << std::endl; - if (orig != retNode) + if (node != retNode) { Trace("strings-rewrite-debug") - << "Strings: post-rewrite " << orig << " to " << retNode << std::endl; + << "Strings: post-rewrite " << node << " to " << retNode << std::endl; + return RewriteResponse(REWRITE_AGAIN_FULL, retNode); } - return RewriteResponse(orig == retNode ? REWRITE_DONE : REWRITE_AGAIN_FULL, - retNode); + return RewriteResponse(REWRITE_DONE, retNode); } bool SequencesRewriter::hasEpsilonNode(TNode node) @@ -1979,6 +2022,15 @@ RewriteResponse SequencesRewriter::preRewrite(TNode node) return RewriteResponse(REWRITE_DONE, node); } +Node SequencesRewriter::rewriteCharAt(Node node) +{ + Assert(node.getKind() == STRING_CHARAT); + NodeManager* nm = NodeManager::currentNM(); + Node one = nm->mkConst(Rational(1)); + Node retNode = nm->mkNode(STRING_SUBSTR, node[0], node[1], one); + return returnRewrite(node, retNode, Rewrite::CHARAT_ELIM); +} + Node SequencesRewriter::rewriteSubstr(Node node) { Assert(node.getKind() == kind::STRING_SUBSTR); @@ -3511,7 +3563,7 @@ Node SequencesRewriter::rewritePrefixSuffix(Node n) Node retNode = n[0].eqNode( NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens)); - return retNode; + return returnRewrite(n, retNode, Rewrite::SUF_PREFIX_ELIM); } Node SequencesRewriter::splitConstant(Node a, Node b, int& index, bool isRev) diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h index a98ad97ac..afdd2c0e1 100644 --- a/src/theory/strings/sequences_rewriter.h +++ b/src/theory/strings/sequences_rewriter.h @@ -121,6 +121,37 @@ class SequencesRewriter : public TheoryRewriter * Returns the rewritten form of node. */ static Node rewriteLoopRegExp(TNode node); + /** rewrite regular expression repeat + * + * This is the entry point for post-rewriting applications of re.repeat. + * Returns the rewritten form of node. + */ + static Node rewriteRepeatRegExp(TNode node); + /** rewrite regular expression option + * + * This is the entry point for post-rewriting applications of re.opt. + * Returns the rewritten form of node. + */ + static Node rewriteOptionRegExp(TNode node); + /** rewrite regular expression plus + * + * This is the entry point for post-rewriting applications of re.+. + * Returns the rewritten form of node. + */ + static Node rewritePlusRegExp(TNode node); + /** rewrite regular expression difference + * + * This is the entry point for post-rewriting applications of re.diff. + * Returns the rewritten form of node. + */ + static Node rewriteDifferenceRegExp(TNode node); + /** rewrite regular expression range + * + * This is the entry point for post-rewriting applications of re.range. + * Returns the rewritten form of node. + */ + static Node rewriteRangeRegExp(TNode node); + /** rewrite regular expression membership * * This is the entry point for post-rewriting applications of str.in.re @@ -182,12 +213,24 @@ class SequencesRewriter : public TheoryRewriter * necessarily one of { s = t, t = s, true, false }. */ static Node rewriteEqualityExt(Node node); + /** rewrite string length + * This is the entry point for post-rewriting terms node of the form + * str.len( t ) + * Returns the rewritten form of node. + */ + static Node rewriteLength(Node node); /** rewrite concat * This is the entry point for post-rewriting terms node of the form * str.++( t1, .., tn ) * Returns the rewritten form of node. */ static Node rewriteConcat(Node node); + /** rewrite character at + * This is the entry point for post-rewriting terms node of the form + * str.charat( s, i1 ) + * Returns the rewritten form of node. + */ + static Node rewriteCharAt(Node node); /** rewrite substr * This is the entry point for post-rewriting terms node of the form * str.substr( s, i1, i2 ) diff --git a/src/theory/strings/strings_rewriter.cpp b/src/theory/strings/strings_rewriter.cpp index 275c2e25e..28ed14095 100644 --- a/src/theory/strings/strings_rewriter.cpp +++ b/src/theory/strings/strings_rewriter.cpp @@ -143,6 +143,16 @@ Node StringsRewriter::rewriteStrConvert(Node node) return node; } +Node StringsRewriter::rewriteStringLt(Node n) +{ + Assert(n.getKind() == kind::STRING_LT); + NodeManager* nm = NodeManager::currentNM(); + // eliminate s < t ---> s != t AND s <= t + Node retNode = nm->mkNode( + AND, n[0].eqNode(n[1]).negate(), nm->mkNode(STRING_LEQ, n[0], n[1])); + return returnRewrite(n, retNode, Rewrite::STR_LT_ELIM); +} + Node StringsRewriter::rewriteStringLeq(Node n) { Assert(n.getKind() == kind::STRING_LEQ); @@ -241,6 +251,18 @@ Node StringsRewriter::rewriteStringToCode(Node n) return n; } +Node StringsRewriter::rewriteStringIsDigit(Node n) +{ + Assert(n.getKind() == kind::STRING_IS_DIGIT); + NodeManager* nm = NodeManager::currentNM(); + // eliminate str.is_digit(s) ----> 48 <= str.to_code(s) <= 57 + Node t = nm->mkNode(STRING_TO_CODE, n[0]); + Node retNode = nm->mkNode(AND, + nm->mkNode(LEQ, nm->mkConst(Rational(48)), t), + nm->mkNode(LEQ, t, nm->mkConst(Rational(57)))); + return returnRewrite(n, retNode, Rewrite::IS_DIGIT_ELIM); +} + } // namespace strings } // namespace theory } // namespace CVC4 diff --git a/src/theory/strings/strings_rewriter.h b/src/theory/strings/strings_rewriter.h index e6a6b0693..0c5b0b2f8 100644 --- a/src/theory/strings/strings_rewriter.h +++ b/src/theory/strings/strings_rewriter.h @@ -56,6 +56,14 @@ class StringsRewriter : public SequencesRewriter */ static Node rewriteStrConvert(Node n); + /** rewrite string less than + * + * This is the entry point for post-rewriting terms n of the form + * str.<( t, s ) + * Returns the rewritten form of n. + */ + static Node rewriteStringLt(Node n); + /** rewrite string less than or equal * * This is the entry point for post-rewriting terms n of the form @@ -79,6 +87,14 @@ class StringsRewriter : public SequencesRewriter * Returns the rewritten form of n. */ static Node rewriteStringToCode(Node n); + + /** rewrite is digit + * + * This is the entry point for post-rewriting terms n of the form + * str.is_digit( t ) + * Returns the rewritten form of n. + */ + static Node rewriteStringIsDigit(Node n); }; } // namespace strings diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp index ca6547134..7051b251f 100644 --- a/src/util/regexp.cpp +++ b/src/util/regexp.cpp @@ -18,21 +18,25 @@ namespace CVC4 { -RegExpRepeat::RegExpRepeat(uint32_t repeatAmount) : d_repeatAmount(repeatAmount) {} +RegExpRepeat::RegExpRepeat(uint32_t repeatAmount) : d_repeatAmount(repeatAmount) +{ +} bool RegExpRepeat::operator==(const RegExpRepeat& r) const { return d_repeatAmount == r.d_repeatAmount; } -RegExpLoop::RegExpLoop(uint32_t l, uint32_t h) : d_loopMinOcc(l), d_loopMaxOcc(h) {} +RegExpLoop::RegExpLoop(uint32_t l, uint32_t h) + : d_loopMinOcc(l), d_loopMaxOcc(h) +{ +} bool RegExpLoop::operator==(const RegExpLoop& r) const { - return d_loopMinOcc == r.d_loopMinOcc - && d_loopMaxOcc == r.d_loopMaxOcc; + return d_loopMinOcc == r.d_loopMinOcc && d_loopMaxOcc == r.d_loopMaxOcc; } - + size_t RegExpRepeatHashFunction::operator()(const RegExpRepeat& r) const { return r.d_repeatAmount; @@ -54,4 +58,3 @@ std::ostream& operator<<(std::ostream& os, const RegExpLoop& r) } } // namespace CVC4 -