From: Andrew Reynolds Date: Wed, 19 May 2021 00:59:09 +0000 (-0500) Subject: Fix handling of non-standard re.range terms (#6563) X-Git-Tag: cvc5-1.0.0~1745 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9ee8b184d9e97ae241ff079803b82859ed014dfa;p=cvc5.git Fix handling of non-standard re.range terms (#6563) Fixes #6561. That benchmark now gives an error: (error "expecting a single constant string term in regexp range"). This PR also makes isConstRegExp do a non-recursive dag traversal. --- diff --git a/src/theory/strings/regexp_entail.cpp b/src/theory/strings/regexp_entail.cpp index 0902d1f9f..b789db6a4 100644 --- a/src/theory/strings/regexp_entail.cpp +++ b/src/theory/strings/regexp_entail.cpp @@ -116,6 +116,11 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, } else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA) { + if (!isConstRegExp(rc)) + { + // if a non-standard re.range term, abort + return Node::null(); + } std::vector ssVec; ssVec.push_back(t == 0 ? s.back() : s.front()); cvc5::String ss(ssVec); @@ -328,21 +333,48 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, bool RegExpEntail::isConstRegExp(TNode t) { - if (t.getKind() == STRING_TO_REGEXP) - { - return t[0].isConst(); - } - else if (t.isVar()) + std::unordered_set visited; + std::vector visit; + TNode cur; + visit.push_back(t); + do { - return false; - } - for (unsigned i = 0; i < t.getNumChildren(); ++i) - { - if (!isConstRegExp(t[i])) + cur = visit.back(); + visit.pop_back(); + if (visited.find(cur) == visited.end()) { - return false; + visited.insert(cur); + Kind ck = cur.getKind(); + if (ck == STRING_TO_REGEXP) + { + if (!cur[0].isConst()) + { + return false; + } + } + else if (ck == REGEXP_RANGE) + { + for (const Node& cn : cur) + { + if (!cn.isConst() || cn.getConst().size() != 1) + { + return false; + } + } + } + else if (cur.isVar()) + { + return false; + } + else + { + for (const Node& cn : cur) + { + visit.push_back(cn); + } + } } - } + } while (!visit.empty()); return true; } diff --git a/src/theory/strings/regexp_entail.h b/src/theory/strings/regexp_entail.h index 44f0815b7..d8bcda4d9 100644 --- a/src/theory/strings/regexp_entail.h +++ b/src/theory/strings/regexp_entail.h @@ -92,7 +92,12 @@ class RegExpEntail static Node simpleRegexpConsume(std::vector& mchildren, std::vector& children, int dir = -1); - /** Is t a constant regular expression? */ + /** + * Is t a constant regular expression? A constant regular expression + * is one with no non-constant (RE or string subterms) and does not contain + * any non-standard RE terms like re.range applied to non-character + * arguments. + */ static bool isConstRegExp(TNode t); /** * Does the substring of s starting at index_start occur in constant regular diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index a0e627423..ff718c124 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -1312,12 +1312,16 @@ Node SequencesRewriter::rewriteMembership(TNode node) else if (r.getKind() == REGEXP_RANGE) { // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j - Node xcode = nm->mkNode(STRING_TO_CODE, x); - Node retNode = - nm->mkNode(AND, - nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode), - nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1]))); - return returnRewrite(node, retNode, Rewrite::RE_IN_RANGE); + // we do not do this if the arguments are not constant + if (RegExpEntail::isConstRegExp(r)) + { + Node xcode = nm->mkNode(STRING_TO_CODE, x); + Node retNode = + nm->mkNode(AND, + nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode), + nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1]))); + return returnRewrite(node, retNode, Rewrite::RE_IN_RANGE); + } } else if (r.getKind() == REGEXP_COMPLEMENT) {