From 46591b1c92fc9ecd4a0997242030a1a48166301b Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Sun, 28 Jun 2020 08:34:44 -0500 Subject: [PATCH] Fix non-termination issues in simpleRegExpConsume (#4667) There were two issues related to RE in bodies of re.* that accepted the empty string that led to non-termination in the rewriter for regular expressions. This also improves trace messages for simpleRegExpConsume. Fixes #4662. --- src/theory/strings/regexp_entail.cpp | 64 ++++++++++++++----- src/theory/strings/sequences_rewriter.cpp | 6 +- test/regress/CMakeLists.txt | 1 + .../strings/issue4662-consume-nterm.smt2 | 6 ++ 4 files changed, 61 insertions(+), 16 deletions(-) create mode 100644 test/regress/regress0/strings/issue4662-consume-nterm.smt2 diff --git a/src/theory/strings/regexp_entail.cpp b/src/theory/strings/regexp_entail.cpp index f005b2b42..7e1f42f37 100644 --- a/src/theory/strings/regexp_entail.cpp +++ b/src/theory/strings/regexp_entail.cpp @@ -48,6 +48,8 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, do_next = false; Node xc = mchildren[mchildren.size() - 1]; Node rc = children[children.size() - 1]; + Trace("regexp-ext-rewrite-debug") + << "* " << xc << " in " << rc << std::endl; Assert(rc.getKind() != REGEXP_CONCAT); Assert(xc.getKind() != STRING_CONCAT); if (rc.getKind() == STRING_TO_REGEXP) @@ -57,7 +59,15 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, children.pop_back(); mchildren.pop_back(); do_next = true; - Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl; + Trace("regexp-ext-rewrite-debug") << "- strip equal" << std::endl; + } + else if (rc[0].isConst() && Word::isEmpty(rc[0])) + { + Trace("regexp-ext-rewrite-debug") + << "- ignore empty RE" << std::endl; + // ignore and continue + children.pop_back(); + do_next = true; } else if (xc.isConst() && rc[0].isConst()) { @@ -65,8 +75,8 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, size_t index; Node s = Word::splitConstant(xc, rc[0], index, t == 0); Trace("regexp-ext-rewrite-debug") - << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> " - << s << " " << index << " " << t << std::endl; + << "- CRE: Regexp const split : " << xc << " " << rc[0] + << " -> " << s << " " << index << " " << t << std::endl; if (s.isNull()) { Trace("regexp-ext-rewrite-debug") @@ -76,7 +86,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, else { Trace("regexp-ext-rewrite-debug") - << "...strip equal const" << std::endl; + << "- strip equal const" << std::endl; children.pop_back(); mchildren.pop_back(); if (index == 0) @@ -88,6 +98,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, children.push_back(nm->mkNode(STRING_TO_REGEXP, s)); } } + Trace("regexp-ext-rewrite-debug") << "- split const" << std::endl; do_next = true; } } @@ -97,7 +108,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, CVC4::String s = xc.getConst(); if (Word::isEmpty(xc)) { - Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl; + Trace("regexp-ext-rewrite-debug") << "- ignore empty" << std::endl; // ignore and continue mchildren.pop_back(); do_next = true; @@ -127,6 +138,8 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, } else { + Trace("regexp-ext-rewrite-debug") + << "...return false" << std::endl; return nm->mkConst(false); } } @@ -135,19 +148,23 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, // see if any/each child does not work bool result_valid = true; Node result; - Node emp_s = nm->mkConst(::CVC4::String("")); + Node emp_s = nm->mkConst(String("")); for (unsigned i = 0; i < rc.getNumChildren(); i++) { std::vector mchildren_s; std::vector children_s; mchildren_s.push_back(xc); utils::getConcat(rc[i], children_s); + Trace("regexp-ext-rewrite-debug") << push; Node ret = simpleRegexpConsume(mchildren_s, children_s, t); + Trace("regexp-ext-rewrite-debug") << pop; if (!ret.isNull()) { // one conjunct cannot be satisfied, return false if (rc.getKind() == REGEXP_INTER) { + Trace("regexp-ext-rewrite-debug") + << "...return " << ret << std::endl; return ret; } } @@ -182,10 +199,15 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, { // all disjuncts cannot be satisfied, return false Assert(rc.getKind() == REGEXP_UNION); + Trace("regexp-ext-rewrite-debug") + << "...return false" << std::endl; return nm->mkConst(false); } else { + Trace("regexp-ext-rewrite-debug") + << "- same result, try again, children now " << children + << std::endl; // all branches led to the same result children.pop_back(); mchildren.pop_back(); @@ -210,17 +232,19 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, std::vector children_s; utils::getConcat(rc[0], children_s); Trace("regexp-ext-rewrite-debug") - << "...recursive call on body of star" << std::endl; + << "- recursive call on body of star" << std::endl; + Trace("regexp-ext-rewrite-debug") << push; Node ret = simpleRegexpConsume(mchildren_s, children_s, t); + Trace("regexp-ext-rewrite-debug") << pop; if (!ret.isNull()) { Trace("regexp-ext-rewrite-debug") - << "CRE : regexp star infeasable " << xc << " " << rc + << "- CRE : regexp star infeasable " << xc << " " << rc << std::endl; children.pop_back(); if (!children.empty()) { - Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl; + Trace("regexp-ext-rewrite-debug") << "- continue" << std::endl; do_next = true; } } @@ -244,16 +268,22 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, std::reverse(mchildren_ss.begin(), mchildren_ss.end()); std::reverse(children_ss.begin(), children_ss.end()); } - if (simpleRegexpConsume(mchildren_ss, children_ss, t) - .isNull()) + Trace("regexp-ext-rewrite-debug") + << "- recursive call required repeat star" << std::endl; + Trace("regexp-ext-rewrite-debug") << push; + Node rets = simpleRegexpConsume(mchildren_ss, children_ss, t); + Trace("regexp-ext-rewrite-debug") << pop; + if (rets.isNull()) { can_skip = true; } } if (!can_skip) { + TypeNode stype = nm->stringType(); + Node prev = utils::mkConcat(mchildren, stype); Trace("regexp-ext-rewrite-debug") - << "...can't skip" << std::endl; + << "- can't skip" << std::endl; // take the result of fully consuming once if (t == 1) { @@ -262,12 +292,15 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, mchildren.clear(); mchildren.insert( mchildren.end(), mchildren_s.begin(), mchildren_s.end()); - do_next = true; + Node curr = utils::mkConcat(mchildren, stype); + do_next = (prev != curr); + Trace("regexp-ext-rewrite-debug") + << "- do_next = " << do_next << std::endl; } else { Trace("regexp-ext-rewrite-debug") - << "...can skip " << rc << " from " << xc << std::endl; + << "- can skip " << rc << " from " << xc << std::endl; } } } @@ -276,7 +309,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, if (!do_next) { Trace("regexp-ext-rewrite") - << "Cannot consume : " << xc << " " << rc << std::endl; + << "- cannot consume : " << xc << " " << rc << std::endl; } } } @@ -286,6 +319,7 @@ Node RegExpEntail::simpleRegexpConsume(std::vector& mchildren, std::reverse(mchildren.begin(), mchildren.end()); } } + Trace("regexp-ext-rewrite-debug") << "...finished, return null" << std::endl; return Node::null(); } diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 9df03f32c..20b892d0f 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -1315,9 +1315,13 @@ Node SequencesRewriter::rewriteMembership(TNode node) } else { + Node prev = retNode; retNode = nm->mkNode( STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r); - success = true; + // Iterate again if the node changed. It may not have changed if + // nothing was consumed from mchildren (e.g. if the body of the + // re.* accepts the empty string. + success = (retNode != prev); } } } diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index 7863c5ec0..fc9cf37ac 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -966,6 +966,7 @@ set(regress_0_tests regress0/strings/issue3657-evalLeq.smt2 regress0/strings/issue4070.smt2 regress0/strings/issue4376.smt2 + regress0/strings/issue4662-consume-nterm.smt2 regress0/strings/itos-entail.smt2 regress0/strings/large-model.smt2 regress0/strings/leadingzero001.smt2 diff --git a/test/regress/regress0/strings/issue4662-consume-nterm.smt2 b/test/regress/regress0/strings/issue4662-consume-nterm.smt2 new file mode 100644 index 000000000..a87279b4c --- /dev/null +++ b/test/regress/regress0/strings/issue4662-consume-nterm.smt2 @@ -0,0 +1,6 @@ +(set-logic QF_S) +(set-info :status unsat) +(declare-fun a () String) +(define-fun b () RegLan (str.to_re "A")) +(assert (str.in_re (str.++ "B" a) (re.+ (re.++ (re.opt b) (re.opt b))))) +(check-sat) -- 2.30.2