From 9d93595783e350969ad428ab277614a3250e59a0 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Sat, 1 Jun 2019 09:04:17 +0200 Subject: [PATCH] Fix rewriter for regular expression consume (#3029) --- .../strings/theory_strings_rewriter.cpp | 24 +++++++++++--- src/theory/strings/theory_strings_rewriter.h | 31 +++++++++++++++++-- test/regress/CMakeLists.txt | 1 + .../regress1/strings/simple-re-consume.smt2 | 6 ++++ 4 files changed, 56 insertions(+), 6 deletions(-) create mode 100644 test/regress/regress1/strings/simple-re-consume.smt2 diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 47f29e814..daae57659 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -34,6 +34,11 @@ using namespace CVC4::theory; using namespace CVC4::theory::strings; Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, std::vector< Node >& children, int dir ){ + Trace("regexp-ext-rewrite-debug") + << "Simple reg exp consume, dir=" << dir << ":" << std::endl; + Trace("regexp-ext-rewrite-debug") + << " mchildren : " << mchildren << std::endl; + Trace("regexp-ext-rewrite-debug") << " children : " << children << std::endl; NodeManager* nm = NodeManager::currentNM(); unsigned tmin = dir<0 ? 0 : dir; unsigned tmax = dir<0 ? 1 : dir; @@ -52,14 +57,19 @@ Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, children.pop_back(); mchildren.pop_back(); do_next = true; + Trace("regexp-ext-rewrite-debug") << "...strip equal" << std::endl; }else if( xc.isConst() && rc[0].isConst() ){ //split the constant int index; Node s = splitConstant( xc, rc[0], index, t==0 ); Trace("regexp-ext-rewrite-debug") << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> " << s << " " << index << " " << t << std::endl; if( s.isNull() ){ + Trace("regexp-ext-rewrite-debug") + << "...return false" << std::endl; return NodeManager::currentNM()->mkConst( false ); }else{ + Trace("regexp-ext-rewrite-debug") + << "...strip equal const" << std::endl; children.pop_back(); mchildren.pop_back(); if( index==0 ){ @@ -75,6 +85,7 @@ Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, CVC4::String s = xc.getConst(); if (s.size() == 0) { + Trace("regexp-ext-rewrite-debug") << "...ignore empty" << std::endl; // ignore and continue mchildren.pop_back(); do_next = true; @@ -157,13 +168,15 @@ Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, } std::vector< Node > children_s; getConcat( rc[0], children_s ); + Trace("regexp-ext-rewrite-debug") + << "...recursive call on body of star" << std::endl; Node ret = simpleRegexpConsume( mchildren_s, children_s, t ); if( !ret.isNull() ){ Trace("regexp-ext-rewrite-debug") << "CRE : regexp star infeasable " << xc << " " << rc << std::endl; children.pop_back(); - if( children.empty() ){ - return NodeManager::currentNM()->mkConst( false ); - }else{ + if (!children.empty()) + { + Trace("regexp-ext-rewrite-debug") << "...continue" << std::endl; do_next = true; } }else{ @@ -185,6 +198,8 @@ Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, } } if( !can_skip ){ + Trace("regexp-ext-rewrite-debug") + << "...can't skip" << std::endl; //take the result of fully consuming once if( t==1 ){ std::reverse( mchildren_s.begin(), mchildren_s.end() ); @@ -193,7 +208,8 @@ Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, mchildren.insert( mchildren.end(), mchildren_s.begin(), mchildren_s.end() ); do_next = true; }else{ - Trace("regexp-ext-rewrite-debug") << "CRE : can skip " << rc << " from " << xc << std::endl; + Trace("regexp-ext-rewrite-debug") + << "...can skip " << rc << " from " << xc << std::endl; } } } diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index e8886d43b..c273ef40e 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -45,8 +45,10 @@ class TheoryStringsRewriter { * membership. The argument dir indicates the direction to consider, where * 0 means strip off the front, 1 off the back, and < 0 off of both. * - * If this method returns the false node, then we have inferred that the input - * membership is equivalent to false. Otherwise, it returns the null node. + * If this method returns the false node, then we have inferred that no + * string in the language of r1 ++ ... ++ rm is a prefix (when dir!=1) or + * suffix (when dir!=0) of s1 ++ ... ++ sn. Otherwise, it returns the null + * node. * * For example, given input * mchildren = { "ab", x }, children = { [["a"]], ([["cd"]])* } and dir = 0, @@ -59,6 +61,31 @@ class TheoryStringsRewriter { * this method updates: * { "b" }, { [[y]] } * where [[.]] denotes str.to.re, and returns null. + * + * Notice that the above requirement for returning false is stronger than + * determining that s1 ++ ... ++ sn in r1 ++ ... ++ rm is equivalent to false. + * For example, for input "bb" in "b" ++ ( "a" )*, we do not return false + * since "b" is in the language of "b" ++ ( "a" )* and is a prefix of "bb". + * We do not return false even though the above membership is equivalent + * to false. We do this because the function is used e.g. to test whether a + * possible unrolling leads to a conflict. This is demonstrated by the + * following examples: + * + * For example, given input + * { "bb", x }, { "b", ("a")* } and dir=-1, + * this method updates: + * { "b" }, { ("a")* } + * and returns null. + * + * For example, given input + * { "cb", x }, { "b", ("a")* } and dir=-1, + * this method leaves children and mchildren unchanged and returns false. + * + * Notice that based on this, we can determine that: + * "cb" ++ x in ( "b" ++ ("a")* )* + * is equivalent to false, whereas we cannot determine that: + * "bb" ++ x in ( "b" ++ ("a")* )* + * is equivalent to false. */ static Node simpleRegexpConsume( std::vector< Node >& mchildren, std::vector< Node >& children, int dir = -1 ); static bool isConstRegExp( TNode t ); diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index c39d96351..e1b182f3d 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -1581,6 +1581,7 @@ set(regress_1_tests regress1/strings/replaceall-len.smt2 regress1/strings/replaceall-replace.smt2 regress1/strings/rew-020618.smt2 + regress1/strings/simple-re-consume.smt2 regress1/strings/stoi-400million.smt2 regress1/strings/stoi-solve.smt2 regress1/strings/str-code-sat.smt2 diff --git a/test/regress/regress1/strings/simple-re-consume.smt2 b/test/regress/regress1/strings/simple-re-consume.smt2 new file mode 100644 index 000000000..af34ef1c9 --- /dev/null +++ b/test/regress/regress1/strings/simple-re-consume.smt2 @@ -0,0 +1,6 @@ +(set-info :smt-lib-version 2.5) +(set-logic QF_SLIA) +(set-info :status sat) +(declare-fun x () String) +(assert (str.in.re (str.++ "bbbbbbbb" x) (re.* (re.++ (str.to.re "bbbb") (re.* (str.to.re "aaaa")))))) +(check-sat) -- 2.30.2