From 7f33294261869ab8f0caa8660222576a4ff7bcdc Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Tue, 27 Feb 2018 16:52:57 -0600 Subject: [PATCH] Improve rewriter for string indexof (#1592) --- .../strings/theory_strings_rewriter.cpp | 239 ++++++++++-------- src/util/regexp.cpp | 5 + src/util/regexp.h | 5 + test/regress/regress0/strings/Makefile.am | 3 +- test/regress/regress0/strings/idof-sem.smt2 | 6 + 5 files changed, 155 insertions(+), 103 deletions(-) create mode 100644 test/regress/regress0/strings/idof-sem.smt2 diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 54a5b4dcb..1555b0aa0 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -1818,107 +1818,143 @@ Node TheoryStringsRewriter::rewriteContains( Node node ) { Node TheoryStringsRewriter::rewriteIndexof( Node node ) { Assert(node.getKind() == kind::STRING_STRIDOF); - std::vector< Node > children; - getConcat( node[0], children ); - //std::vector< Node > children1; - //getConcat( node[1], children1 ); TODO - std::size_t start = 0; - std::size_t val2 = 0; - if( node[2].isConst() ){ - CVC4::Rational RMAXINT(LONG_MAX); - if( node[2].getConst()>RMAXINT ){ - Assert(node[2].getConst() <= RMAXINT, "Number exceeds LONG_MAX in string index_of"); - return NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); - }else if( node[2].getConst().sgn()==-1 ){ - //constant negative - return NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); - }else{ - val2 = node[2].getConst().getNumerator().toUnsignedInt(); - start = val2; - } - } - bool prefixNoOverlap = false; - CVC4::String t; - if( node[1].isConst() ){ - t = node[1].getConst(); - } - //unsigned ch1_index = 0; - for( unsigned i=0; i(); - if( node[1].isConst() ){ - if( i==0 ){ - std::size_t ret = s.find( t, start ); - if( ret!=std::string::npos ) { - //exact if start value was constant - if( node[2].isConst() ){ - return NodeManager::currentNM()->mkConst( ::CVC4::Rational((unsigned) ret) ); - } - }else{ - //exact if we scanned the entire string - if( node[0].isConst() ){ - return NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); - }else{ - prefixNoOverlap = (s.overlap(t)==0); - Trace("strings-rewrite-debug") << "Prefix no overlap : " << s << " " << t << " " << prefixNoOverlap << std::endl; - } - } - }else if( !node[2].isConst() ){ - break; - }else{ - std::size_t ret = s.find(t, start); - //remove remaining children after finding the string - if( ret!=std::string::npos ){ - Assert( ret+t.size()<=s.size() ); - children[i] = NodeManager::currentNM()->mkConst( ::CVC4::String( s.substr(0,ret+t.size()) ) ); - do_splice = true; - }else{ - //if no overlap on last child, can remove - if( t.overlap( s )==0 && i==children.size()-1 ){ - std::vector< Node > spl; - spl.insert( spl.end(), children.begin(), children.begin()+i ); - return NodeManager::currentNM()->mkNode( kind::STRING_STRIDOF, mkConcat( kind::STRING_CONCAT, spl ), node[1], node[2] ); - } - } - } + NodeManager* nm = NodeManager::currentNM(); + + if (node[2].isConst() && node[2].getConst().sgn() < 0) + { + // z<0 implies str.indexof( x, y, z ) --> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-neg"); + } + + if (node[1].isConst()) + { + if (node[1].getConst().size() == 0) + { + // str.indexof( x, "", z ) --> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-empty"); + } + } + + // evaluation and simple cases + std::vector children0; + getConcat(node[0], children0); + if (children0[0].isConst() && node[1].isConst() && node[2].isConst()) + { + CVC4::Rational RMAXINT(CVC4::String::maxSize()); + if (node[2].getConst() > RMAXINT) + { + // We know that, due to limitations on the size of string constants + // in our implementation, that accessing a position greater than + // RMAXINT is guaranteed to be out of bounds. + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-max"); + } + Assert(node[2].getConst().sgn() >= 0); + unsigned start = + node[2].getConst().getNumerator().toUnsignedInt(); + CVC4::String s = children0[0].getConst(); + CVC4::String t = node[1].getConst(); + std::size_t ret = s.find(t, start); + if (ret != std::string::npos) + { + Node retv = nm->mkConst(Rational(static_cast(ret))); + return returnRewrite(node, retv, "idof-find"); + } + else if (children0.size() == 1) + { + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-nfind"); + } + } + + Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]); + Node len1 = nm->mkNode(kind::STRING_LENGTH, node[1]); + Node len0m2 = nm->mkNode(kind::MINUS, len0, node[2]); + if (checkEntailArith(len1, len0m2, true)) + { + // len(x)-z < len(y) implies indexof( x, y, z ) ----> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-len"); + } + + Node fstr = node[0]; + if (!node[2].isConst() || node[2].getConst().sgn() != 0) + { + fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0); + fstr = Rewriter::rewrite(fstr); + if (fstr.isConst()) + { + CVC4::String fs = fstr.getConst(); + if (fs.size() == 0) + { + // substr( x, z, len(x) ) --> "" implies str.indexof( x, y, z ) --> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-base-len"); } - //decrement the start index - if( start>0 ){ - if( s.size()>start ){ - start = 0; - }else{ - start = start - s.size(); + } + } + + Node cmp_con = nm->mkNode(kind::STRING_STRCTN, fstr, node[1]); + Node cmp_conr = Rewriter::rewrite(cmp_con); + if (cmp_conr.isConst()) + { + if (cmp_conr.getConst()) + { + if (node[2].isConst() && node[2].getConst().sgn() == 0) + { + // past the first position in node[0] that contains node[1], we can drop + std::vector children1; + getConcat(node[1], children1); + std::vector nb; + std::vector ne; + int cc = componentContains(children0, children1, nb, ne, true, 1); + if (cc != -1 && !ne.empty()) + { + // For example: + // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0) + Node nn = mkConcat(kind::STRING_CONCAT, children0); + Node ret = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]); + return returnRewrite(node, ret, "idof-def-ctn"); } } - }else if( !node[2].isConst() ){ - break; - }else{ - if( children[i]==node[1] && start==0 ){ - //can remove beyond this - do_splice = true; - } - } - if( do_splice ){ - std::vector< Node > spl; - //since we definitely will find the string, we can safely add the length of the constant non-overlapping prefix - if( prefixNoOverlap ){ - Node pl = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, children[0] ) ); - Assert( pl.isConst() ); - Assert( node[2].isConst() ); - int new_start = val2 - pl.getConst().getNumerator().toUnsignedInt(); - if( new_start<0 ){ - new_start = 0; - } - spl.insert( spl.end(), children.begin()+1, children.begin()+i+1 ); - return NodeManager::currentNM()->mkNode( kind::PLUS, pl, - NodeManager::currentNM()->mkNode( kind::STRING_STRIDOF, mkConcat( kind::STRING_CONCAT, spl ), node[1], NodeManager::currentNM()->mkConst( Rational(new_start) ) ) ); - }else{ - spl.insert( spl.end(), children.begin(), children.begin()+i+1 ); - return NodeManager::currentNM()->mkNode( kind::STRING_STRIDOF, mkConcat( kind::STRING_CONCAT, spl ), node[1], node[2] ); + + // these rewrites are only possible if we will not return -1 + Node l1 = nm->mkNode(kind::STRING_LENGTH, node[1]); + Node zero = NodeManager::currentNM()->mkConst(CVC4::Rational(0)); + bool is_non_empty = checkEntailArith(l1, zero, true); + + if (is_non_empty) + { + // strip symbolic length + Node new_len = node[2]; + std::vector nr; + if (stripSymbolicLength(children0, nr, 1, new_len)) + { + // For example: + // z>str.len( x1 ) and str.len( y )>0 and str.contains( x2, y )-->true + // implies + // str.indexof( str.++( x1, x2 ), y, z ) ---> + // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) ) + Node nn = mkConcat(kind::STRING_CONCAT, children0); + Node ret = nm->mkNode( + kind::PLUS, + nm->mkNode(kind::MINUS, node[2], new_len), + nm->mkNode(kind::STRING_STRIDOF, nn, node[1], new_len)); + return returnRewrite(node, ret, "idof-strip-sym-len"); + } } } + else + { + // str.contains( x, y ) --> false implies str.indexof(x,y,z) --> -1 + Node negone = nm->mkConst(Rational(-1)); + return returnRewrite(node, negone, "idof-nctn"); + } } + + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; return node; } @@ -2123,13 +2159,7 @@ Node TheoryStringsRewriter::rewritePrefixSuffix(Node n) if (n[1].isConst()) { CVC4::String s = n[1].getConst(); - if (s.isEmptyString()) - { - Assert(!n[0].isConst()); - Node ret = n[0].eqNode(n[1]); - return returnRewrite(n, ret, "suf/prefix-empty"); - } - else if (n[0].isConst()) + if (n[0].isConst()) { Node ret = NodeManager::currentNM()->mkConst(false); CVC4::String t = n[0].getConst(); @@ -2143,6 +2173,11 @@ Node TheoryStringsRewriter::rewritePrefixSuffix(Node n) } return returnRewrite(n, ret, "suf/prefix-const"); } + else if (s.isEmptyString()) + { + Node ret = n[0].eqNode(n[1]); + return returnRewrite(n, ret, "suf/prefix-empty"); + } else if (s.size() == 1) { // (str.prefix x "A") and (str.suffix x "A") are equivalent to diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp index d93c5426e..a7e5131ec 100644 --- a/src/util/regexp.cpp +++ b/src/util/regexp.cpp @@ -371,6 +371,11 @@ bool String::isDigit(unsigned character) return c >= '0' && c <= '9'; } +size_t String::maxSize() +{ + return std::numeric_limits::max(); +} + int String::toNumber() const { if (isNumber()) { int ret = 0; diff --git a/src/util/regexp.h b/src/util/regexp.h index d51ef4372..efbb4579d 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -163,6 +163,11 @@ class CVC4_PUBLIC String { */ static bool isDigit(unsigned character); + /** + * Returns the maximum length of string representable by this class. + * Corresponds to the maximum size of d_str. + */ + static size_t maxSize(); private: // guarded static unsigned char hexToDec(unsigned char c); diff --git a/test/regress/regress0/strings/Makefile.am b/test/regress/regress0/strings/Makefile.am index 23b13aea2..df4e8b84b 100644 --- a/test/regress/regress0/strings/Makefile.am +++ b/test/regress/regress0/strings/Makefile.am @@ -43,7 +43,8 @@ TESTS = \ issue1189.smt2 \ rewrites-v2.smt2 \ substr-rewrites.smt2 \ - repl-rewrites2.smt2 + repl-rewrites2.smt2 \ + idof-sem.smt2 FAILING_TESTS = diff --git a/test/regress/regress0/strings/idof-sem.smt2 b/test/regress/regress0/strings/idof-sem.smt2 new file mode 100644 index 000000000..90dcc83a0 --- /dev/null +++ b/test/regress/regress0/strings/idof-sem.smt2 @@ -0,0 +1,6 @@ +(set-logic SLIA) +(set-option :strings-exp true) +(set-info :status unsat) +(declare-fun x () String) +(assert (not (= (str.indexof x "" 0) (- 1)))) +(check-sat) \ No newline at end of file -- 2.30.2