From b0feac10d73770819839624dd943eedb14bd4c86 Mon Sep 17 00:00:00 2001 From: ajreynol Date: Fri, 2 Oct 2015 15:10:10 +0200 Subject: [PATCH] Improvements to rewriter for regexp, contains, indexof. Improvements and fixes for reduction of indexof. Fixes bugs 612 and 615. Fix bug in find+offset in strings util. Add regressions. --- src/theory/strings/theory_strings.cpp | 57 ++- .../strings/theory_strings_preprocess.cpp | 29 +- .../strings/theory_strings_rewriter.cpp | 380 ++++++++++++------ src/theory/strings/theory_strings_rewriter.h | 16 +- src/util/regexp.h | 4 +- test/regress/regress0/strings/Makefile.am | 9 +- test/regress/regress0/strings/bug612.smt2 | 10 + test/regress/regress0/strings/bug615.smt2 | 26 ++ test/regress/regress0/strings/idof-handg.smt2 | 7 + .../regress0/strings/idof-nconst-index.smt2 | 9 + .../regress0/strings/idof-neg-index.smt2 | 8 + .../regress0/strings/norn-simp-rew-sat.smt2 | 25 ++ .../regress0/strings/norn-simp-rew.smt2 | 29 ++ 13 files changed, 441 insertions(+), 168 deletions(-) create mode 100644 test/regress/regress0/strings/bug612.smt2 create mode 100644 test/regress/regress0/strings/bug615.smt2 create mode 100644 test/regress/regress0/strings/idof-handg.smt2 create mode 100644 test/regress/regress0/strings/idof-nconst-index.smt2 create mode 100644 test/regress/regress0/strings/idof-neg-index.smt2 create mode 100644 test/regress/regress0/strings/norn-simp-rew-sat.smt2 create mode 100755 test/regress/regress0/strings/norn-simp-rew.smt2 diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index cebcc4da5..5cf7d8ee6 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -1968,6 +1968,12 @@ bool TheoryStrings::registerTerm( Node n ) { ++(d_statistics.d_splits); } } else { + if( n.getKind()==kind::STRING_CONCAT ){ + //normalize wrt proxy variables + + } + + Node sk = mkSkolemS("lsym", 2); StringsProxyVarAttribute spva; sk.setAttribute(spva,true); @@ -2269,27 +2275,38 @@ void TheoryStrings::getConcatVec( Node n, std::vector< Node >& c ) { void TheoryStrings::debugPrintFlatForms( const char * tc ){ for( unsigned k=0; k1 ){ + Trace( tc ) << "EQC [" << eqc << "]" << std::endl; + }else{ + Trace( tc ) << "eqc [" << eqc << "]"; + } std::map< Node, Node >::iterator itc = d_eqc_to_const.find( eqc ); if( itc!=d_eqc_to_const.end() ){ - Trace( tc ) << " C: " << itc->second << std::endl; + Trace( tc ) << " C: " << itc->second; + if( d_eqc[eqc].size()>1 ){ + Trace( tc ) << std::endl; + } } - for( unsigned i=0; isecond; - }else{ - Trace( tc ) << fc; + if( d_eqc[eqc].size()>1 ){ + for( unsigned i=0; isecond; + }else{ + Trace( tc ) << fc; + } } + if( n!=eqc ){ + Trace( tc ) << ", from " << n; + } + Trace( tc ) << std::endl; } - if( n!=eqc ){ - Trace( tc ) << ", from " << n; - } + }else{ Trace( tc ) << std::endl; } } @@ -2377,14 +2394,14 @@ void TheoryStrings::checkNormalForms() { for( unsigned j=0; jsecond.size(); i++ ){ std::reverse( d_flat_form[it->second].begin(), d_flat_form[it->second].end() ); @@ -2439,7 +2456,7 @@ void TheoryStrings::checkNormalForms() { } Debug("strings-nf") << std::endl; } - + if( !hasProcessed() ){ checkExtendedFuncsEval( 1 ); Trace("strings-process-debug") << "Done check extended functions re-eval, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; @@ -3789,7 +3806,7 @@ void TheoryStrings::checkExtendedFuncsEval( int effort ) { } } }else{ - Trace("strings-extf-debug") << " could not rewrite : " << nr << std::endl; + Trace("strings-extf-debug") << " cannot rewrite extf : " << nrc << std::endl; } } } diff --git a/src/theory/strings/theory_strings_preprocess.cpp b/src/theory/strings/theory_strings_preprocess.cpp index 787979faa..bcea61937 100644 --- a/src/theory/strings/theory_strings_preprocess.cpp +++ b/src/theory/strings/theory_strings_preprocess.cpp @@ -218,37 +218,30 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes, bool d new_nodes.push_back( lemma ); d_cache[t] = t; } else if( t.getKind() == kind::STRING_STRIDOF ) { - Node sk1 = NodeManager::currentNM()->mkSkolem( "io1", NodeManager::currentNM()->stringType(), "created for indexof" ); Node sk2 = NodeManager::currentNM()->mkSkolem( "io2", NodeManager::currentNM()->stringType(), "created for indexof" ); Node sk3 = NodeManager::currentNM()->mkSkolem( "io3", NodeManager::currentNM()->stringType(), "created for indexof" ); Node sk4 = NodeManager::currentNM()->mkSkolem( "io4", NodeManager::currentNM()->stringType(), "created for indexof" ); Node skk = NodeManager::currentNM()->mkSkolem( "iok", NodeManager::currentNM()->integerType(), "created for indexof" ); - Node eq = t[0].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, sk2, sk3, sk4 ) ); + Node st = NodeManager::currentNM()->mkNode( kind::STRING_SUBSTR_TOTAL, t[0], t[2], NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[0] ) ); + Node eq = st.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk2, sk3, sk4 ) ); new_nodes.push_back( eq ); Node negone = NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); Node krange = NodeManager::currentNM()->mkNode( kind::GEQ, skk, negone ); new_nodes.push_back( krange ); - krange = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::GT, - NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[0] ), skk) ); + krange = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::GT, NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[0] ), skk) ); new_nodes.push_back( krange ); - krange = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::GT, - NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[1] ), d_zero) ); - new_nodes.push_back( krange ); - krange = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::GEQ, - t[2], d_zero) ); + krange = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::GT, NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[1] ), d_zero) ); new_nodes.push_back( krange ); + Node start_valid = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::GEQ, t[2], d_zero) ); //str.len(s1) < y + str.len(s2) Node c1 = Rewriter::rewrite(NodeManager::currentNM()->mkNode( kind::GT, NodeManager::currentNM()->mkNode( kind::PLUS, t[2], NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[1] )), NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t[0] ))); - //str.len(t1) = y - Node c2 = t[2].eqNode( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk1 ) ); //~contain(t234, s2) - Node c3 = Rewriter::rewrite(NodeManager::currentNM()->mkNode( kind::STRING_STRCTN, - NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk2, sk3, sk4), t[1] ).negate()); + Node c3 = Rewriter::rewrite(NodeManager::currentNM()->mkNode( kind::STRING_STRCTN, st, t[1] ).negate()); //left - Node left = NodeManager::currentNM()->mkNode( kind::OR, c1, NodeManager::currentNM()->mkNode( kind::AND, c2, c3 ) ); + Node left = NodeManager::currentNM()->mkNode( kind::OR, c1, c3, start_valid.negate() ); //t3 = s2 Node c4 = t[1].eqNode( sk3 ); //~contain(t2, s2) @@ -259,11 +252,11 @@ Node StringsPreprocess::simplify( Node t, std::vector< Node > &new_nodes, bool d NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, t[1]), NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) )))), t[1] ).negate(); - //k=str.len(s1, s2) - Node c6 = skk.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, - NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk1, sk2 ))); + //k=str.len(s2) + Node c6 = skk.eqNode( NodeManager::currentNM()->mkNode( kind::PLUS, t[2], + NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, sk2 )) ); //right - Node right = Rewriter::rewrite(NodeManager::currentNM()->mkNode( kind::AND, c2, c4, c5, c6 )); + Node right = Rewriter::rewrite(NodeManager::currentNM()->mkNode( kind::AND, c4, c5, c6, start_valid )); Node cond = skk.eqNode( negone ); Node rr = NodeManager::currentNM()->mkNode( kind::ITE, cond, left, right ); new_nodes.push_back( rr ); diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 28fff47d4..e326b9b93 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -2,8 +2,8 @@ /*! \file theory_strings_rewriter.cpp ** \verbatim ** Original author: Tianyi Liang - ** Major contributors: none - ** Minor contributors (to current version): Andrew Reynolds + ** Major contributors: Andrew Reynolds + ** Minor contributors (to current version): none ** This file is part of the CVC4 project. ** Copyright (c) 2009-2014 New York University and The University of Iowa ** See the file COPYING in the top-level source directory for licensing @@ -24,6 +24,88 @@ using namespace CVC4::kind; using namespace CVC4::theory; using namespace CVC4::theory::strings; +Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, std::vector< Node >& children, int dir ){ + unsigned tmin = dir<0 ? 0 : dir; + unsigned tmax = dir<0 ? 1 : dir; + //try to remove off front and back + for( unsigned t=0; t<2; t++ ){ + if( tmin<=t && t<=tmax ){ + int count = children.size()-1; + int mcount = mchildren.size()-1; + bool do_next = true; + while( count>=0 && mcount>=0 && do_next ){ + do_next = false; + Node rc = children[count]; + Node xc = mchildren[mcount]; + if( rc.getKind() == kind::STRING_TO_REGEXP ){ + if( xc==rc[0] ){ + children.pop_back(); + mchildren.pop_back(); + count--; + mcount--; + do_next = true; + }else if( xc.isConst() && rc[0].isConst() ){ + //split the constant + int index; + Node s = splitConstant( xc, rc[0], index, t==0 ); + //Trace("spl-const") << "Regexp const split : " << xc << " " << rc[0] << " -> " << s << " " << index << " " << t << std::endl; + if( s.isNull() ){ + return NodeManager::currentNM()->mkConst( false ); + }else{ + children.pop_back(); + mchildren.pop_back(); + count--; + mcount--; + if( index==0 ){ + mchildren.push_back( s ); + mcount++; + }else{ + children.push_back( s ); + count++; + } + } + do_next = true; + } + }else if( xc.isConst() ){ + //check for constants + CVC4::String s = xc.getConst(); + Assert( s.size()>0 ); + if( rc.getKind() == kind::REGEXP_RANGE || rc.getKind()==kind::REGEXP_SIGMA ){ + CVC4::String ss( t==0 ? s.getLastChar() : s.getFirstChar() ); + if( testConstStringInRegExp( ss, 0, rc ) ){ + //strip off one character + mchildren.pop_back(); + if( s.size()>1 ){ + if( t==0 ){ + mchildren.push_back( NodeManager::currentNM()->mkConst(s.substr( 0, s.size()-1 )) ); + }else{ + mchildren.push_back( NodeManager::currentNM()->mkConst(s.substr( 1 )) ); + } + }else{ + mcount--; + } + children.pop_back(); + count--; + do_next = true; + }else{ + return NodeManager::currentNM()->mkConst( false ); + } + }else if( rc.getKind()==kind::REGEXP_INTER || rc.getKind()==kind::REGEXP_UNION ){ + //TODO + }else if( rc.getKind()==kind::REGEXP_STAR ){ + //TODO + } + } + } + } + if( dir!=0 ){ + std::reverse( children.begin(), children.end() ); + std::reverse( mchildren.begin(), mchildren.end() ); + } + } + return Node::null(); +} + Node TheoryStringsRewriter::rewriteConcatString( TNode node ) { Trace("strings-prerewrite") << "Strings::rewriteConcatString start " << node << std::endl; Node retNode = node; @@ -83,70 +165,6 @@ Node TheoryStringsRewriter::rewriteConcatString( TNode node ) { return retNode; } -Node TheoryStringsRewriter::concatTwoNodes(TNode n1, TNode n2) { - Assert(n1.getKind() != kind::REGEXP_CONCAT); - TNode tmp = n2.getKind() == kind::REGEXP_CONCAT ? n2[0] : n2; - if(n1.getKind() == kind::STRING_TO_REGEXP && tmp.getKind() == kind::STRING_TO_REGEXP) { - tmp = NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, n1[0], tmp[0] ); - tmp = rewriteConcatString( tmp ); - tmp = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, tmp ); - } else { - tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, n1, tmp ); - } - Node retNode = tmp; - if(n2.getKind() == kind::REGEXP_CONCAT) { - std::vector vec; - if(tmp.getKind() != kind::REGEXP_CONCAT) { - vec.push_back(retNode); - } else { - vec.push_back(retNode[0]); - vec.push_back(retNode[1]); - } - for(unsigned j=1; jmkNode( kind::REGEXP_CONCAT, vec ); - } - Trace("regexp-ax-debug") << "Regexp::AX::concatTwoNodes: (" << n1 << ", " << n2 << ") -> " << retNode << std::endl; - return retNode; -} - -void TheoryStringsRewriter::unionAndConcat(std::vector &vec_nodes, Node node) { - if(vec_nodes.empty()) { - vec_nodes.push_back(node); - } else { - Node emptysingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( CVC4::String("") ) ); - if(node != emptysingleton) { - for(unsigned i=0; i vec2; - for(unsigned j=0; j 1); - vec_nodes[i] = NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, vec2); - } else if(vec_nodes[i].getKind() == kind::REGEXP_EMPTY) { - //do nothing - } else if(vec_nodes[i] == emptysingleton) { - vec_nodes[i] = node; - } else if(vec_nodes[i].getKind() == kind::STRING_TO_REGEXP) { - vec_nodes[i] = concatTwoNodes(vec_nodes[i], node); - } else { - vec_nodes[i] = NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, vec_nodes[i], node); - } - } - } - } -} void TheoryStringsRewriter::mergeInto(std::vector &t, const std::vector &s) { for(std::vector::const_iterator itr=s.begin(); itr!=s.end(); itr++) { @@ -430,9 +448,6 @@ Node TheoryStringsRewriter::applyAX( TNode node ) { } /* case kind::REGEXP_UNION: { break; - } - case kind::REGEXP_UNION: { - break; }*/ case kind::REGEXP_SIGMA: { break; @@ -622,18 +637,16 @@ Node TheoryStringsRewriter::prerewriteAndRegExp(TNode node) { return retNode; } -bool TheoryStringsRewriter::checkConstRegExp( TNode t ) { - if( t.getKind() != kind::STRING_TO_REGEXP ) { +bool TheoryStringsRewriter::isConstRegExp( TNode t ) { + if( t.getKind()==kind::STRING_TO_REGEXP ) { + return t[0].isConst(); + }else{ for( unsigned i = 0; i().getNumerator().toUnsignedInt(); @@ -806,34 +819,89 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) { if(r.getKind() == kind::REGEXP_EMPTY) { retNode = NodeManager::currentNM()->mkConst( false ); - } else if(x.getKind()==kind::CONST_STRING && checkConstRegExp(r)) { + } else if(x.getKind()==kind::CONST_STRING && isConstRegExp(r)) { //test whether x in node[1] CVC4::String s = x.getConst(); retNode = NodeManager::currentNM()->mkConst( testConstStringInRegExp( s, 0, r ) ); } else if(r.getKind() == kind::REGEXP_SIGMA) { Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) ); retNode = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x)); - } else if(r.getKind() == kind::REGEXP_STAR && r[0].getKind() == kind::REGEXP_SIGMA) { - retNode = NodeManager::currentNM()->mkConst( true ); - } else if(r.getKind() == kind::REGEXP_CONCAT) { - //opt + } else if( r.getKind() == kind::REGEXP_STAR ) { + if( r[0].getKind() == kind::REGEXP_SIGMA ){ + retNode = NodeManager::currentNM()->mkConst( true ); + } + }else if( r.getKind() == kind::REGEXP_CONCAT ){ bool flag = true; for(unsigned i=0; imkConst( ::CVC4::Rational( r.getNumChildren() ) ); retNode = num.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x)); } - // - } else if(r.getKind() == kind::STRING_TO_REGEXP) { + }else if(r.getKind() == kind::STRING_TO_REGEXP) { retNode = x.eqNode(r[0]); - } else if(x != node[0] || r != node[1]) { + }else if(x != node[0] || r != node[1]) { retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, r ); } + + //do simple consumes + if( retNode==node ){ + if( r.getKind()==kind::REGEXP_STAR ){ + for( unsigned dir=0; dir<=1; dir++ ){ + std::vector< Node > mchildren; + getConcat( x, mchildren ); + bool success = true; + while( success ){ + success = false; + std::vector< Node > children; + getConcat( r[0], children ); + Node scn = simpleRegexpConsume( mchildren, children, dir ); + if( !scn.isNull() ){ + Trace("regexp-ext-rewrite") << "Regexp star : const conflict : " << node << std::endl; + return scn; + }else if( children.empty() ){ + //fully consumed one copy of the STAR + if( mchildren.empty() ){ + Trace("regexp-ext-rewrite") << "Regexp star : full consume : " << node << std::endl; + return NodeManager::currentNM()->mkConst( true ); + }else{ + retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, mkConcat( kind::STRING_CONCAT, mchildren ), r ); + success = true; + } + } + } + if( retNode!=node ){ + Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node << " -> " << retNode << std::endl; + break; + } + } + }else{ + std::vector< Node > children; + getConcat( r, children ); + std::vector< Node > mchildren; + getConcat( x, mchildren ); + unsigned prevSize = children.size() + mchildren.size(); + Node scn = simpleRegexpConsume( mchildren, children ); + if( !scn.isNull() ){ + Trace("regexp-ext-rewrite") << "Regexp : const conflict : " << node << std::endl; + return scn; + }else{ + if( (children.size() + mchildren.size())!=prevSize ){ + if( children.empty() ){ + retNode = NodeManager::currentNM()->mkConst( mchildren.empty() ); + }else{ + retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, mkConcat( kind::STRING_CONCAT, mchildren ), mkConcat( kind::REGEXP_CONCAT, children ) ); + } + Trace("regexp-ext-rewrite") << "Regexp : rewrite : " << node << " -> " << retNode << std::endl; + } + } + } + } return retNode; } @@ -921,7 +989,7 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } else if(node[1] == zero && node[2].getKind() == kind::STRING_LENGTH && node[2][0] == node[0]) { retNode = node[0]; } - } else if(node.getKind() == kind::STRING_STRCTN) { + } else if( node.getKind() == kind::STRING_STRCTN ){ if( node[0] == node[1] ) { retNode = NodeManager::currentNM()->mkConst( true ); } else if( node[0].isConst() && node[1].isConst() ) { @@ -932,27 +1000,24 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } else { retNode = NodeManager::currentNM()->mkConst( false ); } - } else if( node[0].getKind() == kind::STRING_CONCAT ) { - if( node[1].getKind() != kind::STRING_CONCAT ){ - bool flag = false; + } else if( node[0].getKind()==kind::STRING_CONCAT ) { + if( node[1].getKind()!=kind::STRING_CONCAT ){ for(unsigned i=0; imkConst( true ); break; //constant contains }else if( node[0][i].isConst() && node[1].isConst() ){ CVC4::String s = node[0][i].getConst(); CVC4::String t = node[1].getConst(); if( s.find(t) != std::string::npos ) { - flag = true; + retNode = NodeManager::currentNM()->mkConst( true ); break; } } } - if(flag) { - retNode = NodeManager::currentNM()->mkConst( true ); - } - } else { + }else{ + //component-wise containment bool flag = false; unsigned n1 = node[0].getNumChildren(); unsigned n2 = node[1].getNumChildren(); @@ -977,23 +1042,66 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } } } + }else if( node[0].isConst() ){ + if( node[1].getKind()==kind::STRING_CONCAT ){ + //must find constant components in order + size_t pos = 0; + CVC4::String t = node[0].getConst(); + for(unsigned i=0; i(); + size_t new_pos = t.find(s,pos); + if( new_pos==std::string::npos ) { + retNode = NodeManager::currentNM()->mkConst( false ); + break; + }else{ + pos = new_pos + s.size(); + } + } + } + } } - } else if(node.getKind() == kind::STRING_STRIDOF) { - if( node[0].isConst() && node[1].isConst() && node[2].isConst() ) { - CVC4::String s = node[0].getConst(); + }else if( node.getKind()==kind::STRING_STRIDOF ){ + std::vector< Node > children; + getConcat( node[0], children ); + if( children[0].isConst() && node[1].isConst() ) { + CVC4::String s = children[0].getConst(); CVC4::String t = node[1].getConst(); - CVC4::Rational RMAXINT(LONG_MAX); - Assert(node[2].getConst() <= RMAXINT, "Number exceeds LONG_MAX in string index_of"); - std::size_t i = node[2].getConst().getNumerator().toUnsignedInt(); - std::size_t ret = s.find(t, i); - if( ret != std::string::npos ) { - retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational((unsigned) ret) ); - } else { + std::size_t i = 0; + bool do_find = true; + if( node[2].isConst() ){ + CVC4::Rational RMAXINT(LONG_MAX); + if( node[2].getConst()>RMAXINT ){ + Assert(node[2].getConst() <= RMAXINT, "Number exceeds LONG_MAX in string index_of"); + retNode = NodeManager::currentNM()->mkConst( false ); + do_find = false; + }else{ + i = node[2].getConst().getNumerator().toUnsignedInt(); + } + } + if( do_find ){ + std::size_t ret = s.find(t, i); + if( ret!=std::string::npos ) { + //only exact if start value was constant + if( node[2].isConst() ){ + retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational((unsigned) ret) ); + } + }else{ + //only exact if we scanned the entire string + if( node[0].isConst() ){ + retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); + } + } + } + } + //constant negative + if( node[2].isConst() ){ + if( node[2].getConst().sgn()==-1 ){ retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); } } - } else if(node.getKind() == kind::STRING_STRREPL) { - if(node[1] != node[2]) { + }else if( node.getKind() == kind::STRING_STRREPL ){ + if( node[1]!=node[2] ){ if(node[0].isConst() && node[1].isConst()) { CVC4::String s = node[0].getConst(); CVC4::String t = node[1].getConst(); @@ -1017,8 +1125,8 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } else { retNode = node[0]; } - } else if(node.getKind() == kind::STRING_PREFIX) { - if(node[0].isConst() && node[1].isConst()) { + }else if( node.getKind() == kind::STRING_PREFIX ){ + if( node[0].isConst() && node[1].isConst() ){ CVC4::String s = node[1].getConst(); CVC4::String t = node[0].getConst(); retNode = NodeManager::currentNM()->mkConst( false ); @@ -1035,7 +1143,7 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { node[0].eqNode(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, node[1], NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) ), lens))); } - } else if(node.getKind() == kind::STRING_SUFFIX) { + }else if( node.getKind() == kind::STRING_SUFFIX ){ if(node[0].isConst() && node[1].isConst()) { CVC4::String s = node[1].getConst(); CVC4::String t = node[0].getConst(); @@ -1053,7 +1161,7 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { node[0].eqNode(NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR_TOTAL, node[1], NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens), lens))); } - } else if(node.getKind() == kind::STRING_ITOS || node.getKind() == kind::STRING_U16TOS || node.getKind() == kind::STRING_U32TOS) { + }else if(node.getKind() == kind::STRING_ITOS || node.getKind() == kind::STRING_U16TOS || node.getKind() == kind::STRING_U32TOS) { if(node[0].isConst()) { bool flag = false; std::string stmp = node[0].getConst().getNumerator().toString(); @@ -1077,7 +1185,7 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { retNode = NodeManager::currentNM()->mkConst( ::CVC4::String(stmp) ); } } - } else if(node.getKind() == kind::STRING_STOI || node.getKind() == kind::STRING_STOU16 || node.getKind() == kind::STRING_STOU32) { + }else if(node.getKind() == kind::STRING_STOI || node.getKind() == kind::STRING_STOU16 || node.getKind() == kind::STRING_STOU32) { if(node[0].isConst()) { CVC4::String s = node[0].getConst(); if(s.isNumber()) { @@ -1146,8 +1254,7 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { if(node.getKind() == kind::STRING_CONCAT) { retNode = rewriteConcatString(node); - } - else if(node.getKind() == kind::REGEXP_CONCAT) { + }else if(node.getKind() == kind::REGEXP_CONCAT) { retNode = prerewriteConcatRegExp(node); } else if(node.getKind() == kind::REGEXP_UNION) { retNode = prerewriteOrRegExp(node); @@ -1160,8 +1267,7 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { } else if(node[0].getKind() == kind::STRING_TO_REGEXP && node[0][0].getKind() == kind::CONST_STRING && node[0][0].getConst().isEmptyString()) { retNode = node[0]; } else if(node[0].getKind() == kind::REGEXP_EMPTY) { - retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, - NodeManager::currentNM()->mkConst( ::CVC4::String("") ) ); + retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String("") ) ); } else if(node[0].getKind() == kind::REGEXP_UNION) { Node tmpNode = prerewriteOrRegExp(node[0]); if(tmpNode.getKind() == kind::REGEXP_UNION) { @@ -1308,3 +1414,37 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { } return RewriteResponse(orig==retNode ? REWRITE_DONE : REWRITE_AGAIN_FULL, retNode); } + +void TheoryStringsRewriter::getConcat( Node n, std::vector< Node >& c ) { + if( n.getKind()==kind::STRING_CONCAT || n.getKind()==kind::REGEXP_CONCAT ){ + for( unsigned i=0; i& c ){ + Assert( !c.empty() || k==kind::STRING_CONCAT ); + return c.size()>1 ? NodeManager::currentNM()->mkNode( k, c ) : ( c.size()==1 ? c[0] : NodeManager::currentNM()->mkConst( ::CVC4::String("") ) ); +} + +Node TheoryStringsRewriter::splitConstant( Node a, Node b, int& index, bool isRev ) { + Assert( a.isConst() && b.isConst() ); + index = a.getConst().size() <= b.getConst().size() ? 1 : 0; + unsigned len_short = index==1 ? a.getConst().size() : b.getConst().size(); + bool cmp = isRev ? a.getConst().rstrncmp(b.getConst(), len_short): a.getConst().strncmp(b.getConst(), len_short); + if( cmp ) { + Node l = index==0 ? a : b; + if( isRev ){ + int new_len = l.getConst().size() - len_short; + return NodeManager::currentNM()->mkConst(l.getConst().substr( 0, new_len )); + }else{ + return NodeManager::currentNM()->mkConst(l.getConst().substr( len_short )); + } + }else{ + //not the same prefix/suffix + return Node::null(); + } +} diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index 2f075febf..03ee6a0cf 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -30,14 +30,13 @@ namespace theory { namespace strings { class TheoryStringsRewriter { -public: - static bool checkConstRegExp( TNode t ); +private: + static Node simpleRegexpConsume( std::vector< Node >& mchildren, std::vector< Node >& children, int dir = -1 ); + static bool isConstRegExp( TNode t ); static bool testConstStringInRegExp( CVC4::String &s, unsigned int index_start, TNode r ); static Node rewriteConcatString(TNode node); - - static Node concatTwoNodes(TNode n1, TNode n2); - static void unionAndConcat(std::vector &vec_nodes, Node node); + static void mergeInto(std::vector &t, const std::vector &s); static void shrinkConVec(std::vector &vec); static Node applyAX( TNode node ); @@ -47,14 +46,17 @@ public: static Node prerewriteAndRegExp(TNode node); static Node rewriteMembership(TNode node); - static RewriteResponse postRewrite(TNode node); - static bool hasEpsilonNode(TNode node); +public: + static RewriteResponse postRewrite(TNode node); static RewriteResponse preRewrite(TNode node); static inline void init() {} static inline void shutdown() {} + static void getConcat( Node n, std::vector< Node >& c ); + static Node mkConcat( Kind k, std::vector< Node >& c ); + static Node splitConstant( Node a, Node b, int& index, bool isRev ); };/* class TheoryStringsRewriter */ }/* CVC4::theory::strings namespace */ diff --git a/src/util/regexp.h b/src/util/regexp.h index c5a094f8c..453fa7ba9 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -3,7 +3,7 @@ ** \verbatim ** Original author: Tianyi Liang ** Major contributors: none - ** Minor contributors (to current version): Morgan Deters + ** Minor contributors (to current version): Morgan Deters, Andrew Reynolds ** This file is part of the CVC4 project. ** Copyright (c) 2009-2014 New York University and The University of Iowa ** See the file COPYING in the top-level source directory for licensing @@ -252,7 +252,7 @@ public: } } }*/ - std::vector::const_iterator itr = std::search(d_str.begin(), d_str.end(), y.d_str.begin(), y.d_str.end()); + std::vector::const_iterator itr = std::search(d_str.begin() + start, d_str.end(), y.d_str.begin(), y.d_str.end()); if(itr != d_str.end()) { ret = itr - d_str.begin(); } diff --git a/test/regress/regress0/strings/Makefile.am b/test/regress/regress0/strings/Makefile.am index 09dbdf08f..894e2546a 100644 --- a/test/regress/regress0/strings/Makefile.am +++ b/test/regress/regress0/strings/Makefile.am @@ -59,7 +59,14 @@ TESTS = \ idof-triv.smt2 \ chapman150408.smt2 \ pierre150331.smt2 \ - norn-360.smt2 + norn-360.smt2 \ + norn-simp-rew.smt2 \ + norn-simp-rew-sat.smt2 \ + idof-handg.smt2 \ + idof-nconst-index.smt2 \ + idof-neg-index.smt2 \ + bug612.smt2 \ + bug615.smt2 FAILING_TESTS = diff --git a/test/regress/regress0/strings/bug612.smt2 b/test/regress/regress0/strings/bug612.smt2 new file mode 100644 index 000000000..b627e2064 --- /dev/null +++ b/test/regress/regress0/strings/bug612.smt2 @@ -0,0 +1,10 @@ +(set-logic QF_S) +(set-option :strings-exp true) +(set-info :status unsat) + +(declare-fun s () String) + +(assert (not (str.contains s "x"))) +(assert (str.contains s "xy")) + +(check-sat) diff --git a/test/regress/regress0/strings/bug615.smt2 b/test/regress/regress0/strings/bug615.smt2 new file mode 100644 index 000000000..86cc592fb --- /dev/null +++ b/test/regress/regress0/strings/bug615.smt2 @@ -0,0 +1,26 @@ +(set-logic QF_S) +(set-option :strings-exp true) +(set-info :status sat) + +(declare-fun s () String) +;(assert (= s ""))) +(assert (str.contains joined "")) + +; ()+ +(assert (str.in.re joined + (re.+ (re.++ + (str.to.re "") + )) + )) + +(check-sat) diff --git a/test/regress/regress0/strings/idof-handg.smt2 b/test/regress/regress0/strings/idof-handg.smt2 new file mode 100644 index 000000000..40aff3168 --- /dev/null +++ b/test/regress/regress0/strings/idof-handg.smt2 @@ -0,0 +1,7 @@ +(set-logic ALL_SUPPORTED) +(set-option :strings-exp true) +(set-info :status sat) +(declare-fun s () String) +(assert (str.contains s "Hello and goodbye!")) +(assert (> (str.indexof s "goodbye" 0) (str.indexof s "Hello" 0))) +(check-sat) \ No newline at end of file diff --git a/test/regress/regress0/strings/idof-nconst-index.smt2 b/test/regress/regress0/strings/idof-nconst-index.smt2 new file mode 100644 index 000000000..eba492220 --- /dev/null +++ b/test/regress/regress0/strings/idof-nconst-index.smt2 @@ -0,0 +1,9 @@ +(set-logic ALL_SUPPORTED) +(set-option :strings-exp true) +(set-info :status sat) +(declare-fun s () String) +(assert (str.contains s "Hello and goodbye!")) +(declare-fun x () Int) +(assert (<= (str.len s) x)) +(assert (not (= (str.indexof s "goodbye" (- x 30)) (- 1)))) +(check-sat) diff --git a/test/regress/regress0/strings/idof-neg-index.smt2 b/test/regress/regress0/strings/idof-neg-index.smt2 new file mode 100644 index 000000000..c24fcc00a --- /dev/null +++ b/test/regress/regress0/strings/idof-neg-index.smt2 @@ -0,0 +1,8 @@ +(set-logic ALL_SUPPORTED) +(set-option :strings-exp true) +(set-info :status unsat) +(declare-fun s () String) +(declare-fun x () Int) +(assert (< x 0)) +(assert (>= (str.indexof s "goodbye" x) 0)) +(check-sat) diff --git a/test/regress/regress0/strings/norn-simp-rew-sat.smt2 b/test/regress/regress0/strings/norn-simp-rew-sat.smt2 new file mode 100644 index 000000000..01a102bf9 --- /dev/null +++ b/test/regress/regress0/strings/norn-simp-rew-sat.smt2 @@ -0,0 +1,25 @@ +(set-logic QF_SLIA) +(set-option :strings-exp true) +(set-info :status sat) + +(declare-fun var_0 () String) +(declare-fun var_1 () String) +(declare-fun var_2 () String) +(declare-fun var_3 () String) +(declare-fun var_4 () String) +(declare-fun var_5 () String) +(declare-fun var_6 () String) +(declare-fun var_7 () String) +(declare-fun var_8 () String) +(declare-fun var_9 () String) +(declare-fun var_10 () String) +(declare-fun var_11 () String) +(declare-fun var_12 () String) + +(assert (str.in.re (str.++ var_3 "z" var_4 ) (re.++ (re.* (re.union (re.union (str.to.re "z") (str.to.re "a")) (re.++ (str.to.re "b") (re.++ (re.* (str.to.re "b")) (re.union (str.to.re "z") (str.to.re "a")))))) (re.++ (str.to.re "b") (re.* (str.to.re "b")))))) +(assert (str.in.re var_4 (re.* (re.range "a" "u")))) +(assert (str.in.re var_4 (re.* (str.to.re "b")))) +(assert (str.in.re var_3 (re.* (re.range "a" "u")))) +(assert (str.in.re var_3 (re.* (str.to.re "a")))) +(assert (<= 0 (str.len var_4))) +(check-sat) \ No newline at end of file diff --git a/test/regress/regress0/strings/norn-simp-rew.smt2 b/test/regress/regress0/strings/norn-simp-rew.smt2 new file mode 100755 index 000000000..d0cd69cb0 --- /dev/null +++ b/test/regress/regress0/strings/norn-simp-rew.smt2 @@ -0,0 +1,29 @@ +(set-logic QF_SLIA) +(set-option :strings-exp true) +(set-info :status unsat) + +(declare-fun var_0 () String) +(declare-fun var_1 () String) +(declare-fun var_2 () String) +(declare-fun var_3 () String) +(declare-fun var_4 () String) +(declare-fun var_5 () String) +(declare-fun var_6 () String) +(declare-fun var_7 () String) +(declare-fun var_8 () String) +(declare-fun var_9 () String) +(declare-fun var_10 () String) +(declare-fun var_11 () String) +(declare-fun var_12 () String) + +(assert (str.in.re (str.++ var_7 "z" var_8 ) (re.++ (re.* (re.union (re.union (str.to.re "z") (str.to.re "a")) (re.++ (str.to.re "b") (re.++ (re.* (str.to.re "b")) (re.union (str.to.re "z") (str.to.re "a")))))) (re.++ (str.to.re "b") (re.* (str.to.re "b")))))) +(assert (str.in.re (str.++ var_7 "z" var_8 ) (re.++ (re.* (re.union (str.to.re "a") (re.++ (str.to.re "b") (re.++ (re.* (re.union (str.to.re "b") (str.to.re "a"))) (str.to.re "z"))))) (re.++ (str.to.re "b") (re.* (re.union (str.to.re "b") (str.to.re "a"))))))) +(assert (str.in.re (str.++ var_7 "z" var_8 ) (re.++ (re.* (re.++ (str.to.re "b") (re.++ (re.* (re.union (str.to.re "z") (str.to.re "b"))) (str.to.re "a")))) (re.++ (str.to.re "b") (re.* (re.union (str.to.re "z") (str.to.re "b"))))))) +(assert (str.in.re (str.++ var_7 "z" var_8 ) (re.++ (re.* (re.union (str.to.re "z") (re.++ (re.union (str.to.re "b") (str.to.re "a")) (re.union (str.to.re "z") (str.to.re "b"))))) (re.union (str.to.re "b") (str.to.re "a"))))) +(assert (str.in.re (str.++ var_7 "z" var_8 ) (re.* (re.++ (str.to.re "b") (re.++ (re.* (str.to.re "z")) (str.to.re "b")))))) +(assert (str.in.re (str.++ var_7 "z" var_8 ) (re.++ (re.* (re.++ (str.to.re "b") (str.to.re "z"))) (str.to.re "b")))) +(assert (str.in.re (str.++ "a" var_8 ) (re.* (re.range "a" "u")))) +(assert (str.in.re var_8 (re.* (re.range "a" "u")))) +(assert (str.in.re var_7 (re.* (re.range "a" "u")))) +(assert (not (str.in.re (str.++ "b" var_7 ) (re.* (re.range "a" "u"))))) +(check-sat) \ No newline at end of file -- 2.30.2