From: Andrew Reynolds Date: Sat, 2 Dec 2017 02:03:03 +0000 (-0600) Subject: Improve rewriter for string replace (#1416) X-Git-Tag: cvc5-1.0.0~5434 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1edc0786ca4cc2dd60dc66a3ff0ac701b50f4103;p=cvc5.git Improve rewriter for string replace (#1416) --- diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index 4745817c8..aab4196cc 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -196,20 +196,19 @@ Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, return Node::null(); } -// TODO (#1180) rename this to rewriteConcat // TODO (#1180) add rewrite // str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) ---> // str.substr( x, n1, n2+n3 ) -Node TheoryStringsRewriter::rewriteConcatString( TNode node ) { - Trace("strings-prerewrite") << "Strings::rewriteConcatString start " << node << std::endl; +Node TheoryStringsRewriter::rewriteConcat(Node node) +{ + Trace("strings-prerewrite") << "Strings::rewriteConcat start " << node + << std::endl; Node retNode = node; std::vector node_vec; Node preNode = Node::null(); for(unsigned int i=0; i &vec) { vec.erase(vec.begin() + i); } else if(vec[i].getKind()==kind::STRING_TO_REGEXP && imkNode(kind::STRING_CONCAT, vec[i][0], vec[i+1][0]); - tmp = rewriteConcatString(tmp); + tmp = rewriteConcat(tmp); vec[i] = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, tmp); vec.erase(vec.begin() + i + 1); } else { @@ -568,8 +568,8 @@ Node TheoryStringsRewriter::prerewriteConcatRegExp( TNode node ) { unsigned j=0; if(!preNode.isNull()) { if(tmpNode[0].getKind() == kind::STRING_TO_REGEXP) { - preNode = rewriteConcatString( - NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, preNode, tmpNode[0][0] ) ); + preNode = rewriteConcat(NodeManager::currentNM()->mkNode( + kind::STRING_CONCAT, preNode, tmpNode[0][0])); node_vec.push_back( NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, preNode ) ); preNode = Node::null(); } else { @@ -589,8 +589,8 @@ Node TheoryStringsRewriter::prerewriteConcatRegExp( TNode node ) { if(preNode.isNull()) { preNode = tmpNode[0]; } else { - preNode = rewriteConcatString( - NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, preNode, tmpNode[0] ) ); + preNode = rewriteConcat(NodeManager::currentNM()->mkNode( + kind::STRING_CONCAT, preNode, tmpNode[0])); } } else if( tmpNode.getKind() == kind::REGEXP_EMPTY ) { emptyflag = true; @@ -899,10 +899,6 @@ Node TheoryStringsRewriter::rewriteMembership(TNode node) { Node x = node[0]; Node r = node[1];//applyAX(node[1]); - if(node[0].getKind() == kind::STRING_CONCAT) { - x = rewriteConcatString(node[0]); - } - if(r.getKind() == kind::REGEXP_EMPTY) { retNode = NodeManager::currentNM()->mkConst( false ); } else if(x.getKind()==kind::CONST_STRING && isConstRegExp(r)) { @@ -1011,32 +1007,22 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { Node orig = retNode; if(node.getKind() == kind::STRING_CONCAT) { - retNode = rewriteConcatString(node); + retNode = rewriteConcat(node); } else if(node.getKind() == kind::EQUAL) { - // TODO (#1180) are these necessary? Node leftNode = node[0]; - if(node[0].getKind() == kind::STRING_CONCAT) { - leftNode = rewriteConcatString(node[0]); - } Node rightNode = node[1]; - if(node[1].getKind() == kind::STRING_CONCAT) { - rightNode = rewriteConcatString(node[1]); - } - if(leftNode == rightNode) { retNode = NodeManager::currentNM()->mkConst(true); } else if(leftNode.isConst() && rightNode.isConst()) { retNode = NodeManager::currentNM()->mkConst(false); } else if(leftNode > rightNode) { retNode = NodeManager::currentNM()->mkNode(kind::EQUAL, rightNode, leftNode); - } else if( leftNode != node[0] || rightNode != node[1]) { - retNode = NodeManager::currentNM()->mkNode(kind::EQUAL, leftNode, rightNode); } } else if(node.getKind() == kind::STRING_LENGTH) { if( node[0].isConst() ){ retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational( node[0].getConst().size() ) ); }else if( node[0].getKind() == kind::STRING_CONCAT ){ - Node tmpNode = rewriteConcatString(node[0]); + Node tmpNode = node[0]; if(tmpNode.isConst()) { retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational( tmpNode.getConst().size() ) ); //} else if(tmpNode.getKind() == kind::STRING_SUBSTR) { @@ -1171,9 +1157,8 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { Node orig = retNode; Trace("strings-prerewrite") << "Strings::preRewrite start " << node << std::endl; - if(node.getKind() == kind::STRING_CONCAT) { - retNode = rewriteConcatString(node); - }else if(node.getKind() == kind::REGEXP_CONCAT) { + if (node.getKind() == kind::REGEXP_CONCAT) + { retNode = prerewriteConcatRegExp(node); } else if(node.getKind() == kind::REGEXP_UNION) { retNode = prerewriteOrRegExp(node); @@ -1775,55 +1760,126 @@ Node TheoryStringsRewriter::rewriteIndexof( Node node ) { Node TheoryStringsRewriter::rewriteReplace( Node node ) { if( node[1]==node[2] ){ - return node[0]; - }else{ - // TODO (#1180) : try str.contains( node[0], node[1] ) ---> false - if( node[1].isConst() ){ - if( node[1].getConst().isEmptyString() ){ - return node[0]; + return returnRewrite(node, node[0], "rpl-id"); + } + else if (node[0] == node[1]) + { + return returnRewrite(node, node[2], "rpl-replace"); + } + else if (node[1].isConst()) + { + if (node[1].getConst().isEmptyString()) + { + return returnRewrite(node, node[0], "rpl-empty"); + } + else if (node[0].isConst()) + { + CVC4::String s = node[0].getConst(); + CVC4::String t = node[1].getConst(); + std::size_t p = s.find(t); + if (p == std::string::npos) + { + return returnRewrite(node, node[0], "rpl-const-nfind"); } - std::vector< Node > children; - getConcat( node[0], children ); - if( children[0].isConst() ){ - CVC4::String s = children[0].getConst(); - CVC4::String t = node[1].getConst(); - std::size_t p = s.find(t); - if( p != std::string::npos ) { - Node retNode; - if( node[2].isConst() ){ - CVC4::String r = node[2].getConst(); - CVC4::String ret = s.replace(t, r); - retNode = NodeManager::currentNM()->mkConst( ::CVC4::String(ret) ); - } else { - CVC4::String s1 = s.substr(0, (int)p); - CVC4::String s3 = s.substr((int)p + (int)t.size()); - Node ns1 = NodeManager::currentNM()->mkConst( ::CVC4::String(s1) ); - Node ns3 = NodeManager::currentNM()->mkConst( ::CVC4::String(s3) ); - retNode = NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, ns1, node[2], ns3 ); - } - if( children.size()>1 ){ - children[0] = retNode; - return mkConcat( kind::STRING_CONCAT, children ); - }else{ - return retNode; - } - }else{ - //could not find replacement string - if( node[0].isConst() ){ - return node[0]; - }else{ - //check for overlap, if none, we can remove the prefix - if( s.overlap(t)==0 ){ - std::vector< Node > spl; - spl.insert( spl.end(), children.begin()+1, children.end() ); - return NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, children[0], - NodeManager::currentNM()->mkNode( kind::STRING_STRREPL, mkConcat( kind::STRING_CONCAT, spl ), node[1], node[2] ) ); - } - } + else + { + CVC4::String s1 = s.substr(0, (int)p); + CVC4::String s3 = s.substr((int)p + (int)t.size()); + Node ns1 = NodeManager::currentNM()->mkConst(::CVC4::String(s1)); + Node ns3 = NodeManager::currentNM()->mkConst(::CVC4::String(s3)); + Node ret = NodeManager::currentNM()->mkNode( + kind::STRING_CONCAT, ns1, node[2], ns3); + return returnRewrite(node, ret, "rpl-const-find"); + } + } + } + + std::vector children0; + getConcat(node[0], children0); + std::vector children1; + getConcat(node[1], children1); + + // check if contains definitely does (or does not) hold + Node cmp_con = + NodeManager::currentNM()->mkNode(kind::STRING_STRCTN, node[0], node[1]); + Node cmp_conr = Rewriter::rewrite(cmp_con); + if (cmp_conr.isConst()) + { + if (cmp_conr.getConst()) + { + // component-wise containment + std::vector cb; + std::vector ce; + int cc = componentContains(children0, children1, cb, ce, true, 1); + if (cc != -1) + { + if (cc == 0 && children0[0] == children1[0]) + { + // definitely a prefix, can do the replace + // for example, + // str.replace( str.++( x, "ab" ), str.++( x, "a" ), y ) ---> + // str.++( y, "b" ) + std::vector cres; + cres.push_back(node[2]); + cres.insert(cres.end(), ce.begin(), ce.end()); + Node ret = mkConcat(kind::STRING_CONCAT, cres); + return returnRewrite(node, ret, "rpl-cctn-rpl"); + } + else if (!ce.empty()) + { + // we can pull remainder past first definite containment + // for example, + // str.replace( str.++( x, "ab" ), "a", y ) ---> + // str.++( str.replace( str.++( x, "a" ), "a", y ), "b" ) + std::vector cc; + cc.push_back(NodeManager::currentNM()->mkNode( + kind::STRING_STRREPL, + mkConcat(kind::STRING_CONCAT, children0), + node[1], + node[2])); + cc.insert(cc.end(), ce.begin(), ce.end()); + Node ret = mkConcat(kind::STRING_CONCAT, cc); + return returnRewrite(node, ret, "rpl-cctn"); } } } + else + { + // ~contains( t, s ) => ( replace( t, s, r ) ----> t ) + return returnRewrite(node, node[0], "rpl-nctn"); + } } + + if (cmp_conr != cmp_con) + { + // pull endpoints that can be stripped + // for example, + // str.replace( str.++( "b", x, "b" ), "a", y ) ---> + // str.++( "b", str.replace( x, "a", y ), "b" ) + std::vector cb; + std::vector ce; + if (stripConstantEndpoints(children0, children1, cb, ce)) + { + std::vector cc; + cc.insert(cc.end(), cb.begin(), cb.end()); + cc.push_back(NodeManager::currentNM()->mkNode( + kind::STRING_STRREPL, + mkConcat(kind::STRING_CONCAT, children0), + node[1], + node[2])); + cc.insert(cc.end(), ce.begin(), ce.end()); + Node ret = mkConcat(kind::STRING_CONCAT, cc); + return returnRewrite(node, ret, "rpl-pull-endpt"); + } + } + + // TODO (#1180) incorporate these? + // contains( t, s ) => + // replace( replace( x, t, s ), s, r ) ----> replace( x, t, r ) + // contains( t, s ) => + // contains( replace( t, s, r ), r ) ----> true + + Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl; return node; } diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index b7712bcef..64120eca0 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -33,8 +33,6 @@ private: static bool isConstRegExp( TNode t ); static bool testConstStringInRegExp( CVC4::String &s, unsigned int index_start, TNode r ); - static Node rewriteConcatString(TNode node); - static void mergeInto(std::vector &t, const std::vector &s); static void shrinkConVec(std::vector &vec); static Node applyAX( TNode node ); @@ -65,6 +63,12 @@ private: static inline void init() {} static inline void shutdown() {} + /** rewrite concat + * This is the entry point for post-rewriting terms node of the form + * str.++( t1, .., tn ) + * Returns the rewritten form of node. + */ + static Node rewriteConcat(Node node); /** rewrite substr * This is the entry point for post-rewriting terms node of the form * str.substr( s, i1, i2 ) @@ -82,7 +86,12 @@ private: */ static Node rewriteContains(Node node); static Node rewriteIndexof(Node node); - static Node rewriteReplace(Node node); + /** rewrite replace + * This is the entry point for post-rewriting terms n of the form + * str.replace( s, t, r ) + * Returns the rewritten form of n. + */ + static Node rewriteReplace(Node n); /** gets the "vector form" of term n, adds it to c. * For example: