From 4b212907cbc414905898edbae41eb654670c1759 Mon Sep 17 00:00:00 2001 From: Tianyi Liang Date: Wed, 7 Jan 2015 10:02:01 -0600 Subject: [PATCH] added initial AX rules; fixed a bug for empty string in regex --- .../strings/theory_strings_rewriter.cpp | 407 +++++++++++++++++- src/theory/strings/theory_strings_rewriter.h | 7 + 2 files changed, 398 insertions(+), 16 deletions(-) diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index f6c12a2e1..3e896c559 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -83,6 +83,372 @@ Node TheoryStringsRewriter::rewriteConcatString( TNode node ) { return retNode; } +Node TheoryStringsRewriter::concatTwoNodes(TNode n1, TNode n2) { + Assert(n1.getKind() != kind::REGEXP_CONCAT); + TNode tmp = n2.getKind() == kind::REGEXP_CONCAT ? n2[0] : n2; + if(n1.getKind() == kind::STRING_TO_REGEXP && tmp.getKind() == kind::STRING_TO_REGEXP) { + tmp = NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, n1[0], tmp[0] ); + tmp = rewriteConcatString( tmp ); + tmp = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, tmp ); + } else { + tmp = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, n1, tmp ); + } + Node retNode = tmp; + if(n2.getKind() == kind::REGEXP_CONCAT) { + std::vector vec; + if(tmp.getKind() != kind::REGEXP_CONCAT) { + vec.push_back(retNode); + } else { + vec.push_back(retNode[0]); + vec.push_back(retNode[1]); + } + for(unsigned j=1; jmkNode( kind::REGEXP_CONCAT, vec ); + } + Trace("regexp-ax-debug") << "Regexp::AX::concatTwoNodes: (" << n1 << ", " << n2 << ") -> " << retNode << std::endl; + return retNode; +} + +void TheoryStringsRewriter::unionAndConcat(std::vector &vec_nodes, Node node) { + if(vec_nodes.empty()) { + vec_nodes.push_back(node); + } else { + Node emptysingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( CVC4::String("") ) ); + if(node != emptysingleton) { + for(unsigned i=0; i vec2; + for(unsigned j=0; j 1); + vec_nodes[i] = NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, vec2); + } else if(vec_nodes[i].getKind() == kind::REGEXP_EMPTY) { + //do nothing + } else if(vec_nodes[i] == emptysingleton) { + vec_nodes[i] = node; + } else if(vec_nodes[i].getKind() == kind::STRING_TO_REGEXP) { + vec_nodes[i] = concatTwoNodes(vec_nodes[i], node); + } else { + vec_nodes[i] = NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, vec_nodes[i], node); + } + } + } + } +} + +void TheoryStringsRewriter::mergeInto(std::vector &t, const std::vector &s) { + for(std::vector::const_iterator itr=s.begin(); itr!=s.end(); itr++) { + if(std::find(t.begin(), t.end(), (*itr)) == t.end()) { + t.push_back( *itr ); + } + } +} + +void TheoryStringsRewriter::shrinkConVec(std::vector &vec) { + unsigned i = 0; + Node emptysingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( CVC4::String("") ) ); + while(i < vec.size()) { + if( vec[i] == emptysingleton ) { + vec.erase(vec.begin() + i); + } else if(vec[i].getKind()==kind::STRING_TO_REGEXP && imkNode(kind::STRING_CONCAT, vec[i][0], vec[i+1][0]); + tmp = rewriteConcatString(tmp); + vec[i] = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, tmp); + vec.erase(vec.begin() + i + 1); + } else { + i++; + } + } +} + +Node TheoryStringsRewriter::applyAX( TNode node ) { + Trace("regexp-ax") << "Regexp::AX start " << node << std::endl; + Node retNode = node; + + int k = node.getKind(); + switch( k ) { + case kind::REGEXP_UNION: { + std::vector vec_nodes; + for(unsigned i=0; i nvec; + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec ); + } else { + retNode = vec_nodes.size() == 1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ); + } + break; + } + case kind::REGEXP_CONCAT: { + std::vector< std::vector > vec_nodes; + bool emptyflag = false; + Node emptysingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( CVC4::String("") ) ); + for(unsigned i=0; i vtmp; + if(tmp[j].getKind() == kind::REGEXP_CONCAT) { + for(unsigned j2=0; j2 vtmp; + for(unsigned j=0; j vtmp; + vtmp.push_back(tmp); + vec_nodes.push_back(vtmp); + } + } else { + //non-empty vec + if(tmp.getKind() == kind::REGEXP_UNION) { + unsigned cnt = vec_nodes.size(); + for(unsigned i2=0; i2 vleft( vec_nodes[i2] ); + for(unsigned j=0; j vt( vec_nodes[i2] ); + if(tmp[j].getKind() != kind::REGEXP_CONCAT) { + vt.push_back( tmp[j] ); + } else { + for(unsigned j2=0; j2 nvec; + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec ); + } else if(vec_nodes.empty()) { + retNode = emptysingleton; + } else if(vec_nodes.size() == 1) { + shrinkConVec(vec_nodes[0]); + retNode = vec_nodes[0].empty()? emptysingleton + : vec_nodes[0].size()==1? vec_nodes[0][0] + : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes[0]); + } else { + std::vector vtmp; + for(unsigned i=0; imkNode(kind::REGEXP_CONCAT, vec_nodes[i]); + vtmp.push_back(ntmp); + } + } + retNode = vtmp.empty()? emptysingleton + : vtmp.size()==1? vtmp[0] : NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vtmp); + } + break; + } + case kind::REGEXP_STAR: { + Node tmp = applyAX(node[0]); + Node emptysingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( CVC4::String("") ) ); + if(tmp.getKind() == kind::REGEXP_EMPTY || tmp == emptysingleton) { + retNode = emptysingleton; + } else { + if(tmp.getKind() == kind::REGEXP_UNION) { + std::vector vec; + for(unsigned i=0; imkNode( kind::REGEXP_UNION, vec) ; + } + } else if(tmp.getKind() == kind::REGEXP_STAR) { + tmp = tmp[0]; + } + if(tmp != node[0]) { + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, tmp ); + } + } + break; + } + case kind::REGEXP_INTER: { + std::vector< std::vector > vec_nodes; + bool emptyflag = false; + bool epsflag = false; + Node emptysingleton = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( CVC4::String("") ) ); + for(unsigned i=0; i vtmp; + for(unsigned j=0; j vtmp; + if(tmp[j].getKind() == kind::REGEXP_INTER) { + for(unsigned j2=0; j2 vtmp; + vtmp.push_back(tmp); + vec_nodes.push_back(vtmp); + } + } else { + //non-empty vec + if(tmp.getKind() == kind::REGEXP_INTER) { + for(unsigned j=0; j vleft( vec_nodes[i2] ); + for(unsigned j=0; j vt(vec_nodes[i2]); + if(tmp[j].getKind() != kind::REGEXP_INTER) { + if(std::find(vt.begin(), vt.end(), tmp[j]) == vt.end()) { + vt.push_back(tmp[j]); + } + } else { + std::vector vtmp; + for(unsigned j2=0; j2 nvec; + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec ); + } else if(vec_nodes.empty()) { + //to check? + retNode = emptysingleton; + } else if(vec_nodes.size() == 1) { + retNode = vec_nodes[0].empty() ? emptysingleton : vec_nodes[0].size() == 1 ? vec_nodes[0][0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes[0] ); + } else { + std::vector vtmp; + for(unsigned i=0; imkNode( kind::REGEXP_INTER, vec_nodes[i] ); + vtmp.push_back(tmp); + } + retNode = vtmp.size() == 1? vtmp[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vtmp ); + } + break; + } +/* case kind::REGEXP_UNION: { + break; + } + case kind::REGEXP_UNION: { + break; + }*/ + case kind::REGEXP_SIGMA: { + break; + } + case kind::REGEXP_EMPTY: { + break; + } + //default: { + //to check? + //} + } + + Trace("regexp-ax") << "Regexp::AX end " << node << " to\n " << retNode << std::endl; + return retNode; +} + Node TheoryStringsRewriter::prerewriteConcatRegExp( TNode node ) { Assert( node.getKind() == kind::REGEXP_CONCAT ); Trace("strings-prerewrite") << "Strings::prerewriteConcatRegExp start " << node << std::endl; @@ -318,6 +684,9 @@ bool TheoryStringsRewriter::testConstStringInRegExp( CVC4::String &s, unsigned i } return false; } else { + for(unsigned i=0; imkConst( false ); - } else if(x.getKind()==kind::CONST_STRING && checkConstRegExp(node[1])) { + } else if(x.getKind()==kind::CONST_STRING && checkConstRegExp(r)) { //test whether x in node[1] CVC4::String s = x.getConst(); - retNode = NodeManager::currentNM()->mkConst( testConstStringInRegExp( s, 0, node[1] ) ); - } else if(node[1].getKind() == kind::REGEXP_SIGMA) { + retNode = NodeManager::currentNM()->mkConst( testConstStringInRegExp( s, 0, r ) ); + } else if(r.getKind() == kind::REGEXP_SIGMA) { Node one = NodeManager::currentNM()->mkConst( ::CVC4::Rational(1) ); retNode = one.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x)); - } else if(node[1].getKind() == kind::REGEXP_STAR && node[1][0].getKind() == kind::REGEXP_SIGMA) { + } else if(r.getKind() == kind::REGEXP_STAR && r[0].getKind() == kind::REGEXP_SIGMA) { retNode = NodeManager::currentNM()->mkConst( true ); - } else if(node[1].getKind() == kind::REGEXP_CONCAT) { + } else if(r.getKind() == kind::REGEXP_CONCAT) { //opt bool flag = true; - for(unsigned i=0; imkConst( ::CVC4::Rational( node[1].getNumChildren() ) ); + Node num = NodeManager::currentNM()->mkConst( ::CVC4::Rational( r.getNumChildren() ) ); retNode = num.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x)); } // - } else if(node[1].getKind() == kind::STRING_TO_REGEXP) { - retNode = x.eqNode(node[1][0]); - } else if(x != node[0]) { - retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, node[1] ); + } else if(r.getKind() == kind::STRING_TO_REGEXP) { + retNode = x.eqNode(r[0]); + } else if(x != node[0] || r != node[1]) { + retNode = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, r ); } return retNode; } @@ -498,6 +869,8 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } else { retNode = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); } + } else if(node[1] == zero && node[2].getKind() == kind::STRING_LENGTH && node[2][0] == node[0]) { + retNode = node[0]; } } else if(node.getKind() == kind::STRING_STRCTN) { if( node[0] == node[1] ) { @@ -712,13 +1085,15 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { if(node.getKind() == kind::STRING_CONCAT) { retNode = rewriteConcatString(node); - } else if(node.getKind() == kind::REGEXP_CONCAT) { + } + else if(node.getKind() == kind::REGEXP_CONCAT) { retNode = prerewriteConcatRegExp(node); } else if(node.getKind() == kind::REGEXP_UNION) { retNode = prerewriteOrRegExp(node); } else if(node.getKind() == kind::REGEXP_INTER) { retNode = prerewriteAndRegExp(node); - } else if(node.getKind() == kind::REGEXP_STAR) { + } + else if(node.getKind() == kind::REGEXP_STAR) { if(node[0].getKind() == kind::REGEXP_STAR) { retNode = node[0]; } else if(node[0].getKind() == kind::STRING_TO_REGEXP && node[0][0].getKind() == kind::CONST_STRING && node[0][0].getConst().isEmptyString()) { diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index a1098f631..2f075febf 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -35,6 +35,13 @@ public: static bool testConstStringInRegExp( CVC4::String &s, unsigned int index_start, TNode r ); static Node rewriteConcatString(TNode node); + + static Node concatTwoNodes(TNode n1, TNode n2); + static void unionAndConcat(std::vector &vec_nodes, Node node); + static void mergeInto(std::vector &t, const std::vector &s); + static void shrinkConVec(std::vector &vec); + static Node applyAX( TNode node ); + static Node prerewriteConcatRegExp(TNode node); static Node prerewriteOrRegExp(TNode node); static Node prerewriteAndRegExp(TNode node); -- 2.30.2