From 6343fbb0c9b238aeb1addca6449f95a01071c1ac Mon Sep 17 00:00:00 2001 From: ajreynol Date: Tue, 6 Oct 2015 13:26:03 +0200 Subject: [PATCH] More improvements to strings rewriter for regexps, contains, indexof, replace and others. Enable non-recursive flat form inferences in strings theory solver. Refactor extf reductions. Use non-constant length terms when checking length equality. Add option --strings-eager-len. --- src/theory/strings/options | 3 + src/theory/strings/theory_strings.cpp | 575 ++++++++++-------- src/theory/strings/theory_strings.h | 19 +- .../strings/theory_strings_rewriter.cpp | 426 +++++++++---- src/theory/strings/theory_strings_rewriter.h | 2 +- src/util/regexp.h | 1 + test/regress/regress0/strings/Makefile.am | 9 +- .../regress0/strings/idof-rewrites.smt2 | 13 + test/regress/regress0/strings/kaluza-fl.smt2 | 97 +++ test/regress/regress0/strings/norn-ab.smt2 | 25 + 10 files changed, 794 insertions(+), 376 deletions(-) create mode 100644 test/regress/regress0/strings/idof-rewrites.smt2 create mode 100755 test/regress/regress0/strings/kaluza-fl.smt2 create mode 100755 test/regress/regress0/strings/norn-ab.smt2 diff --git a/src/theory/strings/options b/src/theory/strings/options index 858a9e21c..ff2b4de5a 100644 --- a/src/theory/strings/options +++ b/src/theory/strings/options @@ -44,5 +44,8 @@ option stringSplitEmp strings-sp-emp --strings-sp-emp bool :default true strings split on empty string option stringInferSym strings-infer-sym --strings-infer-sym bool :default true strings split on empty string +option stringEagerLen strings-eager-len --strings-eager-len bool :default true + strings eager length lemmas + endmodule diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 87cb220db..a19bab836 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -24,6 +24,7 @@ #include "smt/logic_exception.h" #include "theory/strings/options.h" #include "theory/strings/type_enumerator.h" +#include "theory/strings/theory_strings_rewriter.h" #include #define LAZY_ADD_MEMBERSHIP @@ -185,7 +186,7 @@ Node TheoryStrings::getLengthTerm( Node t ) { Node TheoryStrings::getLength( Node t, bool use_t ) { Node retNode; - if(t.isConst() || use_t) { + if( use_t ){ retNode = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, t ); } else { retNode = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, getLengthTerm( t ) ); @@ -574,53 +575,7 @@ void TheoryStrings::check(Effort e) { //run preprocess on memberships bool addFact = true; if( options::stringLazyPreproc() ){ - NodeBoolMap::const_iterator it = d_preproc_cache.find( atom ); - if( it==d_preproc_cache.end() ){ - d_preproc_cache[ atom ] = true; - std::vector< Node > new_nodes; - Node res = d_preproc.decompose( atom, new_nodes ); - if( atom!=res ){ - //check if reduction/deduction - std::vector< Node > subs_lhs; - std::vector< Node > subs_rhs; - subs_lhs.push_back( atom ); - subs_rhs.push_back( d_true ); - Node sres = res.substitute( subs_lhs.begin(), subs_lhs.end(), subs_rhs.begin(), subs_rhs.end() ); - sres = Rewriter::rewrite( sres ); - if( sres!=res ){ - Node plem = NodeManager::currentNM()->mkNode( kind::IMPLIES, atom, sres ); - plem = Rewriter::rewrite( plem ); - Trace("strings-assert") << "(assert " << plem << ")" << std::endl; - d_out->lemma( plem ); - Trace("strings-pp-lemma") << "Preprocess impl lemma : " << plem << std::endl; - Trace("strings-pp-lemma") << "...from " << fact << std::endl; - }else{ - Trace("strings-assertion-debug") << "...preprocessed to : " << res << std::endl; - Node plem = NodeManager::currentNM()->mkNode( kind::IFF, atom, res ); - plem = Rewriter::rewrite( plem ); - Trace("strings-assert") << "(assert " << plem << ")" << std::endl; - d_out->lemma( plem ); - Trace("strings-pp-lemma") << "Preprocess eq lemma : " << plem << std::endl; - Trace("strings-pp-lemma") << "...from " << fact << std::endl; - //reduced by preprocess - addFact = false; - d_preproc_cache[ atom ] = false; - } - }else{ - Trace("strings-assertion-debug") << "...preprocess ok." << std::endl; - } - if( !new_nodes.empty() ){ - Node nnlem = new_nodes.size()==1 ? new_nodes[0] : NodeManager::currentNM()->mkNode( kind::AND, new_nodes ); - nnlem = Rewriter::rewrite( nnlem ); - Trace("strings-assertion-debug") << "...new nodes : " << nnlem << std::endl; - Trace("strings-pp-lemma") << "Preprocess lemma : " << nnlem << std::endl; - Trace("strings-pp-lemma") << "...from " << fact << std::endl; - Trace("strings-assert") << "(assert " << nnlem << ")" << std::endl; - d_out->lemma( nnlem ); - } - }else{ - addFact = (*it).second; - } + addFact = doPreprocess( atom ); } //assert pending fact if( addFact ){ @@ -643,7 +598,7 @@ void TheoryStrings::check(Effort e) { eq::EqClassIterator eqc2_i = eq::EqClassIterator( eqc, &d_equalityEngine ); Trace("strings-eqc") << "Eqc( " << eqc << " ) : { "; while( !eqc2_i.isFinished() ) { - if( (*eqc2_i)!=eqc ){ + if( (*eqc2_i)!=eqc && (*eqc2_i).getKind()!=kind::EQUAL ){ Trace("strings-eqc") << (*eqc2_i) << " "; } ++eqc2_i; @@ -676,18 +631,16 @@ void TheoryStrings::check(Effort e) { checkNormalForms(); Trace("strings-process") << "Done check normal forms, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; if( !hasProcessed() ){ - checkLengthsEqc(); - Trace("strings-process") << "Done check lengths, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; + if( options::stringEagerLen() ){ + checkLengthsEqc(); + Trace("strings-process") << "Done check lengths, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; + } if( !hasProcessed() ){ checkExtendedFuncs(); Trace("strings-process") << "Done check extended functions, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; if( !hasProcessed() ){ checkCardinality(); Trace("strings-process") << "Done check cardinality, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - //if( !d_conflict && !addedFact ) { - // addedFact = checkExtendedFuncsReduction(); - // Trace("strings-process") << "Done check extended functions reductions, addedFact = " << addedFact << ", d_conflict = " << d_conflict << std::endl; - //} } } } @@ -707,16 +660,74 @@ void TheoryStrings::check(Effort e) { Assert( d_lemma_cache.empty() ); } +bool TheoryStrings::doPreprocess( Node atom ) { + bool addFact = true; + NodeBoolMap::const_iterator it = d_preproc_cache.find( atom ); + if( it==d_preproc_cache.end() ){ + d_preproc_cache[ atom ] = true; + std::vector< Node > new_nodes; + Node res = d_preproc.decompose( atom, new_nodes ); + if( atom!=res ){ + //check if reduction/deduction + std::vector< Node > subs_lhs; + std::vector< Node > subs_rhs; + subs_lhs.push_back( atom ); + subs_rhs.push_back( d_true ); + Node sres = res.substitute( subs_lhs.begin(), subs_lhs.end(), subs_rhs.begin(), subs_rhs.end() ); + sres = Rewriter::rewrite( sres ); + if( sres!=res ){ + Node plem = NodeManager::currentNM()->mkNode( kind::IMPLIES, atom, sres ); + plem = Rewriter::rewrite( plem ); + Trace("strings-assert") << "(assert " << plem << ")" << std::endl; + d_out->lemma( plem ); + Trace("strings-pp-lemma") << "Preprocess impl lemma : " << plem << std::endl; + Trace("strings-pp-lemma") << "...from " << atom << std::endl; + }else{ + Trace("strings-assertion-debug") << "...preprocessed to : " << res << std::endl; + Node plem = NodeManager::currentNM()->mkNode( kind::IFF, atom, res ); + plem = Rewriter::rewrite( plem ); + Trace("strings-assert") << "(assert " << plem << ")" << std::endl; + d_out->lemma( plem ); + Trace("strings-pp-lemma") << "Preprocess eq lemma : " << plem << std::endl; + Trace("strings-pp-lemma") << "...from " << atom << std::endl; + //reduced by preprocess + addFact = false; + d_preproc_cache[ atom ] = false; + } + }else{ + Trace("strings-assertion-debug") << "...preprocess ok." << std::endl; + } + if( !new_nodes.empty() ){ + Node nnlem = new_nodes.size()==1 ? new_nodes[0] : NodeManager::currentNM()->mkNode( kind::AND, new_nodes ); + nnlem = Rewriter::rewrite( nnlem ); + Trace("strings-assertion-debug") << "...new nodes : " << nnlem << std::endl; + Trace("strings-pp-lemma") << "Preprocess lemma : " << nnlem << std::endl; + Trace("strings-pp-lemma") << "...from " << atom << std::endl; + Trace("strings-assert") << "(assert " << nnlem << ")" << std::endl; + d_out->lemma( nnlem ); + } + }else{ + addFact = (*it).second; + } + return addFact; +} + bool TheoryStrings::hasProcessed() { return d_conflict || !d_lemma_cache.empty() || !d_pending.empty(); } void TheoryStrings::addToExplanation( Node a, Node b, std::vector< Node >& exp ) { if( a!=b ){ + Debug("strings-explain") << "Add to explanation : " << a << " == " << b << std::endl; Assert( areEqual( a, b ) ); exp.push_back( a.eqNode( b ) ); } } +void TheoryStrings::addToExplanation( Node lit, std::vector< Node >& exp ) { + if( !lit.isNull() ){ + exp.push_back( lit ); + } +} TheoryStrings::EqcInfo::EqcInfo( context::Context* c ) : d_const_term(c), d_length_term(c), d_cardinality_lem_k(c), d_normalized_length(c) { @@ -1827,6 +1838,7 @@ int TheoryStrings::processReverseDeq( std::vector< Node >& nfi, std::vector< Nod int TheoryStrings::processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj, unsigned& index, bool isRev ) { while( index=nfi.size() || index>=nfj.size() ) { + Trace("strings-solve-debug") << "Disequality normalize empty" << std::endl; std::vector< Node > ant; //we have a conflict : because the lengths are equal, the remainder needs to be empty, which will lead to a conflict Node lni = getLength( ni ); @@ -1942,6 +1954,7 @@ bool TheoryStrings::isNormalFormPair2( Node n1, Node n2 ) { bool TheoryStrings::registerTerm( Node n ) { if(d_registered_terms_cache.find(n) == d_registered_terms_cache.end()) { + d_registered_terms_cache.insert(n); Debug("strings-register") << "TheoryStrings::registerTerm() " << n << endl; if(n.getType().isString()) { if(n.getKind() == kind::STRING_SUBSTR_TOTAL) { @@ -1996,17 +2009,14 @@ bool TheoryStrings::registerTerm( Node n ) { } lsum = Rewriter::rewrite( lsum ); d_proxy_var_to_length[sk] = lsum; - Node ceq = Rewriter::rewrite( skl.eqNode( lsum ) ); - Trace("strings-lemma") << "Strings::Lemma LENGTH : " << ceq << std::endl; - Trace("strings-lemma-debug") << " prerewrite : " << skl.eqNode( lsum ) << std::endl; - Trace("strings-assert") << "(assert " << ceq << ")" << std::endl; - d_out->lemma(ceq); - //also add this to the equality engine - if( n.getKind() == kind::CONST_STRING ) { - d_equalityEngine.assertEquality( ceq, true, d_true ); + if( options::stringEagerLen() || n.getKind()==kind::CONST_STRING ){ + Node ceq = Rewriter::rewrite( skl.eqNode( lsum ) ); + Trace("strings-lemma") << "Strings::Lemma LENGTH : " << ceq << std::endl; + Trace("strings-lemma-debug") << " prerewrite : " << skl.eqNode( lsum ) << std::endl; + Trace("strings-assert") << "(assert " << ceq << ")" << std::endl; + d_out->lemma(ceq); } } - d_registered_terms_cache.insert(n); return true; } else { AlwaysAssert(false, "String Terms only in registerTerm."); @@ -2040,7 +2050,7 @@ void TheoryStrings::sendInfer( Node eq_exp, Node eq, const char * c ) { eq = Rewriter::rewrite( eq ); if( eq==d_false || eq.getKind()==kind::OR ) { sendLemma( eq_exp, eq, c ); - } else { + }else if( eq!=d_true ){ if( options::stringInferSym() ){ std::vector< Node > vars; std::vector< Node > subs; @@ -2321,6 +2331,7 @@ void TheoryStrings::checkNormalForms() { //first check for cycles, while building ordering of equivalence classes d_eqc.clear(); d_flat_form.clear(); + d_flat_form_index.clear(); Trace("strings-process") << "Check equivalence classes cycles...." << std::endl; d_eqcs.clear(); for( unsigned i=0; i::iterator itc = d_eqc_to_const.find( eqc ); if( itc!=d_eqc_to_const.end() ){ - c = itc->second; + c = itc->second; //use? } std::map< Node, std::vector< Node > >::iterator it = d_eqc.find( eqc ); if( it!=d_eqc.end() && it->second.size()>1 ){ - for( unsigned r=0; r<2; r++ ){ - bool success; - int count = 0; - do{ - success = true; - Node curr = d_flat_form[it->second[0]][count]; - Node lcurr = getLength( curr ); - for( unsigned i=1; isecond.size(); i++ ){ - Node cc = d_flat_form[it->second[i]][count]; - if( cc!=curr ){ - //constant conflict? TODO - //if lengths are the same, apply LengthEq - Node lcc = getLength( cc ); - if( areEqual( lcurr, lcc ) ){ - std::vector< Node > exp; - Node a = it->second[0]; - Node b = it->second[i]; - addToExplanation( a, b, exp ); - //explain why prefixes up to now were the same - for( unsigned j=0; jsecond.size()-1; start++ ){ + for( unsigned r=0; r<2; r++ ){ + unsigned count = 0; + std::vector< Node > inelig; + for( unsigned i=0; i<=start; i++ ){ + inelig.push_back( it->second[start] ); + } + Node a = it->second[start]; + Node b; + do{ + std::vector< Node > exp; + //std::vector< Node > exp_n; + Node conc; + int inf_type = -1; + if( count==d_flat_form[a].size() ){ + for( unsigned i=start+1; isecond.size(); i++ ){ + b = it->second[i]; + if( std::find( inelig.begin(), inelig.end(), b )==inelig.end() ){ + if( count conc_c; + for( unsigned j=count; j0 ); + //swap, will enforce is empty past current + a = it->second[i]; b = it->second[start]; + count--; + break; + } + inelig.push_back( it->second[i] ); + } + } + }else{ + Node curr = d_flat_form[a][count]; + Node curr_c = d_eqc_to_const[curr]; + Node lcurr = getLength( curr ); + for( unsigned i=1; isecond.size(); i++ ){ + b = it->second[i]; + if( std::find( inelig.begin(), inelig.end(), b )==inelig.end() ){ + if( count==d_flat_form[b].size() ){ + inelig.push_back( b ); + //endpoint + std::vector< Node > conc_c; + for( unsigned j=count; j0 ); + count--; + break; + }else{ + Node cc = d_flat_form[b][count]; + if( cc!=curr ){ + inelig.push_back( b ); + Assert( !areEqual( curr, cc ) ); + Node cc_c = d_eqc_to_const[cc]; + if( !curr_c.isNull() && !cc_c.isNull() ){ + //check for constant conflict + int index; + Node s = TheoryStringsRewriter::splitConstant( cc_c, curr_c, index, r==1 ); + if( s.isNull() ){ + addToExplanation( a[d_flat_form_index[a][count]], d_eqc_to_const_base[curr], exp ); + addToExplanation( d_eqc_to_const_exp[curr], exp ); + addToExplanation( b[d_flat_form_index[b][count]], d_eqc_to_const_base[cc], exp ); + addToExplanation( d_eqc_to_const_exp[cc], exp ); + conc = d_false; + inf_type = 0; + break; + } + }else{ + //if lengths are the same, apply LengthEq + Node lcc = getLength( cc ); + if( areEqual( lcurr, lcc ) ){ + Node ac = a[d_flat_form_index[a][count]]; + Node bc = b[d_flat_form_index[b][count]]; + //exp_n.push_back( getLength( curr, true ).eqNode( getLength( cc, true ) ) ); + addToExplanation( lcurr, lcc, exp ); + if( !lcc.isConst() ){ + addToExplanation( bc, lcc[0], exp ); + } + if( !lcurr.isConst() ){ + addToExplanation( ac, lcurr[0], exp ); + } + conc = ac.eqNode( bc ); + inf_type = 1; + break; + } + } + } + } } - //explain why components are empty - //TODO - addToExplanation( lcurr, lcc, exp ); - sendInfer( mkAnd( exp ), curr.eqNode( cc ), "F-LengthEq" ); - return; } - success = false; } - } - count++; - //check if we are at the endpoint of any string - for( unsigned i=0; isecond.size(); i++ ){ - if( count==d_flat_form[it->second[i]].size() ){ - Node a = it->second[i]; - for( unsigned j=0; jsecond.size(); j++ ){ - if( i!=j ){ - //remainder must be empty - if( countsecond[j]].size() ){ - Node b = it->second[j]; - std::vector< Node > exp; - addToExplanation( a, b, exp ); - for( unsigned j=0; jjj; --j ){ + if( areEqual( c[j], d_emptyString ) ){ + addToExplanation( c[j], d_emptyString, exp ); } - } } } + //if( exp_n.empty() ){ + sendInfer( mkAnd( exp ), conc, inf_type==0? "F_Const" : ( inf_type==1 ? "F_LengthEq" : "F_Endpoint" ) ); + //}else{ + //} + if( d_conflict ){ + return; + }else{ + break; + } } - } - }while( success && ); + count++; + }while( inelig.size()second.size() ); - if( r==1 ){ for( unsigned i=0; isecond.size(); i++ ){ - std::reverse( d_flat_form[it->second].begin(), d_flat_form[it->second].end() ); - std::reverse( d_flat_form_index[it->second].begin(), d_flat_form_index[it->second].end() ); + std::reverse( d_flat_form[it->second[i]].begin(), d_flat_form[it->second[i]].end() ); + std::reverse( d_flat_form_index[it->second[i]].begin(), d_flat_form_index[it->second[i]].end() ); } } } } } - */ + if( !hasProcessed() ){ //get equivalence classes //d_eqcs.clear(); @@ -2452,18 +2545,24 @@ void TheoryStrings::checkNormalForms() { Trace("strings-process-debug") << "Done verifying normal forms are the same for " << eqc << std::endl; } - if(Debug.isOn("strings-nf")) { - Debug("strings-nf") << "**** Normal forms are : " << std::endl; + if(Trace.isOn("strings-nf")) { + Trace("strings-nf") << "**** Normal forms are : " << std::endl; for( std::map< Node, Node >::iterator it = nf_to_eqc.begin(); it != nf_to_eqc.end(); ++it ){ - Debug("strings-nf") << " normal_form(" << it->second << ") = " << it->first << std::endl; + Trace("strings-nf") << " N[" << it->second << "] = " << it->first << std::endl; } - Debug("strings-nf") << std::endl; + Trace("strings-nf") << std::endl; } if( !hasProcessed() ){ checkExtendedFuncsEval( 1 ); Trace("strings-process-debug") << "Done check extended functions re-eval, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; if( !hasProcessed() ){ + if( !options::stringEagerLen() ){ + checkLengthsEqc(); + if( hasProcessed() ){ + return; + } + } //process disequalities between equivalence classes checkDeqNF(); Trace("strings-process-debug") << "Done check disequalities, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; @@ -2551,16 +2650,14 @@ Node TheoryStrings::checkCycles( Node eqc, std::vector< Node >& curr, std::vecto void TheoryStrings::checkDeqNF() { if( !d_conflict && d_lemma_cache.empty() ){ - std::vector< Node > eqcs; - getEquivalenceClasses( eqcs ); std::vector< std::vector< Node > > cols; std::vector< Node > lts; - separateByLength( eqcs, cols, lts ); + separateByLength( d_strings_eqc, cols, lts ); for( unsigned i=0; i1 && d_lemma_cache.empty() ){ - Trace("strings-solve") << "- Verify disequalities are processed for "; + Trace("strings-solve") << "- Verify disequalities are processed for " << cols[i][0]; printConcat( d_normal_forms[cols[i][0]], "strings-solve" ); - Trace("strings-solve") << "..." << std::endl; + Trace("strings-solve") << "... #eql = " << cols[i].size() << std::endl; //must ensure that normal forms are disequal for( unsigned j=0; j nodes; - getEquivalenceClasses( nodes ); - for( unsigned i=0; i1 ) { - Trace("strings-process-debug") << "Process length constraints for " << nodes[i] << std::endl; + for( unsigned i=0; i1 ) { + Trace("strings-process-debug") << "Process length constraints for " << d_strings_eqc[i] << std::endl; //check if there is a length term for this equivalence class - EqcInfo* ei = getOrMakeEqcInfo( nodes[i], false ); + EqcInfo* ei = getOrMakeEqcInfo( d_strings_eqc[i], false ); Node lt = ei ? ei->d_length_term : Node::null(); if( !lt.isNull() ) { Node llt = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, lt ); @@ -2604,18 +2699,36 @@ void TheoryStrings::checkLengthsEqc() { if( ei->d_normalized_length.get().isNull() ) { //if not, add the lemma std::vector< Node > ant; - ant.insert( ant.end(), d_normal_forms_exp[nodes[i]].begin(), d_normal_forms_exp[nodes[i]].end() ); - ant.push_back( d_normal_forms_base[nodes[i]].eqNode( lt ) ); - Node lc = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, mkConcat( d_normal_forms[nodes[i]] ) ); + ant.insert( ant.end(), d_normal_forms_exp[d_strings_eqc[i]].begin(), d_normal_forms_exp[d_strings_eqc[i]].end() ); + ant.push_back( d_normal_forms_base[d_strings_eqc[i]].eqNode( lt ) ); + Node lc = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, mkConcat( d_normal_forms[d_strings_eqc[i]] ) ); lc = Rewriter::rewrite( lc ); Node eq = llt.eqNode( lc ); - ei->d_normalized_length.set( eq ); - sendLemma( mkExplain( ant ), eq, "LEN-NORM" ); + if( llt!=lc ){ + ei->d_normalized_length.set( eq ); + sendLemma( mkExplain( ant ), eq, "LEN-NORM" ); + } + } + }else{ + Trace("strings-process-debug") << "No length term for eqc " << d_strings_eqc[i] << " " << d_eqc_to_len_term[d_strings_eqc[i]] << std::endl; + if( !options::stringEagerLen() ){ + Node c = mkConcat( d_normal_forms[d_strings_eqc[i]] ); + registerTerm( c ); + if( !c.isConst() ){ + NodeNodeMap::const_iterator it = d_proxy_var.find( c ); + if( it!=d_proxy_var.end() ){ + Node pv = (*it).second; + Assert( d_proxy_var_to_length.find( pv )!=d_proxy_var_to_length.end() ); + Node pvl = d_proxy_var_to_length[pv]; + Node ceq = Rewriter::rewrite( getLength( pv, true ).eqNode( pvl ) ); + sendLemma( d_true, ceq, "LEN-NORM-I" ); + } + } } } - } else { - Trace("strings-process-debug") << "Do not process length constraints for " << nodes[i] << " " << d_normal_forms[nodes[i]].size() << std::endl; - } + //} else { + // Trace("strings-process-debug") << "Do not process length constraints for " << nodes[i] << " " << d_normal_forms[nodes[i]].size() << std::endl; + //} } } } @@ -2624,12 +2737,9 @@ void TheoryStrings::checkCardinality() { //int cardinality = options::stringCharCardinality(); //Trace("strings-solve-debug2") << "get cardinality: " << cardinality << endl; - std::vector< Node > eqcs; - getEquivalenceClasses( eqcs ); - std::vector< std::vector< Node > > cols; std::vector< Node > lts; - separateByLength( eqcs, cols, lts ); + separateByLength( d_strings_eqc, cols, lts ); for( unsigned i = 0; i& n, Assert( d_equalityEngine.getRepresentative(eqc)==eqc ); EqcInfo* ei = getOrMakeEqcInfo( eqc, false ); Node lt = ei ? ei->d_length_term : Node::null(); + Trace("ajr-temp") << "Length term for " << eqc << " is " << lt << std::endl; if( !lt.isNull() ){ lt = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, lt ); Node r = d_equalityEngine.getRepresentative( lt ); + Trace("ajr-temp") << "Length term rep for " << eqc << " is " << lt << std::endl; if( eqc_to_leqc.find( r )==eqc_to_leqc.end() ){ eqc_to_leqc[r] = leqc_counter; leqc_to_eqc[leqc_counter] = r; @@ -2769,10 +2881,9 @@ void TheoryStrings::separateByLength(std::vector< Node >& n, } } for( std::map< unsigned, std::vector< Node > >::iterator it = eqc_to_strings.begin(); it != eqc_to_strings.end(); ++it ){ - std::vector< Node > vec; - vec.insert( vec.end(), it->second.begin(), it->second.end() ); + cols.push_back( std::vector< Node >() ); + cols.back().insert( cols.back().end(), it->second.begin(), it->second.end() ); lts.push_back( leqc_to_eqc[it->first] ); - cols.push_back( vec ); } } @@ -3514,6 +3625,7 @@ void TheoryStrings::checkInit() { d_eqc_to_const.clear(); d_eqc_to_const_base.clear(); d_eqc_to_const_exp.clear(); + d_eqc_to_len_term.clear(); d_term_index.clear(); d_strings_eqc.clear(); @@ -3524,14 +3636,19 @@ void TheoryStrings::checkInit() { while( !eqcs_i.isFinished() ){ Node eqc = (*eqcs_i); TypeNode tn = eqc.getType(); - if( !tn.isInteger() && !tn.isRegExp() ){ + if( !tn.isRegExp() ){ if( tn.isString() ){ d_strings_eqc.push_back( eqc ); } eq::EqClassIterator eqc_i = eq::EqClassIterator( eqc, &d_equalityEngine ); while( !eqc_i.isFinished() ) { Node n = *eqc_i; - if( n.isConst() ){ + if( tn.isInteger() ){ + if( n.getKind()==kind::STRING_LENGTH ){ + Node nr = getRepresentative( n[0] ); + d_eqc_to_len_term[nr] = n[0]; + } + }else if( n.isConst() ){ d_eqc_to_const[eqc] = n; d_eqc_to_const_base[eqc] = n; d_eqc_to_const_exp[eqc] = Node::null(); @@ -3725,39 +3842,59 @@ void TheoryStrings::checkConstantEquivalenceClasses( TermIndex* ti, std::vector< } void TheoryStrings::checkExtendedFuncsEval( int effort ) { + if( effort==0 ){ + d_extf_vars.clear(); + } Trace("strings-extf-debug") << "Checking " << d_ext_func_terms.size() << " extended functions." << std::endl; for( NodeBoolMap::iterator it = d_ext_func_terms.begin(); it != d_ext_func_terms.end(); ++it ){ if( (*it).second ){ Node n = (*it).first; Trace("strings-extf-debug") << "Check extf " << n << "..." << std::endl; - //check if all children are in eqc with a constant, if so, we can rewrite - std::vector< Node > children; + if( effort==0 ){ + std::map< Node, bool > visited; + collectVars( n, d_extf_vars[n], visited ); + } + //build up a best current substitution for the variables in the term std::vector< Node > exp; - std::map< Node, Node > visited; - for( unsigned i=0; i0 ){ - //get the normalized string - nc = getNormalString( n[i], exp ); - children.push_back( nc ); - }else{ - Trace("strings-extf-debug") << " unresolved due to " << n[i] << std::endl; - break; + std::vector< Node > var; + std::vector< Node > sub; + for( std::map< Node, std::vector< Node > >::iterator itv = d_extf_vars[n].begin(); itv != d_extf_vars[n].end(); ++itv ){ + Node nr = itv->first; + std::map< Node, Node >::iterator itc = d_eqc_to_const.find( nr ); + Node s; + Node b; + Node e; + if( itc!=d_eqc_to_const.end() ){ + b = d_eqc_to_const_base[nr]; + s = itc->second; + e = d_eqc_to_const_exp[nr]; + }else if( effort>0 ){ + b = d_normal_forms_base[nr]; + std::vector< Node > expt; + s = getNormalString( b, expt ); + e = mkAnd( expt ); + } + if( !s.isNull() ){ + bool added = false; + for( unsigned i=0; isecond.size(); i++ ){ + if( itv->second[i]!=s ){ + var.push_back( itv->second[i] ); + sub.push_back( s ); + addToExplanation( itv->second[i], b, exp ); + Trace("strings-extf-debug") << " " << itv->second[i] << " --> " << s << std::endl; + added = true; + } + } + if( added && !e.isNull() ){ + exp.push_back( e ); } } } - if( children.size()==n.getNumChildren() ){ - if (n.getMetaKind() == kind::metakind::PARAMETERIZED) { - children.insert(children.begin(),n.getOperator()); - } - Node nr = NodeManager::currentNM()->mkNode( n.getKind(), children ); + + if( !var.empty() ){ + Node nr = n.substitute( var.begin(), var.end(), sub.begin(), sub.end() ); Node nrc = Rewriter::rewrite( nr ); - if( nrc.isConst() ){ + if( nrc.isConst() || hasTerm( nrc ) ){ //mark as reduced d_ext_func_terms[n] = false; Trace("strings-extf-debug") << " resolvable by evaluation..." << std::endl; @@ -3805,7 +3942,7 @@ void TheoryStrings::checkExtendedFuncsEval( int effort ) { sendInfer( mkAnd( exp ), conc, effort==0 ? "EXTF" : "EXTF-N" ); } if( d_conflict ){ - Trace("strings-extf") << " conflict, return." << std::endl; + Trace("strings-extf-debug") << " conflict, return." << std::endl; return; } } @@ -3817,54 +3954,20 @@ void TheoryStrings::checkExtendedFuncsEval( int effort ) { } } -Node TheoryStrings::inferConstantDefinition( Node n, std::vector< Node >& exp, std::map< Node, Node >& visited ) { - if( n.isConst() ){ - return n; - }else{ - Node nr = getRepresentative( n ); - std::map< Node, Node >::iterator it = visited.find( nr ); - if( it==visited.end() ){ - visited[nr] = Node::null(); - if( nr.isConst() ){ - addToExplanation( n, nr, exp ); - visited[nr] = nr; - return nr; - }else{ - std::map< Node, Node >::iterator itc = d_eqc_to_const.find( nr ); - if( itc!=d_eqc_to_const.end() ){ - exp.push_back( n.eqNode( d_eqc_to_const_base[nr] ) ); - if( !d_eqc_to_const_exp[nr].isNull() ){ - exp.push_back( d_eqc_to_const_exp[nr] ); - } - visited[nr] = itc->second; - return itc->second; - }else{ - if( n.getNumChildren()>0 ){ - std::vector< Node > children; - for( unsigned i=0; imkNode( n.getKind(), children ); - visited[nr] = ret; - return ret; - } - } +void TheoryStrings::collectVars( Node n, std::map< Node, std::vector< Node > >& vars, std::map< Node, bool >& visited ) { + if( !n.isConst() ){ + if( visited.find( n )==visited.end() ){ + visited[n] = true; + if( n.getNumChildren()>0 ){ + for( unsigned i=0; isecond; } } - return Node::null(); } Node TheoryStrings::getSymbolicDefinition( Node n, std::vector< Node >& exp ) { @@ -4054,36 +4157,6 @@ void TheoryStrings::checkNegContains( std::vector< Node >& negContains ) { } } -void TheoryStrings::checkExtendedFuncsReduction() { - //very lazy reduction? - /* - if( options::stringLazyPreproc() ){ - for( NodeBoolMap::iterator it = d_ext_func_terms.begin(); it != d_ext_func_terms.end(); ++it ){ - if( (*it).second ){ - Node n = (*it).first; - if( n.getKind()!=kind::STRING_IN_REGEXP ){ - if( d_preproc_cache.find( n )==d_preproc_cache.end() ){ - d_preproc_cache[n] = true; - std::vector< Node > new_nodes; - Node res = d_preproc.decompose( n, new_nodes ); - Assert( res==n ); - if( !new_nodes.empty() ){ - Node nnlem = new_nodes.size()==1 ? new_nodes[0] : NodeManager::currentNM()->mkNode( kind::AND, new_nodes ); - nnlem = Rewriter::rewrite( nnlem ); - Trace("strings-pp-lemma") << "Reduction lemma : " << nnlem << std::endl; - Trace("strings-pp-lemma") << "...from term " << n << std::endl; - d_out->lemma( nnlem ); - addedLemmas = true; - } - } - } - } - } - } - */ -} - - CVC4::String TheoryStrings::getHeadConst( Node x ) { if( x.isConst() ) { return x.getConst< String >(); @@ -4382,7 +4455,7 @@ void TheoryStrings::collectExtendedFuncTerms( Node n, std::map< Node, bool >& vi n.getKind() == kind::STRING_STOI || n.getKind() == kind::STRING_STOU16 || n.getKind() == kind::STRING_STOU32 || n.getKind() == kind::STRING_STRREPL || n.getKind() == kind::STRING_STRCTN ){ if( d_ext_func_terms.find( n )==d_ext_func_terms.end() ){ - Trace("strings-extf-debug") << "Found extended function : " << n << std::endl; + Trace("strings-extf-debug2") << "Found extended function : " << n << std::endl; d_ext_func_terms[n] = true; } } diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 98f8d0eea..ce2422faf 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -178,13 +178,17 @@ private: StringsPreprocess d_preproc; NodeBoolMap d_preproc_cache; + bool doPreprocess( Node atom ); bool hasProcessed(); void addToExplanation( Node a, Node b, std::vector< Node >& exp ); + void addToExplanation( Node lit, std::vector< Node >& exp ); + private: std::vector< Node > d_congruent; std::map< Node, Node > d_eqc_to_const; std::map< Node, Node > d_eqc_to_const_base; std::map< Node, Node > d_eqc_to_const_exp; + std::map< Node, Node > d_eqc_to_len_term; std::vector< Node > d_strings_eqc; Node d_emptyString_r; class TermIndex { @@ -199,17 +203,6 @@ private: std::vector< Node > d_eqcs; //list of non-congruent concat terms in each eqc std::map< Node, std::vector< Node > > d_eqc; - //their flat forms - /* - class FlatForm { - public: - Node d_node; - std::deque< Node > d_vec; - std::deque< std::vector< Node > > d_exp; - }; - std::map< Node, FlatForm > d_flat_form; - std::map< Node, FlatForm > d_flat_form_proc; - */ std::map< Node, std::vector< Node > > d_flat_form; std::map< Node, std::vector< int > > d_flat_form_index; @@ -284,6 +277,7 @@ private: void checkInit(); void checkConstantEquivalenceClasses( TermIndex* ti, std::vector< Node >& vecc ); void checkExtendedFuncsEval( int effort = 0 ); + void collectVars( Node n, std::map< Node, std::vector< Node > >& vars, std::map< Node, bool >& visited ); void checkNormalForms(); Node checkCycles( Node eqc, std::vector< Node >& curr, std::vector< Node >& exp ); void checkDeqNF(); @@ -308,9 +302,7 @@ private: void checkExtendedFuncs(); void checkPosContains( std::vector< Node >& posContains ); void checkNegContains( std::vector< Node >& negContains ); - Node inferConstantDefinition( Node n, std::vector< Node >& exp, std::map< Node, Node >& visited ); Node getSymbolicDefinition( Node n, std::vector< Node >& exp ); - void checkExtendedFuncsReduction(); public: void preRegisterTerm(TNode n); @@ -380,6 +372,7 @@ private: NodeSet d_neg_ctn_cached; //extended string terms and whether they have been reduced NodeBoolMap d_ext_func_terms; + std::map< Node, std::map< Node, std::vector< Node > > > d_extf_vars; //collect extended operator terms void collectExtendedFuncTerms( Node n, std::map< Node, bool >& visited ); diff --git a/src/theory/strings/theory_strings_rewriter.cpp b/src/theory/strings/theory_strings_rewriter.cpp index ae0a7aeda..6efa048ca 100644 --- a/src/theory/strings/theory_strings_rewriter.cpp +++ b/src/theory/strings/theory_strings_rewriter.cpp @@ -30,38 +30,30 @@ Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, //try to remove off front and back for( unsigned t=0; t<2; t++ ){ if( tmin<=t && t<=tmax ){ - int count = children.size()-1; - int mcount = mchildren.size()-1; bool do_next = true; - while( count>=0 && mcount>=0 && do_next ){ + while( !children.empty() && !mchildren.empty() && do_next ){ do_next = false; - Node rc = children[count]; - Node xc = mchildren[mcount]; + Node xc = mchildren[mchildren.size()-1]; + Node rc = children[children.size()-1]; if( rc.getKind() == kind::STRING_TO_REGEXP ){ if( xc==rc[0] ){ children.pop_back(); mchildren.pop_back(); - count--; - mcount--; do_next = true; }else if( xc.isConst() && rc[0].isConst() ){ //split the constant int index; Node s = splitConstant( xc, rc[0], index, t==0 ); - //Trace("spl-const") << "Regexp const split : " << xc << " " << rc[0] << " -> " << s << " " << index << " " << t << std::endl; + Trace("regexp-ext-rewrite-debug") << "CRE: Regexp const split : " << xc << " " << rc[0] << " -> " << s << " " << index << " " << t << std::endl; if( s.isNull() ){ return NodeManager::currentNM()->mkConst( false ); }else{ children.pop_back(); mchildren.pop_back(); - count--; - mcount--; if( index==0 ){ mchildren.push_back( s ); - mcount++; }else{ children.push_back( s ); - count++; } } do_next = true; @@ -81,21 +73,114 @@ Node TheoryStringsRewriter::simpleRegexpConsume( std::vector< Node >& mchildren, }else{ mchildren.push_back( NodeManager::currentNM()->mkConst(s.substr( 1 )) ); } - }else{ - mcount--; } children.pop_back(); - count--; do_next = true; }else{ return NodeManager::currentNM()->mkConst( false ); } }else if( rc.getKind()==kind::REGEXP_INTER || rc.getKind()==kind::REGEXP_UNION ){ - //TODO? note this is only propagation : cannot make choices + //see if any/each child does not work + bool result_valid = true; + Node result; + Node emp_s = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); + for( unsigned i=0; i mchildren_s; + std::vector< Node > children_s; + mchildren_s.push_back( xc ); + children_s.push_back( rc[i] ); + Node ret = simpleRegexpConsume( mchildren_s, children_s, t ); + if( !ret.isNull() ){ + // one conjunct cannot be satisfied, return false + if( rc.getKind()==kind::REGEXP_INTER ){ + return ret; + } + }else{ + if( children_s.empty() ){ + //if we were able to fully consume, store the result + Assert( mchildren_s.size()<=1 ); + if( mchildren_s.empty() ){ + mchildren_s.push_back( emp_s ); + } + if( result.isNull() ){ + result = mchildren_s[0]; + }else if( result!=mchildren_s[0] ){ + result_valid = false; + } + }else{ + result_valid = false; + } + } + } + if( result_valid ){ + if( result.isNull() ){ + //all disjuncts cannot be satisfied, return false + Assert( rc.getKind()==kind::REGEXP_UNION ); + return NodeManager::currentNM()->mkConst( false ); + }else{ + //all branches led to the same result + children.pop_back(); + mchildren.pop_back(); + if( result!=emp_s ){ + mchildren.push_back( result ); + } + do_next = true; + } + } }else if( rc.getKind()==kind::REGEXP_STAR ){ - //TODO + //check if there is no way that this star can be unrolled even once + std::vector< Node > mchildren_s; + mchildren_s.insert( mchildren_s.end(), mchildren.begin(), mchildren.end() ); + if( t==1 ){ + std::reverse( mchildren_s.begin(), mchildren_s.end() ); + } + std::vector< Node > children_s; + getConcat( rc[0], children_s ); + Node ret = simpleRegexpConsume( mchildren_s, children_s, t ); + if( !ret.isNull() ){ + Trace("regexp-ext-rewrite-debug") << "CRE : regexp star infeasable " << xc << " " << rc << std::endl; + children.pop_back(); + if( children.empty() ){ + return NodeManager::currentNM()->mkConst( false ); + }else{ + do_next = true; + } + }else{ + if( children_s.empty() ){ + //check if beyond this, we can't do it or there is nothing left, if so, repeat + bool can_skip = false; + if( children.size()>1 ){ + std::vector< Node > mchildren_ss; + mchildren_ss.insert( mchildren_ss.end(), mchildren.begin(), mchildren.end() ); + std::vector< Node > children_ss; + children_ss.insert( children_ss.end(), children.begin(), children.end()-1 ); + if( t==1 ){ + std::reverse( mchildren_ss.begin(), mchildren_ss.end() ); + std::reverse( children_ss.begin(), children_ss.end() ); + } + Node ret = simpleRegexpConsume( mchildren_ss, children_ss, t ); + if( ret.isNull() ){ + can_skip = true; + } + } + if( !can_skip ){ + //take the result of fully consuming once + if( t==1 ){ + std::reverse( mchildren_s.begin(), mchildren_s.end() ); + } + mchildren.clear(); + mchildren.insert( mchildren.end(), mchildren_s.begin(), mchildren_s.end() ); + do_next = true; + }else{ + Trace("regexp-ext-rewrite-debug") << "CRE : can skip " << rc << " from " << xc << std::endl; + } + } + } } } + if( !do_next ){ + Trace("regexp-ext-rewrite") << "Cannot consume : " << xc << " " << rc << std::endl; + } } } if( dir!=0 ){ @@ -121,12 +206,11 @@ Node TheoryStringsRewriter::rewriteConcatString( TNode node ) { if(tmpNode[0].isConst()) { preNode = NodeManager::currentNM()->mkConst( preNode.getConst().concat( tmpNode[0].getConst() ) ); node_vec.push_back( preNode ); - preNode = Node::null(); } else { node_vec.push_back( preNode ); - preNode = Node::null(); node_vec.push_back( tmpNode[0] ); } + preNode = Node::null(); ++j; } for(; j().isEmptyString() ) { - preNode = Node::null(); - } else { + if(preNode.getKind() == kind::CONST_STRING && !preNode.getConst().isEmptyString() ) { node_vec.push_back( preNode ); - preNode = Node::null(); } + preNode = Node::null(); } node_vec.push_back( tmpNode ); - } else { - if(preNode.isNull()) { + }else{ + if( preNode.isNull() ){ preNode = tmpNode; - } else { + }else{ preNode = NodeManager::currentNM()->mkConst( preNode.getConst().concat( tmpNode.getConst() ) ); } } } - if(preNode != Node::null()) { + if( !preNode.isNull() && ( preNode.getKind()!=kind::CONST_STRING || !preNode.getConst().isEmptyString() ) ){ node_vec.push_back( preNode ); } - if(node_vec.size() > 1) { - retNode = NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, node_vec); - } else { - retNode = node_vec[0]; - } + retNode = mkConcat( kind::STRING_CONCAT, node_vec ); Trace("strings-prerewrite") << "Strings::rewriteConcatString end " << retNode << std::endl; return retNode; } @@ -956,6 +1034,12 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec); } + }else if( node[0].getKind()==kind::STRING_STRREPL ){ + if( node[0][1].isConst() && node[0][2].isConst() ){ + if( node[0][1].getConst().size()==node[0][2].getConst().size() ){ + retNode = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, node[0][0] ); + } + } } } else if( node.getKind() == kind::STRING_SUBSTR_TOTAL ){ Node zero = NodeManager::currentNM()->mkConst( ::CVC4::Rational(0) ); @@ -986,8 +1070,17 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } else { retNode = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); } - } else if(node[1] == zero && node[2].getKind() == kind::STRING_LENGTH && node[2][0] == node[0]) { - retNode = node[0]; + } else if(node[1] == zero ) { + if( node[2].getKind() == kind::STRING_LENGTH && node[2][0]==node[0] ){ + retNode = node[0]; + }else{ + //check if lengths rewrite to the same thing + Node lt = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, node[0] ); + lt = Rewriter::rewrite( lt ); + if( lt==node[2] ){ + retNode = node[0]; + } + } } } else if( node.getKind() == kind::STRING_STRCTN ){ if( node[0] == node[1] ) { @@ -995,51 +1088,85 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { } else if( node[0].isConst() && node[1].isConst() ) { CVC4::String s = node[0].getConst(); CVC4::String t = node[1].getConst(); - if( s.find(t) != std::string::npos ) { + if( s.find(t) != std::string::npos ){ retNode = NodeManager::currentNM()->mkConst( true ); - } else { + }else{ retNode = NodeManager::currentNM()->mkConst( false ); } - } else if( node[0].getKind()==kind::STRING_CONCAT ) { - if( node[1].getKind()!=kind::STRING_CONCAT ){ - for(unsigned i=0; imkConst( true ); - break; - //constant contains - }else if( node[0][i].isConst() && node[1].isConst() ){ - CVC4::String s = node[0][i].getConst(); - CVC4::String t = node[1].getConst(); - if( s.find(t) != std::string::npos ) { - retNode = NodeManager::currentNM()->mkConst( true ); + }else if( node[0].getKind()==kind::STRING_CONCAT ) { + //component-wise containment + unsigned n1 = node[0].getNumChildren(); + std::vector< Node > nc1; + getConcat( node[1], nc1 ); + unsigned n2 = nc1.size(); + if( n1>n2 ){ + bool flag = false; + unsigned diff = n1-n2; + for(unsigned i=0; in2 ){ - unsigned diff = n1-n2; - for(unsigned i=0; imkConst( true ); + } + } + if( retNode==node ){ + if( node[1].isConst() ){ + CVC4::String t = node[1].getConst(); + for(unsigned i=0; i(); + if( s.find(t)!=std::string::npos ) { + retNode = NodeManager::currentNM()->mkConst( true ); break; + }else{ + //if no overlap, we can split into disjunction + // if first child, only require no left overlap + // if last child, only require no right overlap + if( i==0 || i==node[0].getNumChildren()-1 || t.find(s)==std::string::npos ){ + bool do_split = false; + int sot = s.overlap( t ); + Trace("strings-ext-rewrite") << "Overlap " << s << " " << t << " is " << sot << std::endl; + if( sot==0 ){ + do_split = i==0; + } + if( !do_split && ( i==(node[0].getNumChildren()-1) || sot==0 ) ){ + int tos = t.overlap( s ); + Trace("strings-ext-rewrite") << "Overlap " << t << " " << s << " is " << tos << std::endl; + if( tos==0 ){ + do_split = true; + } + } + if( do_split ){ + std::vector< Node > nc0; + getConcat( node[0], nc0 ); + std::vector< Node > spl[2]; + if( i>0 ){ + spl[0].insert( spl[0].end(), nc0.begin(), nc0.begin()+i ); + } + if( imkNode( kind::OR, + NodeManager::currentNM()->mkNode( kind::STRING_STRCTN, mkConcat( kind::STRING_CONCAT, spl[0] ), node[1] ), + NodeManager::currentNM()->mkNode( kind::STRING_STRCTN, mkConcat( kind::STRING_CONCAT, spl[1] ), node[1] ) ); + break; + } + } } } } - if(flag) { - retNode = NodeManager::currentNM()->mkConst( true ); - } } } }else if( node[0].isConst() ){ @@ -1064,50 +1191,123 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { }else if( node.getKind()==kind::STRING_STRIDOF ){ std::vector< Node > children; getConcat( node[0], children ); - if( children[0].isConst() && node[1].isConst() ) { - CVC4::String s = children[0].getConst(); - CVC4::String t = node[1].getConst(); - std::size_t i = 0; - bool do_find = true; - if( node[2].isConst() ){ - CVC4::Rational RMAXINT(LONG_MAX); - if( node[2].getConst()>RMAXINT ){ - Assert(node[2].getConst() <= RMAXINT, "Number exceeds LONG_MAX in string index_of"); - retNode = NodeManager::currentNM()->mkConst( false ); - do_find = false; - }else{ - i = node[2].getConst().getNumerator().toUnsignedInt(); - } + //std::vector< Node > children1; + //getConcat( node[1], children1 ); TODO + std::size_t start = 0; + std::size_t val2 = 0; + if( node[2].isConst() ){ + CVC4::Rational RMAXINT(LONG_MAX); + if( node[2].getConst()>RMAXINT ){ + Assert(node[2].getConst() <= RMAXINT, "Number exceeds LONG_MAX in string index_of"); + retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); + }else if( node[2].getConst().sgn()==-1 ){ + //constant negative + retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); + }else{ + val2 = node[2].getConst().getNumerator().toUnsignedInt(); + start = val2; } - if( do_find ){ - std::size_t ret = s.find(t, i); - if( ret!=std::string::npos ) { - //only exact if start value was constant - if( node[2].isConst() ){ - retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational((unsigned) ret) ); + } + if( retNode==node ){ + bool prefixNoOverlap = false; + CVC4::String t; + if( node[1].isConst() ){ + t = node[1].getConst(); + } + //unsigned ch1_index = 0; + for( unsigned i=0; i(); + if( node[1].isConst() ){ + if( i==0 ){ + std::size_t ret = s.find( t, start ); + if( ret!=std::string::npos ) { + //exact if start value was constant + if( node[2].isConst() ){ + retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational((unsigned) ret) ); + break; + } + }else{ + //exact if we scanned the entire string + if( node[0].isConst() ){ + retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); + break; + }else{ + prefixNoOverlap = (s.overlap(t)==0); + Trace("strings-rewrite-debug") << "Prefix no overlap : " << s << " " << t << " " << prefixNoOverlap << std::endl; + } + } + }else if( !node[2].isConst() ){ + break; + }else{ + std::size_t ret = s.find(t, start); + //remove remaining children after finding the string + if( ret!=std::string::npos ){ + Assert( ret+t.size()<=s.size() ); + children[i] = NodeManager::currentNM()->mkConst( ::CVC4::String( s.substr(0,ret+t.size()) ) ); + do_splice = true; + }else{ + //if no overlap on last child, can remove + if( t.overlap( s )==0 && i==children.size()-1 ){ + std::vector< Node > spl; + spl.insert( spl.end(), children.begin(), children.begin()+i ); + retNode = NodeManager::currentNM()->mkNode( kind::STRING_STRIDOF, mkConcat( kind::STRING_CONCAT, spl ), node[1], node[2] ); + break; + } + } + } + } + //decrement the start index + if( start>0 ){ + if( s.size()>start ){ + start = 0; + }else{ + start = start - s.size(); + } } + }else if( !node[2].isConst() ){ + break; }else{ - //only exact if we scanned the entire string - if( node[0].isConst() ){ - retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); + if( children[i]==node[1] && start==0 ){ + //can remove beyond this + do_splice = true; } } - } - } - //constant negative - if( node[2].isConst() ){ - if( node[2].getConst().sgn()==-1 ){ - retNode = NodeManager::currentNM()->mkConst( ::CVC4::Rational(-1) ); + if( do_splice ){ + std::vector< Node > spl; + //since we definitely will find the string, we can safely add the length of the constant non-overlapping prefix + if( prefixNoOverlap ){ + Node pl = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, children[0] ) ); + Assert( pl.isConst() ); + Assert( node[2].isConst() ); + int new_start = val2 - pl.getConst().getNumerator().toUnsignedInt(); + if( new_start<0 ){ + new_start = 0; + } + spl.insert( spl.end(), children.begin()+1, children.begin()+i+1 ); + retNode = NodeManager::currentNM()->mkNode( kind::PLUS, pl, + NodeManager::currentNM()->mkNode( kind::STRING_STRIDOF, mkConcat( kind::STRING_CONCAT, spl ), node[1], NodeManager::currentNM()->mkConst( Rational(new_start) ) ) ); + }else{ + spl.insert( spl.end(), children.begin(), children.begin()+i+1 ); + retNode = NodeManager::currentNM()->mkNode( kind::STRING_STRIDOF, mkConcat( kind::STRING_CONCAT, spl ), node[1], node[2] ); + } + break; + } } } }else if( node.getKind() == kind::STRING_STRREPL ){ - if( node[1]!=node[2] ){ - if(node[0].isConst() && node[1].isConst()) { - CVC4::String s = node[0].getConst(); + if( node[1]==node[2] ){ + retNode = node[0]; + }else{ + std::vector< Node > children; + getConcat( node[0], children ); + if( children[0].isConst() && node[1].isConst() ){ + CVC4::String s = children[0].getConst(); CVC4::String t = node[1].getConst(); std::size_t p = s.find(t); if( p != std::string::npos ) { - if(node[2].isConst()) { + if( node[2].isConst() ){ CVC4::String r = node[2].getConst(); CVC4::String ret = s.replace(t, r); retNode = NodeManager::currentNM()->mkConst( ::CVC4::String(ret) ); @@ -1118,12 +1318,25 @@ RewriteResponse TheoryStringsRewriter::postRewrite(TNode node) { Node ns3 = NodeManager::currentNM()->mkConst( ::CVC4::String(s3) ); retNode = NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, ns1, node[2], ns3 ); } - } else { - retNode = node[0]; + if( children.size()>1 ){ + children[0] = retNode; + retNode = mkConcat( kind::STRING_CONCAT, children ); + } + }else{ + //could not find replacement string + if( node[0].isConst() ){ + retNode = node[0]; + }else{ + //check for overlap, if none, we can remove the prefix + if( s.overlap(t)==0 ){ + std::vector< Node > spl; + spl.insert( spl.end(), children.begin()+1, children.end() ); + retNode = NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, children[0], + NodeManager::currentNM()->mkNode( kind::STRING_STRREPL, mkConcat( kind::STRING_CONCAT, spl ), node[1], node[2] ) ); + } + } } } - } else { - retNode = node[0]; } }else if( node.getKind() == kind::STRING_PREFIX ){ if( node[0].isConst() && node[1].isConst() ){ @@ -1290,8 +1503,7 @@ RewriteResponse TheoryStringsRewriter::preRewrite(TNode node) { } } } else if(node.getKind() == kind::REGEXP_PLUS) { - retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, node[0], - NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, node[0])); + retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, node[0], NodeManager::currentNM()->mkNode( kind::REGEXP_STAR, node[0])); } else if(node.getKind() == kind::REGEXP_OPT) { retNode = NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst( ::CVC4::String("") ) ), diff --git a/src/theory/strings/theory_strings_rewriter.h b/src/theory/strings/theory_strings_rewriter.h index 03ee6a0cf..2a8258a09 100644 --- a/src/theory/strings/theory_strings_rewriter.h +++ b/src/theory/strings/theory_strings_rewriter.h @@ -2,7 +2,7 @@ /*! \file theory_strings_rewriter.h ** \verbatim ** Original author: Tianyi Liang - ** Major contributors: none + ** Major contributors: Andrew Reynolds ** Minor contributors (to current version): none ** This file is part of the CVC4 project. ** Copyright (c) 2009-2014 New York University and The University of Iowa diff --git a/src/util/regexp.h b/src/util/regexp.h index 453fa7ba9..a91f825ec 100644 --- a/src/util/regexp.h +++ b/src/util/regexp.h @@ -293,6 +293,7 @@ public: String suffix(std::size_t i) const { return substr(d_str.size() - i, i); } + // if y=y1...yn and overlap returns m, then this is x1...y1...ym std::size_t overlap(String &y) const; bool isNumber() const { diff --git a/test/regress/regress0/strings/Makefile.am b/test/regress/regress0/strings/Makefile.am index 894e2546a..a8a8e968f 100644 --- a/test/regress/regress0/strings/Makefile.am +++ b/test/regress/regress0/strings/Makefile.am @@ -62,11 +62,13 @@ TESTS = \ norn-360.smt2 \ norn-simp-rew.smt2 \ norn-simp-rew-sat.smt2 \ - idof-handg.smt2 \ idof-nconst-index.smt2 \ idof-neg-index.smt2 \ bug612.smt2 \ - bug615.smt2 + bug615.smt2 \ + kaluza-fl.smt2 \ + norn-ab.smt2 \ + idof-rewrites.smt2 FAILING_TESTS = @@ -74,8 +76,7 @@ EXTRA_DIST = $(TESTS) \ fmf001.smt2 \ type002.smt2 -# slow after changes on Nov 20 : artemis-0512-nonterm.smt2 -# slow after decision engine respects requirePhase: type003.smt2 loop007.smt2 +# somewhat slow after changes on Oct 6: idof-handg.smt2 # and make sure to distribute it EXTRA_DIST += diff --git a/test/regress/regress0/strings/idof-rewrites.smt2 b/test/regress/regress0/strings/idof-rewrites.smt2 new file mode 100644 index 000000000..f52f0c9f3 --- /dev/null +++ b/test/regress/regress0/strings/idof-rewrites.smt2 @@ -0,0 +1,13 @@ +(set-logic ALL_SUPPORTED) +(set-info :status sat) +(set-option :strings-exp true) +(declare-fun s () String) +(declare-fun t () String) + +; all of these should rewrite to true +(assert (= (str.indexof (str.++ s "abc") "ab" 0) (str.indexof (str.++ s "ab") "ab" 0))) +(assert (= (str.indexof (str.++ s "abc" t "a") t 2) (str.indexof (str.++ s "abc" t "c") t 2))) +(assert (= (str.indexof (str.++ "ddd" s "abc") "ab" 2) (+ 1 (str.indexof (str.++ "ed" s "ab") "ab" 1)))) +(assert (= (str.indexof (str.++ "dd" s "dd") "ab" 0) (str.indexof (str.++ "dd" s "ee") "ab" 0))) +(assert (= (str.indexof (str.++ "dd" s "dabd") "ab" 1) (+ 2 (str.indexof (str.++ s "dab" t) "ab" 0)))) +(check-sat) \ No newline at end of file diff --git a/test/regress/regress0/strings/kaluza-fl.smt2 b/test/regress/regress0/strings/kaluza-fl.smt2 new file mode 100755 index 000000000..ce3af9e74 --- /dev/null +++ b/test/regress/regress0/strings/kaluza-fl.smt2 @@ -0,0 +1,97 @@ +(set-logic QF_S) +(set-info :status sat) + +(declare-fun I0_15 () Int) +(declare-fun I0_18 () Int) +(declare-fun I0_2 () Int) +(declare-fun I0_4 () Int) +(declare-fun I0_6 () Int) +(declare-fun PCTEMP_LHS_1 () Int) +(declare-fun PCTEMP_LHS_2 () Int) +(declare-fun PCTEMP_LHS_3 () Int) +(declare-fun PCTEMP_LHS_4 () Int) +(declare-fun PCTEMP_LHS_5 () Int) +(declare-fun T0_15 () String) +(declare-fun T0_18 () String) +(declare-fun T0_2 () String) +(declare-fun T0_4 () String) +(declare-fun T0_6 () String) +(declare-fun T1_15 () String) +(declare-fun T1_18 () String) +(declare-fun T1_2 () String) +(declare-fun T1_4 () String) +(declare-fun T1_6 () String) +(declare-fun T2_15 () String) +(declare-fun T2_18 () String) +(declare-fun T2_2 () String) +(declare-fun T2_4 () String) +(declare-fun T2_6 () String) +(declare-fun T3_15 () String) +(declare-fun T3_18 () String) +(declare-fun T3_2 () String) +(declare-fun T3_4 () String) +(declare-fun T3_6 () String) +(declare-fun T4_15 () String) +(declare-fun T4_18 () String) +(declare-fun T4_2 () String) +(declare-fun T4_4 () String) +(declare-fun T4_6 () String) +(declare-fun T5_15 () String) +(declare-fun T5_18 () String) +(declare-fun T5_2 () String) +(declare-fun T5_4 () String) +(declare-fun T5_6 () String) +(declare-fun T_4 () Bool) +(declare-fun T_5 () Bool) +(declare-fun T_6 () Bool) +(declare-fun T_7 () Bool) +(declare-fun T_8 () Bool) +(declare-fun T_9 () Bool) +(declare-fun T_SELECT_1 () Bool) +(declare-fun T_SELECT_2 () Bool) +(declare-fun T_SELECT_3 () Bool) +(declare-fun T_SELECT_4 () Bool) +(declare-fun T_SELECT_5 () Bool) +(declare-fun T_a () Bool) +(declare-fun T_c () Bool) +(declare-fun T_e () Bool) +(declare-fun var_0xINPUT_12454 () String) + +(assert (= T_SELECT_1 (not (= PCTEMP_LHS_1 (- 1))))) +(assert (ite T_SELECT_1 + (and (= PCTEMP_LHS_1 (+ I0_2 0))(= var_0xINPUT_12454 (str.++ T0_2 T1_2))(= I0_2 (str.len T4_2))(= 0 (str.len T0_2))(= T1_2 (str.++ T2_2 T3_2))(= T2_2 (str.++ T4_2 T5_2))(= T5_2 "__utma=169413169.")(not (str.in.re T4_2 (re.++ (str.to.re "_") (str.to.re "_") (str.to.re "u") (str.to.re "t") (str.to.re "m") (str.to.re "a") (str.to.re "=") (str.to.re "1") (str.to.re "6") (str.to.re "9") (str.to.re "4") (str.to.re "1") (str.to.re "3") (str.to.re "1") (str.to.re "6") (str.to.re "9") (str.to.re "."))))) + (and (= PCTEMP_LHS_1 (- 1))(= var_0xINPUT_12454 (str.++ T0_2 T1_2))(= 0 (str.len T0_2))(not (str.in.re T1_2 (re.++ (str.to.re "_") (str.to.re "_") (str.to.re "u") (str.to.re "t") (str.to.re "m") (str.to.re "a") (str.to.re "=") (str.to.re "1") (str.to.re "6") (str.to.re "9") (str.to.re "4") (str.to.re "1") (str.to.re "3") (str.to.re "1") (str.to.re "6") (str.to.re "9") (str.to.re "."))))))) +(assert (= T_SELECT_2 (not (= PCTEMP_LHS_2 (- 1))))) +(assert (ite T_SELECT_2 + (and (= PCTEMP_LHS_2 (+ I0_4 0))(= var_0xINPUT_12454 (str.++ T0_4 T1_4))(= I0_4 (str.len T4_4))(= 0 (str.len T0_4))(= T1_4 (str.++ T2_4 T3_4))(= T2_4 (str.++ T4_4 T5_4))(= T5_4 "__utmb=169413169")(not (str.in.re T4_4 (re.++ (str.to.re "_") (str.to.re "_") (str.to.re "u") (str.to.re "t") (str.to.re "m") (str.to.re "b") (str.to.re "=") (str.to.re "1") (str.to.re "6") (str.to.re "9") (str.to.re "4") (str.to.re "1") (str.to.re "3") (str.to.re "1") (str.to.re "6") (str.to.re "9"))))) + (and (= PCTEMP_LHS_2 (- 1))(= var_0xINPUT_12454 (str.++ T0_4 T1_4))(= 0 (str.len T0_4))(not (str.in.re T1_4 (re.++ (str.to.re "_") (str.to.re "_") (str.to.re "u") (str.to.re "t") (str.to.re "m") (str.to.re "b") (str.to.re "=") (str.to.re "1") (str.to.re "6") (str.to.re "9") (str.to.re "4") (str.to.re "1") (str.to.re "3") (str.to.re "1") (str.to.re "6") (str.to.re "9"))))))) +(assert (= T_SELECT_3 (not (= PCTEMP_LHS_3 (- 1))))) +(assert (ite T_SELECT_3 + (and (= PCTEMP_LHS_3 (+ I0_6 0))(= var_0xINPUT_12454 (str.++ T0_6 T1_6))(= I0_6 (str.len T4_6))(= 0 (str.len T0_6))(= T1_6 (str.++ T2_6 T3_6))(= T2_6 (str.++ T4_6 T5_6))(= T5_6 "__utmc=169413169")(not (str.in.re T4_6 (re.++ (str.to.re "_") (str.to.re "_") (str.to.re "u") (str.to.re "t") (str.to.re "m") (str.to.re "c") (str.to.re "=") (str.to.re "1") (str.to.re "6") (str.to.re "9") (str.to.re "4") (str.to.re "1") (str.to.re "3") (str.to.re "1") (str.to.re "6") (str.to.re "9"))))) + (and (= PCTEMP_LHS_3 (- 1))(= var_0xINPUT_12454 (str.++ T0_6 T1_6))(= 0 (str.len T0_6))(not (str.in.re T1_6 (re.++ (str.to.re "_") (str.to.re "_") (str.to.re "u") (str.to.re "t") (str.to.re "m") (str.to.re "c") (str.to.re "=") (str.to.re "1") (str.to.re "6") (str.to.re "9") (str.to.re "4") (str.to.re "1") (str.to.re "3") (str.to.re "1") (str.to.re "6") (str.to.re "9"))))))) +(assert (= T_4 (<= 0 PCTEMP_LHS_1))) +(assert T_4) +(assert (= T_5 (<= 0 PCTEMP_LHS_2))) +(assert T_5) +(assert (= T_6 (<= 0 PCTEMP_LHS_3))) +(assert T_6) +(assert (= T_7 (= "" var_0xINPUT_12454))) +(assert (= T_8 (not T_7))) +(assert T_8) +(assert (= T_9 (= var_0xINPUT_12454 ""))) +(assert (= T_a (not T_9))) +(assert T_a) +(assert (= T_SELECT_4 (not (= PCTEMP_LHS_4 (- 1))))) +(assert (ite T_SELECT_4 + (and (= PCTEMP_LHS_4 (+ I0_15 0))(= var_0xINPUT_12454 (str.++ T0_15 T1_15))(= I0_15 (str.len T4_15))(= 0 (str.len T0_15))(= T1_15 (str.++ T2_15 T3_15))(= T2_15 (str.++ T4_15 T5_15))(= T5_15 "__utmb=169413169")(not (str.in.re T4_15 (re.++ (str.to.re "_") (str.to.re "_") (str.to.re "u") (str.to.re "t") (str.to.re "m") (str.to.re "b") (str.to.re "=") (str.to.re "1") (str.to.re "6") (str.to.re "9") (str.to.re "4") (str.to.re "1") (str.to.re "3") (str.to.re "1") (str.to.re "6") (str.to.re "9"))))) + (and (= PCTEMP_LHS_4 (- 1))(= var_0xINPUT_12454 (str.++ T0_15 T1_15))(= 0 (str.len T0_15))(not (str.in.re T1_15 (re.++ (str.to.re "_") (str.to.re "_") (str.to.re "u") (str.to.re "t") (str.to.re "m") (str.to.re "b") (str.to.re "=") (str.to.re "1") (str.to.re "6") (str.to.re "9") (str.to.re "4") (str.to.re "1") (str.to.re "3") (str.to.re "1") (str.to.re "6") (str.to.re "9"))))))) +(assert (= T_c (< (- 1) PCTEMP_LHS_4))) +(assert T_c) +(assert (= T_SELECT_5 (not (= PCTEMP_LHS_5 (- 1))))) +(assert (ite T_SELECT_5 + (and (= PCTEMP_LHS_5 (+ I0_18 PCTEMP_LHS_4))(= var_0xINPUT_12454 (str.++ T0_18 T1_18))(= I0_18 (str.len T4_18))(= PCTEMP_LHS_4 (str.len T0_18))(= T1_18 (str.++ T2_18 T3_18))(= T2_18 (str.++ T4_18 T5_18))(= T5_18 ";")(not (str.in.re T4_18 (str.to.re ";")))) + (and (= PCTEMP_LHS_5 (- 1))(= var_0xINPUT_12454 (str.++ T0_18 T1_18))(= PCTEMP_LHS_4 (str.len T0_18))(not (str.in.re T1_18 (str.to.re ";")))))) +(assert (= T_e (< PCTEMP_LHS_5 0))) +(assert T_e) + +(check-sat) diff --git a/test/regress/regress0/strings/norn-ab.smt2 b/test/regress/regress0/strings/norn-ab.smt2 new file mode 100755 index 000000000..db7aac732 --- /dev/null +++ b/test/regress/regress0/strings/norn-ab.smt2 @@ -0,0 +1,25 @@ +(set-logic QF_SLIA) +(set-info :status unsat) +(set-option :strings-exp true) + +(declare-fun var_0 () String) +(declare-fun var_1 () String) +(declare-fun var_2 () String) +(declare-fun var_3 () String) +(declare-fun var_4 () String) +(declare-fun var_5 () String) +(declare-fun var_6 () String) +(declare-fun var_7 () String) +(declare-fun var_8 () String) +(declare-fun var_9 () String) +(declare-fun var_10 () String) +(declare-fun var_11 () String) +(declare-fun var_12 () String) + +(assert (str.in.re var_4 (re.++ (str.to.re "a") (re.* (str.to.re "b"))))) +(assert (str.in.re var_4 (re.++ (re.* (str.to.re "a")) (str.to.re "b")))) +(assert (str.in.re var_4 (re.* (re.range "a" "u")))) +(assert (str.in.re var_4 (re.++ (re.* (str.to.re "a")) (re.++ (str.to.re "b") (re.* (str.to.re "b")))))) +(assert (not (str.in.re (str.++ "a" var_4 "b" ) (re.++ (re.* (str.to.re "a")) (re.++ (str.to.re "b") (re.* (str.to.re "b"))))))) +(assert (and (<= 0 (str.len var_4)) (not (not (exists ((v Int)) (= (* v 2 ) (+ (str.len var_4) 2 ))))))) +(check-sat) \ No newline at end of file -- 2.30.2