From d35a5a1d8072a662aa230319fbfc1611bb918ccf Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Mon, 21 May 2018 20:54:52 -0500 Subject: [PATCH] Infrastructure for strings strategies (#1883) --- src/theory/strings/theory_strings.cpp | 814 ++++++++++++++++---------- src/theory/strings/theory_strings.h | 306 +++++++++- 2 files changed, 779 insertions(+), 341 deletions(-) diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 3da652457..cd99a0d33 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -55,6 +55,28 @@ std::ostream& operator<<(std::ostream& out, Inference i) return out; } +std::ostream& operator<<(std::ostream& out, InferStep s) +{ + switch (s) + { + case BREAK: out << "break"; break; + case CHECK_INIT: out << "check_init"; break; + case CHECK_CONST_EQC: out << "check_const_eqc"; break; + case CHECK_EXTF_EVAL: out << "check_extf_eval"; break; + case CHECK_CYCLES: out << "check_cycles"; break; + case CHECK_FLAT_FORMS: out << "check_flat_forms"; break; + case CHECK_NORMAL_FORMS_EQ: out << "check_normal_forms_eq"; break; + case CHECK_NORMAL_FORMS_DEQ: out << "check_normal_forms_deq"; break; + case CHECK_CODES: out << "check_codes"; break; + case CHECK_LENGTH_EQC: out << "check_length_eqc"; break; + case CHECK_EXTF_REDUCTION: out << "check_extf_reduction"; break; + case CHECK_MEMBERSHIP: out << "check_membership"; break; + case CHECK_CARDINALITY: out << "check_cardinality"; break; + default: out << "?"; break; + } + return out; +} + Node TheoryStrings::TermIndex::add( TNode n, unsigned index, TheoryStrings* t, Node er, std::vector< Node >& c ) { if( index==n.getNumChildren() ){ if( d_data.isNull() ){ @@ -114,7 +136,8 @@ TheoryStrings::TheoryStrings(context::Context* c, d_input_vars(u), d_input_var_lsum(u), d_cardinality_lits(u), - d_curr_cardinality(c, 0) + d_curr_cardinality(c, 0), + d_strategy_init(false) { setupExtTheory(); getExtTheory()->addFunctionKind(kind::STRING_SUBSTR); @@ -469,6 +492,7 @@ int TheoryStrings::getReduction( int effort, Node n, Node& nr ) { void TheoryStrings::presolve() { Debug("strings-presolve") << "TheoryStrings::Presolving : get fmf options " << (options::stringFMF() ? "true" : "false") << std::endl; + initializeStrategy(); } @@ -778,9 +802,13 @@ void TheoryStrings::check(Effort e) { } doPendingFacts(); - if( !d_conflict && ( ( e == EFFORT_FULL && !d_valuation.needCheck() ) || ( e==EFFORT_STANDARD && options::stringEager() ) ) ) { - Trace("strings-check") << "Theory of strings full effort check " << std::endl; - + Assert(d_strategy_init); + std::map >::iterator itsr = + d_strat_steps.find(e); + if (!d_conflict && !d_valuation.needCheck() && itsr != d_strat_steps.end()) + { + Trace("strings-check") << "Theory of strings " << e << " effort check " + << std::endl; if(Trace.isOn("strings-eqc")) { for( unsigned t=0; t<2; t++ ) { eq::EqClassesIterator eqcs2_i = eq::EqClassesIterator( &d_equalityEngine ); @@ -811,61 +839,21 @@ void TheoryStrings::check(Effort e) { } Trace("strings-eqc") << std::endl; } - + unsigned sbegin = itsr->second.first; + unsigned send = itsr->second.second; bool addedLemma = false; bool addedFact; do{ - Trace("strings-process") << "----check, next round---" << std::endl; - checkInit(); - Trace("strings-process") << "Done check init, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - if( !hasProcessed() ){ - checkExtfEval(); - Trace("strings-process") << "Done check extended functions eval, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - if( !hasProcessed() ){ - checkFlatForms(); - Trace("strings-process") << "Done check flat forms, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - if( !hasProcessed() && e==EFFORT_FULL ){ - checkNormalForms(); - Trace("strings-process") << "Done check normal forms, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - if( !hasProcessed() ){ - if( options::stringEagerLen() ){ - checkLengthsEqc(); - Trace("strings-process") << "Done check lengths, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - } - if( !hasProcessed() ){ - if( options::stringExp() && !options::stringGuessModel() ){ - checkExtfReductions( 2 ); - Trace("strings-process") << "Done check extended functions reduction 2, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - } - if( !hasProcessed() ){ - checkMemberships(); - Trace("strings-process") << "Done check memberships, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - if( !hasProcessed() ){ - checkCardinality(); - Trace("strings-process") << "Done check cardinality, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - } - } - } - } - } - } - } - //flush the facts + runStrategy(sbegin, send); + // flush the facts addedFact = !d_pending.empty(); addedLemma = !d_lemma_cache.empty(); doPendingFacts(); doPendingLemmas(); + // repeat if we did not add a lemma or conflict }while( !d_conflict && !addedLemma && addedFact ); Trace("strings-check") << "Theory of strings done full effort check " << addedLemma << " " << d_conflict << std::endl; - }else if( e==EFFORT_LAST_CALL ){ - Assert( !hasProcessed() ); - Trace("strings-check") << "Theory of strings last call effort check " << std::endl; - checkExtfEval( 3 ); - checkExtfReductions( 2 ); - doPendingFacts(); - doPendingLemmas(); - Trace("strings-process") << "Done check extended functions reduction 2, addedFact = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; } Trace("strings-check") << "Theory of strings, done check : " << e << std::endl; Assert( d_pending.empty() ); @@ -886,10 +874,12 @@ void TheoryStrings::checkExtfReductions( int effort ) { //getExtTheory()->doReductions( effort, nred, false ); std::vector< Node > extf = getExtTheory()->getActive(); - Trace("strings-process") << "checking " << extf.size() << " active extf" << std::endl; + Trace("strings-process") << " checking " << extf.size() << " active extf" + << std::endl; for( unsigned i=0; ifirst << "] = " << ncongruent[it->first] << "/" << (congruent[it->first]+ncongruent[it->first]) << std::endl; } } - Trace("strings-process") << "Done check init, addedLemma = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - //now, infer constants for equivalence classes - if( !hasProcessed() ){ - //do fixed point - unsigned prevSize; - do{ - Trace("strings-process-debug") << "Check constant equivalence classes..." << std::endl; - prevSize = d_eqc_to_const.size(); - std::vector< Node > vecc; - checkConstantEquivalenceClasses( &d_term_index[kind::STRING_CONCAT], vecc ); - }while( !hasProcessed() && d_eqc_to_const.size()>prevSize ); - Trace("strings-process") << "Done check constant equivalence classes, addedLemma = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << ", d_conflict = " << d_conflict << std::endl; - } +} + +void TheoryStrings::checkConstantEquivalenceClasses() +{ + // do fixed point + unsigned prevSize; + std::vector vecc; + do + { + vecc.clear(); + Trace("strings-process-debug") << "Check constant equivalence classes..." + << std::endl; + prevSize = d_eqc_to_const.size(); + checkConstantEquivalenceClasses(&d_term_index[kind::STRING_CONCAT], vecc); + } while (!hasProcessed() && d_eqc_to_const.size() > prevSize); } void TheoryStrings::checkConstantEquivalenceClasses( TermIndex* ti, std::vector< Node >& vecc ) { @@ -1629,23 +1621,6 @@ void TheoryStrings::checkExtfInference( Node n, Node nr, ExtfInfoTmp& in, int ef } } -void TheoryStrings::collectVars( Node n, std::vector< Node >& vars, std::map< Node, bool >& visited ) { - if( !n.isConst() ){ - if( visited.find( n )==visited.end() ){ - visited[n] = true; - if( n.getNumChildren()>0 ){ - for( unsigned i=0; i& exp ) { if( n.getNumChildren()==0 ){ NodeNodeMap::const_iterator it = d_proxy_var.find( n ); @@ -1754,13 +1729,12 @@ struct sortConstLength { } }; - -void TheoryStrings::checkFlatForms() { - //first check for cycles, while building ordering of equivalence classes - d_eqc.clear(); +void TheoryStrings::checkCycles() +{ + // first check for cycles, while building ordering of equivalence classes d_flat_form.clear(); d_flat_form_index.clear(); - Trace("strings-process") << "Check equivalence classes cycles...." << std::endl; + d_eqc.clear(); //rebuild strings eqc based on acyclic ordering std::vector< Node > eqc; eqc.insert( eqc.end(), d_strings_eqc.begin(), d_strings_eqc.end() ); @@ -1784,224 +1758,308 @@ void TheoryStrings::checkFlatForms() { return; } } - Trace("strings-process-debug") << "Done check cycles, lemmas = " << !d_pending.empty() << " " << !d_lemma_cache.empty() << std::endl; - if( !hasProcessed() ){ - //debug print flat forms - if( Trace.isOn("strings-ff") ){ - Trace("strings-ff") << "Flat forms : " << std::endl; - debugPrintFlatForms( "strings-ff" ); - } - - //inferences without recursively expanding flat forms - - //(1) approximate equality by containment, infer conflicts - for( unsigned k=0; k >::iterator it = d_eqc.find( eqc ); - if( it!=d_eqc.end() ){ - for( unsigned i=0; isecond.size(); i++ ){ - Node n = it->second[i]; - int firstc, lastc; - if( !TheoryStringsRewriter::canConstantContainList( c, d_flat_form[n], firstc, lastc ) ){ - Trace("strings-ff-debug") << "Flat form for " << n << " cannot be contained in constant " << c << std::endl; - Trace("strings-ff-debug") << " indices = " << firstc << "/" << lastc << std::endl; - //conflict, explanation is n = base ^ base = c ^ relevant porition of ( n = f[n] ) - std::vector< Node > exp; - Assert( d_eqc_to_const_base.find( eqc )!=d_eqc_to_const_base.end() ); - addToExplanation( n, d_eqc_to_const_base[eqc], exp ); - Assert( d_eqc_to_const_exp.find( eqc )!=d_eqc_to_const_exp.end() ); - if( !d_eqc_to_const_exp[eqc].isNull() ){ - exp.push_back( d_eqc_to_const_exp[eqc] ); - } - for( int e=firstc; e<=lastc; e++ ){ - if( d_flat_form[n][e].isConst() ){ - Assert( e>=0 && e<(int)d_flat_form_index[n].size() ); - Assert( d_flat_form_index[n][e]>=0 && d_flat_form_index[n][e]<(int)n.getNumChildren() ); - addToExplanation( d_flat_form[n][e], n[d_flat_form_index[n][e]], exp ); - } +} + +void TheoryStrings::checkFlatForms() +{ + // debug print flat forms + if (Trace.isOn("strings-ff")) + { + Trace("strings-ff") << "Flat forms : " << std::endl; + debugPrintFlatForms("strings-ff"); + } + + // inferences without recursively expanding flat forms + + //(1) approximate equality by containment, infer conflicts + for (const Node& eqc : d_strings_eqc) + { + Node c = getConstantEqc(eqc); + if (!c.isNull()) + { + // if equivalence class is constant, all component constants in flat forms + // must be contained in it, in order + std::map >::iterator it = d_eqc.find(eqc); + if (it != d_eqc.end()) + { + for (const Node& n : it->second) + { + int firstc, lastc; + if (!TheoryStringsRewriter::canConstantContainList( + c, d_flat_form[n], firstc, lastc)) + { + Trace("strings-ff-debug") << "Flat form for " << n + << " cannot be contained in constant " + << c << std::endl; + Trace("strings-ff-debug") << " indices = " << firstc << "/" + << lastc << std::endl; + // conflict, explanation is n = base ^ base = c ^ relevant portion + // of ( n = f[n] ) + std::vector exp; + Assert(d_eqc_to_const_base.find(eqc) != d_eqc_to_const_base.end()); + addToExplanation(n, d_eqc_to_const_base[eqc], exp); + Assert(d_eqc_to_const_exp.find(eqc) != d_eqc_to_const_exp.end()); + if (!d_eqc_to_const_exp[eqc].isNull()) + { + exp.push_back(d_eqc_to_const_exp[eqc]); + } + for (int e = firstc; e <= lastc; e++) + { + if (d_flat_form[n][e].isConst()) + { + Assert(e >= 0 && e < (int)d_flat_form_index[n].size()); + Assert(d_flat_form_index[n][e] >= 0 + && d_flat_form_index[n][e] < (int)n.getNumChildren()); + addToExplanation( + d_flat_form[n][e], n[d_flat_form_index[n][e]], exp); } - Node conc = d_false; - sendInference( exp, conc, "F_NCTN" ); - return; } + Node conc = d_false; + sendInference(exp, conc, "F_NCTN"); + return; } } } } - - //(2) scan lists, unification to infer conflicts and equalities - for( unsigned k=0; k >::iterator it = d_eqc.find( eqc ); - if( it!=d_eqc.end() && it->second.size()>1 ){ - //iterate over start index - for( unsigned start=0; startsecond.size()-1; start++ ){ - for( unsigned r=0; r<2; r++ ){ - unsigned count = 0; - std::vector< Node > inelig; - for( unsigned i=0; i<=start; i++ ){ - inelig.push_back( it->second[start] ); + } + + //(2) scan lists, unification to infer conflicts and equalities + for (const Node& eqc : d_strings_eqc) + { + std::map >::iterator it = d_eqc.find(eqc); + if (it == d_eqc.end() || it->second.size() <= 1) + { + continue; + } + // iterate over start index + for (unsigned start = 0; start < it->second.size() - 1; start++) + { + for (unsigned r = 0; r < 2; r++) + { + bool isRev = r == 1; + checkFlatForm(it->second, start, isRev); + if (d_conflict) + { + return; + } + } + } + } +} + +void TheoryStrings::checkFlatForm(std::vector& eqc, + unsigned start, + bool isRev) +{ + unsigned count = 0; + std::vector inelig; + for (unsigned i = 0; i <= start; i++) + { + inelig.push_back(eqc[start]); + } + Node a = eqc[start]; + Node b; + do + { + std::vector exp; + Node conc; + int inf_type = -1; + unsigned eqc_size = eqc.size(); + unsigned asize = d_flat_form[a].size(); + if (count == asize) + { + for (unsigned i = start + 1; i < eqc_size; i++) + { + b = eqc[i]; + if (std::find(inelig.begin(), inelig.end(), b) == inelig.end()) + { + unsigned bsize = d_flat_form[b].size(); + if (count < bsize) + { + // endpoint + std::vector conc_c; + for (unsigned j = count; j < bsize; j++) + { + conc_c.push_back( + b[d_flat_form_index[b][j]].eqNode(d_emptyString)); } - Node a = it->second[start]; - Node b; - do{ - std::vector< Node > exp; - //std::vector< Node > exp_n; - Node conc; - int inf_type = -1; - if( count==d_flat_form[a].size() ){ - for( unsigned i=start+1; isecond.size(); i++ ){ - b = it->second[i]; - if( std::find( inelig.begin(), inelig.end(), b )==inelig.end() ){ - if( count conc_c; - for( unsigned j=count; j0 ); - //swap, will enforce is empty past current - a = it->second[i]; b = it->second[start]; - count--; - break; - } - inelig.push_back( it->second[i] ); - } - } - }else{ - Node curr = d_flat_form[a][count]; - Node curr_c = getConstantEqc( curr ); - Node ac = a[d_flat_form_index[a][count]]; - std::vector< Node > lexp; - Node lcurr = getLength( ac, lexp ); - for( unsigned i=1; isecond.size(); i++ ){ - b = it->second[i]; - if( std::find( inelig.begin(), inelig.end(), b )==inelig.end() ){ - if( count==d_flat_form[b].size() ){ - inelig.push_back( b ); - //endpoint - std::vector< Node > conc_c; - for( unsigned j=count; j0 ); - count--; - break; - }else{ - Node cc = d_flat_form[b][count]; - if( cc!=curr ){ - Node bc = b[d_flat_form_index[b][count]]; - inelig.push_back( b ); - Assert( !areEqual( curr, cc ) ); - Node cc_c = getConstantEqc( cc ); - if( !curr_c.isNull() && !cc_c.isNull() ){ - //check for constant conflict - int index; - Node s = TheoryStringsRewriter::splitConstant( cc_c, curr_c, index, r==1 ); - if( s.isNull() ){ - addToExplanation( ac, d_eqc_to_const_base[curr], exp ); - addToExplanation( d_eqc_to_const_exp[curr], exp ); - addToExplanation( bc, d_eqc_to_const_base[cc], exp ); - addToExplanation( d_eqc_to_const_exp[cc], exp ); - conc = d_false; - inf_type = 0; - break; - } - }else if( (d_flat_form[a].size()-1)==count && (d_flat_form[b].size()-1)==count ){ - conc = ac.eqNode( bc ); - inf_type = 3; - break; - }else{ - //if lengths are the same, apply LengthEq - std::vector< Node > lexp2; - Node lcc = getLength( bc, lexp2 ); - if( areEqual( lcurr, lcc ) ){ - Trace("strings-ff-debug") << "Infer " << ac << " == " << bc << " since " << lcurr << " == " << lcc << std::endl; - //exp_n.push_back( getLength( curr, true ).eqNode( getLength( cc, true ) ) ); - Trace("strings-ff-debug") << "Explanation for " << lcurr << " is "; - for( unsigned j=0; j 0); + // swap, will enforce is empty past current + a = eqc[i]; + b = eqc[start]; + count--; + break; + } + inelig.push_back(eqc[i]); + } + } + } + else + { + Node curr = d_flat_form[a][count]; + Node curr_c = getConstantEqc(curr); + Node ac = a[d_flat_form_index[a][count]]; + std::vector lexp; + Node lcurr = getLength(ac, lexp); + for (unsigned i = 1; i < eqc_size; i++) + { + b = eqc[i]; + if (std::find(inelig.begin(), inelig.end(), b) == inelig.end()) + { + if (count == d_flat_form[b].size()) + { + inelig.push_back(b); + // endpoint + std::vector conc_c; + for (unsigned j = count; j < asize; j++) + { + conc_c.push_back( + a[d_flat_form_index[a][j]].eqNode(d_emptyString)); + } + Assert(!conc_c.empty()); + conc = mkAnd(conc_c); + inf_type = 2; + Assert(count > 0); + count--; + break; + } + else + { + Node cc = d_flat_form[b][count]; + if (cc != curr) + { + Node bc = b[d_flat_form_index[b][count]]; + inelig.push_back(b); + Assert(!areEqual(curr, cc)); + Node cc_c = getConstantEqc(cc); + if (!curr_c.isNull() && !cc_c.isNull()) + { + // check for constant conflict + int index; + Node s = TheoryStringsRewriter::splitConstant( + cc_c, curr_c, index, isRev); + if (s.isNull()) + { + addToExplanation(ac, d_eqc_to_const_base[curr], exp); + addToExplanation(d_eqc_to_const_exp[curr], exp); + addToExplanation(bc, d_eqc_to_const_base[cc], exp); + addToExplanation(d_eqc_to_const_exp[cc], exp); + conc = d_false; + inf_type = 0; + break; } } - if( !conc.isNull() ){ - Trace("strings-ff-debug") << "Found inference : " << conc << " based on equality " << a << " == " << b << " " << r << " " << inf_type << std::endl; - addToExplanation( a, b, exp ); - //explain why prefixes up to now were the same - for( unsigned j=0; j lexp2; + Node lcc = getLength(bc, lexp2); + if (areEqual(lcurr, lcc)) + { + Trace("strings-ff-debug") << "Infer " << ac << " == " << bc + << " since " << lcurr + << " == " << lcc << std::endl; + // exp_n.push_back( getLength( curr, true ).eqNode( + // getLength( cc, true ) ) ); + Trace("strings-ff-debug") << "Explanation for " << lcurr + << " is "; + for (unsigned j = 0; j < lexp.size(); j++) + { + Trace("strings-ff-debug") << lexp[j] << std::endl; } - if( r==0 ){ - for( int j=0; jjj; --j ){ - if( areEqual( c[j], d_emptyString ) ){ - addToExplanation( c[j], d_emptyString, exp ); - } - } + Trace("strings-ff-debug") << "Explanation for " << lcc + << " is "; + for (unsigned j = 0; j < lexp2.size(); j++) + { + Trace("strings-ff-debug") << lexp2[j] << std::endl; } - } - //notice that F_EndpointEmp is not typically applied, since strict prefix equality ( a.b = a ) where a,b non-empty - // is conflicting by arithmetic len(a.b)=len(a)+len(b)!=len(a) when len(b)!=0. - sendInference( exp, conc, inf_type==0 ? "F_Const" : ( inf_type==1 ? "F_Unify" : ( inf_type==2 ? "F_EndpointEmp" : "F_EndpointEq" ) ) ); - if( d_conflict ){ - return; - }else{ + exp.insert(exp.end(), lexp.begin(), lexp.end()); + exp.insert(exp.end(), lexp2.begin(), lexp2.end()); + addToExplanation(lcurr, lcc, exp); + conc = ac.eqNode(bc); + inf_type = 1; break; } } - count++; - }while( inelig.size()second.size() ); - - for( unsigned i=0; isecond.size(); i++ ){ - std::reverse( d_flat_form[it->second[i]].begin(), d_flat_form[it->second[i]].end() ); - std::reverse( d_flat_form_index[it->second[i]].begin(), d_flat_form_index[it->second[i]].end() ); } } } } } - if( !hasProcessed() ){ - // simple extended func reduction - Trace("strings-process") << "Check extended function reduction effort=1..." << std::endl; - checkExtfReductions( 1 ); - Trace("strings-process") << "Done check extended function reduction" << std::endl; + if (!conc.isNull()) + { + Trace("strings-ff-debug") + << "Found inference : " << conc << " based on equality " << a + << " == " << b << ", " << isRev << " " << inf_type << std::endl; + addToExplanation(a, b, exp); + // explain why prefixes up to now were the same + for (unsigned j = 0; j < count; j++) + { + Trace("strings-ff-debug") << "Add at " << d_flat_form_index[a][j] << " " + << d_flat_form_index[b][j] << std::endl; + addToExplanation( + a[d_flat_form_index[a][j]], b[d_flat_form_index[b][j]], exp); + } + // explain why other components up to now are empty + for (unsigned t = 0; t < 2; t++) + { + Node c = t == 0 ? a : b; + int jj; + if (inf_type == 3 || (t == 1 && inf_type == 2)) + { + // explain all the empty components for F_EndpointEq, all for + // the short end for F_EndpointEmp + jj = isRev ? -1 : c.getNumChildren(); + } + else + { + jj = t == 0 ? d_flat_form_index[a][count] + : d_flat_form_index[b][count]; + } + int startj = isRev ? jj + 1 : 0; + int endj = isRev ? c.getNumChildren() : jj; + for (int j = startj; j < endj; j++) + { + if (areEqual(c[j], d_emptyString)) + { + addToExplanation(c[j], d_emptyString, exp); + } + } + } + // notice that F_EndpointEmp is not typically applied, since + // strict prefix equality ( a.b = a ) where a,b non-empty + // is conflicting by arithmetic len(a.b)=len(a)+len(b)!=len(a) + // when len(b)!=0. + sendInference( + exp, + conc, + inf_type == 0 + ? "F_Const" + : (inf_type == 1 ? "F_Unify" : (inf_type == 2 ? "F_EndpointEmp" + : "F_EndpointEq"))); + if (d_conflict) + { + return; + } + break; } + count++; + } while (inelig.size() < eqc.size()); + + for (const Node& n : eqc) + { + std::reverse(d_flat_form[n].begin(), d_flat_form[n].end()); + std::reverse(d_flat_form_index[n].begin(), d_flat_form_index[n].end()); } } @@ -2077,8 +2135,8 @@ Node TheoryStrings::checkCycles( Node eqc, std::vector< Node >& curr, std::vecto return Node::null(); } - -void TheoryStrings::checkNormalForms(){ +void TheoryStrings::checkNormalFormsEq() +{ if( !options::stringEagerLen() ){ for( unsigned i=0; i 0) + { + Trace("strings-process") << ", effort = " << effort; + } + Trace("strings-process") << "..." << std::endl; + switch (s) + { + case CHECK_INIT: checkInit(); break; + case CHECK_CONST_EQC: checkConstantEquivalenceClasses(); break; + case CHECK_EXTF_EVAL: checkExtfEval(effort); break; + case CHECK_CYCLES: checkCycles(); break; + case CHECK_FLAT_FORMS: checkFlatForms(); break; + case CHECK_NORMAL_FORMS_EQ: checkNormalFormsEq(); break; + case CHECK_NORMAL_FORMS_DEQ: checkNormalFormsDeq(); break; + case CHECK_CODES: checkCodes(); break; + case CHECK_LENGTH_EQC: checkLengthsEqc(); break; + case CHECK_EXTF_REDUCTION: checkExtfReductions(effort); break; + case CHECK_MEMBERSHIP: checkMemberships(); break; + case CHECK_CARDINALITY: checkCardinality(); break; + default: Unreachable(); break; + } + Trace("strings-process") << "Done " << s + << ", addedFact = " << !d_pending.empty() << " " + << !d_lemma_cache.empty() + << ", d_conflict = " << d_conflict << std::endl; +} + +bool TheoryStrings::hasStrategyEffort(Effort e) const +{ + return d_strat_steps.find(e) != d_strat_steps.end(); +} + +void TheoryStrings::addStrategyStep(InferStep s, int effort, bool addBreak) +{ + // must run check init first + Assert((s == CHECK_INIT)==d_infer_steps.empty()); + // must use check cycles when using flat forms + Assert(s != CHECK_FLAT_FORMS + || std::find(d_infer_steps.begin(), d_infer_steps.end(), CHECK_CYCLES) + != d_infer_steps.end()); + d_infer_steps.push_back(s); + d_infer_step_effort.push_back(effort); + if (addBreak) + { + d_infer_steps.push_back(BREAK); + d_infer_step_effort.push_back(0); + } +} + +void TheoryStrings::initializeStrategy() +{ + // initialize the strategy if not already done so + if (!d_strategy_init) + { + std::map step_begin; + std::map step_end; + d_strategy_init = true; + // beginning indices + step_begin[EFFORT_FULL] = 0; + if (options::stringEager()) + { + step_begin[EFFORT_STANDARD] = 0; + } + // add the inference steps + addStrategyStep(CHECK_INIT); + addStrategyStep(CHECK_CONST_EQC); + addStrategyStep(CHECK_EXTF_EVAL, 0); + addStrategyStep(CHECK_CYCLES); + addStrategyStep(CHECK_FLAT_FORMS); + addStrategyStep(CHECK_EXTF_REDUCTION, 1); + if (options::stringEager()) + { + // do only the above inferences at standard effort, if applicable + step_end[EFFORT_STANDARD] = d_infer_steps.size() - 1; + } + addStrategyStep(CHECK_NORMAL_FORMS_EQ); + addStrategyStep(CHECK_EXTF_EVAL, 1); + if (!options::stringEagerLen()) + { + addStrategyStep(CHECK_LENGTH_EQC); + } + addStrategyStep(CHECK_NORMAL_FORMS_DEQ); + addStrategyStep(CHECK_CODES); + if (options::stringEagerLen()) + { + addStrategyStep(CHECK_LENGTH_EQC); + } + if (options::stringExp() && !options::stringGuessModel()) + { + addStrategyStep(CHECK_EXTF_REDUCTION, 2); + } + addStrategyStep(CHECK_MEMBERSHIP); + addStrategyStep(CHECK_CARDINALITY); + step_end[EFFORT_FULL] = d_infer_steps.size() - 1; + if (options::stringExp() && options::stringGuessModel()) + { + step_begin[EFFORT_LAST_CALL] = d_infer_steps.size(); + // these two steps are run in parallel + addStrategyStep(CHECK_EXTF_REDUCTION, 2, false); + addStrategyStep(CHECK_EXTF_EVAL, 3); + step_end[EFFORT_LAST_CALL] = d_infer_steps.size() - 1; + } + // set the beginning/ending ranges + for (const std::pair& it_begin : step_begin) + { + Effort e = it_begin.first; + std::map::iterator it_end = step_end.find(e); + Assert(it_end != step_end.end()); + d_strat_steps[e] = + std::pair(it_begin.second, it_end->second); + } + } +} + +void TheoryStrings::runStrategy(unsigned sbegin, unsigned send) +{ + Trace("strings-process") << "----check, next round---" << std::endl; + for (unsigned i = sbegin; i <= send; i++) + { + InferStep curr = d_infer_steps[i]; + if (curr == BREAK) + { + if (hasProcessed()) + { + break; + } + } + else + { + runInferStep(curr, d_infer_step_effort[i]); + if (d_conflict) + { + break; + } + } + } + Trace("strings-process") << "----finished round---" << std::endl; +} + }/* CVC4::theory::strings namespace */ }/* CVC4::theory namespace */ }/* CVC4 namespace */ diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index bd5a797ae..bc60eecdf 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -93,6 +93,43 @@ enum Inference }; std::ostream& operator<<(std::ostream& out, Inference i); +/** inference steps + * + * Corresponds to a step in the overall strategy of the strings solver. For + * details on the individual steps, see documentation on the inference schemas + * within TheoryStrings. + */ +enum InferStep +{ + // indicates that the strategy should break if lemmas or facts are added + BREAK, + // check initial + CHECK_INIT, + // check constant equivalence classes + CHECK_CONST_EQC, + // check extended function evaluation + CHECK_EXTF_EVAL, + // check cycles + CHECK_CYCLES, + // check flat forms + CHECK_FLAT_FORMS, + // check normal forms equalities + CHECK_NORMAL_FORMS_EQ, + // check normal forms disequalities + CHECK_NORMAL_FORMS_DEQ, + // check codes + CHECK_CODES, + // check lengths for equivalence classes + CHECK_LENGTH_EQC, + // check extended function reductions + CHECK_EXTF_REDUCTION, + // check regular expression memberships + CHECK_MEMBERSHIP, + // check cardinality + CHECK_CARDINALITY, +}; +std::ostream& operator<<(std::ostream& out, Inference i); + struct StringsProxyVarAttributeId {}; typedef expr::Attribute< StringsProxyVarAttributeId, bool > StringsProxyVarAttribute; @@ -371,21 +408,25 @@ private: Node d_nf_pair[2]; bool sendAsLemma(); }; - //initial check - void checkInit(); void checkConstantEquivalenceClasses( TermIndex* ti, std::vector< Node >& vecc ); - //extended functions evaluation check - void checkExtfEval( int effort = 0 ); void checkExtfInference( Node n, Node nr, ExtfInfoTmp& in, int effort ); - void collectVars( Node n, std::vector< Node >& vars, std::map< Node, bool >& visited ); Node getSymbolicDefinition( Node n, std::vector< Node >& exp ); - //check extf reduction - void checkExtfReductions( int effort ); - //flat forms check - void checkFlatForms(); + + //--------------------------for checkFlatForm + /** + * This checks whether there are flat form inferences between eqc[start] and + * eqc[j] for some j>start. If the flag isRev is true, we check for flat form + * interferences in the reverse direction of the flat forms. For more details, + * see checkFlatForms below. + */ + void checkFlatForm(std::vector& eqc, unsigned start, bool isRev); + //--------------------------end for checkFlatForm + + //--------------------------for checkCycles Node checkCycles( Node eqc, std::vector< Node >& curr, std::vector< Node >& exp ); - //normal forms check - void checkNormalForms(); + //--------------------------end for checkCycles + + //--------------------------for checkNormalFormsEq void normalizeEquivalenceClass( Node n ); void getNormalForms( Node &eqc, std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend ); @@ -400,32 +441,30 @@ private: void processSimpleNEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, unsigned i, unsigned j, unsigned& index, bool isRev, unsigned rproc, std::vector< InferInfo >& pinfer ); + //--------------------------end for checkNormalFormsEq + + //--------------------------for checkNormalFormsDeq void processDeq( Node n1, Node n2 ); int processReverseDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj ); int processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj, unsigned& index, bool isRev ); - void checkDeqNF(); void getExplanationVectorForPrefix( std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, unsigned i, int index, bool isRev, std::vector< Node >& curr_exp ); void getExplanationVectorForPrefixEq( std::vector< std::vector< Node > > &normal_forms, std::vector< Node > &normal_form_src, std::vector< std::vector< Node > > &normal_forms_exp, std::vector< std::map< Node, std::map< bool, int > > >& normal_forms_exp_depend, unsigned i, unsigned j, int index_i, int index_j, bool isRev, std::vector< Node >& curr_exp ); + //--------------------------end for checkNormalFormsDeq - Node collectConstantStringAt( std::vector< Node >& vec, int& index, bool isRev ); - - //check membership constraints + //--------------------------------for checkMemberships + // check membership constraints Node mkRegExpAntec(Node atom, Node ant); bool applyRConsume( CVC4::String &s, Node &r ); Node applyRSplit( Node s1, Node s2, Node r ); bool applyRLen( std::map< Node, std::vector< Node > > &XinR_with_exps ); - void checkMemberships(); bool checkPDerivative( Node x, Node r, Node atom, bool &addedLemma, std::vector< Node > &nf_exp); //check contains void checkPosContains( std::vector< Node >& posContains ); void checkNegContains( std::vector< Node >& negContains ); - //lengths normalize check - void checkLengthsEqc(); - //cardinality check - void checkCardinality(); + //--------------------------------end for checkMemberships private: void addCarePairs( quantifiers::TermArgTrie * t1, quantifiers::TermArgTrie * t2, unsigned arity, unsigned depth ); @@ -612,7 +651,232 @@ private: ~Statistics(); };/* class TheoryStrings::Statistics */ Statistics d_statistics; - + + private: + //-----------------------inference steps + /** check initial + * + * This function initializes term indices for each strings function symbol. + * One key aspect of this construction is that concat terms are indexed by + * their list of non-empty components. For example, if x = "" is an equality + * asserted in this SAT context, then y ++ x ++ z may be indexed by (y,z). + * This method may infer various facts while building these term indices, for + * instance, based on congruence. An example would be inferring: + * y ++ x ++ z = y ++ z + * if both terms are registered in this SAT context. + * + * This function should be called as a first step of any strategy. + */ + void checkInit(); + /** check constant equivalence classes + * + * This function infers whether CONCAT terms can be simplified to constants. + * For example, if x = "a" and y = "b" are equalities in the current SAT + * context, then we may infer x ++ "c" ++ y is equivalent to "acb". In this + * case, we infer the fact x ++ "c" ++ y = "acb". + */ + void checkConstantEquivalenceClasses(); + /** check extended functions evaluation + * + * This applies "context-dependent simplification" for all active extended + * function terms in this SAT context. This infers facts of the form: + * x = c => f( t1 ... tn ) = c' + * where the rewritten form of f( t1...tn ) { x |-> c } is c', and x = c + * is a (tuple of) equalities that are asserted in this SAT context, and + * f( t1 ... tn ) is a term from this SAT context. + * + * For more details, this is steps 4 when effort=0 and step 6 when + * effort=1 from Strategy 1 in Reynolds et al, "Scaling up DPLL(T) String + * Solvers using Context-Dependent Simplification", CAV 2017. When called with + * effort=3, we apply context-dependent simplification based on model values. + */ + void checkExtfEval(int effort); + /** check cycles + * + * This inference schema ensures that a containment ordering < over the + * string equivalence classes is acyclic. We define this ordering < such that + * for equivalence classes e1 = { t1...tn } and e2 = { s1...sm }, e1 < e2 + * if there exists a ti whose flat form (see below) is [w1...sj...wk] for + * some i,j. If e1 < ... < en < e1 for some chain, we infer that the flat + * form components that do not constitute this chain, e.g. (w1...wk) \ sj + * in the flat form above, must be empty. + * + * For more details, see the inference S-Cycle in Liang et al CAV 2014. + */ + void checkCycles(); + /** check flat forms + * + * This applies an inference schema based on "flat forms". The flat form of a + * string term t is a vector of representative terms [r1, ..., rn] such that + * t is of the form t1 ++ ... ++ tm and r1 ++ ... ++ rn is equivalent to + * rewrite( [t1] ++ ... ++ [tm] ), where [t1] denotes the representative of + * the equivalence class containing t1. For example, if t is y ++ z ++ z, + * E is { y = "", w = z }, and w is the representative of the equivalence + * class { w, z }, then the flat form of t is [w, w]. Say t1 and t2 are terms + * in the same equivalence classes with flat forms [r1...rn] and [s1...sm]. + * We may infer various facts based on this pair of terms. For example: + * ri = si, if ri != si, rj == sj for each j < i, and len(ri)=len(si), + * rn = sn, if n=m and rj == sj for each j < n, + * ri = empty, if n=m+1 and ri == rj for each i=1,...,m. + * We refer to these as "unify", "endpoint-eq" and "endpoint-emp" inferences + * respectively. + * + * Notice that this inference scheme is an optimization and not needed for + * model-soundness. The motivation for this schema is that it is simpler than + * checkNormalFormsEq, which can be seen as a recursive version of this + * schema (see difference of "normal form" vs "flat form" below), and + * checkNormalFormsEq is complete, in the sense that if it passes with no + * inferences, we are ensured that all string equalities in the current + * context are satisfied. + * + * Must call checkCycles before this function in a strategy. + */ + void checkFlatForms(); + /** check normal forms equalities + * + * This applies an inference schema based on "normal forms". The normal form + * of an equivalence class of string terms e = {t1, ..., tn} union {x1....xn}, + * where t1...tn are concatenation terms is a vector of representative terms + * [r1, ..., rm] such that: + * (1) if n=0, then m=1 and r1 is the representative of e, + * (2) if n>0, say + * t1 = t^1_1 ++ ... ++ t^1_m_1 + * ... + * tn = t^1_n ++ ... ++ t^_m_n + * for *each* i=1, ..., n, the result of concenating the normal forms of + * t^1_1 ++ ... ++ t^1_m_1 is equal to [r1, ..., rm]. If an equivalence class + * can be assigned a normal form, then all equalities between ti and tj are + * satisfied by all models that correspond to extensions of the current + * assignment. For further detail on this terminology, see Liang et al + * CAV 2014. + * + * Notice that all constant words are implicitly considered concatentation + * of their characters, e.g. "abc" is treated as "a" ++ "b" ++ "c". + * + * At a high level, we build normal forms for equivalence classes bottom-up, + * starting with equivalence classes that are minimal with respect to the + * containment ordering < computed during checkCycles. While computing a + * normal form for an equivalence class, we may infer equalities between + * components of strings that must be equal (e.g. x=y when x++z == y++w when + * len(x)==len(y) is asserted), derive conflicts if two strings have disequal + * prefixes/suffixes (e.g. "a" ++ x == "b" ++ y is a conflict), or split + * string terms into smaller components using fresh skolem variables (see + * Inference values with names "SPLIT"). We also may introduce regular + * expression constraints in this method for looping word equations (see + * the Inference INFER_FLOOP). + * + * If this inference schema returns no facts, lemmas, or conflicts, then + * we have successfully assigned normal forms for all equivalence classes, as + * stored in d_normal_forms. Otherwise, this method may add a fact, lemma, or + * conflict based on inferences in the Inference enumeration above. + */ + void checkNormalFormsEq(); + /** check normal forms disequalities + * + * This inference schema can be seen as the converse of the above schema. In + * particular, it ensures that each pair of distinct equivalence classes + * e1 and e2 have distinct normal forms. + * + * This method considers all pairs of distinct equivalence classes (e1,e2) + * such that len(x1)==len(x2) is asserted for some x1 in e1 and x2 in e2. It + * then traverses the normal forms of x1 and x2, say they are [r1, ..., rn] + * and [s1, ..., sm]. For the minimial i such that ri!=si, if ri and si are + * disequal and have the same length, then x1 and x2 have distinct normal + * forms. Otherwise, we may add splitting lemmas on the length of ri and si, + * or split on an equality between ri and si. + * + * If this inference schema returns no facts, lemmas, or conflicts, then all + * disequalities between string terms are satisfied by all models that are + * extensions of the current assignment. + */ + void checkNormalFormsDeq(); + /** check codes + * + * This inference schema ensures that constraints between str.code terms + * are satisfied by models that correspond to extensions of the current + * assignment. In particular, this method ensures that str.code can be + * given an interpretation that is injective for string arguments with length + * one. It may add lemmas of the form: + * str.code(x) == -1 V str.code(x) != str.code(y) V x == y + */ + void checkCodes(); + /** check lengths for equivalence classes + * + * This inference schema adds lemmas of the form: + * E => len( x ) = rewrite( len( r1 ++ ... ++ rn ) ) + * where [r1, ..., rn] is the normal form of the equivalence class containing + * x. This schema is not required for correctness but experimentally has + * shown to be helpful. + */ + void checkLengthsEqc(); + /** check extended function reductions + * + * This adds "reduction" lemmas for each active extended function in this SAT + * context. These are generally lemmas of the form: + * F[t1...tn,k] ^ f( t1 ... tn ) = k + * where f( t1 ... tn ) is an active extended function, k is a fresh constant + * and F is a formula that constrains k based on the definition of f. + * + * For more details, this is step 7 from Strategy 1 in Reynolds et al, + * CAV 2017. We stratify this in practice, where calling this with effort=1 + * reduces some of the "easier" extended functions, and effort=2 reduces + * the rest. + */ + void checkExtfReductions(int effort); + /** check regular expression memberships + * + * This checks the satisfiability of all regular expression memberships + * of the form (not) s in R. We use various heuristic techniques based on + * unrolling, combined with techniques from Liang et al, "A Decision Procedure + * for Regular Membership and Length Constraints over Unbounded Strings", + * FroCoS 2015. + */ + void checkMemberships(); + /** check cardinality + * + * This function checks whether a cardinality inference needs to be applied + * to a set of equivalence classes. For details, see Step 5 of the proof + * procedure from Liang et al, CAV 2014. + */ + void checkCardinality(); + //-----------------------end inference steps + + //-----------------------representation of the strategy + /** is strategy initialized */ + bool d_strategy_init; + /** run the given inference step */ + void runInferStep(InferStep s, int effort); + /** the strategy */ + std::vector d_infer_steps; + /** the effort levels */ + std::vector d_infer_step_effort; + /** the range (begin, end) of steps to run at given efforts */ + std::map > d_strat_steps; + /** do we have a strategy for effort e? */ + bool hasStrategyEffort(Effort e) const; + /** initialize the strategy + * + * This adds (s,effort) as a strategy step to the vectors d_infer_steps and + * d_infer_step_effort. This indicates that a call to runInferStep should + * be run as the next step in the strategy. If addBreak is true, we add + * a BREAK to the strategy following this step. + */ + void addStrategyStep(InferStep s, int effort = 0, bool addBreak = true); + /** initialize the strategy + * + * This initializes the above information based on the options. This makes + * a series of calls to addStrategyStep above. + */ + void initializeStrategy(); + /** run strategy + * + * This executes the inference steps starting at index sbegin and ending at + * index send. We exit if any step in this sequence adds a lemma or infers a + * fact. + */ + void runStrategy(unsigned sbegin, unsigned send); + //-----------------------end representation of the strategy + };/* class TheoryStrings */ }/* CVC4::theory::strings namespace */ -- 2.30.2