From c9a7ca1f06080b7522ba582bdb99ba9077509209 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Fri, 7 Feb 2020 21:35:30 -0600 Subject: [PATCH] Split core solver from the theory of strings (#3713) This splits the main procedure from Liang et al CAV 2014 to its own file, the "core solver" of theory of strings. I have intentionally not updated or clang-formatted the code in core_solver.cpp since I would prefer this PR to involve as little change to behavior as possible (it is copied verbatim from theory_strings.cpp). Future PRs will clean this code up. --- src/CMakeLists.txt | 2 + src/theory/strings/core_solver.cpp | 2161 ++++++++++++++++++++++++ src/theory/strings/core_solver.h | 377 +++++ src/theory/strings/theory_strings.cpp | 2200 +------------------------ src/theory/strings/theory_strings.h | 241 +-- 5 files changed, 2593 insertions(+), 2388 deletions(-) create mode 100644 src/theory/strings/core_solver.cpp create mode 100644 src/theory/strings/core_solver.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f0aee982c..5bb239bd5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -672,6 +672,8 @@ libcvc4_add_sources( theory/sort_inference.h theory/strings/base_solver.cpp theory/strings/base_solver.h + theory/strings/core_solver.cpp + theory/strings/core_solver.h theory/strings/infer_info.cpp theory/strings/infer_info.h theory/strings/inference_manager.cpp diff --git a/src/theory/strings/core_solver.cpp b/src/theory/strings/core_solver.cpp new file mode 100644 index 000000000..5ad5e5bd6 --- /dev/null +++ b/src/theory/strings/core_solver.cpp @@ -0,0 +1,2161 @@ +/********************* */ +/*! \file theory_strings.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds, Tianyi Liang, Morgan Deters + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of the theory of strings. + ** + ** Implementation of the theory of strings. + **/ + +#include "theory/strings/core_solver.h" + +#include "theory/strings/theory_strings_utils.h" +#include "options/strings_options.h" +#include "theory/strings/theory_strings_rewriter.h" + + +using namespace std; +using namespace CVC4::context; +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +CoreSolver::CoreSolver(context::Context* c, context::UserContext* u, SolverState& s, InferenceManager& im, SkolemCache& skc, BaseSolver& bs) : +d_state(s), d_im(im), d_skCache(skc), +d_bsolver(bs), +d_nf_pairs(c) +{ + d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) ); + d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); + d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1)); + d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); + d_true = NodeManager::currentNM()->mkConst( true ); + d_false = NodeManager::currentNM()->mkConst( false ); +} + +CoreSolver::~CoreSolver() { + +} + +void CoreSolver::debugPrintFlatForms( const char * tc ){ + for( unsigned k=0; k1 ){ + Trace( tc ) << "EQC [" << eqc << "]" << std::endl; + }else{ + Trace( tc ) << "eqc [" << eqc << "]"; + } + Node c = d_bsolver.getConstantEqc(eqc); + if( !c.isNull() ){ + Trace( tc ) << " C: " << c; + if( d_eqc[eqc].size()>1 ){ + Trace( tc ) << std::endl; + } + } + if( d_eqc[eqc].size()>1 ){ + for( unsigned i=0; i d_const_length; + bool operator() (Node i, Node j) { + std::map< Node, unsigned >::iterator it_i = d_const_length.find( i ); + std::map< Node, unsigned >::iterator it_j = d_const_length.find( j ); + if( it_i==d_const_length.end() ){ + if( it_j==d_const_length.end() ){ + return isecondsecond; + } + } + } +}; + +void CoreSolver::checkCycles() +{ + // first check for cycles, while building ordering of equivalence classes + d_flat_form.clear(); + d_flat_form_index.clear(); + d_eqc.clear(); + // Rebuild strings eqc based on acyclic ordering, first copy the equivalence + // classes from the base solver. + std::vector< Node > eqc = d_bsolver.getStringEqc(); + d_strings_eqc.clear(); + if( options::stringBinaryCsp() ){ + //sort: process smallest constants first (necessary if doing binary splits) + sortConstLength scl; + for( unsigned i=0; i().size(); + } + } + std::sort( eqc.begin(), eqc.end(), scl ); + } + for( unsigned i=0; i curr; + std::vector< Node > exp; + checkCycles( eqc[i], curr, exp ); + if (d_im.hasProcessed()) + { + return; + } + } +} + +void CoreSolver::checkFlatForms() +{ + // debug print flat forms + if (Trace.isOn("strings-ff")) + { + Trace("strings-ff") << "Flat forms : " << std::endl; + debugPrintFlatForms("strings-ff"); + } + + // inferences without recursively expanding flat forms + + //(1) approximate equality by containment, infer conflicts + for (const Node& eqc : d_strings_eqc) + { + Node c = d_bsolver.getConstantEqc(eqc); + if (!c.isNull()) + { + // if equivalence class is constant, all component constants in flat forms + // must be contained in it, in order + std::map >::iterator it = d_eqc.find(eqc); + if (it != d_eqc.end()) + { + for (const Node& n : it->second) + { + int firstc, lastc; + if (!TheoryStringsRewriter::canConstantContainList( + c, d_flat_form[n], firstc, lastc)) + { + Trace("strings-ff-debug") << "Flat form for " << n + << " cannot be contained in constant " + << c << std::endl; + Trace("strings-ff-debug") << " indices = " << firstc << "/" + << lastc << std::endl; + // conflict, explanation is n = base ^ base = c ^ relevant portion + // of ( n = f[n] ) + std::vector exp; + d_bsolver.explainConstantEqc(n,eqc,exp); + for (int e = firstc; e <= lastc; e++) + { + if (d_flat_form[n][e].isConst()) + { + Assert(e >= 0 && e < (int)d_flat_form_index[n].size()); + Assert(d_flat_form_index[n][e] >= 0 + && d_flat_form_index[n][e] < (int)n.getNumChildren()); + d_im.addToExplanation( + d_flat_form[n][e], n[d_flat_form_index[n][e]], exp); + } + } + Node conc = d_false; + d_im.sendInference(exp, conc, "F_NCTN"); + return; + } + } + } + } + } + + //(2) scan lists, unification to infer conflicts and equalities + for (const Node& eqc : d_strings_eqc) + { + std::map >::iterator it = d_eqc.find(eqc); + if (it == d_eqc.end() || it->second.size() <= 1) + { + continue; + } + // iterate over start index + for (unsigned start = 0; start < it->second.size() - 1; start++) + { + for (unsigned r = 0; r < 2; r++) + { + bool isRev = r == 1; + checkFlatForm(it->second, start, isRev); + if (d_state.isInConflict()) + { + return; + } + + for (const Node& n : it->second) + { + std::reverse(d_flat_form[n].begin(), d_flat_form[n].end()); + std::reverse(d_flat_form_index[n].begin(), + d_flat_form_index[n].end()); + } + } + } + } +} + +namespace { + +enum class FlatFormInfer +{ + NONE, + CONST, + UNIFY, + ENDPOINT_EMP, + ENDPOINT_EQ, +}; + +std::ostream& operator<<(std::ostream& os, FlatFormInfer inf) +{ + switch (inf) + { + case FlatFormInfer::NONE: os << ""; break; + case FlatFormInfer::CONST: os << "F_Const"; break; + case FlatFormInfer::UNIFY: os << "F_Unify"; break; + case FlatFormInfer::ENDPOINT_EMP: os << "F_EndpointEmp"; break; + case FlatFormInfer::ENDPOINT_EQ: os << "F_EndpointEq"; break; + default: os << ""; break; + } + return os; +} + +} // namespace + +void CoreSolver::checkFlatForm(std::vector& eqc, + size_t start, + bool isRev) +{ + size_t count = 0; + // We check for flat form inferences involving `eqc[start]` and terms past + // `start`. If there was a flat form inference involving `eqc[start]` and a + // term at a smaller index `i`, we would have found it with when `start` was + // `i`. Thus, we mark the preceeding terms in the equivalence class as + // ineligible. + std::vector inelig(eqc.begin(), eqc.begin() + start + 1); + Node a = eqc[start]; + Trace("strings-ff-debug") + << "Check flat form for a = " << a << ", whose flat form is " + << d_flat_form[a] << ")" << std::endl; + Node b; + do + { + std::vector exp; + Node conc; + FlatFormInfer infType = FlatFormInfer::NONE; + size_t eqc_size = eqc.size(); + size_t asize = d_flat_form[a].size(); + if (count == asize) + { + for (size_t i = start + 1; i < eqc_size; i++) + { + b = eqc[i]; + if (std::find(inelig.begin(), inelig.end(), b) != inelig.end()) + { + continue; + } + + size_t bsize = d_flat_form[b].size(); + if (count < bsize) + { + Trace("strings-ff-debug") + << "Found endpoint (in a) with non-empty b = " << b + << ", whose flat form is " << d_flat_form[b] << std::endl; + // endpoint + std::vector conc_c; + for (unsigned j = count; j < bsize; j++) + { + conc_c.push_back(b[d_flat_form_index[b][j]].eqNode(d_emptyString)); + } + Assert(!conc_c.empty()); + conc = utils::mkAnd(conc_c); + infType = FlatFormInfer::ENDPOINT_EMP; + Assert(count > 0); + // swap, will enforce is empty past current + a = eqc[i]; + b = eqc[start]; + break; + } + inelig.push_back(eqc[i]); + } + } + else + { + Node curr = d_flat_form[a][count]; + Node curr_c = d_bsolver.getConstantEqc(curr); + Node ac = a[d_flat_form_index[a][count]]; + std::vector lexp; + Node lcurr = d_state.getLength(ac, lexp); + for (size_t i = start + 1; i < eqc_size; i++) + { + b = eqc[i]; + if (std::find(inelig.begin(), inelig.end(), b) != inelig.end()) + { + continue; + } + + if (count == d_flat_form[b].size()) + { + inelig.push_back(b); + Trace("strings-ff-debug") + << "Found endpoint in b = " << b << ", whose flat form is " + << d_flat_form[b] << std::endl; + // endpoint + std::vector conc_c; + for (size_t j = count; j < asize; j++) + { + conc_c.push_back(a[d_flat_form_index[a][j]].eqNode(d_emptyString)); + } + Assert(!conc_c.empty()); + conc = utils::mkAnd(conc_c); + infType = FlatFormInfer::ENDPOINT_EMP; + Assert(count > 0); + break; + } + else + { + Node cc = d_flat_form[b][count]; + if (cc != curr) + { + Node bc = b[d_flat_form_index[b][count]]; + inelig.push_back(b); + Assert(!d_state.areEqual(curr, cc)); + Node cc_c = d_bsolver.getConstantEqc(cc); + if (!curr_c.isNull() && !cc_c.isNull()) + { + // check for constant conflict + int index; + Node s = TheoryStringsRewriter::splitConstant( + cc_c, curr_c, index, isRev); + if (s.isNull()) + { + d_bsolver.explainConstantEqc(ac,curr,exp); + d_bsolver.explainConstantEqc(bc,cc,exp); + conc = d_false; + infType = FlatFormInfer::CONST; + break; + } + } + else if ((d_flat_form[a].size() - 1) == count + && (d_flat_form[b].size() - 1) == count) + { + conc = ac.eqNode(bc); + infType = FlatFormInfer::ENDPOINT_EQ; + break; + } + else + { + // if lengths are the same, apply LengthEq + std::vector lexp2; + Node lcc = d_state.getLength(bc, lexp2); + if (d_state.areEqual(lcurr, lcc)) + { + if (Trace.isOn("strings-ff-debug")) + { + Trace("strings-ff-debug") + << "Infer " << ac << " == " << bc << " since " << lcurr + << " == " << lcc << std::endl; + Trace("strings-ff-debug") + << "Explanation for " << lcurr << " is "; + for (size_t j = 0; j < lexp.size(); j++) + { + Trace("strings-ff-debug") << lexp[j] << std::endl; + } + Trace("strings-ff-debug") + << "Explanation for " << lcc << " is "; + for (size_t j = 0; j < lexp2.size(); j++) + { + Trace("strings-ff-debug") << lexp2[j] << std::endl; + } + } + + exp.insert(exp.end(), lexp.begin(), lexp.end()); + exp.insert(exp.end(), lexp2.begin(), lexp2.end()); + d_im.addToExplanation(lcurr, lcc, exp); + conc = ac.eqNode(bc); + infType = FlatFormInfer::UNIFY; + break; + } + } + } + } + } + } + if (!conc.isNull()) + { + Trace("strings-ff-debug") << "Found inference (" << infType + << "): " << conc << " based on equality " << a + << " == " << b << ", " << isRev << std::endl; + d_im.addToExplanation(a, b, exp); + // explain why prefixes up to now were the same + for (size_t j = 0; j < count; j++) + { + Trace("strings-ff-debug") << "Add at " << d_flat_form_index[a][j] << " " + << d_flat_form_index[b][j] << std::endl; + d_im.addToExplanation( + a[d_flat_form_index[a][j]], b[d_flat_form_index[b][j]], exp); + } + // explain why other components up to now are empty + for (unsigned t = 0; t < 2; t++) + { + Node c = t == 0 ? a : b; + ssize_t jj; + if (infType == FlatFormInfer::ENDPOINT_EQ + || (t == 1 && infType == FlatFormInfer::ENDPOINT_EMP)) + { + // explain all the empty components for F_EndpointEq, all for + // the short end for F_EndpointEmp + jj = isRev ? -1 : c.getNumChildren(); + } + else + { + jj = t == 0 ? d_flat_form_index[a][count] + : d_flat_form_index[b][count]; + } + ssize_t startj = isRev ? jj + 1 : 0; + ssize_t endj = isRev ? c.getNumChildren() : jj; + for (ssize_t j = startj; j < endj; j++) + { + if (d_state.areEqual(c[j], d_emptyString)) + { + d_im.addToExplanation(c[j], d_emptyString, exp); + } + } + } + // Notice that F_EndpointEmp is not typically applied, since + // strict prefix equality ( a.b = a ) where a,b non-empty + // is conflicting by arithmetic len(a.b)=len(a)+len(b)!=len(a) + // when len(b)!=0. Although if we do not infer this conflict eagerly, + // it may be applied (see #3272). + std::stringstream ss; + ss << infType; + d_im.sendInference(exp, conc, ss.str().c_str()); + if (d_state.isInConflict()) + { + return; + } + break; + } + count++; + } while (inelig.size() < eqc.size()); +} + +Node CoreSolver::checkCycles( Node eqc, std::vector< Node >& curr, std::vector< Node >& exp ){ + if( std::find( curr.begin(), curr.end(), eqc )!=curr.end() ){ + // a loop + return eqc; + }else if( std::find( d_strings_eqc.begin(), d_strings_eqc.end(), eqc )==d_strings_eqc.end() ){ + curr.push_back( eqc ); + //look at all terms in this equivalence class + eq::EqualityEngine* ee = d_state.getEqualityEngine(); + eq::EqClassIterator eqc_i = eq::EqClassIterator( eqc, ee ); + while( !eqc_i.isFinished() ) { + Node n = (*eqc_i); + if( !d_bsolver.isCongruent(n) ){ + if( n.getKind() == kind::STRING_CONCAT ){ + Trace("strings-cycle") << eqc << " check term : " << n << " in " << eqc << std::endl; + if( eqc!=d_emptyString ){ + d_eqc[eqc].push_back( n ); + } + for( unsigned i=0; i exp; + exp.push_back( n.eqNode( d_emptyString ) ); + d_im.sendInference( + exp, n[i].eqNode(d_emptyString), "I_CYCLE_E"); + return Node::null(); + } + }else{ + if( nr!=d_emptyString ){ + d_flat_form[n].push_back( nr ); + d_flat_form_index[n].push_back( i ); + } + //for non-empty eqc, recurse and see if we find a loop + Node ncy = checkCycles( nr, curr, exp ); + if( !ncy.isNull() ){ + Trace("strings-cycle") << eqc << " cycle: " << ncy << " at " << n << "[" << i << "] : " << n[i] << std::endl; + d_im.addToExplanation(n, eqc, exp); + d_im.addToExplanation(nr, n[i], exp); + if( ncy==eqc ){ + //can infer all other components must be empty + for( unsigned j=0; j nf_to_eqc; + std::map eqc_to_nf; + std::map eqc_to_exp; + for (const Node& eqc : d_strings_eqc) + { + Trace("strings-process-debug") << "- Verify normal forms are the same for " + << eqc << std::endl; + normalizeEquivalenceClass(eqc); + Trace("strings-debug") << "Finished normalizing eqc..." << std::endl; + if (d_im.hasProcessed()) + { + return; + } + NormalForm& nfe = getNormalForm(eqc); + Node nf_term = utils::mkNConcat(nfe.d_nf); + std::map::iterator itn = nf_to_eqc.find(nf_term); + if (itn != nf_to_eqc.end()) + { + NormalForm& nfe_eq = getNormalForm(itn->second); + // two equivalence classes have same normal form, merge + std::vector nf_exp; + nf_exp.push_back(utils::mkAnd(nfe.d_exp)); + nf_exp.push_back(eqc_to_exp[itn->second]); + Node eq = nfe.d_base.eqNode(nfe_eq.d_base); + d_im.sendInference(nf_exp, eq, "Normal_Form"); + if (d_im.hasProcessed()) + { + return; + } + } + else + { + nf_to_eqc[nf_term] = eqc; + eqc_to_nf[eqc] = nf_term; + eqc_to_exp[eqc] = utils::mkAnd(nfe.d_exp); + } + Trace("strings-process-debug") + << "Done verifying normal forms are the same for " << eqc << std::endl; + } + if (Trace.isOn("strings-nf")) + { + Trace("strings-nf") << "**** Normal forms are : " << std::endl; + for (std::map::iterator it = eqc_to_exp.begin(); + it != eqc_to_exp.end(); + ++it) + { + NormalForm& nf = getNormalForm(it->first); + Trace("strings-nf") << " N[" << it->first << "] (base " << nf.d_base + << ") = " << eqc_to_nf[it->first] << std::endl; + Trace("strings-nf") << " exp: " << it->second << std::endl; + } + Trace("strings-nf") << std::endl; + } +} + +//compute d_normal_forms_(base,exp,exp_depend)[eqc] +void CoreSolver::normalizeEquivalenceClass( Node eqc ) { + Trace("strings-process-debug") << "Process equivalence class " << eqc << std::endl; + if (d_state.areEqual(eqc, d_emptyString)) + { +#ifdef CVC4_ASSERTIONS + for( unsigned j=0; j normal_forms; + // map each term to its index in the above vector + std::map term_to_nf_index; + // get the normal forms + getNormalForms(eqc, normal_forms, term_to_nf_index); + if (d_im.hasProcessed()) + { + return; + } + // process the normal forms + processNEqc(normal_forms); + if (d_im.hasProcessed()) + { + return; + } + + //construct the normal form + Assert(!normal_forms.empty()); + unsigned nf_index = 0; + std::map::iterator it = term_to_nf_index.find(eqc); + // we prefer taking the normal form whose base is the equivalence + // class representative, since this leads to shorter explanations in + // some cases. + if (it != term_to_nf_index.end()) + { + nf_index = it->second; + } + d_normal_form[eqc] = normal_forms[nf_index]; + Trace("strings-process-debug") + << "Return process equivalence class " << eqc + << " : returned, size = " << d_normal_form[eqc].d_nf.size() + << std::endl; + } +} + +NormalForm& CoreSolver::getNormalForm(Node n) +{ + std::map::iterator itn = d_normal_form.find(n); + if (itn == d_normal_form.end()) + { + Trace("strings-warn") << "WARNING: returning empty normal form for " << n + << std::endl; + // Shouln't ask for normal forms of strings that weren't computed. This + // likely means that n is not a representative or not a term in the current + // context. We simply return a default normal form here in this case. + Assert(false); + return d_normal_form[n]; + } + return itn->second; +} + +Node CoreSolver::getNormalString(Node x, std::vector& nf_exp) +{ + if (!x.isConst()) + { + Node xr = d_state.getRepresentative(x); + std::map::iterator it = d_normal_form.find(xr); + if (it != d_normal_form.end()) + { + NormalForm& nf = it->second; + Node ret = utils::mkNConcat(nf.d_nf); + nf_exp.insert(nf_exp.end(), nf.d_exp.begin(), nf.d_exp.end()); + d_im.addToExplanation(x, nf.d_base, nf_exp); + Trace("strings-debug") + << "Term: " << x << " has a normal form " << ret << std::endl; + return ret; + } + // if x does not have a normal form, then it should not occur in the + // equality engine and hence should be its own representative. + Assert(xr == x); + if (x.getKind() == kind::STRING_CONCAT) + { + std::vector vec_nodes; + for (unsigned i = 0; i < x.getNumChildren(); i++) + { + Node nc = getNormalString(x[i], nf_exp); + vec_nodes.push_back(nc); + } + return utils::mkNConcat(vec_nodes); + } + } + return x; +} + +void CoreSolver::getNormalForms(Node eqc, + std::vector& normal_forms, + std::map& term_to_nf_index) +{ + //constant for equivalence class + Node eqc_non_c = eqc; + Trace("strings-process-debug") << "Get normal forms " << eqc << std::endl; + eq::EqualityEngine* ee = d_state.getEqualityEngine(); + eq::EqClassIterator eqc_i = eq::EqClassIterator( eqc, ee ); + while( !eqc_i.isFinished() ){ + Node n = (*eqc_i); + if( !d_bsolver.isCongruent(n) ){ + if (n.getKind() == CONST_STRING || n.getKind() == STRING_CONCAT) + { + Trace("strings-process-debug") << "Get Normal Form : Process term " << n << " in eqc " << eqc << std::endl; + NormalForm nf_curr; + if (n.getKind() == CONST_STRING) + { + nf_curr.init(n); + } + else if (n.getKind() == STRING_CONCAT) + { + // set the base to n, we construct the other portions of nf_curr in + // the following. + nf_curr.d_base = n; + for( unsigned i=0; igetRepresentative( n[i] ); + // get the normal form for the component + NormalForm& nfr = getNormalForm(nr); + std::vector& nfrv = nfr.d_nf; + Trace("strings-process-debug") << "Normalizing subterm " << n[i] << " = " << nr << std::endl; + unsigned orig_size = nf_curr.d_nf.size(); + unsigned add_size = nfrv.size(); + //if not the empty string, add to current normal form + if (!nfrv.empty()) + { + // if in a build with assertions, we run the following block, + // which checks that normal forms do not have concat terms. + if (Configuration::isAssertionBuild()) + { + for (const Node& nn : nfrv) + { + if (Trace.isOn("strings-error")) + { + if (nn.getKind() == STRING_CONCAT) + { + Trace("strings-error") + << "Strings::Error: From eqc = " << eqc << ", " << n + << " index " << i << ", bad normal form : "; + for (unsigned rr = 0; rr < nfrv.size(); rr++) + { + Trace("strings-error") << nfrv[rr] << " "; + } + Trace("strings-error") << std::endl; + } + } + Assert(nn.getKind() != kind::STRING_CONCAT); + } + } + nf_curr.d_nf.insert(nf_curr.d_nf.end(), nfrv.begin(), nfrv.end()); + } + // Track explanation for the normal form. This is in two parts. + // First, we must carry the explanation of the normal form computed + // for the representative nr. + for (const Node& exp : nfr.d_exp) + { + // The explanation is only relevant for the subsegment it was + // previously relevant for, shifted now based on its relative + // placement in the normal form of n. + nf_curr.addToExplanation( + exp, + orig_size + nfr.d_expDep[exp][false], + orig_size + (add_size - nfr.d_expDep[exp][true])); + } + // Second, must explain that the component n[i] is equal to the + // base of the normal form for nr. + Node base = nfr.d_base; + if (base != n[i]) + { + Node eq = n[i].eqNode(base); + // The equality is relevant for the entire current segment + nf_curr.addToExplanation(eq, orig_size, orig_size + add_size); + } + } + // Now that we are finished with the loop, we convert forward indices + // to reverse indices in the explanation dependency information + int total_size = nf_curr.d_nf.size(); + for (std::pair >& ed : + nf_curr.d_expDep) + { + ed.second[true] = total_size - ed.second[true]; + Assert(ed.second[true] >= 0); + } + } + //if not equal to self + std::vector& currv = nf_curr.d_nf; + if (currv.size() > 1 + || (currv.size() == 1 && currv[0].getKind() == CONST_STRING)) + { + // if in a build with assertions, check that normal form is acyclic + if (Configuration::isAssertionBuild()) + { + if (currv.size() > 1) + { + for (unsigned i = 0; i < currv.size(); i++) + { + if (Trace.isOn("strings-error")) + { + Trace("strings-error") << "Cycle for normal form "; + utils::printConcatTrace(currv, "strings-error"); + Trace("strings-error") << "..." << currv[i] << std::endl; + } + Assert(!d_state.areEqual(currv[i], n)); + } + } + } + term_to_nf_index[n] = normal_forms.size(); + normal_forms.push_back(nf_curr); + }else{ + //this was redundant: combination of self + empty string(s) + Node nn = currv.size() == 0 ? d_emptyString : currv[0]; + Assert(d_state.areEqual(nn, eqc)); + } + }else{ + eqc_non_c = n; + } + } + ++eqc_i; + } + + if( normal_forms.empty() ) { + Trace("strings-solve-debug2") << "construct the normal form" << std::endl; + // This case happens when there are no non-trivial normal forms for this + // equivalence class. For example, given assertions: + // { x = y ++ z, x = y, z = "" } + // The equivalence class of { x, y, y ++ z } is such that the normal form + // of all terms is a variable (either x or y) in the equivalence class + // itself. Thus, the normal form of this equivalence class can be assigned + // to one of these variables. + // We use a non-concatenation term among the terms in this equivalence + // class, which is stored in eqc_non_c. The reason is this does not require + // an explanation, whereas e.g. y ++ z would require the explanation z = "" + // to justify its normal form is y. + Assert(eqc_non_c.getKind() != STRING_CONCAT); + NormalForm nf_triv; + nf_triv.init(eqc_non_c); + normal_forms.push_back(nf_triv); + }else{ + if(Trace.isOn("strings-solve")) { + Trace("strings-solve") << "--- Normal forms for equivalance class " << eqc << " : " << std::endl; + for (unsigned i = 0, size = normal_forms.size(); i < size; i++) + { + NormalForm& nf = normal_forms[i]; + Trace("strings-solve") << "#" << i << " (from " << nf.d_base << ") : "; + for (unsigned j = 0, sizej = nf.d_nf.size(); j < sizej; j++) + { + if(j>0) { + Trace("strings-solve") << ", "; + } + Trace("strings-solve") << nf.d_nf[j]; + } + Trace("strings-solve") << std::endl; + Trace("strings-solve") << " Explanation is : "; + if (nf.d_exp.size() == 0) + { + Trace("strings-solve") << "NONE"; + } else { + for (unsigned j = 0, sizej = nf.d_exp.size(); j < sizej; j++) + { + if(j>0) { + Trace("strings-solve") << " AND "; + } + Trace("strings-solve") << nf.d_exp[j]; + } + Trace("strings-solve") << std::endl; + Trace("strings-solve") << "WITH DEPENDENCIES : " << std::endl; + for (unsigned j = 0, sizej = nf.d_exp.size(); j < sizej; j++) + { + Node exp = nf.d_exp[j]; + Trace("strings-solve") << " " << exp << " -> "; + Trace("strings-solve") << nf.d_expDep[exp][false] << ","; + Trace("strings-solve") << nf.d_expDep[exp][true] << std::endl; + } + } + Trace("strings-solve") << std::endl; + + } + } else { + Trace("strings-solve") << "--- Single normal form for equivalence class " << eqc << std::endl; + } + + //if equivalence class is constant, approximate as containment, infer conflicts + Node c = d_bsolver.getConstantEqc( eqc ); + if( !c.isNull() ){ + Trace("strings-solve") << "Eqc is constant " << c << std::endl; + for (unsigned i = 0, size = normal_forms.size(); i < size; i++) + { + NormalForm& nf = normal_forms[i]; + int firstc, lastc; + if (!TheoryStringsRewriter::canConstantContainList( + c, nf.d_nf, firstc, lastc)) + { + Node n = nf.d_base; + //conflict + Trace("strings-solve") << "Normal form for " << n << " cannot be contained in constant " << c << std::endl; + //conflict, explanation is n = base ^ base = c ^ relevant porition of ( n = N[n] ) + std::vector< Node > exp; + d_bsolver.explainConstantEqc(n,eqc,exp); + // Notice although not implemented, this can be minimized based on + // firstc/lastc, normal_forms_exp_depend. + exp.insert(exp.end(), nf.d_exp.begin(), nf.d_exp.end()); + Node conc = d_false; + d_im.sendInference(exp, conc, "N_NCTN"); + } + } + } + } +} + +void CoreSolver::processNEqc(std::vector& normal_forms) +{ + //the possible inferences + std::vector< InferInfo > pinfer; + // loop over all pairs + for(unsigned i=0; i max_index)) + { + min_id = pinfer[i].d_id; + max_index = pinfer[i].d_index; + use_index = i; + set_use_index = true; + } + } + Trace("strings-solve") << "...choose #" << use_index << std::endl; + doInferInfo(pinfer[use_index]); +} + +void CoreSolver::processSimpleNEq(NormalForm& nfi, + NormalForm& nfj, + unsigned& index, + bool isRev, + unsigned rproc, + std::vector& pinfer) +{ + std::vector& nfiv = nfi.d_nf; + std::vector& nfjv = nfj.d_nf; + NodeManager* nm = NodeManager::currentNM(); + eq::EqualityEngine* ee = d_state.getEqualityEngine(); + Assert(rproc <= nfiv.size() && rproc <= nfjv.size()); + bool success; + do { + success = false; + //if we are at the end + if (index == (nfiv.size() - rproc) || index == (nfjv.size() - rproc)) + { + if (index == (nfiv.size() - rproc) && index == (nfjv.size() - rproc)) + { + //we're done + }else{ + //the remainder must be empty + NormalForm& nfk = index == (nfiv.size() - rproc) ? nfj : nfi; + std::vector& nfkv = nfk.d_nf; + unsigned index_k = index; + std::vector< Node > curr_exp; + NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, curr_exp); + while (!d_state.isInConflict() && index_k < (nfkv.size() - rproc)) + { + //can infer that this string must be empty + Node eq = nfkv[index_k].eqNode(d_emptyString); + //Trace("strings-lemma") << "Strings: Infer " << eq << " from " << eq_exp << std::endl; + Assert(!d_state.areEqual(d_emptyString, nfkv[index_k])); + d_im.sendInference(curr_exp, eq, "N_EndpointEmp"); + index_k++; + } + } + }else{ + Trace("strings-solve-debug") + << "Process " << nfiv[index] << " ... " << nfjv[index] << std::endl; + if (nfiv[index] == nfjv[index]) + { + Trace("strings-solve-debug") << "Simple Case 1 : strings are equal" << std::endl; + index++; + success = true; + }else{ + Assert(!d_state.areEqual(nfiv[index], nfjv[index])); + std::vector< Node > temp_exp; + Node length_term_i = d_state.getLength(nfiv[index], temp_exp); + Node length_term_j = d_state.getLength(nfjv[index], temp_exp); + // check length(nfiv[index]) == length(nfjv[index]) + if (d_state.areEqual(length_term_i, length_term_j)) + { + Trace("strings-solve-debug") << "Simple Case 2 : string lengths are equal" << std::endl; + Node eq = nfiv[index].eqNode(nfjv[index]); + //eq = Rewriter::rewrite( eq ); + Node length_eq = length_term_i.eqNode( length_term_j ); + //temp_exp.insert(temp_exp.end(), curr_exp.begin(), curr_exp.end() ); + NormalForm::getExplanationForPrefixEq( + nfi, nfj, index, index, temp_exp); + temp_exp.push_back(length_eq); + d_im.sendInference(temp_exp, eq, "N_Unify"); + return; + } + else if ((nfiv[index].getKind() != CONST_STRING + && index == nfiv.size() - rproc - 1) + || (nfjv[index].getKind() != CONST_STRING + && index == nfjv.size() - rproc - 1)) + { + Trace("strings-solve-debug") << "Simple Case 3 : at endpoint" << std::endl; + std::vector< Node > antec; + //antec.insert(antec.end(), curr_exp.begin(), curr_exp.end() ); + NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, antec); + std::vector< Node > eqn; + for( unsigned r=0; r<2; r++ ) { + NormalForm& nfk = r == 0 ? nfi : nfj; + std::vector& nfkv = nfk.d_nf; + std::vector< Node > eqnc; + for (unsigned index_l = index, size = (nfkv.size() - rproc); + index_l < size; + index_l++) + { + if(isRev) { + eqnc.insert(eqnc.begin(), nfkv[index_l]); + } else { + eqnc.push_back(nfkv[index_l]); + } + } + eqn.push_back(utils::mkNConcat(eqnc)); + } + if (!d_state.areEqual(eqn[0], eqn[1])) + { + d_im.sendInference( + antec, eqn[0].eqNode(eqn[1]), "N_EndpointEq", true); + return; + } + else + { + Assert(nfiv.size() == nfjv.size()); + index = nfiv.size() - rproc; + } + } + else if (nfiv[index].isConst() && nfjv[index].isConst()) + { + Node const_str = nfiv[index]; + Node other_str = nfjv[index]; + Trace("strings-solve-debug") << "Simple Case 3 : Const Split : " << const_str << " vs " << other_str << " at index " << index << ", isRev = " << isRev << std::endl; + unsigned len_short = const_str.getConst().size() <= other_str.getConst().size() ? const_str.getConst().size() : other_str.getConst().size(); + bool isSameFix = isRev ? const_str.getConst().rstrncmp(other_str.getConst(), len_short): const_str.getConst().strncmp(other_str.getConst(), len_short); + if( isSameFix ) { + //same prefix/suffix + bool constCmp = const_str.getConst().size() + < other_str.getConst().size(); + //k is the index of the string that is shorter + NormalForm& nfk = constCmp ? nfi : nfj; + std::vector& nfkv = nfk.d_nf; + NormalForm& nfl = constCmp ? nfj : nfi; + std::vector& nflv = nfl.d_nf; + Node remainderStr; + if( isRev ){ + int new_len = nflv[index].getConst().size() - len_short; + remainderStr = nm->mkConst( + nflv[index].getConst().substr(0, new_len)); + }else{ + remainderStr = + nm->mkConst(nflv[index].getConst().substr(len_short)); + } + Trace("strings-solve-debug-test") + << "Break normal form of " << nflv[index] << " into " + << nfkv[index] << ", " << remainderStr << std::endl; + nfl.splitConstant(index, nfkv[index], remainderStr); + index++; + success = true; + }else{ + //conflict + std::vector< Node > antec; + NormalForm::getExplanationForPrefixEq( + nfi, nfj, index, index, antec); + d_im.sendInference(antec, d_false, "N_Const", true); + return; + } + }else{ + //construct the candidate inference "info" + InferInfo info; + info.d_index = index; + //for debugging + info.d_i = nfi.d_base; + info.d_j = nfj.d_base; + info.d_rev = isRev; + bool info_valid = false; + Assert(index < nfiv.size() - rproc && index < nfjv.size() - rproc); + std::vector< Node > lexp; + Node length_term_i = d_state.getLength(nfiv[index], lexp); + Node length_term_j = d_state.getLength(nfjv[index], lexp); + //split on equality between string lengths (note that splitting on equality between strings is worse since it is harder to process) + if (!d_state.areDisequal(length_term_i, length_term_j) + && !d_state.areEqual(length_term_i, length_term_j) + && !nfiv[index].isConst() && !nfjv[index].isConst()) + { // AJR: remove the latter 2 conditions? + Trace("strings-solve-debug") << "Non-simple Case 1 : string lengths neither equal nor disequal" << std::endl; + //try to make the lengths equal via splitting on demand + Node length_eq = NodeManager::currentNM()->mkNode( kind::EQUAL, length_term_i, length_term_j ); + length_eq = Rewriter::rewrite( length_eq ); + //set info + info.d_conc = NodeManager::currentNM()->mkNode( kind::OR, length_eq, length_eq.negate() ); + info.d_pending_phase[ length_eq ] = true; + info.d_id = INFER_LEN_SPLIT; + info_valid = true; + }else{ + Trace("strings-solve-debug") << "Non-simple Case 2 : must compare strings" << std::endl; + int loop_in_i = -1; + int loop_in_j = -1; + ProcessLoopResult plr = ProcessLoopResult::SKIPPED; + if (detectLoop(nfi, nfj, index, loop_in_i, loop_in_j, rproc)) + { + if( !isRev ){ //FIXME + NormalForm::getExplanationForPrefixEq( + nfi, nfj, -1, -1, info.d_ant); + // set info + plr = processLoop(loop_in_i != -1 ? nfi : nfj, + loop_in_i != -1 ? nfj : nfi, + loop_in_i != -1 ? loop_in_i : loop_in_j, + index, + info); + if (plr == ProcessLoopResult::INFERENCE) + { + info_valid = true; + } + } + } + + if (plr == ProcessLoopResult::SKIPPED) + { + //AJR: length entailment here? + if (nfiv[index].isConst() || nfjv[index].isConst()) + { + NormalForm& nfc = nfiv[index].isConst() ? nfi : nfj; + std::vector& nfcv = nfc.d_nf; + NormalForm& nfnc = nfiv[index].isConst() ? nfj : nfi; + std::vector& nfncv = nfnc.d_nf; + Node other_str = nfncv[index]; + Assert(other_str.getKind() != kind::CONST_STRING) + << "Other string is not constant."; + Assert(other_str.getKind() != kind::STRING_CONCAT) + << "Other string is not CONCAT."; + if( !ee->areDisequal( other_str, d_emptyString, true ) ){ + Node eq = other_str.eqNode( d_emptyString ); + eq = Rewriter::rewrite(eq); + if (eq.isConst()) + { + // If the equality rewrites to a constant, we must use the + // purify variable for this string to communicate that + // we have inferred whether it is empty. + Node p = d_skCache.mkSkolemCached( + other_str, SkolemCache::SK_PURIFY, "lsym"); + Node pEq = p.eqNode(d_emptyString); + // should not be constant + Assert(!Rewriter::rewrite(pEq).isConst()); + // infer the purification equality, and the (dis)equality + // with the empty string in the direction that the rewriter + // inferred + info.d_conc = + nm->mkNode(AND, + p.eqNode(other_str), + !eq.getConst() ? pEq.negate() : pEq); + info.d_id = INFER_INFER_EMP; + } + else + { + info.d_conc = nm->mkNode(OR, eq, eq.negate()); + info.d_id = INFER_LEN_SPLIT_EMP; + } + //set info + info_valid = true; + }else{ + if( !isRev ){ //FIXME + Node xnz = other_str.eqNode( d_emptyString ).negate(); + unsigned index_nc_k = index+1; + unsigned start_index_nc_k = index+1; + Node next_const_str = + TheoryStringsRewriter::getNextConstantAt( + nfncv, start_index_nc_k, index_nc_k, false); + if( !next_const_str.isNull() ) { + unsigned index_c_k = index; + Node const_str = + TheoryStringsRewriter::collectConstantStringAt( + nfcv, index_c_k, false); + Assert(!const_str.isNull()); + CVC4::String stra = const_str.getConst(); + CVC4::String strb = next_const_str.getConst(); + //since non-empty, we start with charecter #1 + size_t p; + if( isRev ){ + CVC4::String stra1 = stra.prefix( stra.size()-1 ); + p = stra.size() - stra1.roverlap(strb); + Trace("strings-csp-debug") << "Compute roverlap : " << const_str << " " << next_const_str << std::endl; + size_t p2 = stra1.rfind(strb); + p = p2==std::string::npos ? p : ( p>p2+1? p2+1 : p ); + Trace("strings-csp-debug") << "overlap : " << stra1 << " " << strb << " returned " << p << " " << p2 << " " << (p2==std::string::npos) << std::endl; + }else{ + CVC4::String stra1 = stra.substr( 1 ); + p = stra.size() - stra1.overlap(strb); + Trace("strings-csp-debug") << "Compute overlap : " << const_str << " " << next_const_str << std::endl; + size_t p2 = stra1.find(strb); + p = p2==std::string::npos ? p : ( p>p2+1? p2+1 : p ); + Trace("strings-csp-debug") << "overlap : " << stra1 << " " << strb << " returned " << p << " " << p2 << " " << (p2==std::string::npos) << std::endl; + } + if( p>1 ){ + if( start_index_nc_k==index+1 ){ + info.d_ant.push_back(xnz); + NormalForm::getExplanationForPrefixEq( + nfc, nfnc, index_c_k, index_nc_k, info.d_ant); + Node prea = p==stra.size() ? const_str : NodeManager::currentNM()->mkConst( isRev ? stra.suffix( p ) : stra.prefix( p ) ); + Node sk = d_skCache.mkSkolemCached( + other_str, + prea, + isRev ? SkolemCache::SK_ID_C_SPT_REV + : SkolemCache::SK_ID_C_SPT, + "c_spt"); + Trace("strings-csp") << "Const Split: " << prea << " is removed from " << stra << " due to " << strb << ", p=" << p << std::endl; + //set info + info.d_conc = other_str.eqNode( + isRev ? utils::mkNConcat(sk, prea) + : utils::mkNConcat(prea, sk)); + info.d_new_skolem[LENGTH_SPLIT].push_back(sk); + info.d_id = INFER_SSPLIT_CST_PROP; + info_valid = true; + } + } + } + if( !info_valid ){ + info.d_ant.push_back( xnz ); + Node const_str = nfcv[index]; + NormalForm::getExplanationForPrefixEq( + nfi, nfj, index, index, info.d_ant); + CVC4::String stra = const_str.getConst(); + if( options::stringBinaryCsp() && stra.size()>3 ){ + //split string in half + Node c_firstHalf = NodeManager::currentNM()->mkConst( isRev ? stra.substr( stra.size()/2 ) : stra.substr(0, stra.size()/2 ) ); + Node sk = d_skCache.mkSkolemCached( + other_str, + c_firstHalf, + isRev ? SkolemCache::SK_ID_VC_BIN_SPT_REV + : SkolemCache::SK_ID_VC_BIN_SPT, + "cb_spt"); + Trace("strings-csp") << "Const Split: " << c_firstHalf << " is removed from " << const_str << " (binary) " << std::endl; + info.d_conc = nm->mkNode( + OR, + other_str.eqNode( + isRev ? utils::mkNConcat(sk, c_firstHalf) + : utils::mkNConcat(c_firstHalf, sk)), + nm->mkNode( + AND, + sk.eqNode(d_emptyString).negate(), + c_firstHalf.eqNode( + isRev ? utils::mkNConcat(sk, other_str) + : utils::mkNConcat(other_str, sk)))); + info.d_new_skolem[LENGTH_SPLIT].push_back(sk); + info.d_id = INFER_SSPLIT_CST_BINARY; + info_valid = true; + }else{ + // normal v/c split + Node firstChar = stra.size() == 1 ? const_str : NodeManager::currentNM()->mkConst( isRev ? stra.suffix( 1 ) : stra.prefix( 1 ) ); + Node sk = d_skCache.mkSkolemCached( + other_str, + isRev ? SkolemCache::SK_ID_VC_SPT_REV + : SkolemCache::SK_ID_VC_SPT, + "c_spt"); + Trace("strings-csp") << "Const Split: " << firstChar << " is removed from " << const_str << " (serial) " << std::endl; + info.d_conc = other_str.eqNode( + isRev ? utils::mkNConcat(sk, firstChar) + : utils::mkNConcat(firstChar, sk)); + info.d_new_skolem[LENGTH_SPLIT].push_back(sk); + info.d_id = INFER_SSPLIT_CST; + info_valid = true; + } + } + } + } + }else{ + int lentTestSuccess = -1; + Node lentTestExp; + if( options::stringCheckEntailLen() ){ + //check entailment + for( unsigned e=0; e<2; e++ ){ + Node t = e == 0 ? nfiv[index] : nfjv[index]; + //do not infer constants are larger than variables + if( t.getKind()!=kind::CONST_STRING ){ + Node lt1 = e==0 ? length_term_i : length_term_j; + Node lt2 = e==0 ? length_term_j : length_term_i; + Node ent_lit = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::GT, lt1, lt2 ) ); + std::pair et = d_state.entailmentCheck( + options::TheoryOfMode::THEORY_OF_TYPE_BASED, ent_lit); + if( et.first ){ + Trace("strings-entail") << "Strings entailment : " << ent_lit << " is entailed in the current context." << std::endl; + Trace("strings-entail") << " explanation was : " << et.second << std::endl; + lentTestSuccess = e; + lentTestExp = et.second; + break; + } + } + } + } + + NormalForm::getExplanationForPrefixEq( + nfi, nfj, index, index, info.d_ant); + //x!=e /\ y!=e + for(unsigned xory=0; xory<2; xory++) { + Node x = xory == 0 ? nfiv[index] : nfjv[index]; + Node xgtz = x.eqNode( d_emptyString ).negate(); + if( ee->areDisequal( x, d_emptyString, true ) ) { + info.d_ant.push_back( xgtz ); + } else { + info.d_antn.push_back( xgtz ); + } + } + Node sk = d_skCache.mkSkolemCached( + nfiv[index], + nfjv[index], + isRev ? SkolemCache::SK_ID_V_SPT_REV + : SkolemCache::SK_ID_V_SPT, + "v_spt"); + // must add length requirement + info.d_new_skolem[LENGTH_GEQ_ONE].push_back(sk); + Node eq1 = nfiv[index].eqNode( + isRev ? utils::mkNConcat(sk, nfjv[index]) + : utils::mkNConcat(nfjv[index], sk)); + Node eq2 = nfjv[index].eqNode( + isRev ? utils::mkNConcat(sk, nfiv[index]) + : utils::mkNConcat(nfiv[index], sk)); + + if( lentTestSuccess!=-1 ){ + info.d_antn.push_back( lentTestExp ); + info.d_conc = lentTestSuccess==0 ? eq1 : eq2; + info.d_id = INFER_SSPLIT_VAR_PROP; + info_valid = true; + }else{ + Node ldeq = NodeManager::currentNM()->mkNode( kind::EQUAL, length_term_i, length_term_j ).negate(); + if( ee->areDisequal( length_term_i, length_term_j, true ) ){ + info.d_ant.push_back( ldeq ); + }else{ + info.d_antn.push_back(ldeq); + } + //set info + info.d_conc = NodeManager::currentNM()->mkNode( kind::OR, eq1, eq2 ); + info.d_id = INFER_SSPLIT_VAR; + info_valid = true; + } + } + } + } + if( info_valid ){ + pinfer.push_back( info ); + Assert(!success); + } + } + } + } + }while( success ); +} + +bool CoreSolver::detectLoop(NormalForm& nfi, + NormalForm& nfj, + int index, + int& loop_in_i, + int& loop_in_j, + unsigned rproc) +{ + int has_loop[2] = { -1, -1 }; + if( options::stringLB() != 2 ) { + for( unsigned r=0; r<2; r++ ) { + NormalForm& nf = r == 0 ? nfi : nfj; + NormalForm& nfo = r == 0 ? nfj : nfi; + std::vector& nfv = nf.d_nf; + std::vector& nfov = nfo.d_nf; + if (!nfov[index].isConst()) + { + for (unsigned lp = index + 1; lp < nfv.size() - rproc; lp++) + { + if (nfv[lp] == nfov[index]) + { + has_loop[r] = lp; + break; + } + } + } + } + } + if( has_loop[0]!=-1 || has_loop[1]!=-1 ) { + loop_in_i = has_loop[0]; + loop_in_j = has_loop[1]; + return true; + } else { + Trace("strings-solve-debug") << "No loops detected." << std::endl; + return false; + } +} + +//xs(zy)=t(yz)xr +CoreSolver::ProcessLoopResult CoreSolver::processLoop(NormalForm& nfi, + NormalForm& nfj, + int loop_index, + int index, + InferInfo& info) +{ + if (options::stringProcessLoopMode() == options::ProcessLoopMode::ABORT) + { + throw LogicException("Looping word equation encountered."); + } + else if (options::stringProcessLoopMode() == options::ProcessLoopMode::NONE) + { + d_im.setIncomplete(); + return ProcessLoopResult::SKIPPED; + } + + NodeManager* nm = NodeManager::currentNM(); + Node conc; + const std::vector& veci = nfi.d_nf; + const std::vector& vecoi = nfj.d_nf; + + Trace("strings-loop") << "Detected possible loop for " << veci[loop_index] + << std::endl; + Trace("strings-loop") << " ... (X)= " << vecoi[index] << std::endl; + Trace("strings-loop") << " ... T(Y.Z)= "; + std::vector vec_t(veci.begin() + index, veci.begin() + loop_index); + Node t_yz = utils::mkNConcat(vec_t); + Trace("strings-loop") << " (" << t_yz << ")" << std::endl; + Trace("strings-loop") << " ... S(Z.Y)= "; + std::vector vec_s(vecoi.begin() + index + 1, vecoi.end()); + Node s_zy = utils::mkNConcat(vec_s); + Trace("strings-loop") << s_zy << std::endl; + Trace("strings-loop") << " ... R= "; + std::vector vec_r(veci.begin() + loop_index + 1, veci.end()); + Node r = utils::mkNConcat(vec_r); + Trace("strings-loop") << r << std::endl; + + if (s_zy.isConst() && r.isConst() && r != d_emptyString) + { + int c; + bool flag = true; + if (s_zy.getConst().tailcmp(r.getConst(), c)) + { + if (c >= 0) + { + s_zy = nm->mkConst(s_zy.getConst().substr(0, c)); + r = d_emptyString; + vec_r.clear(); + Trace("strings-loop") << "Strings::Loop: Refactor S(Z.Y)= " << s_zy + << ", c=" << c << std::endl; + flag = false; + } + } + if (flag) + { + Trace("strings-loop") << "Strings::Loop: tails are different." + << std::endl; + d_im.sendInference(info.d_ant, conc, "Loop Conflict", true); + return ProcessLoopResult::CONFLICT; + } + } + + Node split_eq; + for (unsigned r = 0; r < 2; r++) + { + Node t = r == 0 ? veci[loop_index] : t_yz; + split_eq = t.eqNode(d_emptyString); + Node split_eqr = Rewriter::rewrite(split_eq); + // the equality could rewrite to false + if (!split_eqr.isConst()) + { + if (!d_state.areDisequal(t, d_emptyString)) + { + // try to make t equal to empty to avoid loop + info.d_conc = nm->mkNode(kind::OR, split_eq, split_eq.negate()); + info.d_id = INFER_LEN_SPLIT_EMP; + return ProcessLoopResult::INFERENCE; + } + else + { + info.d_ant.push_back(split_eq.negate()); + } + } + else + { + Assert(!split_eqr.getConst()); + } + } + + Node ant = d_im.mkExplain(info.d_ant); + info.d_ant.clear(); + info.d_antn.push_back(ant); + + Node str_in_re; + if (s_zy == t_yz && r == d_emptyString && s_zy.isConst() + && s_zy.getConst().isRepeated()) + { + Node rep_c = nm->mkConst(s_zy.getConst().substr(0, 1)); + Trace("strings-loop") << "Special case (X)=" << vecoi[index] << " " + << std::endl; + Trace("strings-loop") << "... (C)=" << rep_c << " " << std::endl; + // special case + str_in_re = nm->mkNode( + STRING_IN_REGEXP, + vecoi[index], + nm->mkNode(REGEXP_STAR, nm->mkNode(STRING_TO_REGEXP, rep_c))); + conc = str_in_re; + } + else if (t_yz.isConst()) + { + Trace("strings-loop") << "Strings::Loop: Const Normal Breaking." + << std::endl; + CVC4::String s = t_yz.getConst(); + unsigned size = s.size(); + std::vector vconc; + for (unsigned len = 1; len <= size; len++) + { + Node y = nm->mkConst(s.substr(0, len)); + Node z = nm->mkConst(s.substr(len, size - len)); + Node restr = s_zy; + Node cc; + if (r != d_emptyString) + { + std::vector v2(vec_r); + v2.insert(v2.begin(), y); + v2.insert(v2.begin(), z); + restr = utils::mkNConcat(z, y); + cc = Rewriter::rewrite(s_zy.eqNode(utils::mkNConcat(v2))); + } + else + { + cc = Rewriter::rewrite(s_zy.eqNode(utils::mkNConcat(z, y))); + } + if (cc == d_false) + { + continue; + } + Node conc2 = nm->mkNode( + STRING_IN_REGEXP, + vecoi[index], + nm->mkNode( + REGEXP_CONCAT, + nm->mkNode(STRING_TO_REGEXP, y), + nm->mkNode(REGEXP_STAR, nm->mkNode(STRING_TO_REGEXP, restr)))); + cc = cc == d_true ? conc2 : nm->mkNode(kind::AND, cc, conc2); + vconc.push_back(cc); + } + conc = vconc.size() == 0 ? Node::null() : vconc.size() == 1 + ? vconc[0] + : nm->mkNode(kind::OR, vconc); + } + else + { + if (options::stringProcessLoopMode() + == options::ProcessLoopMode::SIMPLE_ABORT) + { + throw LogicException("Normal looping word equation encountered."); + } + else if (options::stringProcessLoopMode() + == options::ProcessLoopMode::SIMPLE) + { + d_im.setIncomplete(); + return ProcessLoopResult::SKIPPED; + } + + Trace("strings-loop") << "Strings::Loop: Normal Loop Breaking." + << std::endl; + // right + Node sk_w = d_skCache.mkSkolem("w_loop"); + Node sk_y = d_skCache.mkSkolem("y_loop"); + d_im.registerLength(sk_y, LENGTH_GEQ_ONE); + Node sk_z = d_skCache.mkSkolem("z_loop"); + // t1 * ... * tn = y * z + Node conc1 = t_yz.eqNode(utils::mkNConcat(sk_y, sk_z)); + // s1 * ... * sk = z * y * r + vec_r.insert(vec_r.begin(), sk_y); + vec_r.insert(vec_r.begin(), sk_z); + Node conc2 = s_zy.eqNode(utils::mkNConcat(vec_r)); + Node conc3 = vecoi[index].eqNode(utils::mkNConcat(sk_y, sk_w)); + Node restr = r == d_emptyString ? s_zy : utils::mkNConcat(sk_z, sk_y); + str_in_re = + nm->mkNode(kind::STRING_IN_REGEXP, + sk_w, + nm->mkNode(kind::REGEXP_STAR, + nm->mkNode(kind::STRING_TO_REGEXP, restr))); + + std::vector vec_conc; + vec_conc.push_back(conc1); + vec_conc.push_back(conc2); + vec_conc.push_back(conc3); + vec_conc.push_back(str_in_re); + // vec_conc.push_back(sk_y.eqNode(d_emptyString).negate());//by mkskolems + conc = nm->mkNode(kind::AND, vec_conc); + } // normal case + + // we will be done + info.d_conc = conc; + info.d_id = INFER_FLOOP; + info.d_nf_pair[0] = nfi.d_base; + info.d_nf_pair[1] = nfj.d_base; + return ProcessLoopResult::INFERENCE; +} + +//return true for lemma, false if we succeed +void CoreSolver::processDeq( Node ni, Node nj ) { + NodeManager* nm = NodeManager::currentNM(); + NormalForm& nfni = getNormalForm(ni); + NormalForm& nfnj = getNormalForm(nj); + eq::EqualityEngine* ee = d_state.getEqualityEngine(); + if (nfni.d_nf.size() > 1 || nfnj.d_nf.size() > 1) + { + std::vector< Node > nfi; + nfi.insert(nfi.end(), nfni.d_nf.begin(), nfni.d_nf.end()); + std::vector< Node > nfj; + nfj.insert(nfj.end(), nfnj.d_nf.begin(), nfnj.d_nf.end()); + + int revRet = processReverseDeq( nfi, nfj, ni, nj ); + if( revRet!=0 ){ + return; + } + + nfi.clear(); + nfi.insert(nfi.end(), nfni.d_nf.begin(), nfni.d_nf.end()); + nfj.clear(); + nfj.insert(nfj.end(), nfnj.d_nf.begin(), nfnj.d_nf.end()); + + unsigned index = 0; + while( index lexp; + Node li = d_state.getLength(i, lexp); + Node lj = d_state.getLength(j, lexp); + if (d_state.areDisequal(li, lj)) + { + if( i.getKind()==kind::CONST_STRING || j.getKind()==kind::CONST_STRING ){ + //check if empty + Node const_k = i.getKind() == kind::CONST_STRING ? i : j; + Node nconst_k = i.getKind() == kind::CONST_STRING ? j : i; + Node lnck = i.getKind() == kind::CONST_STRING ? lj : li; + if( !ee->areDisequal( nconst_k, d_emptyString, true ) ){ + Node eq = nconst_k.eqNode( d_emptyString ); + Node conc = NodeManager::currentNM()->mkNode( kind::OR, eq, eq.negate() ); + d_im.sendInference(d_emptyVec, conc, "D-DISL-Emp-Split"); + return; + }else{ + //split on first character + CVC4::String str = const_k.getConst(); + Node firstChar = str.size() == 1 ? const_k : NodeManager::currentNM()->mkConst( str.prefix( 1 ) ); + if (d_state.areEqual(lnck, d_one)) + { + if (d_state.areDisequal(firstChar, nconst_k)) + { + return; + } + else if (!d_state.areEqual(firstChar, nconst_k)) + { + //splitting on demand : try to make them disequal + if (d_im.sendSplit( + firstChar, nconst_k, "S-Split(DEQL-Const)", false)) + { + return; + } + } + } + else + { + Node sk = d_skCache.mkSkolemCached( + nconst_k, SkolemCache::SK_ID_DC_SPT, "dc_spt"); + d_im.registerLength(sk, LENGTH_ONE); + Node skr = + d_skCache.mkSkolemCached(nconst_k, + SkolemCache::SK_ID_DC_SPT_REM, + "dc_spt_rem"); + Node eq1 = nconst_k.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk, skr ) ); + eq1 = Rewriter::rewrite( eq1 ); + Node eq2 = nconst_k.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, firstChar, skr ) ); + std::vector< Node > antec; + antec.insert( + antec.end(), nfni.d_exp.begin(), nfni.d_exp.end()); + antec.insert( + antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); + antec.push_back( nconst_k.eqNode( d_emptyString ).negate() ); + d_im.sendInference( + antec, + nm->mkNode( + OR, + nm->mkNode(AND, eq1, sk.eqNode(firstChar).negate()), + eq2), + "D-DISL-CSplit"); + d_im.sendPhaseRequirement(eq1, true); + return; + } + } + }else{ + Trace("strings-solve") << "Non-Simple Case 1 : add lemma " << std::endl; + //must add lemma + std::vector< Node > antec; + std::vector< Node > antec_new_lits; + antec.insert(antec.end(), nfni.d_exp.begin(), nfni.d_exp.end()); + antec.insert(antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); + //check disequal + if (d_state.areDisequal(ni, nj)) + { + antec.push_back( ni.eqNode( nj ).negate() ); + } + else + { + antec_new_lits.push_back( ni.eqNode( nj ).negate() ); + } + antec_new_lits.push_back( li.eqNode( lj ).negate() ); + std::vector< Node > conc; + Node sk1 = d_skCache.mkSkolemCached( + i, j, SkolemCache::SK_ID_DEQ_X, "x_dsplit"); + Node sk2 = d_skCache.mkSkolemCached( + i, j, SkolemCache::SK_ID_DEQ_Y, "y_dsplit"); + Node sk3 = d_skCache.mkSkolemCached( + i, j, SkolemCache::SK_ID_DEQ_Z, "z_dsplit"); + d_im.registerLength(sk3, LENGTH_GEQ_ONE); + //Node nemp = sk3.eqNode(d_emptyString).negate(); + //conc.push_back(nemp); + Node lsk1 = utils::mkNLength(sk1); + conc.push_back( lsk1.eqNode( li ) ); + Node lsk2 = utils::mkNLength(sk2); + conc.push_back( lsk2.eqNode( lj ) ); + conc.push_back(nm->mkNode(OR, + j.eqNode(utils::mkNConcat(sk1, sk3)), + i.eqNode(utils::mkNConcat(sk2, sk3)))); + d_im.sendInference( + antec, antec_new_lits, nm->mkNode(AND, conc), "D-DISL-Split"); + return; + } + } + else if (d_state.areEqual(li, lj)) + { + Assert(!d_state.areDisequal(i, j)); + //splitting on demand : try to make them disequal + if (d_im.sendSplit(i, j, "S-Split(DEQL)", false)) + { + return; + } + } + else + { + //splitting on demand : try to make lengths equal + if (d_im.sendSplit(li, lj, "D-Split")) + { + return; + } + } + } + index++; + } + } + Assert(false); + } +} + +int CoreSolver::processReverseDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj ) { + //reverse normal form of i, j + std::reverse( nfi.begin(), nfi.end() ); + std::reverse( nfj.begin(), nfj.end() ); + + unsigned index = 0; + int ret = processSimpleDeq( nfi, nfj, ni, nj, index, true ); + + //reverse normal form of i, j + std::reverse( nfi.begin(), nfi.end() ); + std::reverse( nfj.begin(), nfj.end() ); + + return ret; +} + +int CoreSolver::processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj, unsigned& index, bool isRev ){ + // See if one side is constant, if so, the disequality ni != nj is satisfied + // since ni does not contain nj or vice versa. + // This is only valid when isRev is false, since when isRev=true, the contents + // of normal form vectors nfi and nfj are reversed. + if (!isRev) + { + for (unsigned i = 0; i < 2; i++) + { + Node c = d_bsolver.getConstantEqc(i == 0 ? ni : nj); + if (!c.isNull()) + { + int findex, lindex; + if (!TheoryStringsRewriter::canConstantContainList( + c, i == 0 ? nfj : nfi, findex, lindex)) + { + Trace("strings-solve-debug") + << "Disequality: constant cannot contain list" << std::endl; + return 1; + } + } + } + } + NormalForm& nfni = getNormalForm(ni); + NormalForm& nfnj = getNormalForm(nj); + while( index=nfi.size() || index>=nfj.size() ){ + Trace("strings-solve-debug") << "Disequality normalize empty" << std::endl; + std::vector< Node > ant; + //we have a conflict : because the lengths are equal, the remainder needs to be empty, which will lead to a conflict + Node lni = d_state.getLengthExp(ni, ant, nfni.d_base); + Node lnj = d_state.getLengthExp(nj, ant, nfnj.d_base); + ant.push_back( lni.eqNode( lnj ) ); + ant.insert(ant.end(), nfni.d_exp.begin(), nfni.d_exp.end()); + ant.insert(ant.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); + std::vector< Node > cc; + std::vector< Node >& nfk = index>=nfi.size() ? nfj : nfi; + for( unsigned index_k=index; index_kmkNode( kind::AND, cc ); + conc = Rewriter::rewrite( conc ); + d_im.sendInference(ant, conc, "Disequality Normalize Empty", true); + return -1; + }else{ + Node i = nfi[index]; + Node j = nfj[index]; + Trace("strings-solve-debug") << "...Processing(QED) " << i << " " << j << std::endl; + if (!d_state.areEqual(i, j)) + { + if( i.getKind()==kind::CONST_STRING && j.getKind()==kind::CONST_STRING ) { + unsigned int len_short = i.getConst().size() < j.getConst().size() ? i.getConst().size() : j.getConst().size(); + bool isSameFix = isRev ? i.getConst().rstrncmp(j.getConst(), len_short): i.getConst().strncmp(j.getConst(), len_short); + if( isSameFix ) { + //same prefix/suffix + //k is the index of the string that is shorter + Node nk = i.getConst().size() < j.getConst().size() ? i : j; + Node nl = i.getConst().size() < j.getConst().size() ? j : i; + Node remainderStr; + if( isRev ){ + int new_len = nl.getConst().size() - len_short; + remainderStr = NodeManager::currentNM()->mkConst( nl.getConst().substr(0, new_len) ); + Trace("strings-solve-debug-test") << "Rev. Break normal form of " << nl << " into " << nk << ", " << remainderStr << std::endl; + } else { + remainderStr = NodeManager::currentNM()->mkConst( nl.getConst().substr( len_short ) ); + Trace("strings-solve-debug-test") << "Break normal form of " << nl << " into " << nk << ", " << remainderStr << std::endl; + } + if( i.getConst().size() < j.getConst().size() ) { + nfj.insert( nfj.begin() + index + 1, remainderStr ); + nfj[index] = nfi[index]; + } else { + nfi.insert( nfi.begin() + index + 1, remainderStr ); + nfi[index] = nfj[index]; + } + }else{ + return 1; + } + }else{ + std::vector< Node > lexp; + Node li = d_state.getLength(i, lexp); + Node lj = d_state.getLength(j, lexp); + if (d_state.areEqual(li, lj) && d_state.areDisequal(i, j)) + { + Trace("strings-solve") << "Simple Case 2 : found equal length disequal sub strings " << i << " " << j << std::endl; + //we are done: D-Remove + return 1; + } + else + { + return 0; + } + } + } + index++; + } + } + return 0; +} + +void CoreSolver::addNormalFormPair( Node n1, Node n2 ){ + if (n1>n2) + { + addNormalFormPair(n2,n1); + return; + } + if( !isNormalFormPair( n1, n2 ) ){ + int index = 0; + NodeIntMap::const_iterator it = d_nf_pairs.find( n1 ); + if( it!=d_nf_pairs.end() ){ + index = (*it).second; + } + d_nf_pairs[n1] = index + 1; + if( index<(int)d_nf_pairs_data[n1].size() ){ + d_nf_pairs_data[n1][index] = n2; + }else{ + d_nf_pairs_data[n1].push_back( n2 ); + } + Assert(isNormalFormPair(n1, n2)); + } else { + Trace("strings-nf-debug") << "Already a normal form pair " << n1 << " " << n2 << std::endl; + } +} + +bool CoreSolver::isNormalFormPair( Node n1, Node n2 ) { + if (n1>n2) + { + return isNormalFormPair(n2,n1); + } + //Trace("strings-debug") << "is normal form pair. " << n1 << " " << n2 << std::endl; + NodeIntMap::const_iterator it = d_nf_pairs.find( n1 ); + if( it!=d_nf_pairs.end() ){ + Assert(d_nf_pairs_data.find(n1) != d_nf_pairs_data.end()); + for( int i=0; i<(*it).second; i++ ){ + Assert(i < (int)d_nf_pairs_data[n1].size()); + if( d_nf_pairs_data[n1][i]==n2 ){ + return true; + } + } + } + return false; +} + +void CoreSolver::checkNormalFormsDeq() +{ + eq::EqualityEngine* ee = d_state.getEqualityEngine(); + std::vector< std::vector< Node > > cols; + std::vector< Node > lts; + std::map< Node, std::map< Node, bool > > processed; + + const context::CDList& deqs = d_state.getDisequalityList(); + + //for each pair of disequal strings, must determine whether their lengths are equal or disequal + for (const Node& eq : deqs) + { + Node n[2]; + for( unsigned i=0; i<2; i++ ){ + n[i] = ee->getRepresentative( eq[i] ); + } + if( processed[n[0]].find( n[1] )==processed[n[0]].end() ){ + processed[n[0]][n[1]] = true; + Node lt[2]; + for( unsigned i=0; i<2; i++ ){ + EqcInfo* ei = d_state.getOrMakeEqcInfo(n[i], false); + lt[i] = ei ? ei->d_lengthTerm : Node::null(); + if( lt[i].isNull() ){ + lt[i] = eq[i]; + } + lt[i] = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, lt[i] ); + } + if (!d_state.areEqual(lt[0], lt[1]) && !d_state.areDisequal(lt[0], lt[1])) + { + d_im.sendSplit(lt[0], lt[1], "DEQ-LENGTH-SP"); + } + } + } + + if (!d_im.hasProcessed()) + { + d_state.separateByLength(d_strings_eqc, cols, lts); + for( unsigned i=0; i 1 && !d_im.hasPendingLemma()) + { + if (Trace.isOn("strings-solve")) + { + Trace("strings-solve") << "- Verify disequalities are processed for " + << cols[i][0] << ", normal form : "; + utils::printConcatTrace(getNormalForm(cols[i][0]).d_nf, "strings-solve"); + Trace("strings-solve") + << "... #eql = " << cols[i].size() << std::endl; + } + //must ensure that normal forms are disequal + for( unsigned j=0; jd_lengthTerm : Node::null(); + if (lt.isNull()) + { + Trace("strings-process-debug") + << "No length term for eqc " << d_strings_eqc[i] << std::endl; + continue; + } + Node llt = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, lt); + // now, check if length normalization has occurred + if (ei->d_normalizedLength.get().isNull()) + { + Node nf = utils::mkNConcat(nfi.d_nf); + if (Trace.isOn("strings-process-debug")) + { + Trace("strings-process-debug") + << " normal form is " << nf << " from base " << nfi.d_base + << std::endl; + Trace("strings-process-debug") << " normal form exp is: " << std::endl; + for (const Node& exp : nfi.d_exp) + { + Trace("strings-process-debug") << " " << exp << std::endl; + } + } + + // if not, add the lemma + std::vector ant; + ant.insert(ant.end(), nfi.d_exp.begin(), nfi.d_exp.end()); + ant.push_back(nfi.d_base.eqNode(lt)); + Node lc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, nf); + Node lcr = Rewriter::rewrite(lc); + Trace("strings-process-debug") + << "Rewrote length " << lc << " to " << lcr << std::endl; + if (!d_state.areEqual(llt, lcr)) + { + Node eq = llt.eqNode(lcr); + ei->d_normalizedLength.set(eq); + d_im.sendInference(ant, eq, "LEN-NORM", true); + } + } + } +} + +void CoreSolver::doInferInfo(const InferInfo& ii) +{ + // send the inference + if (!ii.d_nf_pair[0].isNull()) + { + Assert(!ii.d_nf_pair[1].isNull()); + addNormalFormPair(ii.d_nf_pair[0], ii.d_nf_pair[1]); + } + // send the inference + d_im.sendInference(ii); + // Register the new skolems from this inference. We register them here + // (lazily), since the code above has now decided to use the inference + // at use_index that involves them. + for (const std::pair >& sks : + ii.d_new_skolem) + { + for (const Node& n : sks.second) + { + d_im.registerLength(n, sks.first); + } + } +} + + +}/* CVC4::theory::strings namespace */ +}/* CVC4::theory namespace */ +}/* CVC4 namespace */ diff --git a/src/theory/strings/core_solver.h b/src/theory/strings/core_solver.h new file mode 100644 index 000000000..d18f109b2 --- /dev/null +++ b/src/theory/strings/core_solver.h @@ -0,0 +1,377 @@ +/********************* */ +/*! \file theory_strings.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Core solver for the theory of strings, responsible for reasoning + ** string concatenation plus length constraints. + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__THEORY__STRINGS__CORE_SOLVER_H +#define CVC4__THEORY__STRINGS__CORE_SOLVER_H + +#include "context/cdhashset.h" +#include "context/cdlist.h" +#include "theory/strings/base_solver.h" +#include "theory/strings/infer_info.h" +#include "theory/strings/inference_manager.h" +#include "theory/strings/normal_form.h" +#include "theory/strings/skolem_cache.h" +#include "theory/strings/solver_state.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +/** The core solver for the theory of strings + * + * This implements techniques for handling (dis)equalities involving + * string concatenation terms based on the procedure by Liang et al CAV 2014. + */ +class CoreSolver +{ + friend class InferenceManager; + typedef context::CDHashMap NodeIntMap; + + public: + CoreSolver(context::Context* c, + context::UserContext* u, + SolverState& s, + InferenceManager& im, + SkolemCache& skc, + BaseSolver& bs); + ~CoreSolver(); + + //-----------------------inference steps + /** check cycles + * + * This inference schema ensures that a containment ordering < over the + * string equivalence classes is acyclic. We define this ordering < such that + * for equivalence classes e1 = { t1...tn } and e2 = { s1...sm }, e1 < e2 + * if there exists a ti whose flat form (see below) is [w1...sj...wk] for + * some i,j. If e1 < ... < en < e1 for some chain, we infer that the flat + * form components that do not constitute this chain, e.g. (w1...wk) \ sj + * in the flat form above, must be empty. + * + * For more details, see the inference S-Cycle in Liang et al CAV 2014. + */ + void checkCycles(); + /** check flat forms + * + * This applies an inference schema based on "flat forms". The flat form of a + * string term t is a vector of representative terms [r1, ..., rn] such that + * t is of the form t1 ++ ... ++ tm and r1 ++ ... ++ rn is equivalent to + * rewrite( [t1] ++ ... ++ [tm] ), where [t1] denotes the representative of + * the equivalence class containing t1. For example, if t is y ++ z ++ z, + * E is { y = "", w = z }, and w is the representative of the equivalence + * class { w, z }, then the flat form of t is [w, w]. Say t1 and t2 are terms + * in the same equivalence classes with flat forms [r1...rn] and [s1...sm]. + * We may infer various facts based on this pair of terms. For example: + * ri = si, if ri != si, rj == sj for each j < i, and len(ri)=len(si), + * rn = sn, if n=m and rj == sj for each j < n, + * ri = empty, if n=m+1 and ri == rj for each i=1,...,m. + * We refer to these as "unify", "endpoint-eq" and "endpoint-emp" inferences + * respectively. + * + * Notice that this inference scheme is an optimization and not needed for + * model-soundness. The motivation for this schema is that it is simpler than + * checkNormalFormsEq, which can be seen as a recursive version of this + * schema (see difference of "normal form" vs "flat form" below), and + * checkNormalFormsEq is complete, in the sense that if it passes with no + * inferences, we are ensured that all string equalities in the current + * context are satisfied. + * + * Must call checkCycles before this function in a strategy. + */ + void checkFlatForms(); + /** check normal forms equalities + * + * This applies an inference schema based on "normal forms". The normal form + * of an equivalence class of string terms e = {t1, ..., tn} union {x1....xn}, + * where t1...tn are concatenation terms is a vector of representative terms + * [r1, ..., rm] such that: + * (1) if n=0, then m=1 and r1 is the representative of e, + * (2) if n>0, say + * t1 = t^1_1 ++ ... ++ t^1_m_1 + * ... + * tn = t^1_n ++ ... ++ t^_m_n + * for *each* i=1, ..., n, the result of concenating the normal forms of + * t^1_1 ++ ... ++ t^1_m_1 is equal to [r1, ..., rm]. If an equivalence class + * can be assigned a normal form, then all equalities between ti and tj are + * satisfied by all models that correspond to extensions of the current + * assignment. For further detail on this terminology, see Liang et al + * CAV 2014. + * + * Notice that all constant words are implicitly considered concatentation + * of their characters, e.g. "abc" is treated as "a" ++ "b" ++ "c". + * + * At a high level, we build normal forms for equivalence classes bottom-up, + * starting with equivalence classes that are minimal with respect to the + * containment ordering < computed during checkCycles. While computing a + * normal form for an equivalence class, we may infer equalities between + * components of strings that must be equal (e.g. x=y when x++z == y++w when + * len(x)==len(y) is asserted), derive conflicts if two strings have disequal + * prefixes/suffixes (e.g. "a" ++ x == "b" ++ y is a conflict), or split + * string terms into smaller components using fresh skolem variables (see + * Inference values with names "SPLIT"). We also may introduce regular + * expression constraints in this method for looping word equations (see + * the Inference INFER_FLOOP). + * + * If this inference schema returns no facts, lemmas, or conflicts, then + * we have successfully assigned normal forms for all equivalence classes, as + * stored in d_normal_forms. Otherwise, this method may add a fact, lemma, or + * conflict based on inferences in the Inference enumeration above. + */ + void checkNormalFormsEq(); + /** check normal forms disequalities + * + * This inference schema can be seen as the converse of the above schema. In + * particular, it ensures that each pair of distinct equivalence classes + * e1 and e2 have distinct normal forms. + * + * This method considers all pairs of distinct equivalence classes (e1,e2) + * such that len(x1)==len(x2) is asserted for some x1 in e1 and x2 in e2. It + * then traverses the normal forms of x1 and x2, say they are [r1, ..., rn] + * and [s1, ..., sm]. For the minimial i such that ri!=si, if ri and si are + * disequal and have the same length, then x1 and x2 have distinct normal + * forms. Otherwise, we may add splitting lemmas on the length of ri and si, + * or split on an equality between ri and si. + * + * If this inference schema returns no facts, lemmas, or conflicts, then all + * disequalities between string terms are satisfied by all models that are + * extensions of the current assignment. + */ + void checkNormalFormsDeq(); + /** check lengths for equivalence classes + * + * This inference schema adds lemmas of the form: + * E => len( x ) = rewrite( len( r1 ++ ... ++ rn ) ) + * where [r1, ..., rn] is the normal form of the equivalence class containing + * x. This schema is not required for correctness but experimentally has + * shown to be helpful. + */ + void checkLengthsEqc(); + //-----------------------end inference steps + + //--------------------------- query functions + /** + * Get normal form for string term n. For details on this data structure, + * see theory/strings/normal_form.h. + * + * This query is valid after a successful call to checkNormalFormsEq, e.g. + * a call where the inference manager was not given any lemmas or inferences. + */ + NormalForm& getNormalForm(Node n); + /** get normal string + * + * This method returns the node that is equivalent to the normal form of x, + * and adds the corresponding explanation to nf_exp. + * + * For example, if x = y ++ z is an assertion in the current context, then + * this method returns the term y ++ z and adds x = y ++ z to nf_exp. + */ + Node getNormalString(Node x, std::vector& nf_exp); + //-------------------------- end query functions + private: + /** + * This processes the infer info ii as an inference. In more detail, it calls + * the inference manager to process the inference, it introduces Skolems, and + * updates the set of normal form pairs. + */ + void doInferInfo(const InferInfo& ii); + /** Add that (n1,n2) is a normal form pair in the current context. */ + void addNormalFormPair(Node n1, Node n2); + /** Is (n1,n2) a normal form pair in the current context? */ + bool isNormalFormPair(Node n1, Node n2); + + //--------------------------for checkFlatForm + /** + * This checks whether there are flat form inferences between eqc[start] and + * eqc[j] for some j>start. If the flag isRev is true, we check for flat form + * interferences in the reverse direction of the flat forms (note: + * `d_flat_form` and `d_flat_form_index` must be in reverse order if `isRev` + * is true). For more details, see checkFlatForms below. + */ + void checkFlatForm(std::vector& eqc, size_t start, bool isRev); + /** debug print current flat forms on trace tc */ + void debugPrintFlatForms(const char* tc); + //--------------------------end for checkFlatForm + + //--------------------------for checkCycles + Node checkCycles(Node eqc, std::vector& curr, std::vector& exp); + //--------------------------end for checkCycles + + //--------------------------for checkNormalFormsEq + /** normalize equivalence class + * + * This method attempts to build a "normal form" for the equivalence class + * of string term n (for more details on normal forms, see normal_form.h + * or see Liang et al CAV 2014). In particular, this method checks whether the + * current normal form for each term in this equivalence class is identical. + * If it is not, then we add an inference via sendInference and abort the + * call. + */ + void normalizeEquivalenceClass(Node n); + /** + * For each term in the equivalence class of eqc, this adds data regarding its + * normal form to normal_forms. The map term_to_nf_index maps terms to the + * index in normal_forms where their normal form data is located. + */ + void getNormalForms(Node eqc, + std::vector& normal_forms, + std::map& term_to_nf_index); + /** process normalize equivalence class + * + * This is called when an equivalence class contains a set of terms that + * have normal forms given by the argument normal_forms. It either + * verifies that all normal forms in this vector are identical, or otherwise + * adds a conflict, lemma, or inference via the sendInference method. + * + * To prioritize one inference versus another, it builds a set of possible + * inferences, at most two for each pair of distinct normal forms, + * corresponding to processing the normal form pair in the (forward, reverse) + * directions. Once all possible inferences are recorded, it executes the + * one with highest priority based on the enumeration type Inference. + */ + void processNEqc(std::vector& normal_forms); + /** process simple normal equality + * + * This method is called when two equal terms have normal forms nfi and nfj. + * It adds (typically at most one) possible inference to the vector pinfer. + * This inference is in the form of an InferInfo object, which stores the + * necessary information regarding how to process the inference. + * + * index: The index in the normal form vectors (nfi.d_nf and nfj.d_nf) that + * we are currently checking. This method will increment this index until + * it finds an index where these vectors differ, or until it reaches the + * end of these vectors. + * isRev: Whether we are processing the normal forms in reverse direction. + * Notice in this case the normal form vectors have been reversed, hence, + * many operations are identical to the forward case, e.g. index is + * incremented not decremented, while others require special care, e.g. + * constant strings "ABC" in the normal form vectors are not reversed to + * "CBA" and hence all operations should assume a flipped semantics for + * constants when isRev is true, + * rproc: the number of string components on the suffix of the normal form of + * nfi and nfj that were already processed. This is used when using + * fowards/backwards traversals of normal forms to ensure that duplicate + * inferences are not processed. + * pinfer: the set of possible inferences we add to. + */ + void processSimpleNEq(NormalForm& nfi, + NormalForm& nfj, + unsigned& index, + bool isRev, + unsigned rproc, + std::vector& pinfer); + //--------------------------end for checkNormalFormsEq + + //--------------------------for checkNormalFormsEq with loops + bool detectLoop(NormalForm& nfi, + NormalForm& nfj, + int index, + int& loop_in_i, + int& loop_in_j, + unsigned rproc); + + /** + * Result of processLoop() below. + */ + enum class ProcessLoopResult + { + /** Loop processing made an inference */ + INFERENCE, + /** Loop processing detected a conflict */ + CONFLICT, + /** Loop not processed or no loop detected */ + SKIPPED, + }; + + ProcessLoopResult processLoop(NormalForm& nfi, + NormalForm& nfj, + int loop_index, + int index, + InferInfo& info); + //--------------------------end for checkNormalFormsEq with loops + + //--------------------------for checkNormalFormsDeq + void processDeq(Node n1, Node n2); + int processReverseDeq(std::vector& nfi, + std::vector& nfj, + Node ni, + Node nj); + int processSimpleDeq(std::vector& nfi, + std::vector& nfj, + Node ni, + Node nj, + unsigned& index, + bool isRev); + //--------------------------end for checkNormalFormsDeq + + /** The solver state object */ + SolverState& d_state; + /** The (custom) output channel of the theory of strings */ + InferenceManager& d_im; + /** cache of all skolems */ + SkolemCache& d_skCache; + /** reference to the base solver, used for certain queries */ + BaseSolver& d_bsolver; + /** Commonly used constants */ + Node d_emptyString; + Node d_true; + Node d_false; + Node d_zero; + Node d_one; + Node d_neg_one; + /** empty vector (used for trivial explanations) */ + std::vector d_emptyVec; + /** + * The list of equivalence classes of type string. These are ordered based + * on the ordering described in checkCycles. + */ + std::vector d_strings_eqc; + /** map from terms to their normal forms */ + std::map d_normal_form; + /** + * In certain cases, we know that two terms are equivalent despite + * not having to verify their normal forms are identical. For example, + * after applying the R-Loop rule to two terms a and b, we know they + * are entailed to be equal in the current context without having + * to look at their normal forms. We call (a,b) a normal form pair. + * + * We map representative terms a to a list of nodes b1,...,bn such that + * (a,b1) ... (a, bn) are normal form pairs. This list is SAT-context + * dependent. We use a context-dependent integer along with a context + * indepedent map from nodes to lists of nodes to model this, given by + * the two data members below. + */ + NodeIntMap d_nf_pairs; + std::map > d_nf_pairs_data; + /** list of non-congruent concat terms in each equivalence class */ + std::map > d_eqc; + /** + * The flat form for each equivalence class. For a term (str.++ t1 ... tn), + * this is a list of the form (r_{i1} ... r_{im}) where each empty t1...tn + * is dropped and the others are replaced in order with their representative. + */ + std::map > d_flat_form; + /** + * For each component r_{i1} ... r_{im} in a flat form, this stores + * the argument number of the t1 ... tn they were generated from. + */ + std::map > d_flat_form_index; +}; /* class CoreSolver */ + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* CVC4__THEORY__STRINGS__CORE_SOLVER_H */ diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 7ebc5f35f..33bde3162 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -72,7 +72,6 @@ TheoryStrings::TheoryStrings(context::Context* c, d_equalityEngine(d_notify, c, "theory::strings", true), d_state(c, d_equalityEngine, d_valuation), d_im(*this, c, u, d_state, out), - d_nf_pairs(c), d_pregistered_terms_cache(u), d_registered_terms_cache(u), d_preproc(&d_sk_cache, u), @@ -83,6 +82,7 @@ TheoryStrings::TheoryStrings(context::Context* c, d_has_extf(c, false), d_has_str_code(false), d_bsolver(c, u, d_state, d_im), + d_csolver(c, u, d_state, d_im, d_sk_cache, d_bsolver), d_regexp_solver(*this, d_state, d_im, c, u), d_input_vars(u), d_input_var_lsum(u), @@ -155,35 +155,7 @@ bool TheoryStrings::areCareDisequal( TNode x, TNode y ) { Node TheoryStrings::getNormalString(Node x, std::vector& nf_exp) { - if (!x.isConst()) - { - Node xr = d_state.getRepresentative(x); - std::map::iterator it = d_normal_form.find(xr); - if (it != d_normal_form.end()) - { - NormalForm& nf = it->second; - Node ret = utils::mkNConcat(nf.d_nf); - nf_exp.insert(nf_exp.end(), nf.d_exp.begin(), nf.d_exp.end()); - d_im.addToExplanation(x, nf.d_base, nf_exp); - Trace("strings-debug") - << "Term: " << x << " has a normal form " << ret << std::endl; - return ret; - } - // if x does not have a normal form, then it should not occur in the - // equality engine and hence should be its own representative. - Assert(xr == x); - if (x.getKind() == kind::STRING_CONCAT) - { - std::vector vec_nodes; - for (unsigned i = 0; i < x.getNumChildren(); i++) - { - Node nc = getNormalString(x[i], nf_exp); - vec_nodes.push_back(nc); - } - return utils::mkNConcat(vec_nodes); - } - } - return x; + return d_csolver.getNormalString(x, nf_exp); } void TheoryStrings::setMasterEqualityEngine(eq::EqualityEngine* eq) { @@ -282,8 +254,8 @@ Node TheoryStrings::getCurrentSubstitutionFor(int effort, { Assert(effort < 3); // normal forms - NormalForm& nfnr = getNormalForm(nr); - Node ns = getNormalString(nfnr.d_base, exp); + NormalForm& nfnr = d_csolver.getNormalForm(nr); + Node ns = d_csolver.getNormalString(nfnr.d_base, exp); Trace("strings-subs") << " normal eqc : " << ns << " " << nfnr.d_base << " " << nr << std::endl; if (!nfnr.d_base.isNull()) @@ -550,7 +522,7 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m) //check if col[i][j] has only variables if (!eqc.isConst()) { - NormalForm& nfe = getNormalForm(eqc); + NormalForm& nfe = d_csolver.getNormalForm(eqc); if (nfe.d_nf.size() == 1) { // does it have a code and the length of these equivalence classes are @@ -671,7 +643,7 @@ bool TheoryStrings::collectModelInfo(TheoryModel* m) //step 4 : assign constants to all other equivalence classes for( unsigned i=0; i& exp) const } } -void TheoryStrings::debugPrintFlatForms( const char * tc ){ - for( unsigned k=0; k1 ){ - Trace( tc ) << "EQC [" << eqc << "]" << std::endl; - }else{ - Trace( tc ) << "eqc [" << eqc << "]"; - } - Node c = d_bsolver.getConstantEqc(eqc); - if (!c.isNull()) - { - Trace(tc) << " C: " << c; - if( d_eqc[eqc].size()>1 ){ - Trace( tc ) << std::endl; - } - } - if( d_eqc[eqc].size()>1 ){ - for( unsigned i=0; i d_const_length; - bool operator() (Node i, Node j) { - std::map< Node, unsigned >::iterator it_i = d_const_length.find( i ); - std::map< Node, unsigned >::iterator it_j = d_const_length.find( j ); - if( it_i==d_const_length.end() ){ - if( it_j==d_const_length.end() ){ - return isecondsecond; - } - } - } -}; - -void TheoryStrings::checkCycles() -{ - // first check for cycles, while building ordering of equivalence classes - d_flat_form.clear(); - d_flat_form_index.clear(); - d_eqc.clear(); - // Rebuild strings eqc based on acyclic ordering, first copy the equivalence - // classes from the base solver. - std::vector eqc = d_bsolver.getStringEqc(); - d_strings_eqc.clear(); - if( options::stringBinaryCsp() ){ - //sort: process smallest constants first (necessary if doing binary splits) - sortConstLength scl; - for( unsigned i=0; i().size(); - } - } - std::sort( eqc.begin(), eqc.end(), scl ); - } - for( unsigned i=0; i curr; - std::vector< Node > exp; - checkCycles( eqc[i], curr, exp ); - if (d_im.hasProcessed()) - { - return; - } - } -} - -void TheoryStrings::checkFlatForms() -{ - // debug print flat forms - if (Trace.isOn("strings-ff")) - { - Trace("strings-ff") << "Flat forms : " << std::endl; - debugPrintFlatForms("strings-ff"); - } - - // inferences without recursively expanding flat forms - - //(1) approximate equality by containment, infer conflicts - for (const Node& eqc : d_strings_eqc) - { - Node c = d_bsolver.getConstantEqc(eqc); - if (!c.isNull()) - { - // if equivalence class is constant, all component constants in flat forms - // must be contained in it, in order - std::map >::iterator it = d_eqc.find(eqc); - if (it != d_eqc.end()) - { - for (const Node& n : it->second) - { - int firstc, lastc; - if (!TheoryStringsRewriter::canConstantContainList( - c, d_flat_form[n], firstc, lastc)) - { - Trace("strings-ff-debug") << "Flat form for " << n - << " cannot be contained in constant " - << c << std::endl; - Trace("strings-ff-debug") << " indices = " << firstc << "/" - << lastc << std::endl; - // conflict, explanation is n = base ^ base = c ^ relevant portion - // of ( n = f[n] ) - std::vector exp; - d_bsolver.explainConstantEqc(n, eqc, exp); - for (int e = firstc; e <= lastc; e++) - { - if (d_flat_form[n][e].isConst()) - { - Assert(e >= 0 && e < (int)d_flat_form_index[n].size()); - Assert(d_flat_form_index[n][e] >= 0 - && d_flat_form_index[n][e] < (int)n.getNumChildren()); - d_im.addToExplanation( - d_flat_form[n][e], n[d_flat_form_index[n][e]], exp); - } - } - Node conc = d_false; - d_im.sendInference(exp, conc, "F_NCTN"); - return; - } - } - } - } - } - - //(2) scan lists, unification to infer conflicts and equalities - for (const Node& eqc : d_strings_eqc) - { - std::map >::iterator it = d_eqc.find(eqc); - if (it == d_eqc.end() || it->second.size() <= 1) - { - continue; - } - // iterate over start index - for (unsigned start = 0; start < it->second.size() - 1; start++) - { - for (unsigned r = 0; r < 2; r++) - { - bool isRev = r == 1; - checkFlatForm(it->second, start, isRev); - if (d_state.isInConflict()) - { - return; - } - - for (const Node& n : it->second) - { - std::reverse(d_flat_form[n].begin(), d_flat_form[n].end()); - std::reverse(d_flat_form_index[n].begin(), - d_flat_form_index[n].end()); - } - } - } - } -} - -namespace { - -enum class FlatFormInfer -{ - NONE, - CONST, - UNIFY, - ENDPOINT_EMP, - ENDPOINT_EQ, -}; - -std::ostream& operator<<(std::ostream& os, FlatFormInfer inf) -{ - switch (inf) - { - case FlatFormInfer::NONE: os << ""; break; - case FlatFormInfer::CONST: os << "F_Const"; break; - case FlatFormInfer::UNIFY: os << "F_Unify"; break; - case FlatFormInfer::ENDPOINT_EMP: os << "F_EndpointEmp"; break; - case FlatFormInfer::ENDPOINT_EQ: os << "F_EndpointEq"; break; - default: os << ""; break; - } - return os; -} - -} // namespace - -void TheoryStrings::checkFlatForm(std::vector& eqc, - size_t start, - bool isRev) -{ - size_t count = 0; - // We check for flat form inferences involving `eqc[start]` and terms past - // `start`. If there was a flat form inference involving `eqc[start]` and a - // term at a smaller index `i`, we would have found it with when `start` was - // `i`. Thus, we mark the preceeding terms in the equivalence class as - // ineligible. - std::vector inelig(eqc.begin(), eqc.begin() + start + 1); - Node a = eqc[start]; - Trace("strings-ff-debug") - << "Check flat form for a = " << a << ", whose flat form is " - << d_flat_form[a] << ")" << std::endl; - Node b; - do - { - std::vector exp; - Node conc; - FlatFormInfer infType = FlatFormInfer::NONE; - size_t eqc_size = eqc.size(); - size_t asize = d_flat_form[a].size(); - if (count == asize) - { - for (size_t i = start + 1; i < eqc_size; i++) - { - b = eqc[i]; - if (std::find(inelig.begin(), inelig.end(), b) != inelig.end()) - { - continue; - } - - size_t bsize = d_flat_form[b].size(); - if (count < bsize) - { - Trace("strings-ff-debug") - << "Found endpoint (in a) with non-empty b = " << b - << ", whose flat form is " << d_flat_form[b] << std::endl; - // endpoint - std::vector conc_c; - for (unsigned j = count; j < bsize; j++) - { - conc_c.push_back(b[d_flat_form_index[b][j]].eqNode(d_emptyString)); - } - Assert(!conc_c.empty()); - conc = utils::mkAnd(conc_c); - infType = FlatFormInfer::ENDPOINT_EMP; - Assert(count > 0); - // swap, will enforce is empty past current - a = eqc[i]; - b = eqc[start]; - break; - } - inelig.push_back(eqc[i]); - } - } - else - { - Node curr = d_flat_form[a][count]; - Node curr_c = d_bsolver.getConstantEqc(curr); - Node ac = a[d_flat_form_index[a][count]]; - std::vector lexp; - Node lcurr = d_state.getLength(ac, lexp); - for (size_t i = start + 1; i < eqc_size; i++) - { - b = eqc[i]; - if (std::find(inelig.begin(), inelig.end(), b) != inelig.end()) - { - continue; - } - - if (count == d_flat_form[b].size()) - { - inelig.push_back(b); - Trace("strings-ff-debug") - << "Found endpoint in b = " << b << ", whose flat form is " - << d_flat_form[b] << std::endl; - // endpoint - std::vector conc_c; - for (size_t j = count; j < asize; j++) - { - conc_c.push_back(a[d_flat_form_index[a][j]].eqNode(d_emptyString)); - } - Assert(!conc_c.empty()); - conc = utils::mkAnd(conc_c); - infType = FlatFormInfer::ENDPOINT_EMP; - Assert(count > 0); - break; - } - else - { - Node cc = d_flat_form[b][count]; - if (cc != curr) - { - Node bc = b[d_flat_form_index[b][count]]; - inelig.push_back(b); - Assert(!d_state.areEqual(curr, cc)); - Node cc_c = d_bsolver.getConstantEqc(cc); - if (!curr_c.isNull() && !cc_c.isNull()) - { - // check for constant conflict - int index; - Node s = TheoryStringsRewriter::splitConstant( - cc_c, curr_c, index, isRev); - if (s.isNull()) - { - d_bsolver.explainConstantEqc(ac, curr, exp); - d_bsolver.explainConstantEqc(bc, cc, exp); - conc = d_false; - infType = FlatFormInfer::CONST; - break; - } - } - else if ((d_flat_form[a].size() - 1) == count - && (d_flat_form[b].size() - 1) == count) - { - conc = ac.eqNode(bc); - infType = FlatFormInfer::ENDPOINT_EQ; - break; - } - else - { - // if lengths are the same, apply LengthEq - std::vector lexp2; - Node lcc = d_state.getLength(bc, lexp2); - if (d_state.areEqual(lcurr, lcc)) - { - if (Trace.isOn("strings-ff-debug")) - { - Trace("strings-ff-debug") - << "Infer " << ac << " == " << bc << " since " << lcurr - << " == " << lcc << std::endl; - Trace("strings-ff-debug") - << "Explanation for " << lcurr << " is "; - for (size_t j = 0; j < lexp.size(); j++) - { - Trace("strings-ff-debug") << lexp[j] << std::endl; - } - Trace("strings-ff-debug") - << "Explanation for " << lcc << " is "; - for (size_t j = 0; j < lexp2.size(); j++) - { - Trace("strings-ff-debug") << lexp2[j] << std::endl; - } - } - - exp.insert(exp.end(), lexp.begin(), lexp.end()); - exp.insert(exp.end(), lexp2.begin(), lexp2.end()); - d_im.addToExplanation(lcurr, lcc, exp); - conc = ac.eqNode(bc); - infType = FlatFormInfer::UNIFY; - break; - } - } - } - } - } - } - if (!conc.isNull()) - { - Trace("strings-ff-debug") << "Found inference (" << infType - << "): " << conc << " based on equality " << a - << " == " << b << ", " << isRev << std::endl; - d_im.addToExplanation(a, b, exp); - // explain why prefixes up to now were the same - for (size_t j = 0; j < count; j++) - { - Trace("strings-ff-debug") << "Add at " << d_flat_form_index[a][j] << " " - << d_flat_form_index[b][j] << std::endl; - d_im.addToExplanation( - a[d_flat_form_index[a][j]], b[d_flat_form_index[b][j]], exp); - } - // explain why other components up to now are empty - for (unsigned t = 0; t < 2; t++) - { - Node c = t == 0 ? a : b; - ssize_t jj; - if (infType == FlatFormInfer::ENDPOINT_EQ - || (t == 1 && infType == FlatFormInfer::ENDPOINT_EMP)) - { - // explain all the empty components for F_EndpointEq, all for - // the short end for F_EndpointEmp - jj = isRev ? -1 : c.getNumChildren(); - } - else - { - jj = t == 0 ? d_flat_form_index[a][count] - : d_flat_form_index[b][count]; - } - ssize_t startj = isRev ? jj + 1 : 0; - ssize_t endj = isRev ? c.getNumChildren() : jj; - for (ssize_t j = startj; j < endj; j++) - { - if (d_state.areEqual(c[j], d_emptyString)) - { - d_im.addToExplanation(c[j], d_emptyString, exp); - } - } - } - // Notice that F_EndpointEmp is not typically applied, since - // strict prefix equality ( a.b = a ) where a,b non-empty - // is conflicting by arithmetic len(a.b)=len(a)+len(b)!=len(a) - // when len(b)!=0. Although if we do not infer this conflict eagerly, - // it may be applied (see #3272). - std::stringstream ss; - ss << infType; - d_im.sendInference(exp, conc, ss.str().c_str()); - if (d_state.isInConflict()) - { - return; - } - break; - } - count++; - } while (inelig.size() < eqc.size()); -} - -Node TheoryStrings::checkCycles( Node eqc, std::vector< Node >& curr, std::vector< Node >& exp ){ - if( std::find( curr.begin(), curr.end(), eqc )!=curr.end() ){ - // a loop - return eqc; - }else if( std::find( d_strings_eqc.begin(), d_strings_eqc.end(), eqc )==d_strings_eqc.end() ){ - curr.push_back( eqc ); - //look at all terms in this equivalence class - eq::EqClassIterator eqc_i = eq::EqClassIterator( eqc, &d_equalityEngine ); - while( !eqc_i.isFinished() ) { - Node n = (*eqc_i); - if (!d_bsolver.isCongruent(n)) - { - if( n.getKind() == kind::STRING_CONCAT ){ - Trace("strings-cycle") << eqc << " check term : " << n << " in " << eqc << std::endl; - if (eqc != d_emptyString) - { - d_eqc[eqc].push_back( n ); - } - for( unsigned i=0; i exp; - exp.push_back( n.eqNode( d_emptyString ) ); - d_im.sendInference( - exp, n[i].eqNode(d_emptyString), "I_CYCLE_E"); - return Node::null(); - } - } - else - { - if (nr != d_emptyString) - { - d_flat_form[n].push_back( nr ); - d_flat_form_index[n].push_back( i ); - } - //for non-empty eqc, recurse and see if we find a loop - Node ncy = checkCycles( nr, curr, exp ); - if( !ncy.isNull() ){ - Trace("strings-cycle") << eqc << " cycle: " << ncy << " at " << n << "[" << i << "] : " << n[i] << std::endl; - d_im.addToExplanation(n, eqc, exp); - d_im.addToExplanation(nr, n[i], exp); - if( ncy==eqc ){ - //can infer all other components must be empty - for( unsigned j=0; j& seqc = d_bsolver.getStringEqc(); + for (const Node& eqc : seqc) { eq::EqClassIterator eqc_i = eq::EqClassIterator(eqc, &d_equalityEngine); while (!eqc_i.isFinished()) @@ -2156,66 +1614,6 @@ void TheoryStrings::checkRegisterTermsPreNormalForm() } } -void TheoryStrings::checkNormalFormsEq() -{ - // calculate normal forms for each equivalence class, possibly adding - // splitting lemmas - d_normal_form.clear(); - std::map nf_to_eqc; - std::map eqc_to_nf; - std::map eqc_to_exp; - for (const Node& eqc : d_strings_eqc) - { - Trace("strings-process-debug") << "- Verify normal forms are the same for " - << eqc << std::endl; - normalizeEquivalenceClass(eqc); - Trace("strings-debug") << "Finished normalizing eqc..." << std::endl; - if (d_im.hasProcessed()) - { - return; - } - NormalForm& nfe = getNormalForm(eqc); - Node nf_term = utils::mkNConcat(nfe.d_nf); - std::map::iterator itn = nf_to_eqc.find(nf_term); - if (itn != nf_to_eqc.end()) - { - NormalForm& nfe_eq = getNormalForm(itn->second); - // two equivalence classes have same normal form, merge - std::vector nf_exp; - nf_exp.push_back(utils::mkAnd(nfe.d_exp)); - nf_exp.push_back(eqc_to_exp[itn->second]); - Node eq = nfe.d_base.eqNode(nfe_eq.d_base); - d_im.sendInference(nf_exp, eq, "Normal_Form"); - if (d_im.hasProcessed()) - { - return; - } - } - else - { - nf_to_eqc[nf_term] = eqc; - eqc_to_nf[eqc] = nf_term; - eqc_to_exp[eqc] = utils::mkAnd(nfe.d_exp); - } - Trace("strings-process-debug") - << "Done verifying normal forms are the same for " << eqc << std::endl; - } - if (Trace.isOn("strings-nf")) - { - Trace("strings-nf") << "**** Normal forms are : " << std::endl; - for (std::map::iterator it = eqc_to_exp.begin(); - it != eqc_to_exp.end(); - ++it) - { - NormalForm& nf = getNormalForm(it->first); - Trace("strings-nf") << " N[" << it->first << "] (base " << nf.d_base - << ") = " << eqc_to_nf[it->first] << std::endl; - Trace("strings-nf") << " exp: " << it->second << std::endl; - } - Trace("strings-nf") << std::endl; - } -} - void TheoryStrings::checkCodes() { // ensure that lemmas regarding str.code been added for each constant string @@ -2229,9 +1627,10 @@ void TheoryStrings::checkCodes() // str.code applied to the proxy variables for each equivalence classes that // are constants of size one std::vector const_codes; - for (const Node& eqc : d_strings_eqc) + const std::vector& seqc = d_bsolver.getStringEqc(); + for (const Node& eqc : seqc) { - NormalForm& nfe = getNormalForm(eqc); + NormalForm& nfe = d_csolver.getNormalForm(eqc); if (nfe.d_nf.size() == 1 && nfe.d_nf[0].isConst()) { Node c = nfe.d_nf[0]; @@ -2290,1411 +1689,40 @@ void TheoryStrings::checkCodes() } } -//compute d_normal_forms_(base,exp,exp_depend)[eqc] -void TheoryStrings::normalizeEquivalenceClass( Node eqc ) { - Trace("strings-process-debug") << "Process equivalence class " << eqc << std::endl; - if (d_state.areEqual(eqc, d_emptyString)) - { -#ifdef CVC4_ASSERTIONS - for( unsigned j=0; j normal_forms; - // map each term to its index in the above vector - std::map term_to_nf_index; - // get the normal forms - getNormalForms(eqc, normal_forms, term_to_nf_index); - if (d_im.hasProcessed()) - { - return; - } - // process the normal forms - processNEqc(normal_forms); - if (d_im.hasProcessed()) + if (options::stringEagerLen()) { - return; + do_register = effort == 0; } - - //construct the normal form - Assert(!normal_forms.empty()); - unsigned nf_index = 0; - std::map::iterator it = term_to_nf_index.find(eqc); - // we prefer taking the normal form whose base is the equivalence - // class representative, since this leads to shorter explanations in - // some cases. - if (it != term_to_nf_index.end()) + else { - nf_index = it->second; + do_register = effort > 0 || n.getKind() != STRING_CONCAT; } - d_normal_form[eqc] = normal_forms[nf_index]; - Trace("strings-process-debug") - << "Return process equivalence class " << eqc - << " : returned, size = " << d_normal_form[eqc].d_nf.size() - << std::endl; } -} - -NormalForm& TheoryStrings::getNormalForm(Node n) -{ - std::map::iterator itn = d_normal_form.find(n); - if (itn == d_normal_form.end()) + if (!do_register) { - Trace("strings-warn") << "WARNING: returning empty normal form for " << n - << std::endl; - // Shouln't ask for normal forms of strings that weren't computed. This - // likely means that n is not a representative or not a term in the current - // context. We simply return a default normal form here in this case. - Assert(false); - return d_normal_form[n]; + return; } - return itn->second; -} - -void TheoryStrings::getNormalForms(Node eqc, - std::vector& normal_forms, - std::map& term_to_nf_index) -{ - //constant for equivalence class - Node eqc_non_c = eqc; - Trace("strings-process-debug") << "Get normal forms " << eqc << std::endl; - eq::EqClassIterator eqc_i = eq::EqClassIterator( eqc, &d_equalityEngine ); - while( !eqc_i.isFinished() ){ - Node n = (*eqc_i); - if (!d_bsolver.isCongruent(n)) - { - if (n.getKind() == CONST_STRING || n.getKind() == STRING_CONCAT) - { - Trace("strings-process-debug") << "Get Normal Form : Process term " << n << " in eqc " << eqc << std::endl; - NormalForm nf_curr; - if (n.getKind() == CONST_STRING) - { - nf_curr.init(n); - } - else if (n.getKind() == STRING_CONCAT) - { - // set the base to n, we construct the other portions of nf_curr in - // the following. - nf_curr.d_base = n; - for( unsigned i=0; i& nfrv = nfr.d_nf; - Trace("strings-process-debug") << "Normalizing subterm " << n[i] << " = " << nr << std::endl; - unsigned orig_size = nf_curr.d_nf.size(); - unsigned add_size = nfrv.size(); - //if not the empty string, add to current normal form - if (!nfrv.empty()) - { - // if in a build with assertions, we run the following block, - // which checks that normal forms do not have concat terms. - if (Configuration::isAssertionBuild()) - { - for (const Node& nn : nfrv) - { - if (Trace.isOn("strings-error")) - { - if (nn.getKind() == STRING_CONCAT) - { - Trace("strings-error") - << "Strings::Error: From eqc = " << eqc << ", " << n - << " index " << i << ", bad normal form : "; - for (unsigned rr = 0; rr < nfrv.size(); rr++) - { - Trace("strings-error") << nfrv[rr] << " "; - } - Trace("strings-error") << std::endl; - } - } - Assert(nn.getKind() != kind::STRING_CONCAT); - } - } - nf_curr.d_nf.insert(nf_curr.d_nf.end(), nfrv.begin(), nfrv.end()); - } - // Track explanation for the normal form. This is in two parts. - // First, we must carry the explanation of the normal form computed - // for the representative nr. - for (const Node& exp : nfr.d_exp) - { - // The explanation is only relevant for the subsegment it was - // previously relevant for, shifted now based on its relative - // placement in the normal form of n. - nf_curr.addToExplanation( - exp, - orig_size + nfr.d_expDep[exp][false], - orig_size + (add_size - nfr.d_expDep[exp][true])); - } - // Second, must explain that the component n[i] is equal to the - // base of the normal form for nr. - Node base = nfr.d_base; - if (base != n[i]) - { - Node eq = n[i].eqNode(base); - // The equality is relevant for the entire current segment - nf_curr.addToExplanation(eq, orig_size, orig_size + add_size); - } - } - // Now that we are finished with the loop, we convert forward indices - // to reverse indices in the explanation dependency information - int total_size = nf_curr.d_nf.size(); - for (std::pair >& ed : - nf_curr.d_expDep) - { - ed.second[true] = total_size - ed.second[true]; - Assert(ed.second[true] >= 0); - } - } - //if not equal to self - std::vector& currv = nf_curr.d_nf; - if (currv.size() > 1 - || (currv.size() == 1 && currv[0].getKind() == CONST_STRING)) - { - // if in a build with assertions, check that normal form is acyclic - if (Configuration::isAssertionBuild()) - { - if (currv.size() > 1) - { - for (unsigned i = 0; i < currv.size(); i++) - { - if (Trace.isOn("strings-error")) - { - Trace("strings-error") << "Cycle for normal form "; - utils::printConcatTrace(currv, "strings-error"); - Trace("strings-error") << "..." << currv[i] << std::endl; - } - Assert(!d_state.areEqual(currv[i], n)); - } - } - } - term_to_nf_index[n] = normal_forms.size(); - normal_forms.push_back(nf_curr); - }else{ - //this was redundant: combination of self + empty string(s) - Node nn = currv.size() == 0 ? d_emptyString : currv[0]; - Assert(d_state.areEqual(nn, eqc)); - } - }else{ - eqc_non_c = n; - } - } - ++eqc_i; - } - - if( normal_forms.empty() ) { - Trace("strings-solve-debug2") << "construct the normal form" << std::endl; - // This case happens when there are no non-trivial normal forms for this - // equivalence class. For example, given assertions: - // { x = y ++ z, x = y, z = "" } - // The equivalence class of { x, y, y ++ z } is such that the normal form - // of all terms is a variable (either x or y) in the equivalence class - // itself. Thus, the normal form of this equivalence class can be assigned - // to one of these variables. - // We use a non-concatenation term among the terms in this equivalence - // class, which is stored in eqc_non_c. The reason is this does not require - // an explanation, whereas e.g. y ++ z would require the explanation z = "" - // to justify its normal form is y. - Assert(eqc_non_c.getKind() != STRING_CONCAT); - NormalForm nf_triv; - nf_triv.init(eqc_non_c); - normal_forms.push_back(nf_triv); - }else{ - if(Trace.isOn("strings-solve")) { - Trace("strings-solve") << "--- Normal forms for equivalance class " << eqc << " : " << std::endl; - for (unsigned i = 0, size = normal_forms.size(); i < size; i++) - { - NormalForm& nf = normal_forms[i]; - Trace("strings-solve") << "#" << i << " (from " << nf.d_base << ") : "; - for (unsigned j = 0, sizej = nf.d_nf.size(); j < sizej; j++) - { - if(j>0) { - Trace("strings-solve") << ", "; - } - Trace("strings-solve") << nf.d_nf[j]; - } - Trace("strings-solve") << std::endl; - Trace("strings-solve") << " Explanation is : "; - if (nf.d_exp.size() == 0) - { - Trace("strings-solve") << "NONE"; - } else { - for (unsigned j = 0, sizej = nf.d_exp.size(); j < sizej; j++) - { - if(j>0) { - Trace("strings-solve") << " AND "; - } - Trace("strings-solve") << nf.d_exp[j]; - } - Trace("strings-solve") << std::endl; - Trace("strings-solve") << "WITH DEPENDENCIES : " << std::endl; - for (unsigned j = 0, sizej = nf.d_exp.size(); j < sizej; j++) - { - Node exp = nf.d_exp[j]; - Trace("strings-solve") << " " << exp << " -> "; - Trace("strings-solve") << nf.d_expDep[exp][false] << ","; - Trace("strings-solve") << nf.d_expDep[exp][true] << std::endl; - } - } - Trace("strings-solve") << std::endl; - - } - } else { - Trace("strings-solve") << "--- Single normal form for equivalence class " << eqc << std::endl; - } - - //if equivalence class is constant, approximate as containment, infer conflicts - Node c = d_bsolver.getConstantEqc(eqc); - if( !c.isNull() ){ - Trace("strings-solve") << "Eqc is constant " << c << std::endl; - for (unsigned i = 0, size = normal_forms.size(); i < size; i++) - { - NormalForm& nf = normal_forms[i]; - int firstc, lastc; - if (!TheoryStringsRewriter::canConstantContainList( - c, nf.d_nf, firstc, lastc)) - { - Node n = nf.d_base; - //conflict - Trace("strings-solve") << "Normal form for " << n << " cannot be contained in constant " << c << std::endl; - //conflict, explanation is n = base ^ base = c ^ relevant porition of ( n = N[n] ) - std::vector< Node > exp; - d_bsolver.explainConstantEqc(n, eqc, exp); - //TODO: this can be minimized based on firstc/lastc, normal_forms_exp_depend - exp.insert(exp.end(), nf.d_exp.begin(), nf.d_exp.end()); - Node conc = d_false; - d_im.sendInference(exp, conc, "N_NCTN"); - } - } - } - } -} - -void TheoryStrings::processNEqc(std::vector& normal_forms) -{ - //the possible inferences - std::vector< InferInfo > pinfer; - // loop over all pairs - for(unsigned i=0; i max_index)) - { - min_id = pinfer[i].d_id; - max_index = pinfer[i].d_index; - use_index = i; - set_use_index = true; - } - } - Trace("strings-solve") << "...choose #" << use_index << std::endl; - doInferInfo(pinfer[use_index]); -} - -void TheoryStrings::doInferInfo(const InferInfo& ii) -{ - // send the inference - if (!ii.d_nf_pair[0].isNull()) - { - Assert(!ii.d_nf_pair[1].isNull()); - addNormalFormPair(ii.d_nf_pair[0], ii.d_nf_pair[1]); - } - // send the inference - d_im.sendInference(ii); - // Register the new skolems from this inference. We register them here - // (lazily), since the code above has now decided to use the inference - // at use_index that involves them. - for (const std::pair >& sks : - ii.d_new_skolem) - { - for (const Node& n : sks.second) - { - d_im.registerLength(n, sks.first); - } - } -} - -void TheoryStrings::processSimpleNEq(NormalForm& nfi, - NormalForm& nfj, - unsigned& index, - bool isRev, - unsigned rproc, - std::vector& pinfer) -{ - std::vector& nfiv = nfi.d_nf; - std::vector& nfjv = nfj.d_nf; - NodeManager* nm = NodeManager::currentNM(); - Assert(rproc <= nfiv.size() && rproc <= nfjv.size()); - bool success; - do { - success = false; - //if we are at the end - if (index == (nfiv.size() - rproc) || index == (nfjv.size() - rproc)) - { - if (index == (nfiv.size() - rproc) && index == (nfjv.size() - rproc)) - { - //we're done - }else{ - //the remainder must be empty - NormalForm& nfk = index == (nfiv.size() - rproc) ? nfj : nfi; - std::vector& nfkv = nfk.d_nf; - unsigned index_k = index; - std::vector< Node > curr_exp; - NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, curr_exp); - while (!d_state.isInConflict() && index_k < (nfkv.size() - rproc)) - { - //can infer that this string must be empty - Node eq = nfkv[index_k].eqNode(d_emptyString); - //Trace("strings-lemma") << "Strings: Infer " << eq << " from " << eq_exp << std::endl; - Assert(!d_state.areEqual(d_emptyString, nfkv[index_k])); - d_im.sendInference(curr_exp, eq, "N_EndpointEmp"); - index_k++; - } - } - }else{ - Trace("strings-solve-debug") - << "Process " << nfiv[index] << " ... " << nfjv[index] << std::endl; - if (nfiv[index] == nfjv[index]) - { - Trace("strings-solve-debug") << "Simple Case 1 : strings are equal" << std::endl; - index++; - success = true; - }else{ - Assert(!d_state.areEqual(nfiv[index], nfjv[index])); - std::vector< Node > temp_exp; - Node length_term_i = d_state.getLength(nfiv[index], temp_exp); - Node length_term_j = d_state.getLength(nfjv[index], temp_exp); - // check length(nfiv[index]) == length(nfjv[index]) - if (d_state.areEqual(length_term_i, length_term_j)) - { - Trace("strings-solve-debug") << "Simple Case 2 : string lengths are equal" << std::endl; - Node eq = nfiv[index].eqNode(nfjv[index]); - //eq = Rewriter::rewrite( eq ); - Node length_eq = length_term_i.eqNode( length_term_j ); - //temp_exp.insert(temp_exp.end(), curr_exp.begin(), curr_exp.end() ); - NormalForm::getExplanationForPrefixEq( - nfi, nfj, index, index, temp_exp); - temp_exp.push_back(length_eq); - d_im.sendInference(temp_exp, eq, "N_Unify"); - return; - } - else if ((nfiv[index].getKind() != CONST_STRING - && index == nfiv.size() - rproc - 1) - || (nfjv[index].getKind() != CONST_STRING - && index == nfjv.size() - rproc - 1)) - { - Trace("strings-solve-debug") << "Simple Case 3 : at endpoint" << std::endl; - std::vector< Node > antec; - //antec.insert(antec.end(), curr_exp.begin(), curr_exp.end() ); - NormalForm::getExplanationForPrefixEq(nfi, nfj, -1, -1, antec); - std::vector< Node > eqn; - for( unsigned r=0; r<2; r++ ) { - NormalForm& nfk = r == 0 ? nfi : nfj; - std::vector& nfkv = nfk.d_nf; - std::vector< Node > eqnc; - for (unsigned index_l = index, size = (nfkv.size() - rproc); - index_l < size; - index_l++) - { - if(isRev) { - eqnc.insert(eqnc.begin(), nfkv[index_l]); - } else { - eqnc.push_back(nfkv[index_l]); - } - } - eqn.push_back(utils::mkNConcat(eqnc)); - } - if (!d_state.areEqual(eqn[0], eqn[1])) - { - d_im.sendInference( - antec, eqn[0].eqNode(eqn[1]), "N_EndpointEq", true); - return; - } - else - { - Assert(nfiv.size() == nfjv.size()); - index = nfiv.size() - rproc; - } - } - else if (nfiv[index].isConst() && nfjv[index].isConst()) - { - Node const_str = nfiv[index]; - Node other_str = nfjv[index]; - Trace("strings-solve-debug") << "Simple Case 3 : Const Split : " << const_str << " vs " << other_str << " at index " << index << ", isRev = " << isRev << std::endl; - unsigned len_short = const_str.getConst().size() <= other_str.getConst().size() ? const_str.getConst().size() : other_str.getConst().size(); - bool isSameFix = isRev ? const_str.getConst().rstrncmp(other_str.getConst(), len_short): const_str.getConst().strncmp(other_str.getConst(), len_short); - if( isSameFix ) { - //same prefix/suffix - bool constCmp = const_str.getConst().size() - < other_str.getConst().size(); - //k is the index of the string that is shorter - NormalForm& nfk = constCmp ? nfi : nfj; - std::vector& nfkv = nfk.d_nf; - NormalForm& nfl = constCmp ? nfj : nfi; - std::vector& nflv = nfl.d_nf; - Node remainderStr; - if( isRev ){ - int new_len = nflv[index].getConst().size() - len_short; - remainderStr = nm->mkConst( - nflv[index].getConst().substr(0, new_len)); - }else{ - remainderStr = - nm->mkConst(nflv[index].getConst().substr(len_short)); - } - Trace("strings-solve-debug-test") - << "Break normal form of " << nflv[index] << " into " - << nfkv[index] << ", " << remainderStr << std::endl; - nfl.splitConstant(index, nfkv[index], remainderStr); - index++; - success = true; - }else{ - //conflict - std::vector< Node > antec; - NormalForm::getExplanationForPrefixEq( - nfi, nfj, index, index, antec); - d_im.sendInference(antec, d_false, "N_Const", true); - return; - } - }else{ - //construct the candidate inference "info" - InferInfo info; - info.d_index = index; - //for debugging - info.d_i = nfi.d_base; - info.d_j = nfj.d_base; - info.d_rev = isRev; - bool info_valid = false; - Assert(index < nfiv.size() - rproc && index < nfjv.size() - rproc); - std::vector< Node > lexp; - Node length_term_i = d_state.getLength(nfiv[index], lexp); - Node length_term_j = d_state.getLength(nfjv[index], lexp); - //split on equality between string lengths (note that splitting on equality between strings is worse since it is harder to process) - if (!d_state.areDisequal(length_term_i, length_term_j) - && !d_state.areEqual(length_term_i, length_term_j) - && !nfiv[index].isConst() && !nfjv[index].isConst()) - { // AJR: remove the latter 2 conditions? - Trace("strings-solve-debug") << "Non-simple Case 1 : string lengths neither equal nor disequal" << std::endl; - //try to make the lengths equal via splitting on demand - Node length_eq = NodeManager::currentNM()->mkNode( kind::EQUAL, length_term_i, length_term_j ); - length_eq = Rewriter::rewrite( length_eq ); - //set info - info.d_conc = NodeManager::currentNM()->mkNode( kind::OR, length_eq, length_eq.negate() ); - info.d_pending_phase[ length_eq ] = true; - info.d_id = INFER_LEN_SPLIT; - info_valid = true; - }else{ - Trace("strings-solve-debug") << "Non-simple Case 2 : must compare strings" << std::endl; - int loop_in_i = -1; - int loop_in_j = -1; - ProcessLoopResult plr = ProcessLoopResult::SKIPPED; - if (detectLoop(nfi, nfj, index, loop_in_i, loop_in_j, rproc)) - { - if( !isRev ){ //FIXME - NormalForm::getExplanationForPrefixEq( - nfi, nfj, -1, -1, info.d_ant); - // set info - plr = processLoop(loop_in_i != -1 ? nfi : nfj, - loop_in_i != -1 ? nfj : nfi, - loop_in_i != -1 ? loop_in_i : loop_in_j, - index, - info); - if (plr == ProcessLoopResult::INFERENCE) - { - info_valid = true; - } - } - } - - if (plr == ProcessLoopResult::SKIPPED) - { - //AJR: length entailment here? - if (nfiv[index].isConst() || nfjv[index].isConst()) - { - NormalForm& nfc = nfiv[index].isConst() ? nfi : nfj; - std::vector& nfcv = nfc.d_nf; - NormalForm& nfnc = nfiv[index].isConst() ? nfj : nfi; - std::vector& nfncv = nfnc.d_nf; - Node other_str = nfncv[index]; - Assert(other_str.getKind() != kind::CONST_STRING) - << "Other string is not constant."; - Assert(other_str.getKind() != kind::STRING_CONCAT) - << "Other string is not CONCAT."; - if( !d_equalityEngine.areDisequal( other_str, d_emptyString, true ) ){ - Node eq = other_str.eqNode( d_emptyString ); - eq = Rewriter::rewrite(eq); - if (eq.isConst()) - { - // If the equality rewrites to a constant, we must use the - // purify variable for this string to communicate that - // we have inferred whether it is empty. - Node p = d_sk_cache.mkSkolemCached( - other_str, SkolemCache::SK_PURIFY, "lsym"); - Node pEq = p.eqNode(d_emptyString); - // should not be constant - Assert(!Rewriter::rewrite(pEq).isConst()); - // infer the purification equality, and the (dis)equality - // with the empty string in the direction that the rewriter - // inferred - info.d_conc = - nm->mkNode(AND, - p.eqNode(other_str), - !eq.getConst() ? pEq.negate() : pEq); - info.d_id = INFER_INFER_EMP; - } - else - { - info.d_conc = nm->mkNode(OR, eq, eq.negate()); - info.d_id = INFER_LEN_SPLIT_EMP; - } - //set info - info_valid = true; - }else{ - if( !isRev ){ //FIXME - Node xnz = other_str.eqNode( d_emptyString ).negate(); - unsigned index_nc_k = index+1; - unsigned start_index_nc_k = index+1; - Node next_const_str = - TheoryStringsRewriter::getNextConstantAt( - nfncv, start_index_nc_k, index_nc_k, false); - if( !next_const_str.isNull() ) { - unsigned index_c_k = index; - Node const_str = - TheoryStringsRewriter::collectConstantStringAt( - nfcv, index_c_k, false); - Assert(!const_str.isNull()); - CVC4::String stra = const_str.getConst(); - CVC4::String strb = next_const_str.getConst(); - //since non-empty, we start with charecter #1 - size_t p; - if( isRev ){ - CVC4::String stra1 = stra.prefix( stra.size()-1 ); - p = stra.size() - stra1.roverlap(strb); - Trace("strings-csp-debug") << "Compute roverlap : " << const_str << " " << next_const_str << std::endl; - size_t p2 = stra1.rfind(strb); - p = p2==std::string::npos ? p : ( p>p2+1? p2+1 : p ); - Trace("strings-csp-debug") << "overlap : " << stra1 << " " << strb << " returned " << p << " " << p2 << " " << (p2==std::string::npos) << std::endl; - }else{ - CVC4::String stra1 = stra.substr( 1 ); - p = stra.size() - stra1.overlap(strb); - Trace("strings-csp-debug") << "Compute overlap : " << const_str << " " << next_const_str << std::endl; - size_t p2 = stra1.find(strb); - p = p2==std::string::npos ? p : ( p>p2+1? p2+1 : p ); - Trace("strings-csp-debug") << "overlap : " << stra1 << " " << strb << " returned " << p << " " << p2 << " " << (p2==std::string::npos) << std::endl; - } - if( p>1 ){ - if( start_index_nc_k==index+1 ){ - info.d_ant.push_back(xnz); - NormalForm::getExplanationForPrefixEq( - nfc, nfnc, index_c_k, index_nc_k, info.d_ant); - Node prea = p==stra.size() ? const_str : NodeManager::currentNM()->mkConst( isRev ? stra.suffix( p ) : stra.prefix( p ) ); - Node sk = d_sk_cache.mkSkolemCached( - other_str, - prea, - isRev ? SkolemCache::SK_ID_C_SPT_REV - : SkolemCache::SK_ID_C_SPT, - "c_spt"); - Trace("strings-csp") << "Const Split: " << prea << " is removed from " << stra << " due to " << strb << ", p=" << p << std::endl; - //set info - info.d_conc = other_str.eqNode( - isRev ? utils::mkNConcat(sk, prea) - : utils::mkNConcat(prea, sk)); - info.d_new_skolem[LENGTH_SPLIT].push_back(sk); - info.d_id = INFER_SSPLIT_CST_PROP; - info_valid = true; - } - } - } - if( !info_valid ){ - info.d_ant.push_back( xnz ); - Node const_str = nfcv[index]; - NormalForm::getExplanationForPrefixEq( - nfi, nfj, index, index, info.d_ant); - CVC4::String stra = const_str.getConst(); - if( options::stringBinaryCsp() && stra.size()>3 ){ - //split string in half - Node c_firstHalf = NodeManager::currentNM()->mkConst( isRev ? stra.substr( stra.size()/2 ) : stra.substr(0, stra.size()/2 ) ); - Node sk = d_sk_cache.mkSkolemCached( - other_str, - c_firstHalf, - isRev ? SkolemCache::SK_ID_VC_BIN_SPT_REV - : SkolemCache::SK_ID_VC_BIN_SPT, - "cb_spt"); - Trace("strings-csp") << "Const Split: " << c_firstHalf << " is removed from " << const_str << " (binary) " << std::endl; - info.d_conc = nm->mkNode( - OR, - other_str.eqNode( - isRev ? utils::mkNConcat(sk, c_firstHalf) - : utils::mkNConcat(c_firstHalf, sk)), - nm->mkNode( - AND, - sk.eqNode(d_emptyString).negate(), - c_firstHalf.eqNode( - isRev ? utils::mkNConcat(sk, other_str) - : utils::mkNConcat(other_str, sk)))); - info.d_new_skolem[LENGTH_SPLIT].push_back(sk); - info.d_id = INFER_SSPLIT_CST_BINARY; - info_valid = true; - }else{ - // normal v/c split - Node firstChar = stra.size() == 1 ? const_str : NodeManager::currentNM()->mkConst( isRev ? stra.suffix( 1 ) : stra.prefix( 1 ) ); - Node sk = d_sk_cache.mkSkolemCached( - other_str, - isRev ? SkolemCache::SK_ID_VC_SPT_REV - : SkolemCache::SK_ID_VC_SPT, - "c_spt"); - Trace("strings-csp") << "Const Split: " << firstChar << " is removed from " << const_str << " (serial) " << std::endl; - info.d_conc = other_str.eqNode( - isRev ? utils::mkNConcat(sk, firstChar) - : utils::mkNConcat(firstChar, sk)); - info.d_new_skolem[LENGTH_SPLIT].push_back(sk); - info.d_id = INFER_SSPLIT_CST; - info_valid = true; - } - } - } - } - }else{ - int lentTestSuccess = -1; - Node lentTestExp; - if( options::stringCheckEntailLen() ){ - //check entailment - for( unsigned e=0; e<2; e++ ){ - Node t = e == 0 ? nfiv[index] : nfjv[index]; - //do not infer constants are larger than variables - if( t.getKind()!=kind::CONST_STRING ){ - Node lt1 = e==0 ? length_term_i : length_term_j; - Node lt2 = e==0 ? length_term_j : length_term_i; - Node ent_lit = Rewriter::rewrite( NodeManager::currentNM()->mkNode( kind::GT, lt1, lt2 ) ); - std::pair et = d_state.entailmentCheck( - options::TheoryOfMode::THEORY_OF_TYPE_BASED, ent_lit); - if( et.first ){ - Trace("strings-entail") << "Strings entailment : " << ent_lit << " is entailed in the current context." << std::endl; - Trace("strings-entail") << " explanation was : " << et.second << std::endl; - lentTestSuccess = e; - lentTestExp = et.second; - break; - } - } - } - } - - NormalForm::getExplanationForPrefixEq( - nfi, nfj, index, index, info.d_ant); - //x!=e /\ y!=e - for(unsigned xory=0; xory<2; xory++) { - Node x = xory == 0 ? nfiv[index] : nfjv[index]; - Node xgtz = x.eqNode( d_emptyString ).negate(); - if( d_equalityEngine.areDisequal( x, d_emptyString, true ) ) { - info.d_ant.push_back( xgtz ); - } else { - info.d_antn.push_back( xgtz ); - } - } - Node sk = d_sk_cache.mkSkolemCached( - nfiv[index], - nfjv[index], - isRev ? SkolemCache::SK_ID_V_SPT_REV - : SkolemCache::SK_ID_V_SPT, - "v_spt"); - // must add length requirement - info.d_new_skolem[LENGTH_GEQ_ONE].push_back(sk); - Node eq1 = nfiv[index].eqNode( - isRev ? utils::mkNConcat(sk, nfjv[index]) - : utils::mkNConcat(nfjv[index], sk)); - Node eq2 = nfjv[index].eqNode( - isRev ? utils::mkNConcat(sk, nfiv[index]) - : utils::mkNConcat(nfiv[index], sk)); - - if( lentTestSuccess!=-1 ){ - info.d_antn.push_back( lentTestExp ); - info.d_conc = lentTestSuccess==0 ? eq1 : eq2; - info.d_id = INFER_SSPLIT_VAR_PROP; - info_valid = true; - }else{ - Node ldeq = NodeManager::currentNM()->mkNode( kind::EQUAL, length_term_i, length_term_j ).negate(); - if( d_equalityEngine.areDisequal( length_term_i, length_term_j, true ) ){ - info.d_ant.push_back( ldeq ); - }else{ - info.d_antn.push_back(ldeq); - } - //set info - info.d_conc = NodeManager::currentNM()->mkNode( kind::OR, eq1, eq2 ); - info.d_id = INFER_SSPLIT_VAR; - info_valid = true; - } - } - } - } - if( info_valid ){ - pinfer.push_back( info ); - Assert(!success); - } - } - } - } - }while( success ); -} - -bool TheoryStrings::detectLoop(NormalForm& nfi, - NormalForm& nfj, - int index, - int& loop_in_i, - int& loop_in_j, - unsigned rproc) -{ - int has_loop[2] = { -1, -1 }; - if( options::stringLB() != 2 ) { - for( unsigned r=0; r<2; r++ ) { - NormalForm& nf = r == 0 ? nfi : nfj; - NormalForm& nfo = r == 0 ? nfj : nfi; - std::vector& nfv = nf.d_nf; - std::vector& nfov = nfo.d_nf; - if (!nfov[index].isConst()) - { - for (unsigned lp = index + 1; lp < nfv.size() - rproc; lp++) - { - if (nfv[lp] == nfov[index]) - { - has_loop[r] = lp; - break; - } - } - } - } - } - if( has_loop[0]!=-1 || has_loop[1]!=-1 ) { - loop_in_i = has_loop[0]; - loop_in_j = has_loop[1]; - return true; - } else { - Trace("strings-solve-debug") << "No loops detected." << std::endl; - return false; - } -} - -//xs(zy)=t(yz)xr -TheoryStrings::ProcessLoopResult TheoryStrings::processLoop(NormalForm& nfi, - NormalForm& nfj, - int loop_index, - int index, - InferInfo& info) -{ - if (options::stringProcessLoopMode() == options::ProcessLoopMode::ABORT) - { - throw LogicException("Looping word equation encountered."); - } - else if (options::stringProcessLoopMode() == options::ProcessLoopMode::NONE) - { - d_out->setIncomplete(); - return ProcessLoopResult::SKIPPED; - } - - NodeManager* nm = NodeManager::currentNM(); - Node conc; - const std::vector& veci = nfi.d_nf; - const std::vector& vecoi = nfj.d_nf; - - Trace("strings-loop") << "Detected possible loop for " << veci[loop_index] - << std::endl; - Trace("strings-loop") << " ... (X)= " << vecoi[index] << std::endl; - Trace("strings-loop") << " ... T(Y.Z)= "; - std::vector vec_t(veci.begin() + index, veci.begin() + loop_index); - Node t_yz = utils::mkNConcat(vec_t); - Trace("strings-loop") << " (" << t_yz << ")" << std::endl; - Trace("strings-loop") << " ... S(Z.Y)= "; - std::vector vec_s(vecoi.begin() + index + 1, vecoi.end()); - Node s_zy = utils::mkNConcat(vec_s); - Trace("strings-loop") << s_zy << std::endl; - Trace("strings-loop") << " ... R= "; - std::vector vec_r(veci.begin() + loop_index + 1, veci.end()); - Node r = utils::mkNConcat(vec_r); - Trace("strings-loop") << r << std::endl; - - if (s_zy.isConst() && r.isConst() && r != d_emptyString) - { - int c; - bool flag = true; - if (s_zy.getConst().tailcmp(r.getConst(), c)) - { - if (c >= 0) - { - s_zy = nm->mkConst(s_zy.getConst().substr(0, c)); - r = d_emptyString; - vec_r.clear(); - Trace("strings-loop") << "Strings::Loop: Refactor S(Z.Y)= " << s_zy - << ", c=" << c << std::endl; - flag = false; - } - } - if (flag) - { - Trace("strings-loop") << "Strings::Loop: tails are different." - << std::endl; - d_im.sendInference(info.d_ant, conc, "Loop Conflict", true); - return ProcessLoopResult::CONFLICT; - } - } - - Node split_eq; - for (unsigned r = 0; r < 2; r++) - { - Node t = r == 0 ? veci[loop_index] : t_yz; - split_eq = t.eqNode(d_emptyString); - Node split_eqr = Rewriter::rewrite(split_eq); - // the equality could rewrite to false - if (!split_eqr.isConst()) - { - if (!d_state.areDisequal(t, d_emptyString)) - { - // try to make t equal to empty to avoid loop - info.d_conc = nm->mkNode(kind::OR, split_eq, split_eq.negate()); - info.d_id = INFER_LEN_SPLIT_EMP; - return ProcessLoopResult::INFERENCE; - } - else - { - info.d_ant.push_back(split_eq.negate()); - } - } - else - { - Assert(!split_eqr.getConst()); - } - } - - Node ant = d_im.mkExplain(info.d_ant); - info.d_ant.clear(); - info.d_antn.push_back(ant); - - Node str_in_re; - if (s_zy == t_yz && r == d_emptyString && s_zy.isConst() - && s_zy.getConst().isRepeated()) - { - Node rep_c = nm->mkConst(s_zy.getConst().substr(0, 1)); - Trace("strings-loop") << "Special case (X)=" << vecoi[index] << " " - << std::endl; - Trace("strings-loop") << "... (C)=" << rep_c << " " << std::endl; - // special case - str_in_re = nm->mkNode( - STRING_IN_REGEXP, - vecoi[index], - nm->mkNode(REGEXP_STAR, nm->mkNode(STRING_TO_REGEXP, rep_c))); - conc = str_in_re; - } - else if (t_yz.isConst()) - { - Trace("strings-loop") << "Strings::Loop: Const Normal Breaking." - << std::endl; - CVC4::String s = t_yz.getConst(); - unsigned size = s.size(); - std::vector vconc; - for (unsigned len = 1; len <= size; len++) - { - Node y = nm->mkConst(s.substr(0, len)); - Node z = nm->mkConst(s.substr(len, size - len)); - Node restr = s_zy; - Node cc; - if (r != d_emptyString) - { - std::vector v2(vec_r); - v2.insert(v2.begin(), y); - v2.insert(v2.begin(), z); - restr = utils::mkNConcat(z, y); - cc = Rewriter::rewrite(s_zy.eqNode(utils::mkNConcat(v2))); - } - else - { - cc = Rewriter::rewrite(s_zy.eqNode(utils::mkNConcat(z, y))); - } - if (cc == d_false) - { - continue; - } - Node conc2 = nm->mkNode( - STRING_IN_REGEXP, - vecoi[index], - nm->mkNode( - REGEXP_CONCAT, - nm->mkNode(STRING_TO_REGEXP, y), - nm->mkNode(REGEXP_STAR, nm->mkNode(STRING_TO_REGEXP, restr)))); - cc = cc == d_true ? conc2 : nm->mkNode(kind::AND, cc, conc2); - vconc.push_back(cc); - } - conc = vconc.size() == 0 ? Node::null() : vconc.size() == 1 - ? vconc[0] - : nm->mkNode(kind::OR, vconc); - } - else - { - if (options::stringProcessLoopMode() - == options::ProcessLoopMode::SIMPLE_ABORT) - { - throw LogicException("Normal looping word equation encountered."); - } - else if (options::stringProcessLoopMode() - == options::ProcessLoopMode::SIMPLE) - { - d_out->setIncomplete(); - return ProcessLoopResult::SKIPPED; - } - - Trace("strings-loop") << "Strings::Loop: Normal Loop Breaking." - << std::endl; - // right - Node sk_w = d_sk_cache.mkSkolem("w_loop"); - Node sk_y = d_sk_cache.mkSkolem("y_loop"); - d_im.registerLength(sk_y, LENGTH_GEQ_ONE); - Node sk_z = d_sk_cache.mkSkolem("z_loop"); - // t1 * ... * tn = y * z - Node conc1 = t_yz.eqNode(utils::mkNConcat(sk_y, sk_z)); - // s1 * ... * sk = z * y * r - vec_r.insert(vec_r.begin(), sk_y); - vec_r.insert(vec_r.begin(), sk_z); - Node conc2 = s_zy.eqNode(utils::mkNConcat(vec_r)); - Node conc3 = vecoi[index].eqNode(utils::mkNConcat(sk_y, sk_w)); - Node restr = r == d_emptyString ? s_zy : utils::mkNConcat(sk_z, sk_y); - str_in_re = - nm->mkNode(kind::STRING_IN_REGEXP, - sk_w, - nm->mkNode(kind::REGEXP_STAR, - nm->mkNode(kind::STRING_TO_REGEXP, restr))); - - std::vector vec_conc; - vec_conc.push_back(conc1); - vec_conc.push_back(conc2); - vec_conc.push_back(conc3); - vec_conc.push_back(str_in_re); - // vec_conc.push_back(sk_y.eqNode(d_emptyString).negate());//by mkskolems - conc = nm->mkNode(kind::AND, vec_conc); - } // normal case - - // we will be done - info.d_conc = conc; - info.d_id = INFER_FLOOP; - info.d_nf_pair[0] = nfi.d_base; - info.d_nf_pair[1] = nfj.d_base; - return ProcessLoopResult::INFERENCE; -} - -//return true for lemma, false if we succeed -void TheoryStrings::processDeq( Node ni, Node nj ) { - NodeManager* nm = NodeManager::currentNM(); - NormalForm& nfni = getNormalForm(ni); - NormalForm& nfnj = getNormalForm(nj); - if (nfni.d_nf.size() > 1 || nfnj.d_nf.size() > 1) - { - std::vector< Node > nfi; - nfi.insert(nfi.end(), nfni.d_nf.begin(), nfni.d_nf.end()); - std::vector< Node > nfj; - nfj.insert(nfj.end(), nfnj.d_nf.begin(), nfnj.d_nf.end()); - - int revRet = processReverseDeq( nfi, nfj, ni, nj ); - if( revRet!=0 ){ - return; - } - - nfi.clear(); - nfi.insert(nfi.end(), nfni.d_nf.begin(), nfni.d_nf.end()); - nfj.clear(); - nfj.insert(nfj.end(), nfnj.d_nf.begin(), nfnj.d_nf.end()); - - unsigned index = 0; - while( index lexp; - Node li = d_state.getLength(i, lexp); - Node lj = d_state.getLength(j, lexp); - if (d_state.areDisequal(li, lj)) - { - if( i.getKind()==kind::CONST_STRING || j.getKind()==kind::CONST_STRING ){ - //check if empty - Node const_k = i.getKind() == kind::CONST_STRING ? i : j; - Node nconst_k = i.getKind() == kind::CONST_STRING ? j : i; - Node lnck = i.getKind() == kind::CONST_STRING ? lj : li; - if( !d_equalityEngine.areDisequal( nconst_k, d_emptyString, true ) ){ - Node eq = nconst_k.eqNode( d_emptyString ); - Node conc = NodeManager::currentNM()->mkNode( kind::OR, eq, eq.negate() ); - d_im.sendInference(d_empty_vec, conc, "D-DISL-Emp-Split"); - return; - }else{ - //split on first character - CVC4::String str = const_k.getConst(); - Node firstChar = str.size() == 1 ? const_k : NodeManager::currentNM()->mkConst( str.prefix( 1 ) ); - if (d_state.areEqual(lnck, d_one)) - { - if (d_state.areDisequal(firstChar, nconst_k)) - { - return; - } - else if (!d_state.areEqual(firstChar, nconst_k)) - { - //splitting on demand : try to make them disequal - if (d_im.sendSplit( - firstChar, nconst_k, "S-Split(DEQL-Const)", false)) - { - return; - } - } - } - else - { - Node sk = d_sk_cache.mkSkolemCached( - nconst_k, SkolemCache::SK_ID_DC_SPT, "dc_spt"); - d_im.registerLength(sk, LENGTH_ONE); - Node skr = - d_sk_cache.mkSkolemCached(nconst_k, - SkolemCache::SK_ID_DC_SPT_REM, - "dc_spt_rem"); - Node eq1 = nconst_k.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, sk, skr ) ); - eq1 = Rewriter::rewrite( eq1 ); - Node eq2 = nconst_k.eqNode( NodeManager::currentNM()->mkNode( kind::STRING_CONCAT, firstChar, skr ) ); - std::vector< Node > antec; - antec.insert( - antec.end(), nfni.d_exp.begin(), nfni.d_exp.end()); - antec.insert( - antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); - antec.push_back( nconst_k.eqNode( d_emptyString ).negate() ); - d_im.sendInference( - antec, - nm->mkNode( - OR, - nm->mkNode(AND, eq1, sk.eqNode(firstChar).negate()), - eq2), - "D-DISL-CSplit"); - d_im.sendPhaseRequirement(eq1, true); - return; - } - } - }else{ - Trace("strings-solve") << "Non-Simple Case 1 : add lemma " << std::endl; - //must add lemma - std::vector< Node > antec; - std::vector< Node > antec_new_lits; - antec.insert(antec.end(), nfni.d_exp.begin(), nfni.d_exp.end()); - antec.insert(antec.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); - //check disequal - if (d_state.areDisequal(ni, nj)) - { - antec.push_back( ni.eqNode( nj ).negate() ); - } - else - { - antec_new_lits.push_back( ni.eqNode( nj ).negate() ); - } - antec_new_lits.push_back( li.eqNode( lj ).negate() ); - std::vector< Node > conc; - Node sk1 = d_sk_cache.mkSkolemCached( - i, j, SkolemCache::SK_ID_DEQ_X, "x_dsplit"); - Node sk2 = d_sk_cache.mkSkolemCached( - i, j, SkolemCache::SK_ID_DEQ_Y, "y_dsplit"); - Node sk3 = d_sk_cache.mkSkolemCached( - i, j, SkolemCache::SK_ID_DEQ_Z, "z_dsplit"); - d_im.registerLength(sk3, LENGTH_GEQ_ONE); - //Node nemp = sk3.eqNode(d_emptyString).negate(); - //conc.push_back(nemp); - Node lsk1 = utils::mkNLength(sk1); - conc.push_back( lsk1.eqNode( li ) ); - Node lsk2 = utils::mkNLength(sk2); - conc.push_back( lsk2.eqNode( lj ) ); - conc.push_back(nm->mkNode(OR, - j.eqNode(utils::mkNConcat(sk1, sk3)), - i.eqNode(utils::mkNConcat(sk2, sk3)))); - d_im.sendInference( - antec, antec_new_lits, nm->mkNode(AND, conc), "D-DISL-Split"); - ++(d_statistics.d_deq_splits); - return; - } - } - else if (d_state.areEqual(li, lj)) - { - Assert(!d_state.areDisequal(i, j)); - //splitting on demand : try to make them disequal - if (d_im.sendSplit(i, j, "S-Split(DEQL)", false)) - { - return; - } - } - else - { - //splitting on demand : try to make lengths equal - if (d_im.sendSplit(li, lj, "D-Split")) - { - return; - } - } - } - index++; - } - } - Assert(false); - } -} - -int TheoryStrings::processReverseDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj ) { - //reverse normal form of i, j - std::reverse( nfi.begin(), nfi.end() ); - std::reverse( nfj.begin(), nfj.end() ); - - unsigned index = 0; - int ret = processSimpleDeq( nfi, nfj, ni, nj, index, true ); - - //reverse normal form of i, j - std::reverse( nfi.begin(), nfi.end() ); - std::reverse( nfj.begin(), nfj.end() ); - - return ret; -} - -int TheoryStrings::processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj, unsigned& index, bool isRev ){ - // See if one side is constant, if so, the disequality ni != nj is satisfied - // since ni does not contain nj or vice versa. - // This is only valid when isRev is false, since when isRev=true, the contents - // of normal form vectors nfi and nfj are reversed. - if (!isRev) - { - for (unsigned i = 0; i < 2; i++) - { - Node c = d_bsolver.getConstantEqc(i == 0 ? ni : nj); - if (!c.isNull()) - { - int findex, lindex; - if (!TheoryStringsRewriter::canConstantContainList( - c, i == 0 ? nfj : nfi, findex, lindex)) - { - Trace("strings-solve-debug") - << "Disequality: constant cannot contain list" << std::endl; - return 1; - } - } - } - } - NormalForm& nfni = getNormalForm(ni); - NormalForm& nfnj = getNormalForm(nj); - while( index=nfi.size() || index>=nfj.size() ){ - Trace("strings-solve-debug") << "Disequality normalize empty" << std::endl; - std::vector< Node > ant; - //we have a conflict : because the lengths are equal, the remainder needs to be empty, which will lead to a conflict - Node lni = d_state.getLengthExp(ni, ant, nfni.d_base); - Node lnj = d_state.getLengthExp(nj, ant, nfnj.d_base); - ant.push_back( lni.eqNode( lnj ) ); - ant.insert(ant.end(), nfni.d_exp.begin(), nfni.d_exp.end()); - ant.insert(ant.end(), nfnj.d_exp.begin(), nfnj.d_exp.end()); - std::vector< Node > cc; - std::vector< Node >& nfk = index>=nfi.size() ? nfj : nfi; - for( unsigned index_k=index; index_kmkNode( kind::AND, cc ); - conc = Rewriter::rewrite( conc ); - d_im.sendInference(ant, conc, "Disequality Normalize Empty", true); - return -1; - }else{ - Node i = nfi[index]; - Node j = nfj[index]; - Trace("strings-solve-debug") << "...Processing(QED) " << i << " " << j << std::endl; - if (!d_state.areEqual(i, j)) - { - if( i.getKind()==kind::CONST_STRING && j.getKind()==kind::CONST_STRING ) { - unsigned int len_short = i.getConst().size() < j.getConst().size() ? i.getConst().size() : j.getConst().size(); - bool isSameFix = isRev ? i.getConst().rstrncmp(j.getConst(), len_short): i.getConst().strncmp(j.getConst(), len_short); - if( isSameFix ) { - //same prefix/suffix - //k is the index of the string that is shorter - Node nk = i.getConst().size() < j.getConst().size() ? i : j; - Node nl = i.getConst().size() < j.getConst().size() ? j : i; - Node remainderStr; - if( isRev ){ - int new_len = nl.getConst().size() - len_short; - remainderStr = NodeManager::currentNM()->mkConst( nl.getConst().substr(0, new_len) ); - Trace("strings-solve-debug-test") << "Rev. Break normal form of " << nl << " into " << nk << ", " << remainderStr << std::endl; - } else { - remainderStr = NodeManager::currentNM()->mkConst( nl.getConst().substr( len_short ) ); - Trace("strings-solve-debug-test") << "Break normal form of " << nl << " into " << nk << ", " << remainderStr << std::endl; - } - if( i.getConst().size() < j.getConst().size() ) { - nfj.insert( nfj.begin() + index + 1, remainderStr ); - nfj[index] = nfi[index]; - } else { - nfi.insert( nfi.begin() + index + 1, remainderStr ); - nfi[index] = nfj[index]; - } - }else{ - return 1; - } - }else{ - std::vector< Node > lexp; - Node li = d_state.getLength(i, lexp); - Node lj = d_state.getLength(j, lexp); - if (d_state.areEqual(li, lj) && d_state.areDisequal(i, j)) - { - Trace("strings-solve") << "Simple Case 2 : found equal length disequal sub strings " << i << " " << j << std::endl; - //we are done: D-Remove - return 1; - } - else - { - return 0; - } - } - } - index++; - } - } - return 0; -} - -void TheoryStrings::addNormalFormPair( Node n1, Node n2 ){ - if( !isNormalFormPair( n1, n2 ) ){ - int index = 0; - NodeIntMap::const_iterator it = d_nf_pairs.find( n1 ); - if( it!=d_nf_pairs.end() ){ - index = (*it).second; - } - d_nf_pairs[n1] = index + 1; - if( index<(int)d_nf_pairs_data[n1].size() ){ - d_nf_pairs_data[n1][index] = n2; - }else{ - d_nf_pairs_data[n1].push_back( n2 ); - } - Assert(isNormalFormPair(n1, n2)); - } else { - Trace("strings-nf-debug") << "Already a normal form pair " << n1 << " " << n2 << std::endl; - } -} - -bool TheoryStrings::isNormalFormPair( Node n1, Node n2 ) { - //TODO: modulo equality? - return isNormalFormPair2( n1, n2 ) || isNormalFormPair2( n2, n1 ); -} - -bool TheoryStrings::isNormalFormPair2( Node n1, Node n2 ) { - //Trace("strings-debug") << "is normal form pair. " << n1 << " " << n2 << std::endl; - NodeIntMap::const_iterator it = d_nf_pairs.find( n1 ); - if( it!=d_nf_pairs.end() ){ - Assert(d_nf_pairs_data.find(n1) != d_nf_pairs_data.end()); - for( int i=0; i<(*it).second; i++ ){ - Assert(i < (int)d_nf_pairs_data[n1].size()); - if( d_nf_pairs_data[n1][i]==n2 ){ - return true; - } - } - } - return false; -} - -void TheoryStrings::registerTerm( Node n, int effort ) { - TypeNode tn = n.getType(); - bool do_register = true; - if (!tn.isString()) - { - if (options::stringEagerLen()) - { - do_register = effort == 0; - } - else - { - do_register = effort > 0 || n.getKind() != STRING_CONCAT; - } - } - if (!do_register) - { - return; - } - if (d_registered_terms_cache.find(n) != d_registered_terms_cache.end()) - { - return; - } - d_registered_terms_cache.insert(n); - NodeManager* nm = NodeManager::currentNM(); - Debug("strings-register") << "TheoryStrings::registerTerm() " << n - << ", effort = " << effort << std::endl; - if (tn.isString()) - { - // register length information: - // for variables, split on empty vs positive length - // for concat/const/replace, introduce proxy var and state length relation - Node lsum; - if (n.getKind() != STRING_CONCAT && n.getKind() != CONST_STRING) + if (d_registered_terms_cache.find(n) != d_registered_terms_cache.end()) + { + return; + } + d_registered_terms_cache.insert(n); + NodeManager* nm = NodeManager::currentNM(); + Debug("strings-register") << "TheoryStrings::registerTerm() " << n + << ", effort = " << effort << std::endl; + if (tn.isString()) + { + // register length information: + // for variables, split on empty vs positive length + // for concat/const/replace, introduce proxy var and state length relation + Node lsum; + if (n.getKind() != STRING_CONCAT && n.getKind() != CONST_STRING) { Node lsumb = nm->mkNode(STRING_LENGTH, n); lsum = Rewriter::rewrite(lsumb); @@ -3783,147 +1811,12 @@ void TheoryStrings::registerTerm( Node n, int effort ) { } } -void TheoryStrings::checkNormalFormsDeq() -{ - std::vector< std::vector< Node > > cols; - std::vector< Node > lts; - std::map< Node, std::map< Node, bool > > processed; - - const NodeList& deqs = d_state.getDisequalityList(); - - //for each pair of disequal strings, must determine whether their lengths are equal or disequal - for (const Node& eq : deqs) - { - Node n[2]; - for( unsigned i=0; i<2; i++ ){ - n[i] = d_equalityEngine.getRepresentative( eq[i] ); - } - if( processed[n[0]].find( n[1] )==processed[n[0]].end() ){ - processed[n[0]][n[1]] = true; - Node lt[2]; - for( unsigned i=0; i<2; i++ ){ - EqcInfo* ei = d_state.getOrMakeEqcInfo(n[i], false); - lt[i] = ei ? ei->d_lengthTerm : Node::null(); - if( lt[i].isNull() ){ - lt[i] = eq[i]; - } - lt[i] = NodeManager::currentNM()->mkNode( kind::STRING_LENGTH, lt[i] ); - } - if (!d_state.areEqual(lt[0], lt[1]) && !d_state.areDisequal(lt[0], lt[1])) - { - d_im.sendSplit(lt[0], lt[1], "DEQ-LENGTH-SP"); - } - } - } - - if (!d_im.hasProcessed()) - { - d_state.separateByLength(d_strings_eqc, cols, lts); - for( unsigned i=0; i 1 && !d_im.hasPendingLemma()) - { - if (Trace.isOn("strings-solve")) - { - Trace("strings-solve") << "- Verify disequalities are processed for " - << cols[i][0] << ", normal form : "; - utils::printConcatTrace(getNormalForm(cols[i][0]).d_nf, - "strings-solve"); - Trace("strings-solve") - << "... #eql = " << cols[i].size() << std::endl; - } - //must ensure that normal forms are disequal - for( unsigned j=0; jd_lengthTerm : Node::null(); - if (lt.isNull()) - { - Trace("strings-process-debug") - << "No length term for eqc " << d_strings_eqc[i] << " " - << d_eqc_to_len_term[d_strings_eqc[i]] << std::endl; - continue; - } - Node llt = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, lt); - // now, check if length normalization has occurred - if (ei->d_normalizedLength.get().isNull()) - { - Node nf = utils::mkNConcat(nfi.d_nf); - if (Trace.isOn("strings-process-debug")) - { - Trace("strings-process-debug") - << " normal form is " << nf << " from base " << nfi.d_base - << std::endl; - Trace("strings-process-debug") << " normal form exp is: " << std::endl; - for (const Node& exp : nfi.d_exp) - { - Trace("strings-process-debug") << " " << exp << std::endl; - } - } - - // if not, add the lemma - std::vector ant; - ant.insert(ant.end(), nfi.d_exp.begin(), nfi.d_exp.end()); - ant.push_back(nfi.d_base.eqNode(lt)); - Node lc = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, nf); - Node lcr = Rewriter::rewrite(lc); - Trace("strings-process-debug") - << "Rewrote length " << lc << " to " << lcr << std::endl; - if (!d_state.areEqual(llt, lcr)) - { - Node eq = llt.eqNode(lcr); - ei->d_normalizedLength.set(eq); - d_im.sendInference(ant, eq, "LEN-NORM", true); - } - } - } -} void TheoryStrings::checkRegisterTermsNormalForms() { - for (const Node& eqc : d_strings_eqc) + const std::vector& seqc = d_bsolver.getStringEqc(); + for (const Node& eqc : seqc) { - NormalForm& nfi = getNormalForm(eqc); + NormalForm& nfi = d_csolver.getNormalForm(eqc); // check if there is a length term for this equivalence class EqcInfo* ei = d_state.getOrMakeEqcInfo(eqc, false); Node lt = ei ? ei->d_lengthTerm : Node::null(); @@ -3942,9 +1835,10 @@ void TheoryStrings::checkCardinality() { //AJR: this will create a partition of eqc, where each collection has length that are pairwise propagated to be equal. // we do not require disequalities between the lengths of each collection, since we split on disequalities between lengths of string terms that are disequal (DEQ-LENGTH-SP). // TODO: revisit this? + const std::vector& seqc = d_bsolver.getStringEqc(); std::vector< std::vector< Node > > cols; std::vector< Node > lts; - d_state.separateByLength(d_strings_eqc, cols, lts); + d_state.separateByLength(seqc, cols, lts); Trace("strings-card") << "Check cardinality...." << std::endl; for( unsigned i = 0; i d_normal_form; - /** get normal form */ - NormalForm& getNormalForm(Node n); - //map of pairs of terms that have the same normal form - NodeIntMap d_nf_pairs; - std::map< Node, std::vector< Node > > d_nf_pairs_data; - void addNormalFormPair( Node n1, Node n2 ); - bool isNormalFormPair( Node n1, Node n2 ); - bool isNormalFormPair2( Node n1, Node n2 ); // preReg cache NodeSet d_pregistered_terms_cache; NodeSet d_registered_terms_cache; @@ -261,13 +252,7 @@ private: Node getCurrentSubstitutionFor(int effort, Node n, std::vector& exp); std::map< Node, Node > d_eqc_to_len_term; - std::vector< Node > d_strings_eqc; - //list of non-congruent concat terms in each eqc - std::map< Node, std::vector< Node > > d_eqc; - std::map< Node, std::vector< Node > > d_flat_form; - std::map< Node, std::vector< int > > d_flat_form_index; - void debugPrintFlatForms( const char * tc ); ///////////////////////////////////////////////////////////////////////////// // MODEL GENERATION ///////////////////////////////////////////////////////////////////////////// @@ -393,120 +378,6 @@ private: void checkExtfInference(Node n, Node nr, ExtfInfoTmp& in, int effort); //--------------------------end for checkExtfEval - //--------------------------for checkFlatForm - /** - * This checks whether there are flat form inferences between eqc[start] and - * eqc[j] for some j>start. If the flag isRev is true, we check for flat form - * interferences in the reverse direction of the flat forms (note: - * `d_flat_form` and `d_flat_form_index` must be in reverse order if `isRev` - * is true). For more details, see checkFlatForms below. - */ - void checkFlatForm(std::vector& eqc, size_t start, bool isRev); - //--------------------------end for checkFlatForm - - //--------------------------for checkCycles - Node checkCycles( Node eqc, std::vector< Node >& curr, std::vector< Node >& exp ); - //--------------------------end for checkCycles - - //--------------------------for checkNormalFormsEq - /** normalize equivalence class - * - * This method attempts to build a "normal form" for the equivalence class - * of string term n (for more details on normal forms, see normal_form.h - * or see Liang et al CAV 2014). In particular, this method checks whether the - * current normal form for each term in this equivalence class is identical. - * If it is not, then we add an inference via sendInference and abort the - * call. - */ - void normalizeEquivalenceClass( Node n ); - /** - * For each term in the equivalence class of eqc, this adds data regarding its - * normal form to normal_forms. The map term_to_nf_index maps terms to the - * index in normal_forms where their normal form data is located. - */ - void getNormalForms(Node eqc, - std::vector& normal_forms, - std::map& term_to_nf_index); - /** process normalize equivalence class - * - * This is called when an equivalence class contains a set of terms that - * have normal forms given by the argument normal_forms. It either - * verifies that all normal forms in this vector are identical, or otherwise - * adds a conflict, lemma, or inference via the sendInference method. - * - * To prioritize one inference versus another, it builds a set of possible - * inferences, at most two for each pair of distinct normal forms, - * corresponding to processing the normal form pair in the (forward, reverse) - * directions. Once all possible inferences are recorded, it executes the - * one with highest priority based on the enumeration type Inference. - */ - void processNEqc(std::vector& normal_forms); - /** process simple normal equality - * - * This method is called when two equal terms have normal forms nfi and nfj. - * It adds (typically at most one) possible inference to the vector pinfer. - * This inference is in the form of an InferInfo object, which stores the - * necessary information regarding how to process the inference. - * - * index: The index in the normal form vectors (nfi.d_nf and nfj.d_nf) that - * we are currently checking. This method will increment this index until - * it finds an index where these vectors differ, or until it reaches the - * end of these vectors. - * isRev: Whether we are processing the normal forms in reverse direction. - * Notice in this case the normal form vectors have been reversed, hence, - * many operations are identical to the forward case, e.g. index is - * incremented not decremented, while others require special care, e.g. - * constant strings "ABC" in the normal form vectors are not reversed to - * "CBA" and hence all operations should assume a flipped semantics for - * constants when isRev is true, - * rproc: the number of string components on the suffix of the normal form of - * nfi and nfj that were already processed. This is used when using - * fowards/backwards traversals of normal forms to ensure that duplicate - * inferences are not processed. - * pinfer: the set of possible inferences we add to. - */ - void processSimpleNEq(NormalForm& nfi, - NormalForm& nfj, - unsigned& index, - bool isRev, - unsigned rproc, - std::vector& pinfer); - //--------------------------end for checkNormalFormsEq - - //--------------------------for checkNormalFormsEq with loops - bool detectLoop(NormalForm& nfi, - NormalForm& nfj, - int index, - int& loop_in_i, - int& loop_in_j, - unsigned rproc); - - /** - * Result of processLoop() below. - */ - enum class ProcessLoopResult - { - /** Loop processing made an inference */ - INFERENCE, - /** Loop processing detected a conflict */ - CONFLICT, - /** Loop not processed or no loop detected */ - SKIPPED, - }; - - ProcessLoopResult processLoop(NormalForm& nfi, - NormalForm& nfj, - int loop_index, - int index, - InferInfo& info); - //--------------------------end for checkNormalFormsEq with loops - - //--------------------------for checkNormalFormsDeq - void processDeq( Node n1, Node n2 ); - int processReverseDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj ); - int processSimpleDeq( std::vector< Node >& nfi, std::vector< Node >& nfj, Node ni, Node nj, unsigned& index, bool isRev ); - //--------------------------end for checkNormalFormsDeq - private: void addCarePairs(TNodeTrie* t1, TNodeTrie* t2, @@ -553,12 +424,6 @@ private: * of atom, including calls to registerTerm. */ void assertPendingFact(Node atom, bool polarity, Node exp); - /** - * This processes the infer info ii as an inference. In more detail, it calls - * the inference manager to process the inference, it introduces Skolems, and - * updates the set of normal form pairs. - */ - void doInferInfo(const InferInfo& ii); /** Register term * @@ -587,6 +452,11 @@ private: * inferring constants for equivalence classes. */ BaseSolver d_bsolver; + /** + * The core solver, responsible for reasoning about string concatenation + * with length constraints. + */ + CoreSolver d_csolver; /** regular expression solver module */ RegExpSolver d_regexp_solver; /** regular expression elimination module */ @@ -671,111 +541,12 @@ private: * effort=3, we apply context-dependent simplification based on model values. */ void checkExtfEval(int effort); - /** check cycles - * - * This inference schema ensures that a containment ordering < over the - * string equivalence classes is acyclic. We define this ordering < such that - * for equivalence classes e1 = { t1...tn } and e2 = { s1...sm }, e1 < e2 - * if there exists a ti whose flat form (see below) is [w1...sj...wk] for - * some i,j. If e1 < ... < en < e1 for some chain, we infer that the flat - * form components that do not constitute this chain, e.g. (w1...wk) \ sj - * in the flat form above, must be empty. - * - * For more details, see the inference S-Cycle in Liang et al CAV 2014. - */ - void checkCycles(); - /** check flat forms - * - * This applies an inference schema based on "flat forms". The flat form of a - * string term t is a vector of representative terms [r1, ..., rn] such that - * t is of the form t1 ++ ... ++ tm and r1 ++ ... ++ rn is equivalent to - * rewrite( [t1] ++ ... ++ [tm] ), where [t1] denotes the representative of - * the equivalence class containing t1. For example, if t is y ++ z ++ z, - * E is { y = "", w = z }, and w is the representative of the equivalence - * class { w, z }, then the flat form of t is [w, w]. Say t1 and t2 are terms - * in the same equivalence classes with flat forms [r1...rn] and [s1...sm]. - * We may infer various facts based on this pair of terms. For example: - * ri = si, if ri != si, rj == sj for each j < i, and len(ri)=len(si), - * rn = sn, if n=m and rj == sj for each j < n, - * ri = empty, if n=m+1 and ri == rj for each i=1,...,m. - * We refer to these as "unify", "endpoint-eq" and "endpoint-emp" inferences - * respectively. - * - * Notice that this inference scheme is an optimization and not needed for - * model-soundness. The motivation for this schema is that it is simpler than - * checkNormalFormsEq, which can be seen as a recursive version of this - * schema (see difference of "normal form" vs "flat form" below), and - * checkNormalFormsEq is complete, in the sense that if it passes with no - * inferences, we are ensured that all string equalities in the current - * context are satisfied. - * - * Must call checkCycles before this function in a strategy. - */ - void checkFlatForms(); /** check register terms pre-normal forms * * This calls registerTerm(n,2) on all non-congruent strings in the * equality engine of this class. */ void checkRegisterTermsPreNormalForm(); - /** check normal forms equalities - * - * This applies an inference schema based on "normal forms". The normal form - * of an equivalence class of string terms e = {t1, ..., tn} union {x1....xn}, - * where t1...tn are concatenation terms is a vector of representative terms - * [r1, ..., rm] such that: - * (1) if n=0, then m=1 and r1 is the representative of e, - * (2) if n>0, say - * t1 = t^1_1 ++ ... ++ t^1_m_1 - * ... - * tn = t^1_n ++ ... ++ t^_m_n - * for *each* i=1, ..., n, the result of concenating the normal forms of - * t^1_1 ++ ... ++ t^1_m_1 is equal to [r1, ..., rm]. If an equivalence class - * can be assigned a normal form, then all equalities between ti and tj are - * satisfied by all models that correspond to extensions of the current - * assignment. For further detail on this terminology, see Liang et al - * CAV 2014. - * - * Notice that all constant words are implicitly considered concatentation - * of their characters, e.g. "abc" is treated as "a" ++ "b" ++ "c". - * - * At a high level, we build normal forms for equivalence classes bottom-up, - * starting with equivalence classes that are minimal with respect to the - * containment ordering < computed during checkCycles. While computing a - * normal form for an equivalence class, we may infer equalities between - * components of strings that must be equal (e.g. x=y when x++z == y++w when - * len(x)==len(y) is asserted), derive conflicts if two strings have disequal - * prefixes/suffixes (e.g. "a" ++ x == "b" ++ y is a conflict), or split - * string terms into smaller components using fresh skolem variables (see - * Inference values with names "SPLIT"). We also may introduce regular - * expression constraints in this method for looping word equations (see - * the Inference INFER_FLOOP). - * - * If this inference schema returns no facts, lemmas, or conflicts, then - * we have successfully assigned normal forms for all equivalence classes, as - * stored in d_normal_forms. Otherwise, this method may add a fact, lemma, or - * conflict based on inferences in the Inference enumeration above. - */ - void checkNormalFormsEq(); - /** check normal forms disequalities - * - * This inference schema can be seen as the converse of the above schema. In - * particular, it ensures that each pair of distinct equivalence classes - * e1 and e2 have distinct normal forms. - * - * This method considers all pairs of distinct equivalence classes (e1,e2) - * such that len(x1)==len(x2) is asserted for some x1 in e1 and x2 in e2. It - * then traverses the normal forms of x1 and x2, say they are [r1, ..., rn] - * and [s1, ..., sm]. For the minimial i such that ri!=si, if ri and si are - * disequal and have the same length, then x1 and x2 have distinct normal - * forms. Otherwise, we may add splitting lemmas on the length of ri and si, - * or split on an equality between ri and si. - * - * If this inference schema returns no facts, lemmas, or conflicts, then all - * disequalities between string terms are satisfied by all models that are - * extensions of the current assignment. - */ - void checkNormalFormsDeq(); /** check codes * * This inference schema ensures that constraints between str.code terms -- 2.30.2