From b596df5c4d0745cc8d290d1a7715bbbece015d3c Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Sun, 24 Mar 2019 14:26:25 -0500 Subject: [PATCH] Split regular expression solver (#2891) --- src/CMakeLists.txt | 2 + src/theory/strings/regexp_solver.cpp | 547 +++++++++++++++++++++++ src/theory/strings/regexp_solver.h | 106 +++++ src/theory/strings/theory_strings.cpp | 596 +++----------------------- src/theory/strings/theory_strings.h | 162 +++++-- 5 files changed, 830 insertions(+), 583 deletions(-) create mode 100644 src/theory/strings/regexp_solver.cpp create mode 100644 src/theory/strings/regexp_solver.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 244845fda..460c6c14d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -629,6 +629,8 @@ libcvc4_add_sources( theory/strings/regexp_elim.h theory/strings/regexp_operation.cpp theory/strings/regexp_operation.h + theory/strings/regexp_solver.cpp + theory/strings/regexp_solver.h theory/strings/skolem_cache.cpp theory/strings/skolem_cache.h theory/strings/theory_strings.cpp diff --git a/src/theory/strings/regexp_solver.cpp b/src/theory/strings/regexp_solver.cpp new file mode 100644 index 000000000..bb3a6f516 --- /dev/null +++ b/src/theory/strings/regexp_solver.cpp @@ -0,0 +1,547 @@ +/********************* */ +/*! \file regexp_solver.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of the regular expression solver for the theory of + ** strings. + ** + **/ + +#include "theory/strings/regexp_solver.h" + +#include + +#include "options/strings_options.h" +#include "theory/strings/theory_strings.h" +#include "theory/strings/theory_strings_rewriter.h" +#include "theory/theory_model.h" + +using namespace std; +using namespace CVC4::context; +using namespace CVC4::kind; + +namespace CVC4 { +namespace theory { +namespace strings { + +RegExpSolver::RegExpSolver(TheoryStrings& p, + context::Context* c, + context::UserContext* u) + : d_parent(p), + d_regexp_memberships(c), + d_regexp_ucached(u), + d_regexp_ccached(c), + d_pos_memberships(c), + d_neg_memberships(c), + d_inter_cache(c), + d_inter_index(c), + d_processed_memberships(c) +{ + d_emptyString = NodeManager::currentNM()->mkConst(::CVC4::String("")); + std::vector nvec; + d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY, nvec); + d_true = NodeManager::currentNM()->mkConst(true); + d_false = NodeManager::currentNM()->mkConst(false); +} + +unsigned RegExpSolver::getNumMemberships(Node n, bool isPos) +{ + if (isPos) + { + NodeUIntMap::const_iterator it = d_pos_memberships.find(n); + if (it != d_pos_memberships.end()) + { + return (*it).second; + } + } + else + { + NodeUIntMap::const_iterator it = d_neg_memberships.find(n); + if (it != d_neg_memberships.end()) + { + return (*it).second; + } + } + return 0; +} + +Node RegExpSolver::getMembership(Node n, bool isPos, unsigned i) +{ + return isPos ? d_pos_memberships_data[n][i] : d_neg_memberships_data[n][i]; +} + +Node RegExpSolver::mkAnd(Node c1, Node c2) +{ + return NodeManager::currentNM()->mkNode(AND, c1, c2); +} + +void RegExpSolver::check() +{ + bool addedLemma = false; + bool changed = false; + std::vector processed; + std::vector cprocessed; + + Trace("regexp-debug") << "Checking Memberships ... " << std::endl; + for (NodeUIntMap::const_iterator itr_xr = d_pos_memberships.begin(); + itr_xr != d_pos_memberships.end(); + ++itr_xr) + { + bool spflag = false; + Node x = (*itr_xr).first; + Trace("regexp-debug") << "Checking Memberships for " << x << std::endl; + if (d_inter_index.find(x) == d_inter_index.end()) + { + d_inter_index[x] = 0; + } + int cur_inter_idx = d_inter_index[x]; + unsigned n_pmem = (*itr_xr).second; + Assert(getNumMemberships(x, true) == n_pmem); + if (cur_inter_idx != (int)n_pmem) + { + if (n_pmem == 1) + { + d_inter_cache[x] = getMembership(x, true, 0); + d_inter_index[x] = 1; + Trace("regexp-debug") << "... only one choice " << std::endl; + } + else if (n_pmem > 1) + { + Node r; + if (d_inter_cache.find(x) != d_inter_cache.end()) + { + r = d_inter_cache[x]; + } + if (r.isNull()) + { + r = getMembership(x, true, 0); + cur_inter_idx = 1; + } + + unsigned k_start = cur_inter_idx; + Trace("regexp-debug") << "... staring from : " << cur_inter_idx + << ", we have " << n_pmem << std::endl; + for (unsigned k = k_start; k < n_pmem; k++) + { + Node r2 = getMembership(x, true, k); + r = d_regexp_opr.intersect(r, r2, spflag); + if (spflag) + { + break; + } + else if (r == d_emptyRegexp) + { + std::vector vec_nodes; + for (unsigned kk = 0; kk <= k; kk++) + { + Node rr = getMembership(x, true, kk); + Node n = + NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, x, rr); + vec_nodes.push_back(n); + } + Node conc; + d_parent.sendInference(vec_nodes, conc, "INTERSECT CONFLICT", true); + addedLemma = true; + break; + } + if (d_parent.inConflict()) + { + break; + } + } + // updates + if (!d_parent.inConflict() && !spflag) + { + d_inter_cache[x] = r; + d_inter_index[x] = (int)n_pmem; + } + } + } + } + + Trace("regexp-debug") + << "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma + << std::endl; + if (!addedLemma) + { + NodeManager* nm = NodeManager::currentNM(); + for (unsigned i = 0; i < d_regexp_memberships.size(); i++) + { + // check regular expression membership + Node assertion = d_regexp_memberships[i]; + Trace("regexp-debug") + << "Check : " << assertion << " " + << (d_regexp_ucached.find(assertion) == d_regexp_ucached.end()) << " " + << (d_regexp_ccached.find(assertion) == d_regexp_ccached.end()) + << std::endl; + if (d_regexp_ucached.find(assertion) == d_regexp_ucached.end() + && d_regexp_ccached.find(assertion) == d_regexp_ccached.end()) + { + Trace("strings-regexp") + << "We have regular expression assertion : " << assertion + << std::endl; + Node atom = assertion.getKind() == NOT ? assertion[0] : assertion; + bool polarity = assertion.getKind() != NOT; + bool flag = true; + Node x = atom[0]; + Node r = atom[1]; + std::vector rnfexp; + + if (!x.isConst()) + { + x = d_parent.getNormalString(x, rnfexp); + changed = true; + } + if (!d_regexp_opr.checkConstRegExp(r)) + { + r = getNormalSymRegExp(r, rnfexp); + changed = true; + } + Trace("strings-regexp-nf") << "Term " << atom << " is normalized to " + << x << " IN " << r << std::endl; + if (changed) + { + Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, x, r)); + if (!polarity) + { + tmp = tmp.negate(); + } + if (tmp == d_true) + { + d_regexp_ccached.insert(assertion); + continue; + } + else if (tmp == d_false) + { + std::vector exp_n; + exp_n.push_back(assertion); + Node conc = Node::null(); + d_parent.sendInference(rnfexp, exp_n, conc, "REGEXP NF Conflict"); + addedLemma = true; + break; + } + } + + if (polarity) + { + flag = checkPDerivative(x, r, atom, addedLemma, rnfexp); + } + else + { + if (!options::stringExp()) + { + throw LogicException( + "Strings Incomplete (due to Negative Membership) by default, " + "try --strings-exp option."); + } + } + if (flag) + { + // check if the term is atomic + Node xr = d_parent.getRepresentative(x); + Trace("strings-regexp") + << "Unroll/simplify membership of atomic term " << xr + << std::endl; + // if so, do simple unrolling + std::vector nvec; + if (nvec.empty()) + { + d_regexp_opr.simplify(atom, nvec, polarity); + } + std::vector exp_n; + exp_n.push_back(assertion); + Node conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec); + conc = Rewriter::rewrite(conc); + d_parent.sendInference(rnfexp, exp_n, conc, "REGEXP_Unfold"); + addedLemma = true; + if (changed) + { + cprocessed.push_back(assertion); + } + else + { + processed.push_back(assertion); + } + } + } + if (d_parent.inConflict()) + { + break; + } + } + } + if (addedLemma) + { + if (!d_parent.inConflict()) + { + for (unsigned i = 0; i < processed.size(); i++) + { + Trace("strings-regexp") + << "...add " << processed[i] << " to u-cache." << std::endl; + d_regexp_ucached.insert(processed[i]); + } + for (unsigned i = 0; i < cprocessed.size(); i++) + { + Trace("strings-regexp") + << "...add " << cprocessed[i] << " to c-cache." << std::endl; + d_regexp_ccached.insert(cprocessed[i]); + } + } + } +} + +bool RegExpSolver::checkPDerivative( + Node x, Node r, Node atom, bool& addedLemma, std::vector& nf_exp) +{ + if (d_parent.areEqual(x, d_emptyString)) + { + Node exp; + switch (d_regexp_opr.delta(r, exp)) + { + case 0: + { + std::vector exp_n; + exp_n.push_back(atom); + exp_n.push_back(x.eqNode(d_emptyString)); + d_parent.sendInference(nf_exp, exp_n, exp, "RegExp Delta"); + addedLemma = true; + d_regexp_ccached.insert(atom); + return false; + } + case 1: + { + d_regexp_ccached.insert(atom); + break; + } + case 2: + { + std::vector exp_n; + exp_n.push_back(atom); + exp_n.push_back(x.eqNode(d_emptyString)); + Node conc; + d_parent.sendInference(nf_exp, exp_n, conc, "RegExp Delta CONFLICT"); + addedLemma = true; + d_regexp_ccached.insert(atom); + return false; + } + default: + // Impossible + break; + } + } + else + { + if (deriveRegExp(x, r, atom, nf_exp)) + { + addedLemma = true; + d_regexp_ccached.insert(atom); + return false; + } + } + return true; +} + +CVC4::String RegExpSolver::getHeadConst(Node x) +{ + if (x.isConst()) + { + return x.getConst(); + } + else if (x.getKind() == STRING_CONCAT) + { + if (x[0].isConst()) + { + return x[0].getConst(); + } + } + return d_emptyString.getConst(); +} + +bool RegExpSolver::deriveRegExp(Node x, + Node r, + Node atom, + std::vector& ant) +{ + Assert(x != d_emptyString); + Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x + << ", r= " << r << std::endl; + CVC4::String s = getHeadConst(x); + if (!s.isEmptyString() && d_regexp_opr.checkConstRegExp(r)) + { + Node conc = Node::null(); + Node dc = r; + bool flag = true; + for (unsigned i = 0; i < s.size(); ++i) + { + CVC4::String c = s.substr(i, 1); + Node dc2; + int rt = d_regexp_opr.derivativeS(dc, c, dc2); + dc = dc2; + if (rt == 2) + { + // CONFLICT + flag = false; + break; + } + } + // send lemma + if (flag) + { + if (x.isConst()) + { + Assert(false, + "Impossible: RegExpSolver::deriveRegExp: const string in const " + "regular expression."); + return false; + } + else + { + Assert(x.getKind() == STRING_CONCAT); + std::vector vec_nodes; + for (unsigned int i = 1; i < x.getNumChildren(); ++i) + { + vec_nodes.push_back(x[i]); + } + Node left = TheoryStringsRewriter::mkConcat(STRING_CONCAT, vec_nodes); + left = Rewriter::rewrite(left); + conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc); + } + } + std::vector exp_n; + exp_n.push_back(atom); + d_parent.sendInference(ant, exp_n, conc, "RegExp-Derive"); + return true; + } + return false; +} + +void RegExpSolver::addMembership(Node assertion) +{ + bool polarity = assertion.getKind() != NOT; + TNode atom = polarity ? assertion : assertion[0]; + Node x = atom[0]; + Node r = atom[1]; + if (polarity) + { + unsigned index = 0; + NodeUIntMap::const_iterator it = d_pos_memberships.find(x); + if (it != d_pos_memberships.end()) + { + index = (*it).second; + for (unsigned k = 0; k < index; k++) + { + if (k < d_pos_memberships_data[x].size()) + { + if (d_pos_memberships_data[x][k] == r) + { + return; + } + } + else + { + break; + } + } + } + d_pos_memberships[x] = index + 1; + if (index < d_pos_memberships_data[x].size()) + { + d_pos_memberships_data[x][index] = r; + } + else + { + d_pos_memberships_data[x].push_back(r); + } + } + else if (!options::stringIgnNegMembership()) + { + unsigned index = 0; + NodeUIntMap::const_iterator it = d_neg_memberships.find(x); + if (it != d_neg_memberships.end()) + { + index = (*it).second; + for (unsigned k = 0; k < index; k++) + { + if (k < d_neg_memberships_data[x].size()) + { + if (d_neg_memberships_data[x][k] == r) + { + return; + } + } + else + { + break; + } + } + } + d_neg_memberships[x] = index + 1; + if (index < d_neg_memberships_data[x].size()) + { + d_neg_memberships_data[x][index] = r; + } + else + { + d_neg_memberships_data[x].push_back(r); + } + } + // old + if (polarity || !options::stringIgnNegMembership()) + { + d_regexp_memberships.push_back(assertion); + } +} + +Node RegExpSolver::getNormalSymRegExp(Node r, std::vector& nf_exp) +{ + Node ret = r; + switch (r.getKind()) + { + case REGEXP_EMPTY: + case REGEXP_SIGMA: break; + case STRING_TO_REGEXP: + { + if (!r[0].isConst()) + { + Node tmp = d_parent.getNormalString(r[0], nf_exp); + if (tmp != r[0]) + { + ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp); + } + } + break; + } + case REGEXP_CONCAT: + case REGEXP_UNION: + case REGEXP_INTER: + case REGEXP_STAR: + { + std::vector vec_nodes; + for (const Node& cr : r) + { + vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp)); + } + ret = Rewriter::rewrite( + NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes)); + break; + } + default: + { + Trace("strings-error") << "Unsupported term: " << r + << " in normalization SymRegExp." << std::endl; + Assert(false); + } + } + return ret; +} + +} // namespace strings +} // namespace theory +} // namespace CVC4 diff --git a/src/theory/strings/regexp_solver.h b/src/theory/strings/regexp_solver.h new file mode 100644 index 000000000..4b8f5bb73 --- /dev/null +++ b/src/theory/strings/regexp_solver.h @@ -0,0 +1,106 @@ +/********************* */ +/*! \file regexp_solver.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Regular expression solver for the theory of strings. + ** + **/ + +#include "cvc4_private.h" + +#ifndef __CVC4__THEORY__STRINGS__REGEXP_SOLVER_H +#define __CVC4__THEORY__STRINGS__REGEXP_SOLVER_H + +#include +#include "context/cdhashset.h" +#include "context/cdlist.h" +#include "context/context.h" +#include "expr/node.h" +#include "theory/strings/regexp_operation.h" +#include "util/regexp.h" + +namespace CVC4 { +namespace theory { +namespace strings { + +class TheoryStrings; + +class RegExpSolver +{ + typedef context::CDList NodeList; + typedef context::CDHashMap NodeBoolMap; + typedef context::CDHashMap NodeIntMap; + typedef context::CDHashMap NodeUIntMap; + typedef context::CDHashMap NodeNodeMap; + typedef context::CDHashSet NodeSet; + + public: + RegExpSolver(TheoryStrings& p, context::Context* c, context::UserContext* u); + ~RegExpSolver() {} + + /** add membership + * + * This informs this class that assertion is asserted in the current context. + * We expect that assertion is a (possibly negated) regular expression + * membership. + */ + void addMembership(Node assertion); + /** check + * + * Tells this solver to check whether the regular expressions asserted to it + * are consistent. If they are not, then this class will call the + * sendInference method of its parent TheoryString object, indicating that + * it requires a conflict or lemma to be processed. + */ + void check(); + + private: + // Constants + Node d_emptyString; + Node d_emptyRegexp; + Node d_true; + Node d_false; + /** the parent of this object */ + TheoryStrings& d_parent; + // check membership constraints + Node mkAnd(Node c1, Node c2); + bool checkPDerivative( + Node x, Node r, Node atom, bool& addedLemma, std::vector& nf_exp); + Node getMembership(Node n, bool isPos, unsigned i); + unsigned getNumMemberships(Node n, bool isPos); + CVC4::String getHeadConst(Node x); + bool deriveRegExp(Node x, Node r, Node atom, std::vector& ant); + Node getNormalSymRegExp(Node r, std::vector& nf_exp); + // regular expression memberships + NodeList d_regexp_memberships; + NodeSet d_regexp_ucached; + NodeSet d_regexp_ccached; + // stored assertions + NodeUIntMap d_pos_memberships; + std::map > d_pos_memberships_data; + NodeUIntMap d_neg_memberships; + std::map > d_neg_memberships_data; + // semi normal forms for symbolic expression + std::map d_nf_regexps; + std::map > d_nf_regexps_exp; + // intersection + NodeNodeMap d_inter_cache; + NodeIntMap d_inter_index; + // processed memberships + NodeSet d_processed_memberships; + /** regular expression operation module */ + RegExpOpr d_regexp_opr; +}; /* class TheoryStrings */ + +} // namespace strings +} // namespace theory +} // namespace CVC4 + +#endif /* __CVC4__THEORY__STRINGS__THEORY_STRINGS_H */ diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 18f1f801a..4e939e47b 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -121,15 +121,7 @@ TheoryStrings::TheoryStrings(context::Context* c, d_functionsTerms(c), d_has_extf(c, false), d_has_str_code(false), - d_regexp_memberships(c), - d_regexp_ucached(u), - d_regexp_ccached(c), - d_pos_memberships(c), - d_neg_memberships(c), - d_inter_cache(c), - d_inter_index(c), - d_processed_memberships(c), - d_regexp_ant(c), + d_regexp_solver(*this, c, u), d_input_vars(u), d_input_var_lsum(u), d_cardinality_lits(u), @@ -169,8 +161,6 @@ TheoryStrings::TheoryStrings(context::Context* c, d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) ); d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1)); d_emptyString = NodeManager::currentNM()->mkConst( ::CVC4::String("") ); - std::vector< Node > nvec; - d_emptyRegexp = NodeManager::currentNM()->mkNode( kind::REGEXP_EMPTY, nvec ); d_true = NodeManager::currentNM()->mkConst( true ); d_false = NodeManager::currentNM()->mkConst( false ); @@ -258,6 +248,39 @@ Node TheoryStrings::getLength( Node t, std::vector< Node >& exp ) { return getLengthExp( t, exp, t ); } +Node TheoryStrings::getNormalString(Node x, std::vector& nf_exp) +{ + if (!x.isConst()) + { + Node xr = getRepresentative(x); + if (d_normal_forms.find(xr) != d_normal_forms.end()) + { + Node ret = mkConcat(d_normal_forms[xr]); + nf_exp.insert(nf_exp.end(), + d_normal_forms_exp[xr].begin(), + d_normal_forms_exp[xr].end()); + addToExplanation(x, d_normal_forms_base[xr], nf_exp); + Trace("strings-debug") + << "Term: " << x << " has a normal form " << ret << std::endl; + return ret; + } + // if x does not have a normal form, then it should not occur in the + // equality engine and hence should be its own representative. + Assert(xr == x); + if (x.getKind() == kind::STRING_CONCAT) + { + std::vector vec_nodes; + for (unsigned i = 0; i < x.getNumChildren(); i++) + { + Node nc = getNormalString(x[i], nf_exp); + vec_nodes.push_back(nc); + } + return mkConcat(vec_nodes); + } + } + return x; +} + void TheoryStrings::setMasterEqualityEngine(eq::EqualityEngine* eq) { d_equalityEngine.setMasterEqualityEngine(eq); } @@ -1030,6 +1053,30 @@ void TheoryStrings::checkExtfReductions( int effort ) { } } +void TheoryStrings::checkMemberships() +{ + // add the memberships + std::vector mems = getExtTheory()->getActive(kind::STRING_IN_REGEXP); + for (unsigned i = 0; i < mems.size(); i++) + { + Node n = mems[i]; + Assert(d_extf_info_tmp.find(n) != d_extf_info_tmp.end()); + if (!d_extf_info_tmp[n].d_const.isNull()) + { + bool pol = d_extf_info_tmp[n].d_const.getConst(); + Trace("strings-process-debug") + << " add membership : " << n << ", pol = " << pol << std::endl; + d_regexp_solver.addMembership(pol ? n : n.negate()); + } + else + { + Trace("strings-process-debug") + << " irrelevant (non-asserted) membership : " << n << std::endl; + } + } + d_regexp_solver.check(); +} + TheoryStrings::EqcInfo::EqcInfo(context::Context* c) : d_length_term(c), d_code_term(c), @@ -3497,7 +3544,6 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop( nm->mkNode(kind::REGEXP_STAR, nm->mkNode(kind::STRING_TO_REGEXP, restr)))); cc = cc == d_true ? conc2 : nm->mkNode(kind::AND, cc, conc2); - d_regexp_ant[conc2] = ant; vconc.push_back(cc); } conc = vconc.size() == 0 ? Node::null() : vconc.size() == 1 @@ -3547,11 +3593,6 @@ TheoryStrings::ProcessLoopResult TheoryStrings::processLoop( conc = nm->mkNode(kind::AND, vec_conc); } // normal case - // set its antecedant to ant, to say when it is relevant - if (!str_in_re.isNull()) - { - d_regexp_ant[str_in_re] = ant; - } // we will be done info.d_conc = conc; info.d_id = INFER_FLOOP; @@ -4722,527 +4763,6 @@ TheoryStrings::Statistics::~Statistics(){ smtStatisticsRegistry()->unregisterStat(&d_loop_lemmas); } - - - - - - - - - - - - - - - - - - - -//// Regular Expressions - - -unsigned TheoryStrings::getNumMemberships( Node n, bool isPos ) { - if( isPos ){ - NodeIntMap::const_iterator it = d_pos_memberships.find( n ); - if( it!=d_pos_memberships.end() ){ - return (*it).second; - } - }else{ - NodeIntMap::const_iterator it = d_neg_memberships.find( n ); - if( it!=d_neg_memberships.end() ){ - return (*it).second; - } - } - return 0; -} - -Node TheoryStrings::getMembership( Node n, bool isPos, unsigned i ) { - return isPos ? d_pos_memberships_data[n][i] : d_neg_memberships_data[n][i]; -} - -Node TheoryStrings::mkRegExpAntec(Node atom, Node ant) { - if(d_regexp_ant.find(atom) == d_regexp_ant.end()) { - return NodeManager::currentNM()->mkNode(kind::AND, ant, atom); - } else { - Node n = d_regexp_ant[atom]; - return NodeManager::currentNM()->mkNode(kind::AND, ant, n); - } -} - -void TheoryStrings::checkMemberships() { - //add the memberships - std::vector mems = getExtTheory()->getActive(kind::STRING_IN_REGEXP); - for (unsigned i = 0; i < mems.size(); i++) { - Node n = mems[i]; - Assert( d_extf_info_tmp.find( n )!=d_extf_info_tmp.end() ); - if (!d_extf_info_tmp[n].d_const.isNull()) - { - bool pol = d_extf_info_tmp[n].d_const.getConst(); - Trace("strings-process-debug") << " add membership : " << n << ", pol = " << pol << std::endl; - addMembership( pol ? n : n.negate() ); - }else{ - Trace("strings-process-debug") << " irrelevant (non-asserted) membership : " << n << std::endl; - } - } - - bool addedLemma = false; - bool changed = false; - std::vector< Node > processed; - std::vector< Node > cprocessed; - - Trace("regexp-debug") << "Checking Memberships ... " << std::endl; - //if(options::stringEIT()) { - //TODO: Opt for normal forms - for( NodeIntMap::const_iterator itr_xr = d_pos_memberships.begin(); itr_xr != d_pos_memberships.end(); ++itr_xr ){ - bool spflag = false; - Node x = (*itr_xr).first; - Trace("regexp-debug") << "Checking Memberships for " << x << std::endl; - if(d_inter_index.find(x) == d_inter_index.end()) { - d_inter_index[x] = 0; - } - int cur_inter_idx = d_inter_index[x]; - unsigned n_pmem = (*itr_xr).second; - Assert( getNumMemberships( x, true )==n_pmem ); - if( cur_inter_idx != (int)n_pmem ) { - if( n_pmem == 1) { - d_inter_cache[x] = getMembership( x, true, 0 ); - d_inter_index[x] = 1; - Trace("regexp-debug") << "... only one choice " << std::endl; - } else if(n_pmem > 1) { - Node r; - if(d_inter_cache.find(x) != d_inter_cache.end()) { - r = d_inter_cache[x]; - } - if(r.isNull()) { - r = getMembership( x, true, 0 ); - cur_inter_idx = 1; - } - - unsigned k_start = cur_inter_idx; - Trace("regexp-debug") << "... staring from : " << cur_inter_idx << ", we have " << n_pmem << std::endl; - for(unsigned k = k_start; k vec_nodes; - for( unsigned kk=0; kk<=k; kk++ ){ - Node rr = getMembership( x, true, kk ); - Node n = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, rr); - vec_nodes.push_back( n ); - } - Node conc; - sendInference(vec_nodes, conc, "INTERSECT CONFLICT", true); - addedLemma = true; - break; - } - if(d_conflict) { - break; - } - } - //updates - if(!d_conflict && !spflag) { - d_inter_cache[x] = r; - d_inter_index[x] = (int)n_pmem; - } - } - } - } - //} - - Trace("regexp-debug") << "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma << std::endl; - if(!addedLemma) { - NodeManager* nm = NodeManager::currentNM(); - for( unsigned i=0; i rnfexp; - - if (!x.isConst()) - { - x = getNormalString(x, rnfexp); - changed = true; - } - if (!d_regexp_opr.checkConstRegExp(r)) - { - r = getNormalSymRegExp(r, rnfexp); - changed = true; - } - Trace("strings-regexp-nf") << "Term " << atom << " is normalized to " - << x << " IN " << r << std::endl; - if (changed) - { - Node tmp = - Rewriter::rewrite(nm->mkNode(kind::STRING_IN_REGEXP, x, r)); - if (!polarity) - { - tmp = tmp.negate(); - } - if (tmp == d_true) - { - d_regexp_ccached.insert(assertion); - continue; - } - else if (tmp == d_false) - { - Node antec = mkRegExpAntec(assertion, mkExplain(rnfexp)); - Node conc = Node::null(); - sendLemma(antec, conc, "REGEXP NF Conflict"); - addedLemma = true; - break; - } - } - - if( polarity ) { - flag = checkPDerivative(x, r, atom, addedLemma, rnfexp); - } else { - if(! options::stringExp()) { - throw LogicException("Strings Incomplete (due to Negative Membership) by default, try --strings-exp option."); - } - } - if(flag) { - //check if the term is atomic - Node xr = getRepresentative( x ); - //Trace("strings-regexp") << xr << " is rep of " << x << std::endl; - //Assert( d_normal_forms.find( xr )!=d_normal_forms.end() ); - Trace("strings-regexp") - << "Unroll/simplify membership of atomic term " << xr - << std::endl; - // if so, do simple unrolling - std::vector nvec; - - if (nvec.empty()) - { - d_regexp_opr.simplify(atom, nvec, polarity); - } - Node antec = assertion; - if (d_regexp_ant.find(assertion) != d_regexp_ant.end()) - { - antec = d_regexp_ant[assertion]; - for (std::vector::const_iterator itr = nvec.begin(); - itr < nvec.end(); - itr++) - { - if (itr->getKind() == kind::STRING_IN_REGEXP) - { - if (d_regexp_ant.find(*itr) == d_regexp_ant.end()) - { - d_regexp_ant[*itr] = antec; - } - } - } - } - antec = NodeManager::currentNM()->mkNode( - kind::AND, antec, mkExplain(rnfexp)); - Node conc = nvec.size() == 1 - ? nvec[0] - : NodeManager::currentNM()->mkNode(kind::AND, nvec); - conc = Rewriter::rewrite(conc); - sendLemma(antec, conc, "REGEXP_Unfold"); - addedLemma = true; - if (changed) - { - cprocessed.push_back(assertion); - } - else - { - processed.push_back(assertion); - } - // d_regexp_ucached[assertion] = true; - } - } - if(d_conflict) { - break; - } - } - } - if( addedLemma ) { - if( !d_conflict ){ - for( unsigned i=0; i &nf_exp ) { - - Node antnf = mkExplain(nf_exp); - - if(areEqual(x, d_emptyString)) { - Node exp; - switch(d_regexp_opr.delta(r, exp)) { - case 0: { - Node antec = mkRegExpAntec(atom, x.eqNode(d_emptyString)); - antec = NodeManager::currentNM()->mkNode(kind::AND, antec, antnf); - sendLemma(antec, exp, "RegExp Delta"); - addedLemma = true; - d_regexp_ccached.insert(atom); - return false; - } - case 1: { - d_regexp_ccached.insert(atom); - break; - } - case 2: { - Node antec = mkRegExpAntec(atom, x.eqNode(d_emptyString)); - antec = NodeManager::currentNM()->mkNode(kind::AND, antec, antnf); - Node conc = Node::null(); - sendLemma(antec, conc, "RegExp Delta CONFLICT"); - addedLemma = true; - d_regexp_ccached.insert(atom); - return false; - } - default: - //Impossible - break; - } - } else { - /*Node xr = getRepresentative( x ); - if(x != xr) { - Node n = NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, xr, r); - Node nn = Rewriter::rewrite( n ); - if(nn == d_true) { - d_regexp_ccached.insert(atom); - return false; - } else if(nn == d_false) { - Node antec = mkRegExpAntec(atom, x.eqNode(xr)); - Node conc = Node::null(); - sendLemma(antec, conc, "RegExp Delta CONFLICT"); - addedLemma = true; - d_regexp_ccached.insert(atom); - return false; - } - }*/ - Node sREant = mkRegExpAntec(atom, d_true); - sREant = NodeManager::currentNM()->mkNode(kind::AND, sREant, antnf); - if(deriveRegExp( x, r, sREant )) { - addedLemma = true; - d_regexp_ccached.insert(atom); - return false; - } - } - return true; -} - -CVC4::String TheoryStrings::getHeadConst( Node x ) { - if( x.isConst() ) { - return x.getConst< String >(); - } else if( x.getKind() == kind::STRING_CONCAT ) { - if( x[0].isConst() ) { - return x[0].getConst< String >(); - } else { - return d_emptyString.getConst< String >(); - } - } else { - return d_emptyString.getConst< String >(); - } -} - -bool TheoryStrings::deriveRegExp( Node x, Node r, Node ant ) { - // TODO cstr in vre - Assert(x != d_emptyString); - Trace("regexp-derive") << "TheoryStrings::deriveRegExp: x=" << x << ", r= " << r << std::endl; - //if(x.isConst()) { - // Node n = NodeManager::currentNM()->mkNode( kind::STRING_IN_REGEXP, x, r ); - // Node r = Rewriter::rewrite( n ); - // if(n != r) { - // sendLemma(ant, r, "REGEXP REWRITE"); - // return true; - // } - //} - CVC4::String s = getHeadConst( x ); - if( !s.isEmptyString() && d_regexp_opr.checkConstRegExp( r ) ) { - Node conc = Node::null(); - Node dc = r; - bool flag = true; - for(unsigned i=0; i vec_nodes; - for(unsigned int i=1; imkNode( kind::STRING_IN_REGEXP, left, dc ); - - /*std::vector< Node > sdc; - d_regexp_opr.simplify(conc, sdc, true); - if(sdc.size() == 1) { - conc = sdc[0]; - } else { - conc = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, conc)); - }*/ - } - } - sendLemma(ant, conc, "RegExp-Derive"); - return true; - } else { - return false; - } -} - -void TheoryStrings::addMembership(Node assertion) { - bool polarity = assertion.getKind() != kind::NOT; - TNode atom = polarity ? assertion : assertion[0]; - Node x = atom[0]; - Node r = atom[1]; - if(polarity) { - int index = 0; - NodeIntMap::const_iterator it = d_pos_memberships.find( x ); - if (it != d_pos_memberships.end()) - { - index = (*it).second; - for( int k=0; kmkNode(kind::STRING_IN_REGEXP, x, r2); - }*/ - int index = 0; - NodeIntMap::const_iterator it = d_neg_memberships.find( x ); - if (it != d_neg_memberships.end()) - { - index = (*it).second; - for( int k=0; k& nf_exp ){ - if( !x.isConst() ){ - Node xr = getRepresentative( x ); - if( d_normal_forms.find( xr ) != d_normal_forms.end() ){ - Node ret = mkConcat( d_normal_forms[xr] ); - nf_exp.insert( nf_exp.end(), d_normal_forms_exp[xr].begin(), d_normal_forms_exp[xr].end() ); - addToExplanation( x, d_normal_forms_base[xr], nf_exp ); - Trace("strings-debug") << "Term: " << x << " has a normal form " << ret << std::endl; - return ret; - } else { - if(x.getKind() == kind::STRING_CONCAT) { - std::vector< Node > vec_nodes; - for(unsigned i=0; i &nf_exp) { - Node ret = r; - switch( r.getKind() ) { - case kind::REGEXP_EMPTY: - case kind::REGEXP_SIGMA: - break; - case kind::STRING_TO_REGEXP: { - if(!r[0].isConst()) { - Node tmp = getNormalString( r[0], nf_exp ); - if(tmp != r[0]) { - ret = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, tmp); - } - } - break; - } - case kind::REGEXP_CONCAT: - case kind::REGEXP_UNION: - case kind::REGEXP_INTER: - case kind::REGEXP_STAR: - { - std::vector< Node > vec_nodes; - for (const Node& cr : r) - { - vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp)); - } - ret = Rewriter::rewrite( - NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes)); - break; - } - //case kind::REGEXP_PLUS: - //case kind::REGEXP_OPT: - //case kind::REGEXP_RANGE: - default: { - Trace("strings-error") << "Unsupported term: " << r << " in normalization SymRegExp." << std::endl; - Assert( false ); - //return Node::null(); - } - } - return ret; -} - /** run the given inference step */ void TheoryStrings::runInferStep(InferStep s, int effort) { diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 6eb1f38b4..13f6a45e9 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -26,6 +26,7 @@ #include "theory/decision_manager.h" #include "theory/strings/regexp_elim.h" #include "theory/strings/regexp_operation.h" +#include "theory/strings/regexp_solver.h" #include "theory/strings/skolem_cache.h" #include "theory/strings/theory_strings_preprocess.h" #include "theory/theory.h" @@ -235,27 +236,63 @@ class TheoryStrings : public Theory { } };/* class TheoryStrings::NotifyClass */ -private: + //--------------------------- equality engine + /** + * Get the representative of t in the equality engine of this class, or t + * itself if it is not registered as a term. + */ + Node getRepresentative(Node t); + /** Is t registered as a term in the equality engine of this class? */ + bool hasTerm(Node a); + /** + * Are a and b equal according to the equality engine of this class? Also + * returns true if a and b are identical. + */ + bool areEqual(Node a, Node b); + /** + * Are a and b disequal according to the equality engine of this class? Also + * returns true if the representative of a and b are distinct constants. + */ + bool areDisequal(Node a, Node b); + //--------------------------- end equality engine + + //--------------------------- helper functions + /** get length with explanation + * + * If possible, this returns an arithmetic term that exists in the current + * context that is equal to the length of te, or otherwise returns the + * length of t. It adds to exp literals that hold in the current context that + * explain why that term is equal to the length of t. For example, if + * we have assertions: + * len( x ) = 5 ^ z = x ^ x = y, + * then getLengthExp( z, exp, y ) returns len( x ) and adds { z = x } to + * exp. On the other hand, getLengthExp( z, exp, x ) returns len( x ) and + * adds nothing to exp. + */ + Node getLengthExp(Node t, std::vector& exp, Node te); + /** shorthand for getLengthExp(t, exp, t) */ + Node getLength(Node t, std::vector& exp); + /** get normal string + * + * This method returns the node that is equivalent to the normal form of x, + * and adds the corresponding explanation to nf_exp. + * + * For example, if x = y ++ z is an assertion in the current context, then + * this method returns the term y ++ z and adds x = y ++ z to nf_exp. + */ + Node getNormalString(Node x, std::vector& nf_exp); + //-------------------------- end helper functions + + private: // Constants Node d_emptyString; - Node d_emptyRegexp; Node d_true; Node d_false; Node d_zero; Node d_one; Node d_neg_one; + /** the cardinality of the alphabet */ unsigned d_card_size; - // Helper functions - Node getRepresentative( Node t ); - bool hasTerm( Node a ); - bool areEqual( Node a, Node b ); - bool areDisequal( Node a, Node b ); - bool areCareDisequal( TNode x, TNode y ); - // t is representative, te = t, add lt = te to explanation exp - Node getLengthExp( Node t, std::vector< Node >& exp, Node te ); - Node getLength( Node t, std::vector< Node >& exp ); - -private: /** The notify class */ NotifyClass d_notify; /** Equaltity engine */ @@ -607,6 +644,11 @@ private: protected: /** compute care graph */ void computeCareGraph() override; + /** + * Are x and y shared terms that are not equal? This is used for constructing + * the care graph in the above function. + */ + bool areCareDisequal(TNode x, TNode y); // do pending merges void assertPendingFact(Node atom, bool polarity, Node exp); @@ -641,6 +683,7 @@ private: */ void registerTerm(Node n, int effort); //-------------------------------------send inferences + public: /** send internal inferences * * This is called when we have inferred exp => conc, where exp is a set @@ -652,7 +695,7 @@ private: * sendInference below in that it does not introduce any new non-constant * terms to the state. * - * The argument c is a string identifying the reason for the interference. + * The argument c is a string identifying the reason for the inference. * This string is used for debugging purposes. * * Return true if the inference is complete, in the sense that we infer @@ -661,17 +704,72 @@ private: * to the criteria mentioned above. */ bool sendInternalInference(std::vector& exp, Node conc, const char* c); - // send lemma + /** send inference + * + * This function should be called when ( exp ^ exp_n ) => eq. The set exp + * contains literals that are explainable by this class, i.e. those that + * hold in the equality engine of this class. On the other hand, the set + * exp_n ("explanations new") contain nodes that are not explainable by this + * class. This method may call sendInfer or sendLemma. Overall, the result + * of this method is one of the following: + * + * [1] (No-op) Do nothing if eq is true, + * + * [2] (Infer) Indicate that eq should be added to the equality engine of this + * class with explanation EXPLAIN(exp), where EXPLAIN returns the + * explanation of the node in exp in terms of the literals asserted to this + * class, + * + * [3] (Lemma) Indicate that the lemma ( EXPLAIN(exp) ^ exp_n ) => eq should + * be sent on the output channel of this class, or + * + * [4] (Conflict) Immediately report a conflict EXPLAIN(exp) on the output + * channel of this class. + * + * Determining which case to apply depends on the form of eq and whether + * exp_n is empty. In particular, lemmas must be used whenever exp_n is + * non-empty, conflicts are used when exp_n is empty and eq is false. + * + * The argument c is a string identifying the reason for inference, used for + * debugging. + * + * If the flag asLemma is true, then this method will send a lemma instead + * of an inference whenever applicable. + */ void sendInference(std::vector& exp, std::vector& exp_n, Node eq, const char* c, bool asLemma = false); + /** same as above, but where exp_n is empty */ void sendInference(std::vector& exp, Node eq, const char* c, bool asLemma = false); + /** + * Are we in conflict? This returns true if this theory has called its output + * channel's conflict method in the current SAT context. + */ + bool inConflict() const { return d_conflict; } + + protected: + /** + * Indicates that ant => conc should be sent on the output channel of this + * class. This will either trigger an immediate call to the conflict + * method of the output channel of this class of conc is false, or adds the + * above lemma to the lemma cache d_lemma_cache, which may be flushed + * later within the current call to TheoryStrings::check. + * + * The argument c is a string identifying the reason for inference, used for + * debugging. + */ void sendLemma(Node ant, Node conc, const char* c); + /** + * Indicates that conc should be added to the equality engine of this class + * with explanation eq_exp. It must be the case that eq_exp is a (conjunction + * of) literals that each are explainable, i.e. they already hold in the + * equality engine of this class. + */ void sendInfer(Node eq_exp, Node eq, const char* c); bool sendSplit(Node a, Node b, const char* c, bool preq = true); //-------------------------------------end send inferences @@ -685,6 +783,8 @@ private: /** mkExplain **/ Node mkExplain(std::vector& a); Node mkExplain(std::vector& a, std::vector& an); + + protected: /** mkAnd **/ Node mkAnd(std::vector& a); /** get concat vector */ @@ -716,39 +816,11 @@ private: // Symbolic Regular Expression private: - // regular expression memberships - NodeList d_regexp_memberships; - NodeSet d_regexp_ucached; - NodeSet d_regexp_ccached; - // stored assertions - NodeIntMap d_pos_memberships; - std::map< Node, std::vector< Node > > d_pos_memberships_data; - NodeIntMap d_neg_memberships; - std::map< Node, std::vector< Node > > d_neg_memberships_data; - unsigned getNumMemberships( Node n, bool isPos ); - Node getMembership( Node n, bool isPos, unsigned i ); - // semi normal forms for symbolic expression - std::map< Node, Node > d_nf_regexps; - std::map< Node, std::vector< Node > > d_nf_regexps_exp; - // intersection - NodeNodeMap d_inter_cache; - NodeIntMap d_inter_index; - // processed memberships - NodeSet d_processed_memberships; - // antecedant for why regexp membership must be true - NodeNodeMap d_regexp_ant; - /** regular expression operation module */ - RegExpOpr d_regexp_opr; + /** regular expression solver module */ + RegExpSolver d_regexp_solver; /** regular expression elimination module */ RegExpElimination d_regexp_elim; - CVC4::String getHeadConst( Node x ); - bool deriveRegExp( Node x, Node r, Node ant ); - void addMembership(Node assertion); - Node getNormalString(Node x, std::vector &nf_exp); - Node getNormalSymRegExp(Node r, std::vector &nf_exp); - - // Finite Model Finding private: NodeSet d_input_vars; -- 2.30.2