From: Andrew Reynolds Date: Mon, 24 May 2021 14:39:56 +0000 (-0500) Subject: Better formalization of regular expression unfolding skolems (#6602) X-Git-Tag: cvc5-1.0.0~1705 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b00667a21984dc3f6b960a1a2d22845c7096bc35;p=cvc5.git Better formalization of regular expression unfolding skolems (#6602) This replaces our previous formalization of RE unfolding skolems with a more explicit one that is amenable to external proof conversion. It adds a few associated utility methods to SkolemManager required for LFSC proof conversion for RE_UNFOLD_POS. It also changes the order of equalities in the RE_UNFOLD_POS rule, which simplifies LFSC proof checking. --- diff --git a/src/expr/skolem_manager.cpp b/src/expr/skolem_manager.cpp index 4004bf6fe..8e1c5e54c 100644 --- a/src/expr/skolem_manager.cpp +++ b/src/expr/skolem_manager.cpp @@ -53,6 +53,7 @@ const char* toString(SkolemFunId id) case SkolemFunId::SELECTOR_WRONG: return "SELECTOR_WRONG"; case SkolemFunId::SHARED_SELECTOR: return "SHARED_SELECTOR"; case SkolemFunId::SEQ_NTH_OOB: return "SEQ_NTH_OOB"; + case SkolemFunId::RE_UNFOLD_POS_COMPONENT: return "RE_UNFOLD_POS_COMPONENT"; default: return "?"; } } @@ -206,11 +207,37 @@ Node SkolemManager::mkSkolemFunction(SkolemFunId id, ss << "SKOLEM_FUN_" << id; Node k = nm->mkSkolem(ss.str(), tn, "an internal skolem function", flags); d_skolemFuns[key] = k; + d_skolemFunMap[k] = key; return k; } return it->second; } +Node SkolemManager::mkSkolemFunction(SkolemFunId id, + TypeNode tn, + const std::vector& cacheVals, + int flags) +{ + Assert(cacheVals.size() > 1); + Node cacheVal = NodeManager::currentNM()->mkNode(SEXPR, cacheVals); + return mkSkolemFunction(id, tn, cacheVal, flags); +} + +bool SkolemManager::isSkolemFunction(Node k, + SkolemFunId& id, + Node& cacheVal) const +{ + std::map>::const_iterator it = + d_skolemFunMap.find(k); + if (it == d_skolemFunMap.end()) + { + return false; + } + id = std::get<0>(it->second); + cacheVal = std::get<2>(it->second); + return true; +} + Node SkolemManager::mkDummySkolem(const std::string& prefix, const TypeNode& type, const std::string& comment, diff --git a/src/expr/skolem_manager.h b/src/expr/skolem_manager.h index d62153941..a7d35d155 100644 --- a/src/expr/skolem_manager.h +++ b/src/expr/skolem_manager.h @@ -29,6 +29,7 @@ class ProofGenerator; /** Skolem function identifier */ enum class SkolemFunId { + NONE, /** an uninterpreted function f s.t. f(x) = x / 0.0 (real division) */ DIV_BY_ZERO, /** an uninterpreted function f s.t. f(x) = x / 0 (integer division) */ @@ -43,6 +44,13 @@ enum class SkolemFunId SHARED_SELECTOR, /** an application of seq.nth that is out of bounds */ SEQ_NTH_OOB, + /** + * Regular expression unfold component: if (str.in_re t R), where R is + * (re.++ r0 ... rn), then the RE_UNFOLD_POS_COMPONENT{t,R,i} is a string + * skolem ki such that t = (str.++ k0 ... kn) and (str.in_re k0 r0) for + * i = 0, ..., n. + */ + RE_UNFOLD_POS_COMPONENT, }; /** Converts a skolem function name to a string. */ const char* toString(SkolemFunId id); @@ -235,6 +243,16 @@ class SkolemManager TypeNode tn, Node cacheVal = Node::null(), int flags = NodeManager::SKOLEM_DEFAULT); + /** Same as above, with multiple cache values */ + Node mkSkolemFunction(SkolemFunId id, + TypeNode tn, + const std::vector& cacheVals, + int flags = NodeManager::SKOLEM_DEFAULT); + /** + * Is k a skolem function? Returns true if k was generated by the above call. + * Updates the arguments to the values used when constructing it. + */ + bool isSkolemFunction(Node k, SkolemFunId& id, Node& cacheVal) const; /** * Create a skolem constant with the given name, type, and comment. This * should only be used if the definition of the skolem does not matter. @@ -281,10 +299,10 @@ class SkolemManager static Node getOriginalForm(Node n); private: - /** - * Cached of skolem functions for mkSkolemFunction above. - */ + /** Cache of skolem functions for mkSkolemFunction above. */ std::map, Node> d_skolemFuns; + /** Backwards mapping of above */ + std::map> d_skolemFunMap; /** * Mapping from witness terms to proof generators. */ diff --git a/src/theory/strings/infer_proof_cons.cpp b/src/theory/strings/infer_proof_cons.cpp index 8ee29f712..29a4187ec 100644 --- a/src/theory/strings/infer_proof_cons.cpp +++ b/src/theory/strings/infer_proof_cons.cpp @@ -712,11 +712,11 @@ void InferProofCons::convert(InferenceId infer, } else if (eunf.getKind() == AND) { - // equality is the last conjunct + // equality is the first conjunct std::vector childrenAE; childrenAE.push_back(eunf); std::vector argsAE; - argsAE.push_back(nm->mkConst(Rational(eunf.getNumChildren() - 1))); + argsAE.push_back(nm->mkConst(Rational(0))); Node eunfAE = psb.tryStep(PfRule::AND_ELIM, childrenAE, argsAE); Trace("strings-ipc-prefix") << "--- and elim to " << eunfAE << std::endl; diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index bf4c20f85..96351cda9 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -1017,16 +1017,7 @@ Node RegExpOpr::reduceRegExpPos(Node mem, { std::vector nvec; std::vector cc; - // get the (valid) existential for this membership - Node eform = getExistsForRegExpConcatMem(mem); SkolemManager* sm = nm->getSkolemManager(); - // Notice that this rule does not introduce witness terms, instead it - // uses skolems in the conclusion of the proof rule directly. Thus, - // the existential eform does not need to be explicitly justified by a - // proof here, since it is only being used as an intermediate formula in - // this inference. Hence we do not pass a proof generator to mkSkolemize. - sm->mkSkolemize(eform, newSkolems, "rc", "regexp concat skolem"); - Assert(newSkolems.size() == r.getNumChildren()); // Look up skolems for each of the components. If sc has optimizations // enabled, this will return arguments of str.to_re. for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i) @@ -1034,17 +1025,22 @@ Node RegExpOpr::reduceRegExpPos(Node mem, if (r[i].getKind() == STRING_TO_REGEXP) { // optimization, just take the body - newSkolems[i] = r[i][0]; + newSkolems.push_back(r[i][0]); } else { + Node ivalue = nm->mkConst(Rational(i)); + Node sk = sm->mkSkolemFunction(SkolemFunId::RE_UNFOLD_POS_COMPONENT, + s.getType(), + {mem[0], mem[1], ivalue}); + newSkolems.push_back(sk); nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i])); } } - // (str.in_re x (re.++ R1 .... Rn)) => - // (and (str.in_re k1 R1) ... (str.in_re kn Rn) (= x (str.++ k1 ... kn))) + // (str.in_re x (re.++ R0 .... Rn)) => + // (and (= x (str.++ k0 ... kn)) (str.in_re k0 R0) ... (str.in_re kn Rn) ) Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems)); - nvec.push_back(lem); + nvec.insert(nvec.begin(), lem); conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec); } else if (k == REGEXP_STAR) @@ -1574,50 +1570,6 @@ bool RegExpOpr::regExpIncludes(Node r1, Node r2) return result; } -/** - * Associating formulas with their "exists form", or an existentially - * quantified formula that is equivalent to it. This is currently used - * for regular expression memberships in the method below. - */ -struct ExistsFormAttributeId -{ -}; -typedef expr::Attribute ExistsFormAttribute; - -Node RegExpOpr::getExistsForRegExpConcatMem(Node mem) -{ - // get or make the exists form of the membership - ExistsFormAttribute efa; - if (mem.hasAttribute(efa)) - { - // already computed - return mem.getAttribute(efa); - } - Assert(mem.getKind() == STRING_IN_REGEXP); - Node x = mem[0]; - Node r = mem[1]; - Assert(r.getKind() == REGEXP_CONCAT); - NodeManager* nm = NodeManager::currentNM(); - TypeNode xtn = x.getType(); - std::vector vars; - std::vector mems; - for (const Node& rc : r) - { - Node v = nm->mkBoundVar(xtn); - vars.push_back(v); - mems.push_back(nm->mkNode(STRING_IN_REGEXP, v, rc)); - } - Node sconcat = nm->mkNode(STRING_CONCAT, vars); - Node eq = x.eqNode(sconcat); - mems.insert(mems.begin(), eq); - Node bvl = nm->mkNode(BOUND_VAR_LIST, vars); - Node ebody = nm->mkNode(AND, mems); - Node eform = nm->mkNode(EXISTS, bvl, ebody); - mem.setAttribute(efa, eform); - Trace("regexp-opr") << "Exists form " << mem << " : " << eform << std::endl; - return eform; -} - } // namespace strings } // namespace theory } // namespace cvc5 diff --git a/src/theory/strings/regexp_operation.h b/src/theory/strings/regexp_operation.h index a20e9a0a9..04b5a999d 100644 --- a/src/theory/strings/regexp_operation.h +++ b/src/theory/strings/regexp_operation.h @@ -190,18 +190,6 @@ class RegExpOpr { bool regExpIncludes(Node r1, Node r2); private: - /** - * Given a regular expression membership of the form: - * (str.in_re x (re.++ R1 ... Rn)) - * This returns the valid existentially quantified formula: - * (exists ((x1 String) ... (xn String)) - * (=> (str.in_re x (re.++ R1 ... Rn)) - * (and (= x (str.++ x1 ... xn)) - * (str.in_re x1 R1) ... (str.in_re xn Rn)))) - * Moreover, this formula is cached per regular expression membership via - * an attribute, meaning it is always the same for a given membership mem. - */ - static Node getExistsForRegExpConcatMem(Node mem); /** pointer to the skolem cache used by this class */ SkolemCache* d_sc; };