From 5a795677fd0e9663508664009a67abf6857b0ac9 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 9 Mar 2022 04:14:39 -0600 Subject: [PATCH] Add REGEXP_ALL kind to API (#8264) Previously was immediately converted to (re.* re.allchar) upon construction. --- src/api/cpp/cvc5.cpp | 12 +++++++----- src/api/cpp/cvc5_kind.h | 10 ++++++++++ src/printer/smt2/smt2_printer.cpp | 1 + src/theory/strings/kinds | 2 ++ src/theory/strings/rewrites.cpp | 1 + src/theory/strings/rewrites.h | 1 + src/theory/strings/sequences_rewriter.cpp | 13 +++++++++++++ src/theory/strings/sequences_rewriter.h | 6 ++++++ src/theory/strings/theory_strings_utils.cpp | 10 +++++----- 9 files changed, 46 insertions(+), 10 deletions(-) diff --git a/src/api/cpp/cvc5.cpp b/src/api/cpp/cvc5.cpp index 0dd86f814..8f5fc6566 100644 --- a/src/api/cpp/cvc5.cpp +++ b/src/api/cpp/cvc5.cpp @@ -355,6 +355,7 @@ const static std::unordered_map s_kinds{ {REGEXP_REPEAT, cvc5::Kind::REGEXP_REPEAT}, {REGEXP_LOOP, cvc5::Kind::REGEXP_LOOP}, {REGEXP_NONE, cvc5::Kind::REGEXP_NONE}, + {REGEXP_ALL, cvc5::Kind::REGEXP_ALL}, {REGEXP_ALLCHAR, cvc5::Kind::REGEXP_ALLCHAR}, {REGEXP_COMPLEMENT, cvc5::Kind::REGEXP_COMPLEMENT}, // maps to the same kind as the string versions @@ -670,6 +671,7 @@ const static std::unordered_map {cvc5::Kind::REGEXP_LOOP, REGEXP_LOOP}, {cvc5::Kind::REGEXP_LOOP_OP, REGEXP_LOOP}, {cvc5::Kind::REGEXP_NONE, REGEXP_NONE}, + {cvc5::Kind::REGEXP_ALL, REGEXP_ALL}, {cvc5::Kind::REGEXP_ALLCHAR, REGEXP_ALLCHAR}, {cvc5::Kind::REGEXP_COMPLEMENT, REGEXP_COMPLEMENT}, {cvc5::Kind::CONST_SEQUENCE, CONST_SEQUENCE}, @@ -5160,13 +5162,14 @@ Sort Solver::mkTupleSortHelper(const std::vector& sorts) const Term Solver::mkTermFromKind(Kind kind) const { CVC5_API_KIND_CHECK_EXPECTED(kind == PI || kind == REGEXP_NONE + || kind == REGEXP_ALL || kind == REGEXP_ALLCHAR || kind == SEP_EMP, kind) - << "PI, REGEXP_NONE, REGEXP_ALLCHAR or SEP_EMP"; + << "PI, REGEXP_NONE, REGEXP_ALL, REGEXP_ALLCHAR or SEP_EMP"; //////// all checks before this line Node res; cvc5::Kind k = extToIntKind(kind); - if (kind == REGEXP_NONE || kind == REGEXP_ALLCHAR) + if (kind == REGEXP_NONE || kind == REGEXP_ALL || kind == REGEXP_ALLCHAR) { Assert(isDefinedIntKind(k)); res = d_nodeMgr->mkNode(k, std::vector()); @@ -5906,9 +5909,8 @@ Term Solver::mkRegexpAll() const { CVC5_API_TRY_CATCH_BEGIN; //////// all checks before this line - Node res = d_nodeMgr->mkNode( - cvc5::kind::REGEXP_STAR, - d_nodeMgr->mkNode(cvc5::kind::REGEXP_ALLCHAR, std::vector())); + Node res = + d_nodeMgr->mkNode(cvc5::kind::REGEXP_ALL, std::vector()); (void)res.getType(true); /* kick off type checking */ return Term(this, res); //////// diff --git a/src/api/cpp/cvc5_kind.h b/src/api/cpp/cvc5_kind.h index 89a7732a8..24b1b5ed7 100644 --- a/src/api/cpp/cvc5_kind.h +++ b/src/api/cpp/cvc5_kind.h @@ -3087,6 +3087,16 @@ enum Kind : int32_t * - `Solver::mkTerm(Kind kind) const` */ REGEXP_NONE, + /** + * Regexp all. + * + * Parameters: none + * + * Create with: + * - `Solver::mkRegexpAll() const` + * - `Solver::mkTerm(Kind kind) const` + */ + REGEXP_ALL, /** * Regexp all characters. * diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp index 1ca7893c0..72d303be8 100644 --- a/src/printer/smt2/smt2_printer.cpp +++ b/src/printer/smt2/smt2_printer.cpp @@ -1246,6 +1246,7 @@ std::string Smt2Printer::smtKindString(Kind k, Variant v) case kind::STRING_IN_REGEXP: return "str.in_re"; case kind::STRING_TO_REGEXP: return "str.to_re"; case kind::REGEXP_NONE: return "re.none"; + case kind::REGEXP_ALL: return "re.all"; case kind::REGEXP_ALLCHAR: return "re.allchar"; case kind::REGEXP_CONCAT: return "re.++"; case kind::REGEXP_UNION: return "re.union"; diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 0a17eed02..ff226202b 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -112,6 +112,7 @@ operator REGEXP_RANGE 2 "regexp range" operator REGEXP_COMPLEMENT 1 "regexp complement" operator REGEXP_NONE 0 "regexp empty" +operator REGEXP_ALL 0 "regexp all" operator REGEXP_ALLCHAR 0 "regexp all characters" constant REGEXP_REPEAT_OP \ @@ -152,6 +153,7 @@ typerule REGEXP_COMPLEMENT "SimpleTypeRule" typerule STRING_TO_REGEXP ::cvc5::theory::strings::StringToRegExpTypeRule typerule STRING_IN_REGEXP "SimpleTypeRule" typerule REGEXP_NONE "SimpleTypeRule" +typerule REGEXP_ALL "SimpleTypeRule" typerule REGEXP_ALLCHAR "SimpleTypeRule" # we return isConst for some regular expressions, including all that we enumerate diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp index 98420faaa..5ef742e9d 100644 --- a/src/theory/strings/rewrites.cpp +++ b/src/theory/strings/rewrites.cpp @@ -73,6 +73,7 @@ const char* toString(Rewrite r) case Rewrite::INDEXOF_RE_INVALID_INDEX: return "INDEXOF_RE_INVALID_INDEX"; case Rewrite::INDEXOF_RE_MAX_INDEX: return "INDEXOF_RE_MAX_INDEX"; case Rewrite::ITOS_EVAL: return "ITOS_EVAL"; + case Rewrite::RE_ALL_ELIM: return "RE_ALL_ELIM"; case Rewrite::RE_AND_EMPTY: return "RE_AND_EMPTY"; case Rewrite::RE_ANDOR_FLATTEN: return "RE_ANDOR_FLATTEN"; case Rewrite::RE_ANDOR_INC_CONFLICT: return "RE_ANDOR_INC_CONFLICT"; diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h index de8af0b4b..ea41450e9 100644 --- a/src/theory/strings/rewrites.h +++ b/src/theory/strings/rewrites.h @@ -78,6 +78,7 @@ enum class Rewrite : uint32_t INDEXOF_RE_INVALID_INDEX, INDEXOF_RE_MAX_INDEX, ITOS_EVAL, + RE_ALL_ELIM, RE_AND_EMPTY, RE_ANDOR_FLATTEN, RE_ANDOR_INC_CONFLICT, diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index cf2df4734..606241c0a 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -757,6 +757,15 @@ Node SequencesRewriter::rewriteConcat(Node node) return node; } +Node SequencesRewriter::rewriteAllRegExp(TNode node) +{ + Assert(node.getKind() == kind::REGEXP_ALL); + NodeManager* nm = NodeManager::currentNM(); + // re.all ----> (re.* re.allchar) + Node ret = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_ALLCHAR)); + return returnRewrite(node, ret, Rewrite::RE_ALL_ELIM); +} + Node SequencesRewriter::rewriteConcatRegExp(TNode node) { Assert(node.getKind() == kind::REGEXP_CONCAT); @@ -1655,6 +1664,10 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) { retNode = rewriteMembership(node); } + else if (nk == REGEXP_ALL) + { + retNode = rewriteAllRegExp(node); + } else if (nk == REGEXP_CONCAT) { retNode = rewriteConcatRegExp(node); diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h index 850acfb2a..f988a9460 100644 --- a/src/theory/strings/sequences_rewriter.h +++ b/src/theory/strings/sequences_rewriter.h @@ -40,6 +40,12 @@ class SequencesRewriter : public TheoryRewriter StringsEntail& getStringsEntail(); protected: + /** rewrite regular expression all + * + * This is the entry point for post-rewriting applications of re.all. + * Returns the rewritten form of node. + */ + Node rewriteAllRegExp(TNode node); /** rewrite regular expression concatenation * * This is the entry point for post-rewriting applications of re.++. diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index 3c6562aae..9deecc921 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -356,11 +356,11 @@ bool isStringKind(Kind k) bool isRegExpKind(Kind k) { - return k == REGEXP_NONE || k == REGEXP_ALLCHAR || k == STRING_TO_REGEXP - || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER - || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT - || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV - || k == REGEXP_COMPLEMENT; + return k == REGEXP_NONE || k == REGEXP_ALL || k == REGEXP_ALLCHAR + || k == STRING_TO_REGEXP || k == REGEXP_CONCAT || k == REGEXP_UNION + || k == REGEXP_INTER || k == REGEXP_STAR || k == REGEXP_PLUS + || k == REGEXP_OPT || k == REGEXP_RANGE || k == REGEXP_LOOP + || k == REGEXP_RV || k == REGEXP_COMPLEMENT; } TypeNode getOwnerStringType(Node n) -- 2.30.2