From: Andrew Reynolds Date: Mon, 30 Mar 2020 21:29:23 +0000 (-0500) Subject: Support indexed operators re.loop and re.^ (#4167) X-Git-Tag: cvc5-1.0.0~3424 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6838885b1250e0abbb1b8d56e6b400a5d7f3ca95;p=cvc5.git Support indexed operators re.loop and re.^ (#4167) Towards support for the strings standard. This modifies our interface so that we accept the SMT-LIB standard versions of re.loop and re.^. This means re.loop no longer accepts 3 arguments but 1 (with 2 indices). This means we no longer accept re.loop with only a lower bound and no upper bound on the number of repetitions. Also fixes #4161. --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2ea305bc7..912df8b82 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -966,6 +966,7 @@ install(FILES util/proof.h util/rational_cln_imp.h util/rational_gmp_imp.h + util/regexp.h util/resource_manager.h util/result.h util/sexpr.h diff --git a/src/api/cvc4cpp.cpp b/src/api/cvc4cpp.cpp index f563e83f5..fa727088e 100644 --- a/src/api/cvc4cpp.cpp +++ b/src/api/cvc4cpp.cpp @@ -277,6 +277,7 @@ const static std::unordered_map s_kinds{ {REGEXP_PLUS, CVC4::Kind::REGEXP_PLUS}, {REGEXP_OPT, CVC4::Kind::REGEXP_OPT}, {REGEXP_RANGE, CVC4::Kind::REGEXP_RANGE}, + {REGEXP_REPEAT, CVC4::Kind::REGEXP_REPEAT}, {REGEXP_LOOP, CVC4::Kind::REGEXP_LOOP}, {REGEXP_EMPTY, CVC4::Kind::REGEXP_EMPTY}, {REGEXP_SIGMA, CVC4::Kind::REGEXP_SIGMA}, @@ -544,6 +545,7 @@ const static std::unordered_map {CVC4::Kind::REGEXP_PLUS, REGEXP_PLUS}, {CVC4::Kind::REGEXP_OPT, REGEXP_OPT}, {CVC4::Kind::REGEXP_RANGE, REGEXP_RANGE}, + {CVC4::Kind::REGEXP_REPEAT, REGEXP_REPEAT}, {CVC4::Kind::REGEXP_LOOP, REGEXP_LOOP}, {CVC4::Kind::REGEXP_EMPTY, REGEXP_EMPTY}, {CVC4::Kind::REGEXP_SIGMA, REGEXP_SIGMA}, @@ -3490,6 +3492,11 @@ Op Solver::mkOp(Kind kind, uint32_t arg) const kind, *mkValHelper(CVC4::TupleUpdate(arg)).d_expr.get()); break; + case REGEXP_REPEAT: + res = Op(kind, + *mkValHelper(CVC4::RegExpRepeat(arg)) + .d_expr.get()); + break; default: CVC4_API_KIND_CHECK_EXPECTED(false, kind) << "operator kind with uint32_t argument"; @@ -3550,6 +3557,11 @@ Op Solver::mkOp(Kind kind, uint32_t arg1, uint32_t arg2) const CVC4::FloatingPointToFPGeneric(arg1, arg2)) .d_expr.get()); break; + case REGEXP_LOOP: + res = Op(kind, + *mkValHelper(CVC4::RegExpLoop(arg1, arg2)) + .d_expr.get()); + break; default: CVC4_API_KIND_CHECK_EXPECTED(false, kind) << "operator kind with two uint32_t arguments"; diff --git a/src/api/cvc4cppkind.h b/src/api/cvc4cppkind.h index d399ad616..f8e1fb90c 100644 --- a/src/api/cvc4cppkind.h +++ b/src/api/cvc4cppkind.h @@ -2175,15 +2175,37 @@ enum CVC4_PUBLIC Kind : int32_t */ REGEXP_RANGE, /** - * Regexp loop. - * Parameters: 2 (3) - * -[1]: Term of sort RegExp - * -[2]: Lower bound for the number of repetitions of the first argument - * -[3]: Upper bound for the number of repetitions of the first argument + * Operator for regular expression repeat. + * Parameters: 1 + * -[1]: The number of repetitions * Create with: - * mkTerm(Kind kind, Term child1, Term child2) - * mkTerm(Kind kind, Term child1, Term child2, Term child3) - * mkTerm(Kind kind, const std::vector& children) + * mkOp(Kind kind, uint32_t param) + * + * Apply regular expression loop. + * Parameters: 2 + * -[1]: Op of kind REGEXP_REPEAT + * -[2]: Term of regular expression sort + * Create with: + * mkTerm(Op op, Term child) + * mkTerm(Op op, const std::vector& children) + */ + REGEXP_REPEAT, + /** + * Operator for regular expression loop, from lower bound to upper bound + * number of repetitions. + * Parameters: 2 + * -[1]: The lower bound + * -[2]: The upper bound + * Create with: + * mkOp(Kind kind, uint32_t param, uint32_t param) + * + * Apply regular expression loop. + * Parameters: 2 + * -[1]: Op of kind REGEXP_LOOP + * -[2]: Term of regular expression sort + * Create with: + * mkTerm(Op op, Term child) + * mkTerm(Op op, const std::vector& children) */ REGEXP_LOOP, /** diff --git a/src/bindings/java/CMakeLists.txt b/src/bindings/java/CMakeLists.txt index 344387ed9..c6034d0aa 100644 --- a/src/bindings/java/CMakeLists.txt +++ b/src/bindings/java/CMakeLists.txt @@ -112,6 +112,8 @@ set(gen_java_files ${CMAKE_CURRENT_BINARY_DIR}/RecordUpdate.java ${CMAKE_CURRENT_BINARY_DIR}/RecordUpdateHashFunction.java ${CMAKE_CURRENT_BINARY_DIR}/RecoverableModalException.java + ${CMAKE_CURRENT_BINARY_DIR}/RegExpLoop.java + ${CMAKE_CURRENT_BINARY_DIR}/RegExpRepeat.java ${CMAKE_CURRENT_BINARY_DIR}/RegExpType.java ${CMAKE_CURRENT_BINARY_DIR}/Result.java ${CMAKE_CURRENT_BINARY_DIR}/RoundingMode.java diff --git a/src/cvc4.i b/src/cvc4.i index 9dcff7f8e..42713ce40 100644 --- a/src/cvc4.i +++ b/src/cvc4.i @@ -274,6 +274,7 @@ std::set CVC4::JavaInputStreamAdapter::s_adapters; %include "util/cardinality.i" %include "util/hash.i" %include "util/proof.i" +%include "util/regexp.i" %include "util/result.i" %include "util/sexpr.i" %include "util/statistics.i" diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g index 8b3b96cfd..7b3e1c4de 100644 --- a/src/parser/cvc/Cvc.g +++ b/src/parser/cvc/Cvc.g @@ -2074,8 +2074,11 @@ stringTerm[CVC4::api::Term& f] { f = MK_TERM(CVC4::api::REGEXP_OPT, f); } | REGEXP_RANGE_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_TERM(CVC4::api::REGEXP_RANGE, f, f2); } - | REGEXP_LOOP_TOK LPAREN formula[f] COMMA formula[f2] COMMA formula[f3] RPAREN - { f = MK_TERM(CVC4::api::REGEXP_LOOP, f, f2, f3); } + | REGEXP_LOOP_TOK LPAREN formula[f] COMMA lo=numeral COMMA hi=numeral RPAREN + { + api::Op lop = SOLVER->mkOp(CVC4::api::REGEXP_LOOP, lo, hi); + f = MK_TERM(lop, f); + } | REGEXP_COMPLEMENT_TOK LPAREN formula[f] RPAREN { f = MK_TERM(CVC4::api::REGEXP_COMPLEMENT, f); } | REGEXP_EMPTY_TOK diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp index 7ba882f24..3233ee7e8 100644 --- a/src/parser/smt2/smt2.cpp +++ b/src/parser/smt2/smt2.cpp @@ -193,8 +193,9 @@ void Smt2::addStringOperators() { addOperator(api::REGEXP_STAR, "re.*"); addOperator(api::REGEXP_PLUS, "re.+"); addOperator(api::REGEXP_OPT, "re.opt"); + addIndexedOperator(api::REGEXP_REPEAT, api::REGEXP_REPEAT, "re.^"); + addIndexedOperator(api::REGEXP_LOOP, api::REGEXP_LOOP, "re.loop"); addOperator(api::REGEXP_RANGE, "re.range"); - addOperator(api::REGEXP_LOOP, "re.loop"); addOperator(api::REGEXP_COMPLEMENT, "re.comp"); addOperator(api::REGEXP_DIFF, "re.diff"); addOperator(api::STRING_LT, "str.<"); diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 3f7abdb7c..06f05a8af 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -68,12 +68,25 @@ operator REGEXP_STAR 1 "regexp *" operator REGEXP_PLUS 1 "regexp +" operator REGEXP_OPT 1 "regexp ?" operator REGEXP_RANGE 2 "regexp range" -operator REGEXP_LOOP 2:3 "regexp loop" operator REGEXP_COMPLEMENT 1 "regexp complement" operator REGEXP_EMPTY 0 "regexp empty" operator REGEXP_SIGMA 0 "regexp all characters" +constant REGEXP_REPEAT_OP \ + ::CVC4::RegExpRepeat \ + ::CVC4::RegExpRepeatHashFunction \ + "util/regexp.h" \ + "operator for regular expression repeat; payload is an instance of the CVC4::RegExpRepeat class" +parameterized REGEXP_REPEAT REGEXP_REPEAT_OP 1 "regular expression repeat; first parameter is a REGEXP_REPEAT_OP, second is a regular expression term" + +constant REGEXP_LOOP_OP \ + ::CVC4::RegExpLoop \ + ::CVC4::RegExpLoopHashFunction \ + "util/regexp.h" \ + "operator for regular expression loop; payload is an instance of the CVC4::RegExpLoop class" +parameterized REGEXP_LOOP REGEXP_LOOP_OP 1 "regular expression loop; first parameter is a REGEXP_LOOP_OP, second is a regular expression term" + #internal operator REGEXP_RV 1 "regexp rv (internal use only)" typerule REGEXP_RV "SimpleTypeRule" @@ -88,7 +101,10 @@ typerule REGEXP_STAR "SimpleTypeRule" typerule REGEXP_PLUS "SimpleTypeRule" typerule REGEXP_OPT "SimpleTypeRule" typerule REGEXP_RANGE ::CVC4::theory::strings::RegExpRangeTypeRule -typerule REGEXP_LOOP "SimpleTypeRule>" +typerule REGEXP_REPEAT_OP "SimpleTypeRule" +typerule REGEXP_REPEAT "SimpleTypeRule" +typerule REGEXP_LOOP_OP "SimpleTypeRule" +typerule REGEXP_LOOP "SimpleTypeRule" typerule REGEXP_COMPLEMENT "SimpleTypeRule" typerule STRING_TO_REGEXP "SimpleTypeRule" typerule STRING_IN_REGEXP "SimpleTypeRule" diff --git a/src/theory/strings/regexp_operation.cpp b/src/theory/strings/regexp_operation.cpp index 9a2091eac..6453e1909 100644 --- a/src/theory/strings/regexp_operation.cpp +++ b/src/theory/strings/regexp_operation.cpp @@ -93,15 +93,7 @@ RegExpConstType RegExpOpr::getRegExpConstType(Node r) { d_constCache[cur] = RE_C_UNKNOWN; visit.push_back(cur); - if (ck == REGEXP_LOOP) - { - // only add the first child of loop - visit.push_back(cur[0]); - } - else - { - visit.insert(visit.end(), cur.begin(), cur.end()); - } + visit.insert(visit.end(), cur.begin(), cur.end()); } } else if (it->second == RE_C_UNKNOWN) @@ -260,7 +252,9 @@ int RegExpOpr::delta( Node r, Node &exp ) { break; } case kind::REGEXP_LOOP: { - if(r[1] == d_zero) { + uint32_t lo = utils::getLoopMinOccurrences(r); + if (lo == 0) + { ret = 1; } else { ret = delta(r[0], exp); @@ -501,18 +495,18 @@ int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) { break; } case kind::REGEXP_LOOP: { - if(r[1] == r[2] && r[1] == d_zero) { + uint32_t l = utils::getLoopMinOccurrences(r); + uint32_t u = utils::getLoopMaxOccurrences(r); + if (l == u && l == 0) + { ret = 2; //retNode = d_emptyRegexp; } else { Node dc; ret = derivativeS(r[0], c, dc); if(dc==d_emptyRegexp) { - unsigned l = r[1].getConst().getNumerator().toUnsignedInt(); - unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); - Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], - NodeManager::currentNM()->mkConst(CVC4::Rational(l==0? 0 : (l-1))), - NodeManager::currentNM()->mkConst(CVC4::Rational(u-1))); + Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1)); + Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]); retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); } else { retNode = d_emptyRegexp; @@ -686,16 +680,16 @@ Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) { break; } case kind::REGEXP_LOOP: { - if(r[1] == r[2] && r[1] == d_zero) { + uint32_t l = utils::getLoopMinOccurrences(r); + uint32_t u = utils::getLoopMaxOccurrences(r); + if (l == u || l == 0) + { retNode = d_emptyRegexp; } else { Node dc = derivativeSingle(r[0], c); if(dc != d_emptyRegexp) { - unsigned l = r[1].getConst().getNumerator().toUnsignedInt(); - unsigned u = r[2].getConst().getNumerator().toUnsignedInt(); - Node r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_LOOP, r[0], - NodeManager::currentNM()->mkConst(CVC4::Rational(l==0? 0 : (l-1))), - NodeManager::currentNM()->mkConst(CVC4::Rational(u-1))); + Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1)); + Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]); retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); } else { retNode = d_emptyRegexp; diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 861d99135..a86c9599f 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -1192,61 +1192,40 @@ Node SequencesRewriter::rewriteLoopRegExp(TNode node) { return returnRewrite(node, r, Rewrite::RE_LOOP_STAR); } - TNode n1 = node[1]; NodeManager* nm = NodeManager::currentNM(); CVC4::Rational rMaxInt(String::maxSize()); - AlwaysAssert(n1.isConst()) << "re.loop contains non-constant integer (1)."; - AlwaysAssert(n1.getConst().sgn() >= 0) - << "Negative integer in string REGEXP_LOOP (1)"; - Assert(n1.getConst() <= rMaxInt) - << "Exceeded UINT32_MAX in string REGEXP_LOOP (1)"; - uint32_t l = n1.getConst().getNumerator().toUnsignedInt(); + uint32_t l = utils::getLoopMinOccurrences(node); std::vector vec_nodes; for (unsigned i = 0; i < l; i++) { vec_nodes.push_back(r); } - if (node.getNumChildren() == 3) + Node n = + vec_nodes.size() == 0 + ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))) + : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes); + uint32_t u = utils::getLoopMaxOccurrences(node); + if (u < l) { - TNode n2 = Rewriter::rewrite(node[2]); - Node n = - vec_nodes.size() == 0 - ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))) - : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes); - AlwaysAssert(n2.isConst()) << "re.loop contains non-constant integer (2)."; - AlwaysAssert(n2.getConst().sgn() >= 0) - << "Negative integer in string REGEXP_LOOP (2)"; - Assert(n2.getConst() <= rMaxInt) - << "Exceeded UINT32_MAX in string REGEXP_LOOP (2)"; - uint32_t u = n2.getConst().getNumerator().toUnsignedInt(); - if (u <= l) - { - retNode = n; - } - else - { - std::vector vec2; - vec2.push_back(n); - TypeNode rtype = nm->regExpType(); - for (unsigned j = l; j < u; j++) - { - vec_nodes.push_back(r); - n = utils::mkConcat(vec_nodes, rtype); - vec2.push_back(n); - } - retNode = nm->mkNode(REGEXP_UNION, vec2); - } + std::vector nvec; + retNode = nm->mkNode(REGEXP_EMPTY, nvec); + } + else if (u == l) + { + retNode = n; } else { - Node rest = nm->mkNode(REGEXP_STAR, r); - retNode = vec_nodes.size() == 0 - ? rest - : vec_nodes.size() == 1 - ? nm->mkNode(REGEXP_CONCAT, r, rest) - : nm->mkNode(REGEXP_CONCAT, - nm->mkNode(REGEXP_CONCAT, vec_nodes), - rest); + std::vector vec2; + vec2.push_back(n); + TypeNode rtype = nm->regExpType(); + for (uint32_t j = l; j < u; j++) + { + vec_nodes.push_back(r); + n = utils::mkConcat(vec_nodes, rtype); + vec2.push_back(n); + } + retNode = nm->mkNode(REGEXP_UNION, vec2); } Trace("strings-lp") << "Strings::lp " << node << " => " << retNode << std::endl; @@ -1963,6 +1942,13 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) { retNode = rewriteLoopRegExp(node); } + else if (nk == REGEXP_REPEAT) + { + // ((_ re.^ n) R) --> ((_ re.loop n n) R) + unsigned r = utils::getRepeatAmount(node); + Node lop = nm->mkConst(RegExpLoop(r, r)); + retNode = nm->mkNode(REGEXP_LOOP, lop, node[0]); + } Trace("strings-postrewrite") << "Strings::postRewrite returning " << retNode << std::endl; diff --git a/src/theory/strings/theory_strings_utils.cpp b/src/theory/strings/theory_strings_utils.cpp index a3f6f4255..3b4c757f2 100644 --- a/src/theory/strings/theory_strings_utils.cpp +++ b/src/theory/strings/theory_strings_utils.cpp @@ -293,6 +293,24 @@ TypeNode getOwnerStringType(Node n) return tn; } +unsigned getRepeatAmount(TNode node) +{ + Assert(node.getKind() == REGEXP_REPEAT); + return node.getOperator().getConst().d_repeatAmount; +} + +unsigned getLoopMaxOccurrences(TNode node) +{ + Assert(node.getKind() == REGEXP_LOOP); + return node.getOperator().getConst().d_loopMaxOcc; +} + +unsigned getLoopMinOccurrences(TNode node) +{ + Assert(node.getKind() == REGEXP_LOOP); + return node.getOperator().getConst().d_loopMinOcc; +} + } // namespace utils } // namespace strings } // namespace theory diff --git a/src/theory/strings/theory_strings_utils.h b/src/theory/strings/theory_strings_utils.h index 578c224df..846d3d563 100644 --- a/src/theory/strings/theory_strings_utils.h +++ b/src/theory/strings/theory_strings_utils.h @@ -153,6 +153,14 @@ bool isStringKind(Kind k); */ TypeNode getOwnerStringType(Node n); +/* Get the number of repetitions for a regexp repeat node */ +unsigned getRepeatAmount(TNode node); + +/* Get the maximum occurrences of given regexp loop node. */ +unsigned getLoopMaxOccurrences(TNode node); +/* Get the minimum occurrences of given regexp loop node. */ +unsigned getLoopMinOccurrences(TNode node); + } // namespace utils } // namespace strings } // namespace theory diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 6895dc01a..eba5fb8c9 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -29,6 +29,8 @@ libcvc4_add_sources( resource_manager.h result.cpp result.h + regexp.cpp + regexp.h safe_print.cpp safe_print.h sampler.cpp diff --git a/src/util/regexp.cpp b/src/util/regexp.cpp new file mode 100644 index 000000000..ca6547134 --- /dev/null +++ b/src/util/regexp.cpp @@ -0,0 +1,57 @@ +/********************* */ +/*! \file regexp.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of data structures for regular expression operators. + **/ + +#include "util/regexp.h" + +#include + +namespace CVC4 { + +RegExpRepeat::RegExpRepeat(uint32_t repeatAmount) : d_repeatAmount(repeatAmount) {} + +bool RegExpRepeat::operator==(const RegExpRepeat& r) const +{ + return d_repeatAmount == r.d_repeatAmount; +} + +RegExpLoop::RegExpLoop(uint32_t l, uint32_t h) : d_loopMinOcc(l), d_loopMaxOcc(h) {} + +bool RegExpLoop::operator==(const RegExpLoop& r) const +{ + return d_loopMinOcc == r.d_loopMinOcc + && d_loopMaxOcc == r.d_loopMaxOcc; +} + +size_t RegExpRepeatHashFunction::operator()(const RegExpRepeat& r) const +{ + return r.d_repeatAmount; +} + +size_t RegExpLoopHashFunction::operator()(const RegExpLoop& r) const +{ + return r.d_loopMinOcc + r.d_loopMaxOcc; +} + +std::ostream& operator<<(std::ostream& os, const RegExpRepeat& r) +{ + return os << r.d_repeatAmount; +} + +std::ostream& operator<<(std::ostream& os, const RegExpLoop& r) +{ + return os << "[" << r.d_loopMinOcc << ".." << r.d_loopMaxOcc << "]"; +} + +} // namespace CVC4 + diff --git a/src/util/regexp.h b/src/util/regexp.h new file mode 100644 index 000000000..aaba9e4db --- /dev/null +++ b/src/util/regexp.h @@ -0,0 +1,75 @@ +/********************* */ +/*! \file regexp.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Data structures for regular expression operators. + **/ + +#include "cvc4_public.h" + +#ifndef CVC4__UTIL__REGEXP_H +#define CVC4__UTIL__REGEXP_H + +#include +#include + +namespace CVC4 { + +struct CVC4_PUBLIC RegExpRepeat +{ + RegExpRepeat(uint32_t repeatAmount); + + bool operator==(const RegExpRepeat& r) const; + /** The amount of repetitions of the regular expression */ + uint32_t d_repeatAmount; +}; + +struct CVC4_PUBLIC RegExpLoop +{ + RegExpLoop(uint32_t l, uint32_t h); + + bool operator==(const RegExpLoop& r) const; + /** The minimum number of repetitions of the regular expression */ + uint32_t d_loopMinOcc; + /** The maximum number of repetitions of the regular expression */ + uint32_t d_loopMaxOcc; +}; + +/* ----------------------------------------------------------------------- + ** Hash Function structs + * ----------------------------------------------------------------------- */ + +/* + * Hash function for the RegExpRepeat constants. + */ +struct CVC4_PUBLIC RegExpRepeatHashFunction +{ + size_t operator()(const RegExpRepeat& r) const; +}; + +/** + * Hash function for the RegExpLoop objects. + */ +struct CVC4_PUBLIC RegExpLoopHashFunction +{ + size_t operator()(const RegExpLoop& r) const; +}; + +/* ----------------------------------------------------------------------- + ** Output stream + * ----------------------------------------------------------------------- */ + +std::ostream& operator<<(std::ostream& os, const RegExpRepeat& bv) CVC4_PUBLIC; + +std::ostream& operator<<(std::ostream& os, const RegExpLoop& bv) CVC4_PUBLIC; + +} // namespace CVC4 + +#endif /* CVC4__UTIL__REGEXP_H */ diff --git a/src/util/regexp.i b/src/util/regexp.i new file mode 100644 index 000000000..775e778f7 --- /dev/null +++ b/src/util/regexp.i @@ -0,0 +1,12 @@ +%{ +#include "util/regexp.h" +%} + +%rename(equals) CVC4::RegExpRepeat::operator==(const RegExpRepeat&) const; + +%rename(equals) CVC4::RegExpLoop::operator==(const RegExpLoop&) const; + +%ignore CVC4::operator<<(std::ostream&, const RegExpRepeat&); +%ignore CVC4::operator<<(std::ostream&, const RegExpLoop&); + +%include "util/regexp.h" diff --git a/test/regress/CMakeLists.txt b/test/regress/CMakeLists.txt index d843eb5ed..b9835d919 100644 --- a/test/regress/CMakeLists.txt +++ b/test/regress/CMakeLists.txt @@ -942,6 +942,7 @@ set(regress_0_tests regress0/strings/large-model.smt2 regress0/strings/leadingzero001.smt2 regress0/strings/loop001.smt2 + regress0/strings/loop-wrong-sem.smt2 regress0/strings/model001.smt2 regress0/strings/model-code-point.smt2 regress0/strings/model-friendly.smt2 diff --git a/test/regress/regress0/strings/bug002.smt2 b/test/regress/regress0/strings/bug002.smt2 index fd60089fd..5bf21ebb9 100644 --- a/test/regress/regress0/strings/bug002.smt2 +++ b/test/regress/regress0/strings/bug002.smt2 @@ -4,7 +4,7 @@ (set-info :status sat) ; regex = [\*-,\t\*-\|](.{6,}()?)+ -(define-fun strinre ((?s String)) Bool (str.in.re ?s (re.union re.nostr (re.++ (str.to.re "") (str.to.re "") (re.union re.nostr (re.range "*" ",") (str.to.re "\t") (re.range "*" "|") ) (re.+ (re.union re.nostr (re.++ (str.to.re "") (str.to.re "") (re.loop re.allchar 6 ) (re.opt (re.union re.nostr (re.++ (str.to.re "") (str.to.re "") ) ) ) ) ) ) ) ) ) ) +(define-fun strinre ((?s String)) Bool (str.in.re ?s (re.union re.nostr (re.++ (str.to.re "") (str.to.re "") (re.union re.nostr (re.range "*" ",") (str.to.re "\t") (re.range "*" "|") ) (re.+ (re.union re.nostr (re.++ (str.to.re "") (str.to.re "") ((_ re.^ 6) re.allchar) (re.opt (re.union re.nostr (re.++ (str.to.re "") (str.to.re "") ) ) ) ) ) ) ) ) ) ) (assert (not (strinre "6O\1\127\n?"))) (check-sat) diff --git a/test/regress/regress0/strings/loop-wrong-sem.smt2 b/test/regress/regress0/strings/loop-wrong-sem.smt2 new file mode 100644 index 000000000..d0dd3fcb2 --- /dev/null +++ b/test/regress/regress0/strings/loop-wrong-sem.smt2 @@ -0,0 +1,4 @@ +(set-logic ALL) +(set-info :status unsat) +(assert (str.in.re "" ((_ re.loop 1 0) (str.to.re "")))) +(check-sat) diff --git a/test/regress/regress1/strings/bug686dd.smt2 b/test/regress/regress1/strings/bug686dd.smt2 index 0cb9fac26..b5c9457ff 100644 --- a/test/regress/regress1/strings/bug686dd.smt2 +++ b/test/regress/regress1/strings/bug686dd.smt2 @@ -8,7 +8,7 @@ (declare-fun root6 () T) (assert (and -(str.in.re root5 (re.loop (re.range "0" "9") 4 4) ) -(str.in.re (TCb root6) (re.loop (re.range "0" "9") 4 4) ) +(str.in.re root5 ((_ re.loop 4 4) (re.range "0" "9")) ) +(str.in.re (TCb root6) ((_ re.loop 4 4) (re.range "0" "9")) ) ) ) (check-sat) diff --git a/test/regress/regress1/strings/pierre150331.smt2 b/test/regress/regress1/strings/pierre150331.smt2 index add60d534..e04db8e9a 100644 --- a/test/regress/regress1/strings/pierre150331.smt2 +++ b/test/regress/regress1/strings/pierre150331.smt2 @@ -6,7 +6,7 @@ (define-fun stringEval ((?s String)) Bool (str.in.re ?s (re.union (str.to.re "H") -(re.++ (re.loop (str.to.re "{") 2 2 ) (re.loop (re.union re.nostr (re.range "" "]") (re.range "" "^") ) 2 4 ) ) ) ) ) +(re.++ ((_ re.loop 2 2) (str.to.re "{") ) ((_ re.loop 2 4) (re.union re.nostr (re.range "" "]") (re.range "" "^") ) ) ) ) ) ) (declare-fun s0() String) (declare-fun s1() String) (declare-fun s2() String) diff --git a/test/regress/regress1/strings/reloop.smt2 b/test/regress/regress1/strings/reloop.smt2 index 22537b957..6230d1656 100644 --- a/test/regress/regress1/strings/reloop.smt2 +++ b/test/regress/regress1/strings/reloop.smt2 @@ -8,11 +8,11 @@ (declare-fun z () String) (declare-fun w () String) -(assert (str.in.re x (re.loop (str.to.re "a") 5))) -(assert (str.in.re y (re.loop (str.to.re "b") 2 5))) -(assert (str.in.re z (re.loop (str.to.re "c") 5))) +(assert (str.in.re x ((_ re.^ 5) (str.to.re "a")))) +(assert (str.in.re y ((_ re.loop 2 5) (str.to.re "b")))) +(assert (str.in.re z ((_ re.loop 5 15) (str.to.re "c")))) (assert (> (str.len z) 7)) -(assert (str.in.re w (re.loop (str.to.re "b") 2 7))) +(assert (str.in.re w ((_ re.loop 2 7) (str.to.re "b")))) (assert (> (str.len w) 2)) (assert (< (str.len w) 5)) diff --git a/test/regress/regress2/strings/range-perf.smt2 b/test/regress/regress2/strings/range-perf.smt2 index 62ec10711..33960e124 100644 --- a/test/regress/regress2/strings/range-perf.smt2 +++ b/test/regress/regress2/strings/range-perf.smt2 @@ -2,6 +2,6 @@ ; EXPECT: sat (set-logic QF_SLIA) (declare-const x String) -(assert (str.in.re x (re.loop (re.range "0" "9") 12 12))) +(assert (str.in.re x ((_ re.loop 12 12) (re.range "0" "9")))) (assert (str.in.re x (re.++ (re.* re.allchar) (str.to.re "01") (re.* re.allchar)))) (check-sat)