From ba659fb1b8bc2f110616ec113892f63f210a5ebb Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Sat, 30 May 2020 13:10:54 -0500 Subject: [PATCH] Add the sequence type (#4539) This adds basic infrastructure for the sequence type, including its type rules, type enumerator and extensions to type functions. The next step will be to finalize the support in TheoryStrings for sequences and then add front end support for sequences in the API/parsers. --- src/bindings/java/CMakeLists.txt | 1 + src/expr/node_manager.cpp | 11 +++ src/expr/node_manager.h | 5 +- src/expr/type.cpp | 16 ++++ src/expr/type.h | 25 +++++- src/expr/type_node.cpp | 12 ++- src/expr/type_node.h | 10 +++ src/theory/strings/kinds | 20 +++++ src/theory/strings/rewrites.cpp | 1 + src/theory/strings/rewrites.h | 3 +- src/theory/strings/sequences_rewriter.cpp | 17 ++++ src/theory/strings/sequences_rewriter.h | 6 ++ .../strings/theory_strings_type_rules.h | 50 +++++++++++ src/theory/strings/type_enumerator.cpp | 83 +++++++++++++++++++ src/theory/strings/type_enumerator.h | 43 ++++++++++ 15 files changed, 296 insertions(+), 7 deletions(-) diff --git a/src/bindings/java/CMakeLists.txt b/src/bindings/java/CMakeLists.txt index 2cecf15e9..4e1b96af9 100644 --- a/src/bindings/java/CMakeLists.txt +++ b/src/bindings/java/CMakeLists.txt @@ -138,6 +138,7 @@ set(gen_java_files ${CMAKE_CURRENT_BINARY_DIR}/SWIGTYPE_p_std__vectorT_std__pairT_CVC4__Expr_CVC4__Expr_t_t.java ${CMAKE_CURRENT_BINARY_DIR}/SWIGTYPE_p_std__vectorT_std__vectorT_std__string_t_t.java ${CMAKE_CURRENT_BINARY_DIR}/SelectorType.java + ${CMAKE_CURRENT_BINARY_DIR}/SequenceType.java ${CMAKE_CURRENT_BINARY_DIR}/SetType.java ${CMAKE_CURRENT_BINARY_DIR}/SmtEngine.java ${CMAKE_CURRENT_BINARY_DIR}/SortConstructorType.java diff --git a/src/expr/node_manager.cpp b/src/expr/node_manager.cpp index feec9b782..7bc98f65d 100644 --- a/src/expr/node_manager.cpp +++ b/src/expr/node_manager.cpp @@ -495,6 +495,17 @@ Node NodeManager::mkSkolem(const std::string& prefix, const TypeNode& type, cons return n; } +TypeNode NodeManager::mkSequenceType(TypeNode elementType) +{ + CheckArgument( + !elementType.isNull(), elementType, "unexpected NULL element type"); + CheckArgument(elementType.isFirstClass(), + elementType, + "cannot store types that are not first-class in sequences. Try " + "option --uf-ho."); + return mkTypeNode(kind::SEQUENCE_TYPE, elementType); +} + TypeNode NodeManager::mkConstructorType(const DatatypeConstructor& constructor, TypeNode range) { vector sorts; diff --git a/src/expr/node_manager.h b/src/expr/node_manager.h index aea49d979..45d3d5b7d 100644 --- a/src/expr/node_manager.h +++ b/src/expr/node_manager.h @@ -875,9 +875,12 @@ public: /** Make the type of arrays with the given parameterization */ inline TypeNode mkArrayType(TypeNode indexType, TypeNode constituentType); - /** Make the type of arrays with the given parameterization */ + /** Make the type of set with the given parameterization */ inline TypeNode mkSetType(TypeNode elementType); + /** Make the type of sequences with the given parameterization */ + TypeNode mkSequenceType(TypeNode elementType); + /** Make a type representing a constructor with the given parameterization */ TypeNode mkConstructorType(const DatatypeConstructor& constructor, TypeNode range); /** diff --git a/src/expr/type.cpp b/src/expr/type.cpp index 031dcb3f0..2067beef5 100644 --- a/src/expr/type.cpp +++ b/src/expr/type.cpp @@ -353,6 +353,12 @@ bool Type::isSet() const { return d_typeNode->isSet(); } +bool Type::isSequence() const +{ + NodeManagerScope nms(d_nodeManager); + return d_typeNode->isSequence(); +} + /** Is this a sort kind */ bool Type::isSort() const { NodeManagerScope nms(d_nodeManager); @@ -516,6 +522,11 @@ SetType::SetType(const Type& t) : Type(t) PrettyCheckArgument(isNull() || isSet(), this); } +SequenceType::SequenceType(const Type& t) : Type(t) +{ + PrettyCheckArgument(isNull() || isSequence(), this); +} + SortType::SortType(const Type& t) : Type(t) { PrettyCheckArgument(isNull() || isSort(), this); @@ -550,6 +561,11 @@ Type SetType::getElementType() const { return makeType(d_typeNode->getSetElementType()); } +Type SequenceType::getElementType() const +{ + return makeType(d_typeNode->getSequenceElementType()); +} + DatatypeType ConstructorType::getRangeType() const { return DatatypeType(makeType(d_typeNode->getConstructorRangeType())); } diff --git a/src/expr/type.h b/src/expr/type.h index 529c40930..0cdf55626 100644 --- a/src/expr/type.h +++ b/src/expr/type.h @@ -373,7 +373,13 @@ protected: */ bool isSet() const; - /** + /** + * Is this a Sequence type? + * @return true if the type is a Sequence type + */ + bool isSequence() const; + + /** * Is this a datatype type? * @return true if the type is a datatype type */ @@ -515,15 +521,26 @@ class CVC4_PUBLIC ArrayType : public Type { Type getConstituentType() const; };/* class ArrayType */ -/** Class encapsulating an set type. */ +/** Class encapsulating a set type. */ class CVC4_PUBLIC SetType : public Type { public: /** Construct from the base type */ SetType(const Type& type = Type()); - /** Get the index type */ + /** Get the element type */ + Type getElementType() const; +}; /* class SetType */ + +/** Class encapsulating a sequence type. */ +class CVC4_PUBLIC SequenceType : public Type +{ + public: + /** Construct from the base type */ + SequenceType(const Type& type = Type()); + + /** Get the element type */ Type getElementType() const; -};/* class SetType */ +}; /* class SetType */ /** Class encapsulating a user-defined sort. */ class CVC4_PUBLIC SortType : public Type { diff --git a/src/expr/type_node.cpp b/src/expr/type_node.cpp index 110db6162..e191be0c2 100644 --- a/src/expr/type_node.cpp +++ b/src/expr/type_node.cpp @@ -122,7 +122,7 @@ bool TypeNode::isFiniteInternal(bool usortFinite) { ret = true; } - else if (isString() || isRegExp() || isReal()) + else if (isString() || isRegExp() || isSequence() || isReal()) { ret = false; } @@ -245,6 +245,10 @@ bool TypeNode::isClosedEnumerable() { ret = getSetElementType().isClosedEnumerable(); } + else if (isSequence()) + { + ret = getSequenceElementType().isClosedEnumerable(); + } else if (isDatatype()) { // avoid infinite loops: initially set to true @@ -353,6 +357,12 @@ bool TypeNode::isComparableTo(TypeNode t) const { return false; } +TypeNode TypeNode::getSequenceElementType() const +{ + Assert(isSequence()); + return (*this)[0]; +} + TypeNode TypeNode::getBaseType() const { TypeNode realt = NodeManager::currentNM()->realType(); if (isSubtypeOf(realt)) { diff --git a/src/expr/type_node.h b/src/expr/type_node.h index 70392fb01..c9771bd3d 100644 --- a/src/expr/type_node.h +++ b/src/expr/type_node.h @@ -518,6 +518,9 @@ public: /** Is this a Set type? */ bool isSet() const; + /** Is this a Sequence type? */ + bool isSequence() const; + /** Get the index type (for array types) */ TypeNode getArrayIndexType() const; @@ -536,6 +539,8 @@ public: /** Get the element type (for set types) */ TypeNode getSetElementType() const; + /** Get the element type (for sequence types) */ + TypeNode getSequenceElementType() const; /** * Is this a function type? Function-like things (e.g. datatype * selectors) that aren't actually functions are NOT considered @@ -964,6 +969,11 @@ inline bool TypeNode::isSet() const { return getKind() == kind::SET_TYPE; } +inline bool TypeNode::isSequence() const +{ + return getKind() == kind::SEQUENCE_TYPE; +} + inline TypeNode TypeNode::getSetElementType() const { Assert(isSet()); return (*this)[0]; diff --git a/src/theory/strings/kinds b/src/theory/strings/kinds index 27ffe5d26..800847ffe 100644 --- a/src/theory/strings/kinds +++ b/src/theory/strings/kinds @@ -58,12 +58,27 @@ constant CONST_STRING \ "util/string.h" \ "a string of characters" +# the type +operator SEQUENCE_TYPE 1 "seuence type, takes as parameter the type of the elements" +cardinality SEQUENCE_TYPE \ + "::CVC4::theory::strings::SequenceProperties::computeCardinality(%TYPE%)" \ + "theory/strings/theory_strings_type_rules.h" +well-founded SEQUENCE_TYPE \ + "::CVC4::theory::strings::SequenceProperties::isWellFounded(%TYPE%)" \ + "::CVC4::theory::strings::SequenceProperties::mkGroundTerm(%TYPE%)" \ + "theory/strings/theory_strings_type_rules.h" +enumerator SEQUENCE_TYPE \ + "::CVC4::theory::strings::SequenceEnumerator" \ + "theory/strings/type_enumerator.h" + constant CONST_SEQUENCE \ ::CVC4::ExprSequence \ ::CVC4::ExprSequenceHashFunction \ "expr/expr_sequence.h" \ "a sequence of characters" +operator SEQ_UNIT 1 "a sequence of length one" + # equal equal / less than / output operator STRING_TO_REGEXP 1 "convert string to regexp" operator REGEXP_CONCAT 2: "regexp concat" @@ -144,4 +159,9 @@ typerule STRING_FROM_CODE "SimpleTypeRule" typerule STRING_TOUPPER "SimpleTypeRule" typerule STRING_TOLOWER "SimpleTypeRule" +### sequence specific operators + +typerule CONST_SEQUENCE ::CVC4::theory::strings::ConstSequenceTypeRule +typerule SEQ_UNIT ::CVC4::theory::strings::SeqUnitTypeRule + endtheory diff --git a/src/theory/strings/rewrites.cpp b/src/theory/strings/rewrites.cpp index 2953a2b3c..a4055c4f9 100644 --- a/src/theory/strings/rewrites.cpp +++ b/src/theory/strings/rewrites.cpp @@ -200,6 +200,7 @@ const char* toString(Rewrite r) case Rewrite::LEN_REPL_INV: return "LEN_REPL_INV"; case Rewrite::LEN_CONV_INV: return "LEN_CONV_INV"; case Rewrite::CHARAT_ELIM: return "CHARAT_ELIM"; + case Rewrite::SEQ_UNIT_EVAL: return "SEQ_UNIT_EVAL"; default: return "?"; } } diff --git a/src/theory/strings/rewrites.h b/src/theory/strings/rewrites.h index 7a315ebd3..96a3b65fd 100644 --- a/src/theory/strings/rewrites.h +++ b/src/theory/strings/rewrites.h @@ -202,7 +202,8 @@ enum class Rewrite : uint32_t LEN_CONCAT, LEN_REPL_INV, LEN_CONV_INV, - CHARAT_ELIM + CHARAT_ELIM, + SEQ_UNIT_EVAL }; /** diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 2d2ec0af0..55d58b860 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -1461,6 +1461,10 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node) { retNode = rewriteRepeatRegExp(node); } + else if (nk == SEQ_UNIT) + { + retNode = rewriteSeqUnit(node); + } Trace("sequences-postrewrite") << "Strings::SequencesRewriter::postRewrite returning " << retNode @@ -3095,6 +3099,19 @@ Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype) return res; } +Node SequencesRewriter::rewriteSeqUnit(Node node) +{ + NodeManager* nm = NodeManager::currentNM(); + if (node[0].isConst()) + { + std::vector seq; + seq.push_back(node[0].toExpr()); + TypeNode stype = nm->mkSequenceType(node[0].getType()); + Node ret = nm->mkConst(ExprSequence(stype.toType(), seq)); + return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL); + } + return node; +} Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r) { diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h index 56b74f536..490dd8b3c 100644 --- a/src/theory/strings/sequences_rewriter.h +++ b/src/theory/strings/sequences_rewriter.h @@ -224,6 +224,12 @@ class SequencesRewriter : public TheoryRewriter * Returns the rewritten form of node. */ Node rewriteStringToCode(Node node); + /** rewrite seq.unit + * This is the entry point for post-rewriting terms n of the form + * seq.unit( t ) + * Returns the rewritten form of node. + */ + Node rewriteSeqUnit(Node node); /** length preserving rewrite * diff --git a/src/theory/strings/theory_strings_type_rules.h b/src/theory/strings/theory_strings_type_rules.h index 93a32f26e..3229e6631 100644 --- a/src/theory/strings/theory_strings_type_rules.h +++ b/src/theory/strings/theory_strings_type_rules.h @@ -20,6 +20,9 @@ #ifndef CVC4__THEORY__STRINGS__THEORY_STRINGS_TYPE_RULES_H #define CVC4__THEORY__STRINGS__THEORY_STRINGS_TYPE_RULES_H +#include "expr/expr_sequence.h" +#include "expr/sequence.h" + namespace CVC4 { namespace theory { namespace strings { @@ -318,6 +321,53 @@ public: } }; +class ConstSequenceTypeRule +{ + public: + static TypeNode computeType(NodeManager* nodeManager, + TNode n, + bool check) + { + Assert(n.getKind() == kind::CONST_SEQUENCE); + return n.getConst().getSequence().getType(); + } +}; + +class SeqUnitTypeRule +{ + public: + static TypeNode computeType(NodeManager* nodeManager, + TNode n, + bool check) + { + return nodeManager->mkSequenceType(n[0].getType(check)); + } +}; + +/** Properties of the sequence type */ +struct SequenceProperties +{ + static Cardinality computeCardinality(TypeNode type) + { + Assert(type.getKind() == kind::SEQUENCE_TYPE); + return Cardinality::INTEGERS; + } + /** A sequence is well-founded if its element type is */ + static bool isWellFounded(TypeNode type) + { + return type[0].isWellFounded(); + } + /** Make ground term for sequence type (return the empty sequence) */ + static Node mkGroundTerm(TypeNode type) + { + Assert(type.isSequence()); + // empty sequence + std::vector seq; + return NodeManager::currentNM()->mkConst( + ExprSequence(SequenceType(type.toType()), seq)); + } +}; /* struct SequenceProperties */ + }/* CVC4::theory::strings namespace */ }/* CVC4::theory namespace */ }/* CVC4 namespace */ diff --git a/src/theory/strings/type_enumerator.cpp b/src/theory/strings/type_enumerator.cpp index d24206860..c25363e65 100644 --- a/src/theory/strings/type_enumerator.cpp +++ b/src/theory/strings/type_enumerator.cpp @@ -158,6 +158,67 @@ void StringEnumLen::mkCurr() d_curr = makeStandardModelConstant(d_witer->getData(), d_cardinality); } +SeqEnumLen::SeqEnumLen(TypeNode tn, + TypeEnumeratorProperties* tep, + uint32_t startLength) + : SEnumLen(tn, startLength) +{ + d_elementEnumerator.reset( + new TypeEnumerator(d_type.getSequenceElementType(), tep)); + mkCurr(); +} + +SeqEnumLen::SeqEnumLen(TypeNode tn, + TypeEnumeratorProperties* tep, + uint32_t startLength, + uint32_t endLength) + : SEnumLen(tn, startLength, endLength) +{ + d_elementEnumerator.reset( + new TypeEnumerator(d_type.getSequenceElementType(), tep)); + mkCurr(); +} + +SeqEnumLen::SeqEnumLen(const SeqEnumLen& wenum) + : SEnumLen(wenum), + d_elementEnumerator(new TypeEnumerator(*wenum.d_elementEnumerator)), + d_elementDomain(wenum.d_elementDomain) +{ +} + +bool SeqEnumLen::increment() +{ + if (!d_elementEnumerator->isFinished()) + { + // yet to establish domain + Assert(d_elementEnumerator != nullptr); + d_elementDomain.push_back((**d_elementEnumerator).toExpr()); + ++(*d_elementEnumerator); + } + // the current cardinality is the domain size of the element + if (!d_witer->increment(d_elementDomain.size())) + { + Assert(d_elementEnumerator->isFinished()); + d_curr = Node::null(); + return false; + } + mkCurr(); + return true; +} + +void SeqEnumLen::mkCurr() +{ + std::vector seq; + const std::vector& data = d_witer->getData(); + for (unsigned i : data) + { + seq.push_back(d_elementDomain[i]); + } + // make sequence from seq + d_curr = + NodeManager::currentNM()->mkConst(ExprSequence(d_type.toType(), seq)); +} + StringEnumerator::StringEnumerator(TypeNode type, TypeEnumeratorProperties* tep) : TypeEnumeratorBase(type), d_wenum(0, utils::getAlphabetCardinality()) @@ -182,6 +243,28 @@ StringEnumerator& StringEnumerator::operator++() bool StringEnumerator::isFinished() { return d_wenum.isFinished(); } +SequenceEnumerator::SequenceEnumerator(TypeNode type, + TypeEnumeratorProperties* tep) + : TypeEnumeratorBase(type), d_wenum(type, tep, 0) +{ +} + +SequenceEnumerator::SequenceEnumerator(const SequenceEnumerator& enumerator) + : TypeEnumeratorBase(enumerator.getType()), + d_wenum(enumerator.d_wenum) +{ +} + +Node SequenceEnumerator::operator*() { return d_wenum.getCurrent(); } + +SequenceEnumerator& SequenceEnumerator::operator++() +{ + d_wenum.increment(); + return *this; +} + +bool SequenceEnumerator::isFinished() { return d_wenum.isFinished(); } + } // namespace strings } // namespace theory } // namespace CVC4 diff --git a/src/theory/strings/type_enumerator.h b/src/theory/strings/type_enumerator.h index b379ce5c3..c82892624 100644 --- a/src/theory/strings/type_enumerator.h +++ b/src/theory/strings/type_enumerator.h @@ -136,6 +136,34 @@ class StringEnumLen : public SEnumLen void mkCurr(); }; +/** + * Enumerates sequence values for a given length. + */ +class SeqEnumLen : public SEnumLen +{ + public: + /** For sequences */ + SeqEnumLen(TypeNode tn, TypeEnumeratorProperties* tep, uint32_t startLength); + SeqEnumLen(TypeNode tn, + TypeEnumeratorProperties* tep, + uint32_t startLength, + uint32_t endLength); + /** copy constructor */ + SeqEnumLen(const SeqEnumLen& wenum); + /** destructor */ + ~SeqEnumLen() {} + /** increment */ + bool increment() override; + + private: + /** an enumerator for the elements' type */ + std::unique_ptr d_elementEnumerator; + /** The domain */ + std::vector d_elementDomain; + /** Make the current term from d_data */ + void mkCurr(); +}; + class StringEnumerator : public TypeEnumeratorBase { public: @@ -154,6 +182,21 @@ class StringEnumerator : public TypeEnumeratorBase StringEnumLen d_wenum; }; /* class StringEnumerator */ +class SequenceEnumerator : public TypeEnumeratorBase +{ + public: + SequenceEnumerator(TypeNode type, TypeEnumeratorProperties* tep = nullptr); + SequenceEnumerator(const SequenceEnumerator& enumerator); + ~SequenceEnumerator() {} + Node operator*() override; + SequenceEnumerator& operator++() override; + bool isFinished() override; + + private: + /** underlying sequence enumerator */ + SeqEnumLen d_wenum; +}; /* class SequenceEnumerator */ + }/* CVC4::theory::strings namespace */ }/* CVC4::theory namespace */ }/* CVC4 namespace */ -- 2.30.2