From 7e81d459952dc80811df83d0ac86fb7342b58000 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Sat, 23 May 2020 07:13:42 -0500 Subject: [PATCH] Add the sequence datatype (#4153) This class is the Node-level representation of a sequence. It is analogous to CVC4::String. --- src/expr/CMakeLists.txt | 2 + src/expr/sequence.cpp | 331 ++++++++++++++++++++++++++++++++++++++++ src/expr/sequence.h | 168 ++++++++++++++++++++ 3 files changed, 501 insertions(+) create mode 100644 src/expr/sequence.cpp create mode 100644 src/expr/sequence.h diff --git a/src/expr/CMakeLists.txt b/src/expr/CMakeLists.txt index 55b2c9baa..f2a4377d0 100644 --- a/src/expr/CMakeLists.txt +++ b/src/expr/CMakeLists.txt @@ -69,6 +69,8 @@ libcvc4_add_sources( dtype_selector.cpp record.cpp record.h + sequence.cpp + sequence.h sygus_datatype.cpp sygus_datatype.h uninterpreted_constant.cpp diff --git a/src/expr/sequence.cpp b/src/expr/sequence.cpp new file mode 100644 index 000000000..e42a67bbe --- /dev/null +++ b/src/expr/sequence.cpp @@ -0,0 +1,331 @@ +/********************* */ +/*! \file sequence.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Implementation of the sequence data type. + **/ + +#include "expr/sequence.h" + +using namespace std; + +namespace CVC4 { + +Sequence::Sequence(TypeNode t, const std::vector& s) : d_type(t), d_seq(s) +{ +} + +Sequence& Sequence::operator=(const Sequence& y) +{ + if (this != &y) + { + d_type = y.d_type; + d_seq = y.d_seq; + } + return *this; +} + +int Sequence::cmp(const Sequence& y) const +{ + Assert(d_type == y.d_type); + if (size() != y.size()) + { + return size() < y.size() ? -1 : 1; + } + for (size_t i = 0, sz = size(); i < sz; ++i) + { + if (d_seq[i] != y.d_seq[i]) + { + return d_seq[i] < y.d_seq[i] ? -1 : 1; + } + } + return 0; +} + +Sequence Sequence::concat(const Sequence& other) const +{ + Assert(d_type == other.d_type); + std::vector ret_vec(d_seq); + ret_vec.insert(ret_vec.end(), other.d_seq.begin(), other.d_seq.end()); + return Sequence(d_type, ret_vec); +} + +bool Sequence::strncmp(const Sequence& y, size_t n) const +{ + Assert(d_type == y.d_type); + size_t b = (size() >= y.size()) ? size() : y.size(); + size_t s = (size() <= y.size()) ? size() : y.size(); + if (n > s) + { + if (b != s) + { + return false; + } + n = s; + } + for (size_t i = 0; i < n; ++i) + { + if (d_seq[i] != y.d_seq[i]) + { + return false; + } + } + return true; +} + +bool Sequence::rstrncmp(const Sequence& y, size_t n) const +{ + Assert(d_type == y.d_type); + size_t b = (size() >= y.size()) ? size() : y.size(); + size_t s = (size() <= y.size()) ? size() : y.size(); + if (n > s) + { + if (b != s) + { + return false; + } + n = s; + } + for (size_t i = 0; i < n; ++i) + { + if (d_seq[size() - i - 1] != y.d_seq[y.size() - i - 1]) + { + return false; + } + } + return true; +} + +Node Sequence::front() const +{ + Assert(!d_seq.empty()); + return d_seq.front(); +} + +Node Sequence::back() const +{ + Assert(!d_seq.empty()); + return d_seq.back(); +} + +size_t Sequence::overlap(const Sequence& y) const +{ + Assert(d_type == y.d_type); + size_t i = size() < y.size() ? size() : y.size(); + for (; i > 0; i--) + { + Sequence s = suffix(i); + Sequence p = y.prefix(i); + if (s == p) + { + return i; + } + } + return i; +} + +size_t Sequence::roverlap(const Sequence& y) const +{ + Assert(d_type == y.d_type); + size_t i = size() < y.size() ? size() : y.size(); + for (; i > 0; i--) + { + Sequence s = prefix(i); + Sequence p = y.suffix(i); + if (s == p) + { + return i; + } + } + return i; +} + +bool Sequence::isRepeated() const +{ + if (size() > 1) + { + Node f = d_seq[0]; + for (unsigned i = 1, sz = size(); i < sz; ++i) + { + if (f != d_seq[i]) + { + return false; + } + } + } + return true; +} + +size_t Sequence::find(const Sequence& y, size_t start) const +{ + Assert(d_type == y.d_type); + if (size() < y.size() + start) + { + return std::string::npos; + } + if (y.empty()) + { + return start; + } + if (empty()) + { + return std::string::npos; + } + std::vector::const_iterator itr = std::search( + d_seq.begin() + start, d_seq.end(), y.d_seq.begin(), y.d_seq.end()); + if (itr != d_seq.end()) + { + return itr - d_seq.begin(); + } + return std::string::npos; +} + +size_t Sequence::rfind(const Sequence& y, size_t start) const +{ + Assert(d_type == y.d_type); + if (size() < y.size() + start) + { + return std::string::npos; + } + if (y.empty()) + { + return start; + } + if (empty()) + { + return std::string::npos; + } + std::vector::const_reverse_iterator itr = std::search( + d_seq.rbegin() + start, d_seq.rend(), y.d_seq.rbegin(), y.d_seq.rend()); + if (itr != d_seq.rend()) + { + return itr - d_seq.rbegin(); + } + return std::string::npos; +} + +bool Sequence::hasPrefix(const Sequence& y) const +{ + Assert(d_type == y.d_type); + size_t s = size(); + size_t ys = y.size(); + if (ys > s) + { + return false; + } + for (size_t i = 0; i < ys; i++) + { + if (d_seq[i] != y.d_seq[i]) + { + return false; + } + } + return true; +} + +bool Sequence::hasSuffix(const Sequence& y) const +{ + Assert(d_type == y.d_type); + size_t s = size(); + size_t ys = y.size(); + if (ys > s) + { + return false; + } + size_t idiff = s - ys; + for (size_t i = 0; i < ys; i++) + { + if (d_seq[i + idiff] != y.d_seq[i]) + { + return false; + } + } + return true; +} + +Sequence Sequence::replace(const Sequence& s, const Sequence& t) const +{ + Assert(d_type == s.d_type); + Assert(d_type == t.d_type); + size_t ret = find(s); + if (ret != std::string::npos) + { + std::vector vec; + vec.insert(vec.begin(), d_seq.begin(), d_seq.begin() + ret); + vec.insert(vec.end(), t.d_seq.begin(), t.d_seq.end()); + vec.insert(vec.end(), d_seq.begin() + ret + s.size(), d_seq.end()); + return Sequence(d_type, vec); + } + return *this; +} + +Sequence Sequence::substr(size_t i) const +{ + Assert(i >= 0); + Assert(i <= size()); + std::vector ret_vec; + std::vector::const_iterator itr = d_seq.begin() + i; + ret_vec.insert(ret_vec.end(), itr, d_seq.end()); + return Sequence(d_type, ret_vec); +} + +Sequence Sequence::substr(size_t i, size_t j) const +{ + Assert(i >= 0); + Assert(j >= 0); + Assert(i + j <= size()); + std::vector ret_vec; + std::vector::const_iterator itr = d_seq.begin() + i; + ret_vec.insert(ret_vec.end(), itr, itr + j); + return Sequence(d_type, ret_vec); +} + +bool Sequence::noOverlapWith(const Sequence& y) const +{ + Assert(d_type == y.d_type); + return y.find(*this) == std::string::npos + && this->find(y) == std::string::npos && this->overlap(y) == 0 + && y.overlap(*this) == 0; +} + +size_t Sequence::maxSize() { return std::numeric_limits::max(); } + +std::ostream& operator<<(std::ostream& os, const Sequence& s) +{ + const std::vector& vec = s.getVec(); + std::stringstream ss; + if (vec.empty()) + { + ss << "(as seq.empty " << s.getType() << ")"; + } + else + { + ss << "(seq.++"; + for (const Node& n : vec) + { + ss << " " << n; + } + ss << ")"; + } + return os << ss.str(); +} + +size_t SequenceHashFunction::operator()(const Sequence& s) const +{ + size_t ret = 0; + const std::vector& vec = s.getVec(); + for (const Node& n : vec) + { + ret = fnv1a::fnv1a_64(ret, NodeHashFunction()(n)); + } + return ret; +} + +} // namespace CVC4 diff --git a/src/expr/sequence.h b/src/expr/sequence.h new file mode 100644 index 000000000..833e79441 --- /dev/null +++ b/src/expr/sequence.h @@ -0,0 +1,168 @@ +/********************* */ +/*! \file sequence.h + ** \verbatim + ** Top contributors (to current version): + ** Andrew Reynolds + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2019 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief The sequence data type. + **/ + +#include "cvc4_private.h" + +#ifndef CVC4__EXPR__SEQUENCE_H +#define CVC4__EXPR__SEQUENCE_H + +#include +#include "expr/node.h" + +namespace CVC4 { + +class ExprSequence; + +/** The CVC4 sequence class + * + * This data structure is the domain of values for the sequence type. + */ +class Sequence +{ + public: + /** constructors for Sequence + * + * Internally, a CVC4::Sequence is represented by a vector of Nodes (d_seq), + * where each Node in this vector must be a constant. + */ + Sequence() = default; + explicit Sequence(TypeNode t, const std::vector& s); + + Sequence& operator=(const Sequence& y); + + Sequence concat(const Sequence& other) const; + + bool operator==(const Sequence& y) const { return cmp(y) == 0; } + bool operator!=(const Sequence& y) const { return cmp(y) != 0; } + bool operator<(const Sequence& y) const { return cmp(y) < 0; } + bool operator>(const Sequence& y) const { return cmp(y) > 0; } + bool operator<=(const Sequence& y) const { return cmp(y) <= 0; } + bool operator>=(const Sequence& y) const { return cmp(y) >= 0; } + + bool strncmp(const Sequence& y, size_t n) const; + bool rstrncmp(const Sequence& y, size_t n) const; + + /** is this the empty sequence? */ + bool empty() const { return d_seq.empty(); } + /** is less than or equal to sequence y */ + bool isLeq(const Sequence& y) const; + /** Return the length of the sequence */ + size_t size() const { return d_seq.size(); } + + /** Return true if this sequence is a repetition of the same element */ + bool isRepeated() const; + + /** + * Return the first position y occurs in this sequence, or std::string::npos + * otherwise. + */ + size_t find(const Sequence& y, size_t start = 0) const; + /** + * Return the first position y occurs in this sequence searching from the end, + * or std::string::npos otherwise. + */ + size_t rfind(const Sequence& y, size_t start = 0) const; + /** Returns true if y is a prefix of this */ + bool hasPrefix(const Sequence& y) const; + /** Returns true if y is a suffix of this */ + bool hasSuffix(const Sequence& y) const; + /** Replace the first occurrence of s in this sequence with t */ + Sequence replace(const Sequence& s, const Sequence& t) const; + /** Return the subsequence of this sequence starting at index i */ + Sequence substr(size_t i) const; + /** + * Return the subsequence of this sequence starting at index i with size at + * most j. + */ + Sequence substr(size_t i, size_t j) const; + /** Return the prefix of this sequence of size at most i */ + Sequence prefix(size_t i) const { return substr(0, i); } + /** Return the suffix of this sequence of size at most i */ + Sequence suffix(size_t i) const { return substr(size() - i, i); } + + /** + * Checks if there is any overlap between this sequence and another sequence. + * This corresponds to checking whether one sequence contains the other and + * whether a subsequence of one is a prefix of the other and vice-versa. + * + * @param y The other sequence + * @return True if there is an overlap, false otherwise + */ + bool noOverlapWith(const Sequence& y) const; + + /** sequence overlap + * + * if overlap returns m>0, + * then the maximal suffix of this sequence that is a prefix of y is of length + * m. + * + * For example, if x is "abcdef", then: + * x.overlap("defg") = 3 + * x.overlap("ab") = 0 + * x.overlap("d") = 0 + * x.overlap("bcdefdef") = 5 + */ + size_t overlap(const Sequence& y) const; + /** sequence reverse overlap + * + * if roverlap returns m>0, + * then the maximal prefix of this sequence that is a suffix of y is of length + * m. + * + * For example, if x is "abcdef", then: + * x.roverlap("aaabc") = 3 + * x.roverlap("def") = 0 + * x.roverlap("d") = 0 + * x.roverlap("defabcde") = 5 + * + * Notice that x.overlap(y) = y.roverlap(x) + */ + size_t roverlap(const Sequence& y) const; + + /** get type */ + TypeNode getType() const { return d_type; } + /** get the internal Node representation of this sequence */ + const std::vector& getVec() const { return d_seq; } + /** get the internal node value of the first element in this sequence */ + Node front() const; + /** get the internal node value of the last element in this sequence */ + Node back() const; + /** + * Returns the maximum sequence length representable by this class. + * Corresponds to the maximum size of d_seq. + */ + static size_t maxSize(); + + private: + /** + * Returns a negative number if *this < y, 0 if *this and y are equal and a + * positive number if *this > y. + */ + int cmp(const Sequence& y) const; + /** The element type of the sequence */ + TypeNode d_type; + /** The data of the sequence */ + std::vector d_seq; +}; + +struct SequenceHashFunction +{ + size_t operator()(const Sequence& s) const; +}; + +std::ostream& operator<<(std::ostream& os, const Sequence& s); + +} // namespace CVC4 + +#endif /* CVC4__EXPR__SEQUENCE_H */ -- 2.30.2