From 29364b41e638c581fcbd7b152007eb95c91e0eab Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Fri, 3 Dec 2021 13:36:16 -0800 Subject: [PATCH] Faster hasing for `cvc5::String` (#7742) Previously, our hashing algorithm for cvc5::String would create an std::string representation of the string and then call std::hash on the result. Creating the temporary std::string is quite expensive and can be avoided by computing the hash directly on the elements of the vector of characters in cvc5::String. This change improves our solving time on an industrial benchmark from about 59s to 55s. --- src/expr/sequence.cpp | 4 ++-- src/util/hash.h | 5 ++++- src/util/string.cpp | 15 +++++++++++++++ src/util/string.h | 11 +++++++---- 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/expr/sequence.cpp b/src/expr/sequence.cpp index 6b15f33e0..82c83e0ea 100644 --- a/src/expr/sequence.cpp +++ b/src/expr/sequence.cpp @@ -371,13 +371,13 @@ std::ostream& operator<<(std::ostream& os, const Sequence& s) size_t SequenceHashFunction::operator()(const Sequence& s) const { - size_t ret = 0; + uint64_t ret = fnv1a::offsetBasis; const std::vector& vec = s.getVec(); for (const Node& n : vec) { ret = fnv1a::fnv1a_64(ret, std::hash()(n)); } - return ret; + return static_cast(ret); } } // namespace cvc5 diff --git a/src/util/hash.h b/src/util/hash.h index db280984b..ef8bf4a2a 100644 --- a/src/util/hash.h +++ b/src/util/hash.h @@ -43,12 +43,15 @@ namespace cvc5 { namespace fnv1a { +constexpr uint64_t offsetBasis = 14695981039346656037U; + /** * FNV-1a hash algorithm for 64-bit numbers. * * More details here: http://www.isthe.com/chongo/tech/comp/fnv/index.html */ -inline uint64_t fnv1a_64(uint64_t v, uint64_t hash = 14695981039346656037U) { +inline uint64_t fnv1a_64(uint64_t v, uint64_t hash = offsetBasis) +{ hash ^= v; // Compute (hash * 1099511628211) return hash + (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + diff --git a/src/util/string.cpp b/src/util/string.cpp index 50b0a7199..2235ac5ee 100644 --- a/src/util/string.cpp +++ b/src/util/string.cpp @@ -23,6 +23,7 @@ #include "base/check.h" #include "base/exception.h" +#include "util/hash.h" using namespace std; @@ -519,6 +520,20 @@ Rational String::toNumber() const return Rational(toString()); } +namespace strings { + +size_t StringHashFunction::operator()(const ::cvc5::String& s) const +{ + uint64_t ret = fnv1a::offsetBasis; + for (unsigned c : s.d_str) + { + ret = fnv1a::fnv1a_64(c, ret); + } + return static_cast(ret); +} + +} // namespace strings + std::ostream &operator<<(std::ostream &os, const String &s) { return os << "\"" << s.toString() << "\""; } diff --git a/src/util/string.h b/src/util/string.h index 3f6384082..017c60fb8 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -26,6 +26,10 @@ namespace cvc5 { +namespace strings { +struct StringHashFunction; +} + /** The cvc5 string class * * This data structure is the domain of values for the string type. It can also @@ -33,6 +37,8 @@ namespace cvc5 { */ class String { + friend strings::StringHashFunction; + public: /** * This is the cardinality of the alphabet that is representable by this @@ -267,10 +273,7 @@ namespace strings { struct StringHashFunction { - size_t operator()(const ::cvc5::String& s) const - { - return std::hash()(s.toString()); - } + size_t operator()(const ::cvc5::String& s) const; }; /* struct StringHashFunction */ } // namespace strings -- 2.30.2