From 987cc25110bd87a1657ff6555c3e8a479fd3daaa Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Wed, 5 Dec 2007 22:56:51 +0000 Subject: [PATCH] Rework Stringpool to not compute the hash code twice when adding a new string. --- gold/stringpool.cc | 214 +++++++++++++++++++++++++++------------------ gold/stringpool.h | 90 ++++++++++++------- 2 files changed, 184 insertions(+), 120 deletions(-) diff --git a/gold/stringpool.cc b/gold/stringpool.cc index 19698e24c0b..0ac1dcb24c9 100644 --- a/gold/stringpool.cc +++ b/gold/stringpool.cc @@ -80,13 +80,12 @@ Stringpool_template::string_length(const char* p) return strlen(p); } -// Equality comparison function. +// Compare two strings of arbitrary character type for equality. template bool -Stringpool_template::Stringpool_eq::operator()( - const Stringpool_char* s1, - const Stringpool_char* s2) const +Stringpool_template::string_equal(const Stringpool_char* s1, + const Stringpool_char* s2) { while (*s1 != 0) if (*s1++ != *s2++) @@ -94,62 +93,69 @@ Stringpool_template::Stringpool_eq::operator()( return *s2 == 0; } -// Specialize equality comparison for char. +// Specialize string_equal for char. template<> -bool -Stringpool_template::Stringpool_eq::operator()(const char* s1, - const char* s2) const +inline bool +Stringpool_template::string_equal(const char* s1, const char* s2) { return strcmp(s1, s2) == 0; } -// Hash function. +// Equality comparison function for the hash table. + +template +inline bool +Stringpool_template::Stringpool_eq::operator()( + const Hashkey& h1, + const Hashkey& h2) const +{ + return (h1.hash_code == h2.hash_code + && h1.length == h2.length + && memcmp(h1.string, h2.string, + h1.length * sizeof(Stringpool_char)) == 0); +} + +// Hash function. The length is in characters, not bytes. template size_t -Stringpool_template::Stringpool_hash::operator()( - const Stringpool_char* s) const +Stringpool_template::string_hash(const Stringpool_char* s, + size_t length) { // Fowler/Noll/Vo (FNV) hash (type FNV-1a). + const unsigned char* p = reinterpret_cast(s); if (sizeof(size_t) > 4) { size_t result = static_cast(14695981039346656037ULL); - while (*s != 0) + for (size_t i = 0; i < length * sizeof(Stringpool_char); ++i) { - const char* p = reinterpret_cast(s); - for (size_t i = 0; i < sizeof(Stringpool_char); ++i) - { - result ^= (size_t) *p++; - result *= 1099511628211ULL; - } - ++s; + result ^= static_cast(*p++); + result *= 1099511628211ULL; } return result; } else { size_t result = 2166136261UL; - while (*s != 0) + for (size_t i = 0; i < length * sizeof(Stringpool_char); ++i) { - const char* p = reinterpret_cast(s); - for (size_t i = 0; i < sizeof(Stringpool_char); ++i) - { - result ^= (size_t) *p++; - result *= 16777619UL; - } - ++s; + result ^= static_cast(*p++); + result *= 16777619UL; } return result; } } -// Add a string to the list of canonical strings. Return a pointer to -// the canonical string. If PKEY is not NULL, set *PKEY to the key. +// Add the string S to the list of canonical strings. Return a +// pointer to the canonical string. If PKEY is not NULL, set *PKEY to +// the key. LENGTH is the length of S in characters. Note that S may +// not be NUL terminated. template const Stringpool_char* Stringpool_template::add_string(const Stringpool_char* s, + size_t len, Key* pkey) { // We are in trouble if we've already computed the string offsets. @@ -162,7 +168,9 @@ Stringpool_template::add_string(const Stringpool_char* s, const size_t key_mult = 1024; gold_assert(key_mult >= buffer_size); - size_t len = (string_length(s) + 1) * sizeof(Stringpool_char); + // Convert len to the number of bytes we need to allocate, including + // the null character. + len = (len + 1) * sizeof(Stringpool_char); size_t alc; bool front = true; @@ -181,7 +189,9 @@ Stringpool_template::add_string(const Stringpool_char* s, else { char* ret = psd->data + psd->len; - memcpy(ret, s, len); + memcpy(ret, s, len - sizeof(Stringpool_char)); + memset(ret + len - sizeof(Stringpool_char), 0, + sizeof(Stringpool_char)); if (pkey != NULL) *pkey = psd->index * key_mult + psd->len; @@ -194,7 +204,9 @@ Stringpool_template::add_string(const Stringpool_char* s, Stringdata *psd = reinterpret_cast(new char[alc]); psd->alc = alc - sizeof(Stringdata); - memcpy(psd->data, s, len); + memcpy(psd->data, s, len - sizeof(Stringpool_char)); + memset(psd->data + len - sizeof(Stringpool_char), 0, + sizeof(Stringpool_char)); psd->len = len; psd->index = this->next_index_; ++this->next_index_; @@ -217,42 +229,39 @@ const Stringpool_char* Stringpool_template::add(const Stringpool_char* s, bool copy, Key* pkey) { - // FIXME: This will look up the entry twice in the hash table. The - // problem is that we can't insert S before we canonicalize it. I - // don't think there is a way to handle this correctly with - // unordered_map, so this should be replaced with custom code to do - // what we need, which is to return the empty slot. + typedef std::pair Insert_type; - typename String_set_type::const_iterator p = this->string_set_.find(s); - if (p != this->string_set_.end()) + if (!copy) { - if (pkey != NULL) - *pkey = p->second.first; - return p->first; - } + // When we don't need to copy the string, we can call insert + // directly. - Key k; - const Stringpool_char* ret; - if (copy) - ret = this->add_string(s, &k); - else - { - ret = s; - k = this->next_uncopied_key_; - --this->next_uncopied_key_; - } + const Key k = this->next_uncopied_key_; + const off_t ozero = 0; + std::pair element(Hashkey(s), + std::make_pair(k, ozero)); - const off_t ozero = 0; - std::pair element(ret, - std::make_pair(k, ozero)); - std::pair ins = - this->string_set_.insert(element); - gold_assert(ins.second); + Insert_type ins = this->string_set_.insert(element); - if (pkey != NULL) - *pkey = k; + typename String_set_type::const_iterator p = ins.first; + + if (ins.second) + { + // We just added the string. The key value has now been + // used. + --this->next_uncopied_key_; + } + else + { + gold_assert(k != p->second.first); + } + + if (pkey != NULL) + *pkey = p->second.first; + return p->first.string; + } - return ret; + return this->add_prefix(s, string_length(s), pkey); } // Add a prefix of a string to a string pool. @@ -263,10 +272,36 @@ Stringpool_template::add_prefix(const Stringpool_char* s, size_t len, Key* pkey) { - // FIXME: This implementation should be rewritten when we rewrite - // the hash table to avoid copying. - std::basic_string st(s, len); - return this->add(st.c_str(), true, pkey); + // When adding an entry, this will look it up twice in the hash + // table. The problem is that we can't insert S before we + // canonicalize it by copying it into the canonical list. The hash + // code will only be computed once, so this isn't all that + // expensive. + + Hashkey hk(s, len); + typename String_set_type::const_iterator p = this->string_set_.find(hk); + if (p != this->string_set_.end()) + { + if (pkey != NULL) + *pkey = p->second.first; + return p->first.string; + } + + Key k; + hk.string = this->add_string(s, len, &k); + // The contents of the string stay the same, so we don't need to + // adjust hk.hash_code or hk.length. + + const off_t ozero = 0; + std::pair element(hk, std::make_pair(k, ozero)); + + typedef std::pair Insert_type; + Insert_type ins = this->string_set_.insert(element); + gold_assert(ins.second); + + if (pkey != NULL) + *pkey = k; + return hk.string; } template @@ -274,14 +309,15 @@ const Stringpool_char* Stringpool_template::find(const Stringpool_char* s, Key* pkey) const { - typename String_set_type::const_iterator p = this->string_set_.find(s); + Hashkey hk(s); + typename String_set_type::const_iterator p = this->string_set_.find(hk); if (p == this->string_set_.end()) return NULL; if (pkey != NULL) *pkey = p->second.first; - return p->first; + return p->first.string; } // Comparison routine used when sorting into an ELF strtab. We want @@ -301,10 +337,12 @@ Stringpool_template::Stringpool_sort_comparison::operator()( const Stringpool_sort_info& sort_info1, const Stringpool_sort_info& sort_info2) const { - const Stringpool_char* s1 = sort_info1.it->first; - const Stringpool_char* s2 = sort_info2.it->first; - const size_t len1 = sort_info1.string_length; - const size_t len2 = sort_info2.string_length; + const Hashkey& h1(sort_info1->first); + const Hashkey& h2(sort_info2->first); + const Stringpool_char* s1 = h1.string; + const Stringpool_char* s2 = h2.string; + const size_t len1 = h1.length; + const size_t len2 = h2.length; const size_t minlen = len1 < len2 ? len1 : len2; const Stringpool_char* p1 = s1 + len1 - 1; const Stringpool_char* p2 = s2 + len2 - 1; @@ -359,12 +397,12 @@ Stringpool_template::set_string_offsets() curr != this->string_set_.end(); curr++) { - if (this->zero_null_ && curr->first[0] == 0) + if (this->zero_null_ && curr->first.string[0] == 0) curr->second.second = 0; else { curr->second.second = offset; - offset += (string_length(curr->first) + 1) * charsize; + offset += (curr->first.length + 1) * charsize; } } } @@ -378,7 +416,7 @@ Stringpool_template::set_string_offsets() for (typename String_set_type::iterator p = this->string_set_.begin(); p != this->string_set_.end(); ++p) - v.push_back(Stringpool_sort_info(p, string_length(p->first))); + v.push_back(Stringpool_sort_info(p)); std::sort(v.begin(), v.end(), Stringpool_sort_comparison()); @@ -387,19 +425,21 @@ Stringpool_template::set_string_offsets() curr != v.end(); last = curr++) { - if (this->zero_null_ && curr->it->first[0] == 0) - curr->it->second.second = 0; + if (this->zero_null_ && (*curr)->first.string[0] == 0) + (*curr)->second.second = 0; else if (last != v.end() - && is_suffix(curr->it->first, curr->string_length, - last->it->first, last->string_length)) - curr->it->second.second = (last->it->second.second - + ((last->string_length - - curr->string_length) - * charsize)); + && is_suffix((*curr)->first.string, + (*curr)->first.length, + (*last)->first.string, + (*last)->first.length)) + (*curr)->second.second = ((*last)->second.second + + (((*last)->first.length + - (*curr)->first.length) + * charsize)); else { - curr->it->second.second = offset; - offset += (curr->string_length + 1) * charsize; + (*curr)->second.second = offset; + offset += ((*curr)->first.length + 1) * charsize; } } } @@ -439,9 +479,9 @@ Stringpool_template::write_to_buffer(unsigned char* buffer, p != this->string_set_.end(); ++p) { - const int len = (string_length(p->first) + 1) * sizeof(Stringpool_char); + const int len = (p->first.length + 1) * sizeof(Stringpool_char); gold_assert(p->second.second + len <= this->strtab_size_); - memcpy(buffer + p->second.second, p->first, len); + memcpy(buffer + p->second.second, p->first.string, len); } } diff --git a/gold/stringpool.h b/gold/stringpool.h index c5a3baf884b..929b4d1bc1f 100644 --- a/gold/stringpool.h +++ b/gold/stringpool.h @@ -161,6 +161,15 @@ class Stringpool_template static size_t string_length(const Stringpool_char*); + // Return whether two strings are equal. + static bool + string_equal(const Stringpool_char*, const Stringpool_char*); + + // Compute a hash code for a string. LENGTH is the length of the + // string in characters. + static size_t + string_hash(const Stringpool_char*, size_t length); + // We store the actual data in a list of these buffers. struct Stringdata { @@ -176,55 +185,70 @@ class Stringpool_template // Copy a string into the buffers, returning a canonical string. const Stringpool_char* - add_string(const Stringpool_char*, Key*); + add_string(const Stringpool_char*, size_t, Key*); + + // Return whether s1 is a suffix of s2. + static bool + is_suffix(const Stringpool_char* s1, size_t len1, + const Stringpool_char* s2, size_t len2); + + // The hash table key includes the string, the length of the string, + // and the hash code for the string. We put the hash code + // explicitly into the key so that we can do a find()/insert() + // sequence without having to recompute the hash. Computing the + // hash code is a significant user of CPU time in the linker. + struct Hashkey + { + const Stringpool_char* string; + // Length is in characters, not bytes. + size_t length; + size_t hash_code; + + // This goes in an STL container, so we need a default + // constructor. + Hashkey() + : string(NULL), length(0), hash_code(0) + { } + + // Note that these constructors are relatively expensive, because + // they compute the hash code. + Hashkey(const Stringpool_char* s) + : string(s), length(string_length(s)), hash_code(string_hash(s, length)) + { } + + Hashkey(const Stringpool_char* s, size_t len) + : string(s), length(len), hash_code(string_hash(s, len)) + { } + }; - // Hash function. + // Hash function. This is trivial, since we have already computed + // the hash. struct Stringpool_hash { size_t - operator()(const Stringpool_char*) const; + operator()(const Hashkey& hk) const + { return hk.hash_code; } }; // Equality comparison function for hash table. struct Stringpool_eq { bool - operator()(const Stringpool_char* p1, const Stringpool_char* p2) const; + operator()(const Hashkey&, const Hashkey&) const; }; - // Return whether s1 is a suffix of s2. - static bool - is_suffix(const Stringpool_char* s1, size_t len1, - const Stringpool_char* s2, size_t len2); - - // The hash table is a map from string names to a pair of Key and - // string table offsets. We only use the offsets if we turn this - // into an string table section. + // The hash table is a map from strings to a pair of Key and string + // table offsets. We only use the offsets if we turn this into an + // string table section. - typedef std::pair Val; + typedef std::pair Hashval; -#ifdef HAVE_TR1_UNORDERED_SET - typedef Unordered_map >, - true> String_set_type; -#else - typedef Unordered_map String_set_type; -#endif - // Comparison routine used when sorting into a string table. We - // store string-sizes in the sort-vector so we don't have to - // recompute them log(n) times as we sort. - struct Stringpool_sort_info - { - typename String_set_type::iterator it; - size_t string_length; - Stringpool_sort_info(typename String_set_type::iterator i, size_t s) - : it(i), string_length(s) - { } - }; + // Comparison routine used when sorting into a string table. + + typedef typename String_set_type::iterator Stringpool_sort_info; struct Stringpool_sort_comparison { -- 2.30.2