From 42c2c69462fd83db2e0532ee57c44091bc1032f9 Mon Sep 17 00:00:00 2001 From: Tom Tromey Date: Sat, 17 Apr 2021 13:56:36 -0600 Subject: [PATCH] Handle unaligned mapping of .gdb_index The .gdb_index was designed such that all data would be aligned. Unfortunately, we neglected to require this alignment in the objcopy instructions in the manual. As a result, in many cases, a .gdb_index in the wild will not be properly aligned by mmap. This yields undefined behavior, which is PR gdb/23743. This patch fixes the bug by always assuming that the mapping is unaligned, and using extract_unsigned_integer when needed. A new helper class is introduced to make this less painful. gdb/ChangeLog 2021-04-17 Tom Tromey PR gdb/23743: * dwarf2/read.c (class offset_view): New. (struct symbol_table_slot): Remove. (struct mapped_index) : Change type. : New methods. : Rewrite. (read_gdb_index_from_buffer): Update. (struct dw2_symtab_iterator) : Change type. (dw2_symtab_iter_init_common, dw2_symtab_iter_init) (dw2_symtab_iter_next, dw2_expand_marked_cus): Update. * dwarf2/index-write.c (class data_buf) : Remove. : New methods. (write_hash_table, add_address_entry, write_gdbindex_1) (write_debug_names): Update. * dwarf2/index-common.h (byte_swap, MAYBE_SWAP): Remove. --- gdb/ChangeLog | 19 ++++++ gdb/dwarf2/index-common.h | 21 ++---- gdb/dwarf2/index-write.c | 41 ++++++------ gdb/dwarf2/read.c | 132 +++++++++++++++++++++++++------------- 4 files changed, 132 insertions(+), 81 deletions(-) diff --git a/gdb/ChangeLog b/gdb/ChangeLog index 803597532a7..678ad2772f3 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,3 +1,22 @@ +2021-04-17 Tom Tromey + + PR gdb/23743: + * dwarf2/read.c (class offset_view): New. + (struct symbol_table_slot): Remove. + (struct mapped_index) : Change type. + : New methods. + : + Rewrite. + (read_gdb_index_from_buffer): Update. + (struct dw2_symtab_iterator) : Change type. + (dw2_symtab_iter_init_common, dw2_symtab_iter_init) + (dw2_symtab_iter_next, dw2_expand_marked_cus): Update. + * dwarf2/index-write.c (class data_buf) : Remove. + : New methods. + (write_hash_table, add_address_entry, write_gdbindex_1) + (write_debug_names): Update. + * dwarf2/index-common.h (byte_swap, MAYBE_SWAP): Remove. + 2021-04-17 Tom Tromey * dwarf2/index-write.c (write_psymtabs_to_index): Check diff --git a/gdb/dwarf2/index-common.h b/gdb/dwarf2/index-common.h index fa762c400fa..86180747533 100644 --- a/gdb/dwarf2/index-common.h +++ b/gdb/dwarf2/index-common.h @@ -29,28 +29,15 @@ architecture-independent. */ typedef uint32_t offset_type; -#if WORDS_BIGENDIAN - -/* Convert VALUE between big- and little-endian. */ +/* Unpack a 32-bit little-endian value. */ static inline offset_type -byte_swap (offset_type value) +gdb_index_unpack (const gdb_byte *value) { - offset_type result; - - result = (value & 0xff) << 24; - result |= (value & 0xff00) << 8; - result |= (value & 0xff0000) >> 8; - result |= (value & 0xff000000) >> 24; - return result; + return (offset_type) extract_unsigned_integer (value, sizeof (offset_type), + BFD_ENDIAN_LITTLE); } -#define MAYBE_SWAP(V) byte_swap (V) - -#else -#define MAYBE_SWAP(V) static_cast (V) -#endif /* WORDS_BIGENDIAN */ - /* The hash function for strings in the mapped index. This is the same as SYMBOL_HASH_NEXT, but we keep a separate copy to maintain control over the implementation. This is necessary because the hash function is tied to the diff --git a/gdb/dwarf2/index-write.c b/gdb/dwarf2/index-write.c index 6cfe415c5a2..e27e1e8cf7d 100644 --- a/gdb/dwarf2/index-write.c +++ b/gdb/dwarf2/index-write.c @@ -94,13 +94,10 @@ file_write (FILE *file, const std::vector &vec) class data_buf { public: - /* Copy DATA to the end of the buffer. */ - template - void append_data (const T &data) + /* Copy ARRAY to the end of the buffer. */ + void append_array (gdb::array_view array) { - std::copy (reinterpret_cast (&data), - reinterpret_cast (&data + 1), - grow (sizeof (data))); + std::copy (array.begin (), array.end (), grow (array.size ())); } /* Copy CSTR (a zero-terminated string) to the end of buffer. The @@ -120,7 +117,7 @@ public: input >>= 7; if (input) output |= 0x80; - append_data (output); + m_vec.push_back (output); if (input == 0) break; } @@ -133,6 +130,12 @@ public: ::store_unsigned_integer (grow (len), len, byte_order, val); } + /* Copy VALUE to the end of the buffer, little-endian. */ + void append_offset (offset_type value) + { + append_uint (sizeof (value), BFD_ENDIAN_LITTLE, value); + } + /* Return the size of the buffer. */ size_t size () const { @@ -371,9 +374,9 @@ write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool) symbol_hash_table.emplace (entry.cu_indices, cpool.size ()); entry.index_offset = cpool.size (); - cpool.append_data (MAYBE_SWAP (entry.cu_indices.size ())); + cpool.append_offset (entry.cu_indices.size ()); for (const auto index : entry.cu_indices) - cpool.append_data (MAYBE_SWAP (index)); + cpool.append_offset (index); } } @@ -399,8 +402,8 @@ write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool) vec_off = 0; } - output.append_data (MAYBE_SWAP (str_off)); - output.append_data (MAYBE_SWAP (vec_off)); + output.append_offset (str_off); + output.append_offset (vec_off); } } @@ -434,7 +437,7 @@ add_address_entry (data_buf &addr_vec, { addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start); addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end); - addr_vec.append_data (MAYBE_SWAP (cu_index)); + addr_vec.append_offset (cu_index); } /* Worker function for traversing an addrmap to build the address table. */ @@ -1374,26 +1377,26 @@ write_gdbindex_1 (FILE *out_file, offset_type total_len = size_of_header; /* The version number. */ - contents.append_data (MAYBE_SWAP (8)); + contents.append_offset (8); /* The offset of the CU list from the start of the file. */ - contents.append_data (MAYBE_SWAP (total_len)); + contents.append_offset (total_len); total_len += cu_list.size (); /* The offset of the types CU list from the start of the file. */ - contents.append_data (MAYBE_SWAP (total_len)); + contents.append_offset (total_len); total_len += types_cu_list.size (); /* The offset of the address table from the start of the file. */ - contents.append_data (MAYBE_SWAP (total_len)); + contents.append_offset (total_len); total_len += addr_vec.size (); /* The offset of the symbol table from the start of the file. */ - contents.append_data (MAYBE_SWAP (total_len)); + contents.append_offset (total_len); total_len += symtab_vec.size (); /* The offset of the constant pool from the start of the file. */ - contents.append_data (MAYBE_SWAP (total_len)); + contents.append_offset (total_len); total_len += constant_pool.size (); gdb_assert (contents.size () == size_of_header); @@ -1611,7 +1614,7 @@ write_debug_names (dwarf2_per_objfile *per_objfile, string. This value is rounded up to a multiple of 4. */ static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, ""); header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation)); - header.append_data (dwarf5_gdb_augmentation); + header.append_array (dwarf5_gdb_augmentation); gdb_assert (header.size () == bytes_of_header); diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c index 22a5f333b78..40aeb298c14 100644 --- a/gdb/dwarf2/read.c +++ b/gdb/dwarf2/read.c @@ -223,17 +223,50 @@ protected: ~mapped_index_base() = default; }; +/* This is a view into the index that converts from bytes to an + offset_type, and allows indexing. Unaligned bytes are specifically + allowed here, and handled via unpacking. */ + +class offset_view +{ +public: + offset_view () = default; + + explicit offset_view (gdb::array_view bytes) + : m_bytes (bytes) + { + } + + /* Extract the INDEXth offset_type from the array. */ + offset_type operator[] (size_t index) const + { + const gdb_byte *bytes = &m_bytes[index * sizeof (offset_type)]; + return (offset_type) extract_unsigned_integer (bytes, + sizeof (offset_type), + BFD_ENDIAN_LITTLE); + } + + /* Return the number of offset_types in this array. */ + size_t size () const + { + return m_bytes.size () / sizeof (offset_type); + } + + /* Return true if this view is empty. */ + bool empty () const + { + return m_bytes.empty (); + } + +private: + /* The underlying bytes. */ + gdb::array_view m_bytes; +}; + /* A description of the mapped index. The file format is described in a comment by the code that writes the index. */ struct mapped_index final : public mapped_index_base { - /* A slot/bucket in the symbol table hash. */ - struct symbol_table_slot - { - const offset_type name; - const offset_type vec; - }; - /* Index data format version. */ int version = 0; @@ -241,25 +274,42 @@ struct mapped_index final : public mapped_index_base gdb::array_view address_table; /* The symbol table, implemented as a hash table. */ - gdb::array_view symbol_table; + offset_view symbol_table; /* A pointer to the constant pool. */ - const char *constant_pool = nullptr; + gdb::array_view constant_pool; + + /* Return the index into the constant pool of the name of the IDXth + symbol in the symbol table. */ + offset_type symbol_name_index (offset_type idx) const + { + return symbol_table[2 * idx]; + } + + /* Return the index into the constant pool of the CU vector of the + IDXth symbol in the symbol table. */ + offset_type symbol_vec_index (offset_type idx) const + { + return symbol_table[2 * idx + 1]; + } bool symbol_name_slot_invalid (offset_type idx) const override { - const auto &bucket = this->symbol_table[idx]; - return bucket.name == 0 && bucket.vec == 0; + return (symbol_name_index (idx) == 0 + && symbol_vec_index (idx) == 0); } /* Convenience method to get at the name of the symbol at IDX in the symbol table. */ const char *symbol_name_at (offset_type idx, dwarf2_per_objfile *per_objfile) const override - { return this->constant_pool + MAYBE_SWAP (this->symbol_table[idx].name); } + { + return (const char *) (this->constant_pool.data () + + symbol_name_index (idx)); + } size_t symbol_name_count () const override - { return this->symbol_table.size (); } + { return this->symbol_table.size () / 2; } }; /* A description of the mapped .debug_names. @@ -2950,9 +3000,10 @@ read_gdb_index_from_buffer (const char *filename, offset_type *types_list_elements) { const gdb_byte *addr = &buffer[0]; + offset_view metadata (buffer); /* Version check. */ - offset_type version = MAYBE_SWAP (*(offset_type *) addr); + offset_type version = metadata[0]; /* Versions earlier than 3 emitted every copy of a psymbol. This causes the index to behave very poorly for certain requests. Version 3 contained incomplete addrmap. So, it seems better to just ignore such @@ -3005,35 +3056,29 @@ to use the section anyway."), map->version = version; - offset_type *metadata = (offset_type *) (addr + sizeof (offset_type)); - - int i = 0; - *cu_list = addr + MAYBE_SWAP (metadata[i]); - *cu_list_elements = ((MAYBE_SWAP (metadata[i + 1]) - MAYBE_SWAP (metadata[i])) - / 8); + int i = 1; + *cu_list = addr + metadata[i]; + *cu_list_elements = (metadata[i + 1] - metadata[i]) / 8; ++i; - *types_list = addr + MAYBE_SWAP (metadata[i]); - *types_list_elements = ((MAYBE_SWAP (metadata[i + 1]) - - MAYBE_SWAP (metadata[i])) - / 8); + *types_list = addr + metadata[i]; + *types_list_elements = (metadata[i + 1] - metadata[i]) / 8; ++i; - const gdb_byte *address_table = addr + MAYBE_SWAP (metadata[i]); - const gdb_byte *address_table_end = addr + MAYBE_SWAP (metadata[i + 1]); + const gdb_byte *address_table = addr + metadata[i]; + const gdb_byte *address_table_end = addr + metadata[i + 1]; map->address_table = gdb::array_view (address_table, address_table_end); ++i; - const gdb_byte *symbol_table = addr + MAYBE_SWAP (metadata[i]); - const gdb_byte *symbol_table_end = addr + MAYBE_SWAP (metadata[i + 1]); + const gdb_byte *symbol_table = addr + metadata[i]; + const gdb_byte *symbol_table_end = addr + metadata[i + 1]; map->symbol_table - = gdb::array_view - ((mapped_index::symbol_table_slot *) symbol_table, - (mapped_index::symbol_table_slot *) symbol_table_end); + = offset_view (gdb::array_view (symbol_table, + symbol_table_end)); ++i; - map->constant_pool = (char *) (addr + MAYBE_SWAP (metadata[i])); + map->constant_pool = buffer.slice (metadata[i]); return 1; } @@ -3317,7 +3362,7 @@ struct dw2_symtab_iterator domain_enum domain; /* The list of CUs from the index entry of the symbol, or NULL if not found. */ - offset_type *vec; + offset_view vec; /* The next element in VEC to look at. */ int next; /* The number of elements in VEC, or zero if there is no match. */ @@ -3342,7 +3387,7 @@ dw2_symtab_iter_init (struct dw2_symtab_iterator *iter, iter->domain = domain; iter->next = 0; iter->global_seen = 0; - iter->vec = NULL; + iter->vec = {}; iter->length = 0; mapped_index *index = per_objfile->per_bfd->index_table.get (); @@ -3351,11 +3396,10 @@ dw2_symtab_iter_init (struct dw2_symtab_iterator *iter, return; gdb_assert (!index->symbol_name_slot_invalid (namei)); - const auto &bucket = index->symbol_table[namei]; + offset_type vec_idx = index->symbol_vec_index (namei); - iter->vec = (offset_type *) (index->constant_pool - + MAYBE_SWAP (bucket.vec)); - iter->length = MAYBE_SWAP (*iter->vec); + iter->vec = offset_view (index->constant_pool.slice (vec_idx)); + iter->length = iter->vec[0]; } /* Return the next matching CU or NULL if there are no more. */ @@ -3367,8 +3411,7 @@ dw2_symtab_iter_next (struct dw2_symtab_iterator *iter) for ( ; iter->next < iter->length; ++iter->next) { - offset_type cu_index_and_attrs = - MAYBE_SWAP (iter->vec[iter->next + 1]); + offset_type cu_index_and_attrs = iter->vec[iter->next + 1]; offset_type cu_index = GDB_INDEX_CU_VALUE (cu_index_and_attrs); gdb_index_symbol_kind symbol_kind = GDB_INDEX_SYMBOL_KIND_VALUE (cu_index_and_attrs); @@ -4387,16 +4430,15 @@ dw2_expand_marked_cus block_search_flags search_flags, search_domain kind) { - offset_type *vec, vec_len, vec_idx; + offset_type vec_len, vec_idx; bool global_seen = false; mapped_index &index = *per_objfile->per_bfd->index_table; - vec = (offset_type *) (index.constant_pool - + MAYBE_SWAP (index.symbol_table[idx].vec)); - vec_len = MAYBE_SWAP (vec[0]); + offset_view vec (index.constant_pool.slice (index.symbol_vec_index (idx))); + vec_len = vec[0]; for (vec_idx = 0; vec_idx < vec_len; ++vec_idx) { - offset_type cu_index_and_attrs = MAYBE_SWAP (vec[vec_idx + 1]); + offset_type cu_index_and_attrs = vec[vec_idx + 1]; /* This value is only valid for index versions >= 7. */ int is_static = GDB_INDEX_SYMBOL_STATIC_VALUE (cu_index_and_attrs); gdb_index_symbol_kind symbol_kind = -- 2.30.2