The new DWARF indexer
authorTom Tromey <tom@tromey.com>
Sat, 22 May 2021 13:51:24 +0000 (07:51 -0600)
committerTom Tromey <tom@tromey.com>
Tue, 12 Apr 2022 15:31:16 +0000 (09:31 -0600)
This patch adds the code to index DWARF.  This is just the scanner; it
reads the DWARF and constructs the index, but nothing calls it yet.

The indexer is split into two parts: a storage object and an indexer
object.  This is done to support the parallelization of this code -- a
future patch will create a single storage object per thread.

gdb/dwarf2/read.c
gdb/dwarf2/read.h

index 0c7a4de7c7def26913ce0ccc3c3ab07162577102..2a3f3e718761cdf7799f689da29864c4150bf42d 100644 (file)
@@ -89,6 +89,8 @@
 #include "gdbsupport/pathstuff.h"
 #include "count-one-bits.h"
 #include <unordered_set>
+#include "dwarf2/abbrev-cache.h"
+#include "cooked-index.h"
 
 /* When == 1, print basic high level tracing messages.
    When > 1, be more verbose.
@@ -800,6 +802,8 @@ public:
 
   DISABLE_COPY_AND_ASSIGN (cutu_reader);
 
+  cutu_reader (cutu_reader &&) = default;
+
   const gdb_byte *info_ptr = nullptr;
   struct die_info *comp_unit_die = nullptr;
   bool dummy_p = false;
@@ -808,6 +812,13 @@ public:
      for dummy CUs.  */
   void keep ();
 
+  /* Release the abbrev table, transferring ownership to the
+     caller.  */
+  abbrev_table_up release_abbrev_table ()
+  {
+    return std::move (m_abbrev_table_holder);
+  }
+
 private:
   void init_tu_and_read_dwo_dies (dwarf2_per_cu_data *this_cu,
                                  dwarf2_per_objfile *per_objfile,
@@ -7021,6 +7032,238 @@ create_partial_symtab (dwarf2_per_cu_data *per_cu,
 
   return pst;
 }
+\f
+
+/* An instance of this is created when scanning DWARF to create a
+   cooked index.  */
+
+class cooked_index_storage
+{
+public:
+
+  cooked_index_storage ()
+    : m_reader_hash (htab_create_alloc (10, hash_cutu_reader,
+                                       eq_cutu_reader,
+                                       htab_delete_entry<cutu_reader>,
+                                       xcalloc, xfree)),
+      m_index (new cooked_index),
+      m_addrmap_storage (),
+      m_addrmap (addrmap_create_mutable (&m_addrmap_storage))
+  {
+  }
+
+  DISABLE_COPY_AND_ASSIGN (cooked_index_storage);
+
+  /* Return the current abbrev cache.  */
+  abbrev_cache *get_abbrev_cache ()
+  {
+    return &m_abbrev_cache;
+  }
+
+  /* Return the DIE reader corresponding to PER_CU.  If no such reader
+     has been registered, return NULL.  */
+  cutu_reader *get_reader (dwarf2_per_cu_data *per_cu)
+  {
+    int index = per_cu->index;
+    return (cutu_reader *) htab_find_with_hash (m_reader_hash.get (),
+                                               &index, index);
+  }
+
+  /* Preserve READER by storing it in the local hash table.  */
+  cutu_reader *preserve (std::unique_ptr<cutu_reader> reader)
+  {
+    m_abbrev_cache.add (reader->release_abbrev_table ());
+
+    int index = reader->cu->per_cu->index;
+    void **slot = htab_find_slot_with_hash (m_reader_hash.get (), &index,
+                                           index, INSERT);
+    gdb_assert (*slot == nullptr);
+    cutu_reader *result = reader.get ();
+    *slot = reader.release ();
+    return result;
+  }
+
+  /* Add an entry to the index.  The arguments describe the entry; see
+     cooked-index.h.  The new entry is returned.  */
+  const cooked_index_entry *add (sect_offset die_offset, enum dwarf_tag tag,
+                                cooked_index_flag flags,
+                                const char *name,
+                                const cooked_index_entry *parent_entry,
+                                dwarf2_per_cu_data *per_cu)
+  {
+    return m_index->add (die_offset, tag, flags, name, parent_entry, per_cu);
+  }
+
+  /* Install the current addrmap into the index being constructed,
+     then transfer ownership of the index to the caller.  */
+  std::unique_ptr<cooked_index> release ()
+  {
+    m_index->install_addrmap (m_addrmap);
+    return std::move (m_index);
+  }
+
+  /* Return the mutable addrmap that is currently being created.  */
+  addrmap *get_addrmap ()
+  {
+    return m_addrmap;
+  }
+
+private:
+
+  /* Hash function for a cutu_reader.  */
+  static hashval_t hash_cutu_reader (const void *a)
+  {
+    const cutu_reader *reader = (const cutu_reader *) a;
+    return reader->cu->per_cu->index;
+  }
+
+  /* Equality function for cutu_reader.  */
+  static int eq_cutu_reader (const void *a, const void *b)
+  {
+    const cutu_reader *ra = (const cutu_reader *) a;
+    const int *rb = (const int *) b;
+    return ra->cu->per_cu->index == *rb;
+  }
+
+  /* The abbrev cache used by this indexer.  */
+  abbrev_cache m_abbrev_cache;
+  /* A hash table of cutu_reader objects.  */
+  htab_up m_reader_hash;
+  /* The index that is being constructed.  */
+  std::unique_ptr<cooked_index> m_index;
+
+  /* Storage for the writeable addrmap.  */
+  auto_obstack m_addrmap_storage;
+  /* A writeable addrmap being constructed by this scanner.  */
+  addrmap *m_addrmap;
+};
+
+/* An instance of this is created to index a CU.  */
+
+class cooked_indexer
+{
+public:
+
+  cooked_indexer (cooked_index_storage *storage,
+                 dwarf2_per_cu_data *per_cu,
+                 enum language language)
+    : m_index_storage (storage),
+      m_per_cu (per_cu),
+      m_language (language),
+      m_obstack (),
+      m_die_range_map (addrmap_create_mutable (&m_obstack))
+  {
+  }
+
+  DISABLE_COPY_AND_ASSIGN (cooked_indexer);
+
+  /* Index the given CU.  */
+  void make_index (cutu_reader *reader);
+
+private:
+
+  /* A helper function to turn a section offset into an address that
+     can be used in an addrmap.  */
+  CORE_ADDR form_addr (sect_offset offset, bool is_dwz)
+  {
+    CORE_ADDR value = to_underlying (offset);
+    if (is_dwz)
+      value |= ((CORE_ADDR) 1) << (8 * sizeof (CORE_ADDR) - 1);
+    return value;
+  }
+
+  /* A helper function to scan the PC bounds of READER and record them
+     in the storage's addrmap.  */
+  void check_bounds (cutu_reader *reader);
+
+  /* Ensure that the indicated CU exists.  The cutu_reader for it is
+     returned.  FOR_SCANNING is true if the caller intends to scan all
+     the DIEs in the CU; when false, this use is assumed to be to look
+     up just a single DIE.  */
+  cutu_reader *ensure_cu_exists (cutu_reader *reader,
+                                dwarf2_per_objfile *per_objfile,
+                                sect_offset sect_off,
+                                bool is_dwz,
+                                bool for_scanning);
+
+  /* Index DIEs in the READER starting at INFO_PTR.  PARENT_ENTRY is
+     the entry for the enclosing scope (nullptr at top level).  FULLY
+     is true when a full scan must be done -- in some languages,
+     function scopes must be fully explored in order to find nested
+     functions.  This returns a pointer to just after the spot where
+     reading stopped.  */
+  const gdb_byte *index_dies (cutu_reader *reader,
+                             const gdb_byte *info_ptr,
+                             const cooked_index_entry *parent_entry,
+                             bool fully);
+
+  /* Scan the attributes for a given DIE and update the out
+     parameters.  Returns a pointer to the byte after the DIE.  */
+  const gdb_byte *scan_attributes (dwarf2_per_cu_data *scanning_per_cu,
+                                  cutu_reader *reader,
+                                  const gdb_byte *watermark_ptr,
+                                  const gdb_byte *info_ptr,
+                                  const abbrev_info *abbrev,
+                                  const char **name,
+                                  const char **linkage_name,
+                                  cooked_index_flag *flags,
+                                  sect_offset *sibling_offset,
+                                  const cooked_index_entry **parent_entry,
+                                  CORE_ADDR *maybe_defer,
+                                  bool for_specification);
+
+  /* Handle DW_TAG_imported_unit, by scanning the DIE to find
+     DW_AT_import, and then scanning the referenced CU.  Returns a
+     pointer to the byte after the DIE.  */
+  const gdb_byte *index_imported_unit (cutu_reader *reader,
+                                      const gdb_byte *info_ptr,
+                                      const abbrev_info *abbrev);
+
+  /* Recursively read DIEs, recording the section offsets in
+     m_die_range_map and then calling index_dies.  */
+  const gdb_byte *recurse (cutu_reader *reader,
+                          const gdb_byte *info_ptr,
+                          const cooked_index_entry *parent_entry,
+                          bool fully);
+
+  /* The storage object, where the results are kept.  */
+  cooked_index_storage *m_index_storage;
+  /* The CU that we are reading on behalf of.  This object might be
+     asked to index one CU but to treat the results as if they come
+     from some including CU; in this case the including CU would be
+     recorded here.  */
+  dwarf2_per_cu_data *m_per_cu;
+  /* The language that we're assuming when reading.  */
+  enum language m_language;
+
+  /* Temporary storage.  */
+  auto_obstack m_obstack;
+  /* An addrmap that maps from section offsets (see the form_addr
+     method) to newly-created entries.  See m_deferred_entries to
+     understand this.  */
+  addrmap *m_die_range_map;
+
+  /* A single deferred entry.  */
+  struct deferred_entry
+  {
+    sect_offset die_offset;
+    const char *name;
+    CORE_ADDR spec_offset;
+    dwarf_tag tag;
+    cooked_index_flag flags;
+  };
+
+  /* The generated DWARF can sometimes have the declaration for a
+     method in a class (or perhaps namespace) scope, with the
+     definition appearing outside this scope... just one of the many
+     bad things about DWARF.  In order to handle this situation, we
+     defer certain entries until the end of scanning, at which point
+     we'll know the containing context of all the DIEs that we might
+     have scanned.  This vector stores these deferred entries.  */
+  std::vector<deferred_entry> m_deferred_entries;
+};
+
+
 
 /* DIE reader function for process_psymtab_comp_unit.  */
 
@@ -8398,7 +8641,8 @@ skip_one_die (const struct die_reader_specs *reader, const gdb_byte *info_ptr,
       /* We only handle DW_FORM_ref4 here.  */
       const gdb_byte *sibling_data = info_ptr + abbrev->sibling_offset;
       unsigned int offset = read_4_bytes (abfd, sibling_data);
-      const gdb_byte *sibling_ptr = buffer + offset;
+      const gdb_byte *sibling_ptr
+       = buffer + to_underlying (cu->header.sect_off) + offset;
       if (sibling_ptr >= info_ptr && sibling_ptr < reader->buffer_end)
        return sibling_ptr;
       /* Fall through to the slow way.  */
@@ -19069,6 +19313,573 @@ read_full_die (const struct die_reader_specs *reader,
 }
 \f
 
+void
+cooked_indexer::check_bounds (cutu_reader *reader)
+{
+  if (reader->cu->per_cu->addresses_seen)
+    return;
+
+  dwarf2_cu *cu = reader->cu;
+
+  CORE_ADDR best_lowpc = 0, best_highpc = 0;
+  /* Possibly set the default values of LOWPC and HIGHPC from
+     `DW_AT_ranges'.  */
+  dwarf2_find_base_address (reader->comp_unit_die, cu);
+  enum pc_bounds_kind cu_bounds_kind
+    = dwarf2_get_pc_bounds (reader->comp_unit_die, &best_lowpc, &best_highpc,
+                           cu, m_index_storage->get_addrmap (), cu->per_cu);
+  if (cu_bounds_kind == PC_BOUNDS_HIGH_LOW && best_lowpc < best_highpc)
+    {
+      struct objfile *objfile = cu->per_objfile->objfile;
+      CORE_ADDR baseaddr = objfile->text_section_offset ();
+      struct gdbarch *gdbarch = objfile->arch ();
+      CORE_ADDR low
+       = (gdbarch_adjust_dwarf2_addr (gdbarch, best_lowpc + baseaddr)
+          - baseaddr);
+      CORE_ADDR high
+       = (gdbarch_adjust_dwarf2_addr (gdbarch, best_highpc + baseaddr)
+          - baseaddr - 1);
+      /* Store the contiguous range if it is not empty; it can be
+        empty for CUs with no code.  */
+      addrmap_set_empty (m_index_storage->get_addrmap (), low, high,
+                        cu->per_cu);
+
+      cu->per_cu->addresses_seen = true;
+    }
+}
+
+/* Helper function that returns true if TAG can have a linkage
+   name.  */
+
+static bool
+tag_can_have_linkage_name (enum dwarf_tag tag)
+{
+  switch (tag)
+    {
+      /* We include types here because an anonymous C++ type might
+        have a name for linkage purposes.  */
+    case DW_TAG_class_type:
+    case DW_TAG_structure_type:
+    case DW_TAG_union_type:
+    case DW_TAG_variable:
+    case DW_TAG_subprogram:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+cutu_reader *
+cooked_indexer::ensure_cu_exists (cutu_reader *reader,
+                                 dwarf2_per_objfile *per_objfile,
+                                 sect_offset sect_off, bool is_dwz,
+                                 bool for_scanning)
+{
+  /* Lookups for type unit references are always in the CU, and
+     cross-CU references will crash.  */
+  if (reader->cu->per_cu->is_dwz == is_dwz
+      && reader->cu->header.offset_in_cu_p (sect_off))
+    return reader;
+
+  dwarf2_per_cu_data *per_cu
+    = dwarf2_find_containing_comp_unit (sect_off, is_dwz,
+                                       per_objfile->per_bfd);
+
+  /* When scanning, we only want to visit a given CU a single time.
+     Doing this check here avoids self-imports as well.  */
+  if (for_scanning)
+    {
+      if (per_cu->scanned)
+       return nullptr;
+      per_cu->scanned = true;
+    }
+  if (per_cu == m_per_cu)
+    return reader;
+
+  cutu_reader *result = m_index_storage->get_reader (per_cu);
+  if (result == nullptr)
+    {
+      cutu_reader new_reader (per_cu, per_objfile, nullptr, nullptr, false);
+
+      prepare_one_comp_unit (new_reader.cu, new_reader.comp_unit_die,
+                            language_minimal);
+      std::unique_ptr<cutu_reader> copy
+       (new cutu_reader (std::move (new_reader)));
+      result = m_index_storage->preserve (std::move (copy));
+    }
+
+  if (result->dummy_p || !result->comp_unit_die->has_children)
+    return nullptr;
+
+  if (for_scanning)
+    check_bounds (result);
+
+  return result;
+}
+
+const gdb_byte *
+cooked_indexer::scan_attributes (dwarf2_per_cu_data *scanning_per_cu,
+                                cutu_reader *reader,
+                                const gdb_byte *watermark_ptr,
+                                const gdb_byte *info_ptr,
+                                const abbrev_info *abbrev,
+                                const char **name,
+                                const char **linkage_name,
+                                cooked_index_flag *flags,
+                                sect_offset *sibling_offset,
+                                const cooked_index_entry **parent_entry,
+                                CORE_ADDR *maybe_defer,
+                                bool for_specification)
+{
+  bool origin_is_dwz = false;
+  bool is_declaration = false;
+  sect_offset origin_offset {};
+
+  gdb::optional<CORE_ADDR> low_pc;
+  gdb::optional<CORE_ADDR> high_pc;
+  bool high_pc_relative = false;
+
+  for (int i = 0; i < abbrev->num_attrs; ++i)
+    {
+      attribute attr;
+      info_ptr = read_attribute (reader, &attr, &abbrev->attrs[i], info_ptr);
+      if (attr.requires_reprocessing_p ())
+       read_attribute_reprocess (reader, &attr, abbrev->tag);
+
+      /* Store the data if it is of an attribute we want to keep in a
+        partial symbol table.  */
+      switch (attr.name)
+       {
+       case DW_AT_name:
+         switch (abbrev->tag)
+           {
+           case DW_TAG_compile_unit:
+           case DW_TAG_partial_unit:
+           case DW_TAG_type_unit:
+             /* Compilation units have a DW_AT_name that is a filename, not
+                a source language identifier.  */
+             break;
+
+           default:
+             if (*name == nullptr)
+               *name = attr.as_string ();
+             break;
+           }
+         break;
+
+       case DW_AT_linkage_name:
+       case DW_AT_MIPS_linkage_name:
+         /* Note that both forms of linkage name might appear.  We
+            assume they will be the same, and we only store the last
+            one we see.  */
+         if (*linkage_name == nullptr)
+           *linkage_name = attr.as_string ();
+         break;
+
+       case DW_AT_main_subprogram:
+         if (attr.as_boolean ())
+           *flags |= IS_MAIN;
+         break;
+
+       case DW_AT_declaration:
+         is_declaration = attr.as_boolean ();
+         break;
+
+       case DW_AT_sibling:
+         if (sibling_offset != nullptr)
+           *sibling_offset = attr.get_ref_die_offset ();
+         break;
+
+       case DW_AT_specification:
+       case DW_AT_abstract_origin:
+       case DW_AT_extension:
+         origin_offset = attr.get_ref_die_offset ();
+         origin_is_dwz = attr.form == DW_FORM_GNU_ref_alt;
+         break;
+
+       case DW_AT_external:
+         if (attr.as_boolean ())
+           *flags &= ~IS_STATIC;
+         break;
+
+       case DW_AT_enum_class:
+         if (attr.as_boolean ())
+           *flags |= IS_ENUM_CLASS;
+         break;
+
+       case DW_AT_low_pc:
+         low_pc = attr.as_address ();
+         break;
+
+       case DW_AT_high_pc:
+         high_pc = attr.as_address ();
+         if (reader->cu->header.version >= 4 && attr.form_is_constant ())
+           high_pc_relative = true;
+         break;
+
+       case DW_AT_location:
+         if (!scanning_per_cu->addresses_seen && attr.form_is_block ())
+           {
+             struct dwarf_block *locdesc = attr.as_block ();
+             CORE_ADDR addr = decode_locdesc (locdesc, reader->cu);
+             if (addr != 0
+                 || reader->cu->per_objfile->per_bfd->has_section_at_zero)
+               {
+                 low_pc = addr;
+                 /* For variables, we don't want to try decoding the
+                    type just to find the size -- for gdb's purposes
+                    we only need the address of a variable.  */
+                 high_pc = addr + 1;
+                 high_pc_relative = false;
+               }
+           }
+         break;
+
+       case DW_AT_ranges:
+         if (!scanning_per_cu->addresses_seen)
+           {
+             /* Offset in the .debug_ranges or .debug_rnglist section
+                (depending on DWARF version).  */
+             ULONGEST ranges_offset = attr.as_unsigned ();
+
+             /* See dwarf2_cu::gnu_ranges_base's doc for why we might
+                want to add this value.  */
+             ranges_offset += reader->cu->gnu_ranges_base;
+
+             CORE_ADDR lowpc, highpc;
+             dwarf2_ranges_read (ranges_offset, &lowpc, &highpc, reader->cu,
+                                 m_index_storage->get_addrmap (),
+                                 scanning_per_cu, abbrev->tag);
+           }
+         break;
+       }
+    }
+
+  /* We don't want to examine declarations, but if we found a
+     declaration when handling DW_AT_specification or the like, then
+     that is ok.  Similarly, we allow an external variable without a
+     location; those are resolved via minimal symbols.  */
+  if (is_declaration && !for_specification
+      && (abbrev->tag != DW_TAG_variable
+         || (*flags & IS_STATIC) != 0))
+    {
+      *linkage_name = nullptr;
+      *name = nullptr;
+    }
+  else if ((*name == nullptr
+           || (*linkage_name == nullptr
+               && tag_can_have_linkage_name (abbrev->tag))
+           || (*parent_entry == nullptr && m_language != language_c))
+          && origin_offset != sect_offset (0))
+    {
+      cutu_reader *new_reader
+       = ensure_cu_exists (reader, reader->cu->per_objfile, origin_offset,
+                           origin_is_dwz, false);
+      if (new_reader != nullptr)
+       {
+         const gdb_byte *new_info_ptr = (new_reader->buffer
+                                         + to_underlying (origin_offset));
+
+         if (new_reader->cu == reader->cu
+             && new_info_ptr > watermark_ptr
+             && maybe_defer != nullptr
+             && *parent_entry == nullptr)
+           *maybe_defer = form_addr (origin_offset, origin_is_dwz);
+         else if (*parent_entry == nullptr)
+           {
+             CORE_ADDR lookup = form_addr (origin_offset, origin_is_dwz);
+             *parent_entry
+               = (cooked_index_entry *) addrmap_find (m_die_range_map,
+                                                      lookup);
+           }
+
+         unsigned int bytes_read;
+         const abbrev_info *new_abbrev = peek_die_abbrev (*new_reader,
+                                                          new_info_ptr,
+                                                          &bytes_read);
+         new_info_ptr += bytes_read;
+         scan_attributes (scanning_per_cu, new_reader, new_info_ptr, new_info_ptr,
+                          new_abbrev, name, linkage_name, flags, nullptr,
+                          parent_entry, maybe_defer, true);
+       }
+    }
+
+  if (!for_specification)
+    {
+      if (m_language == language_ada
+         && *linkage_name == nullptr)
+       *linkage_name = *name;
+
+      if (!scanning_per_cu->addresses_seen
+         && low_pc.has_value ()
+         && (reader->cu->per_objfile->per_bfd->has_section_at_zero
+             || *low_pc != 0)
+         && high_pc.has_value ())
+       {
+         if (high_pc_relative)
+           high_pc = *high_pc + *low_pc;
+
+         if (*high_pc > *low_pc)
+           {
+             struct objfile *objfile = reader->cu->per_objfile->objfile;
+             CORE_ADDR baseaddr = objfile->text_section_offset ();
+             struct gdbarch *gdbarch = objfile->arch ();
+             CORE_ADDR lo
+               = (gdbarch_adjust_dwarf2_addr (gdbarch, *low_pc + baseaddr)
+                  - baseaddr);
+             CORE_ADDR hi
+               = (gdbarch_adjust_dwarf2_addr (gdbarch, *high_pc + baseaddr)
+                  - baseaddr);
+             addrmap_set_empty (m_index_storage->get_addrmap (), lo, hi - 1,
+                                scanning_per_cu);
+           }
+       }
+
+      if (abbrev->tag == DW_TAG_module || abbrev->tag == DW_TAG_namespace)
+       *flags &= ~IS_STATIC;
+
+      if (abbrev->tag == DW_TAG_namespace && *name == nullptr)
+       *name = "(anonymous namespace)";
+
+      if (m_language == language_cplus
+         && (abbrev->tag == DW_TAG_class_type
+             || abbrev->tag == DW_TAG_interface_type
+             || abbrev->tag == DW_TAG_structure_type
+             || abbrev->tag == DW_TAG_union_type
+             || abbrev->tag == DW_TAG_enumeration_type
+             || abbrev->tag == DW_TAG_enumerator))
+       *flags &= ~IS_STATIC;
+    }
+
+  return info_ptr;
+}
+
+const gdb_byte *
+cooked_indexer::index_imported_unit (cutu_reader *reader,
+                                    const gdb_byte *info_ptr,
+                                    const abbrev_info *abbrev)
+{
+  sect_offset sect_off {};
+  bool is_dwz = false;
+
+  for (int i = 0; i < abbrev->num_attrs; ++i)
+    {
+      /* Note that we never need to reprocess attributes here.  */
+      attribute attr;
+      info_ptr = read_attribute (reader, &attr, &abbrev->attrs[i], info_ptr);
+
+      if (attr.name == DW_AT_import)
+       {
+         sect_off = attr.get_ref_die_offset ();
+         is_dwz = (attr.form == DW_FORM_GNU_ref_alt
+                   || reader->cu->per_cu->is_dwz);
+       }
+    }
+
+  /* Did not find DW_AT_import.  */
+  if (sect_off == sect_offset (0))
+    return info_ptr;
+
+  dwarf2_per_objfile *per_objfile = reader->cu->per_objfile;
+  cutu_reader *new_reader = ensure_cu_exists (reader, per_objfile, sect_off,
+                                             is_dwz, true);
+  if (new_reader != nullptr)
+    {
+      index_dies (new_reader, new_reader->info_ptr, nullptr, false);
+
+      reader->cu->add_dependence (new_reader->cu->per_cu);
+    }
+
+  return info_ptr;
+}
+
+const gdb_byte *
+cooked_indexer::recurse (cutu_reader *reader,
+                        const gdb_byte *info_ptr,
+                        const cooked_index_entry *parent_entry,
+                        bool fully)
+{
+  info_ptr = index_dies (reader, info_ptr, parent_entry, fully);
+
+  if (parent_entry != nullptr)
+    {
+      CORE_ADDR start = form_addr (parent_entry->die_offset,
+                                  reader->cu->per_cu->is_dwz);
+      CORE_ADDR end = form_addr (sect_offset (info_ptr - 1 - reader->buffer),
+                                reader->cu->per_cu->is_dwz);
+      addrmap_set_empty (m_die_range_map, start, end, (void *) parent_entry);
+    }
+
+  return info_ptr;
+}
+
+const gdb_byte *
+cooked_indexer::index_dies (cutu_reader *reader,
+                           const gdb_byte *info_ptr,
+                           const cooked_index_entry *parent_entry,
+                           bool fully)
+{
+  const gdb_byte *end_ptr = info_ptr + reader->cu->header.get_length ();
+
+  while (info_ptr < end_ptr)
+    {
+      sect_offset this_die = (sect_offset) (info_ptr - reader->buffer);
+      unsigned int bytes_read;
+      const abbrev_info *abbrev = peek_die_abbrev (*reader, info_ptr,
+                                                  &bytes_read);
+      info_ptr += bytes_read;
+      if (abbrev == nullptr)
+       break;
+
+      if (abbrev->tag == DW_TAG_imported_unit)
+       {
+         info_ptr = index_imported_unit (reader, info_ptr, abbrev);
+         continue;
+       }
+
+      if (!abbrev->interesting)
+       {
+         info_ptr = skip_one_die (reader, info_ptr, abbrev, !fully);
+         if (fully && abbrev->has_children)
+           info_ptr = index_dies (reader, info_ptr, parent_entry, fully);
+         continue;
+       }
+
+      const char *name = nullptr;
+      const char *linkage_name = nullptr;
+      CORE_ADDR defer = 0;
+      cooked_index_flag flags = IS_STATIC;
+      sect_offset sibling {};
+      const cooked_index_entry *this_parent_entry = parent_entry;
+      info_ptr = scan_attributes (reader->cu->per_cu, reader, info_ptr,
+                                 info_ptr, abbrev, &name, &linkage_name,
+                                 &flags, &sibling, &this_parent_entry,
+                                 &defer, false);
+
+      if (abbrev->tag == DW_TAG_namespace
+         && m_language == language_cplus
+         && strcmp (name, "::") == 0)
+       {
+         /* GCC 4.0 and 4.1 had a bug (PR c++/28460) where they
+            generated bogus DW_TAG_namespace DIEs with a name of "::"
+            for the global namespace.  Work around this problem
+            here.  */
+         name = nullptr;
+       }
+
+      const cooked_index_entry *this_entry = nullptr;
+      if (name != nullptr)
+       {
+         if (defer != 0)
+           m_deferred_entries.push_back ({
+               this_die, name, defer, abbrev->tag, flags
+             });
+         else
+           this_entry = m_index_storage->add (this_die, abbrev->tag, flags,
+                                              name, this_parent_entry,
+                                              m_per_cu);
+       }
+
+      if (linkage_name != nullptr)
+       {
+         /* We only want this to be "main" if it has a linkage name
+            but not an ordinary name.  */
+         if (name != nullptr)
+           flags = flags & ~IS_MAIN;
+         m_index_storage->add (this_die, abbrev->tag, flags | IS_LINKAGE,
+                               linkage_name, nullptr, m_per_cu);
+       }
+
+      if (abbrev->has_children)
+       {
+         switch (abbrev->tag)
+           {
+           case DW_TAG_class_type:
+           case DW_TAG_interface_type:
+           case DW_TAG_structure_type:
+           case DW_TAG_union_type:
+             if (m_language != language_c && this_entry != nullptr)
+               {
+                 info_ptr = recurse (reader, info_ptr, this_entry, fully);
+                 continue;
+               }
+             break;
+
+           case DW_TAG_enumeration_type:
+             /* We need to recurse even for an anonymous enumeration.
+                Which scope we record as the parent scope depends on
+                whether we're reading an "enum class".  If so, we use
+                the enum itself as the parent, yielding names like
+                "enum_class::enumerator"; otherwise we inject the
+                names into our own parent scope.  */
+             info_ptr = recurse (reader, info_ptr,
+                                 ((flags & IS_ENUM_CLASS) == 0)
+                                 ? parent_entry
+                                 : this_entry,
+                                 fully);
+             continue;
+
+           case DW_TAG_module:
+             if (this_entry == nullptr)
+               break;
+             /* FALLTHROUGH */
+           case DW_TAG_namespace:
+             /* We don't check THIS_ENTRY for a namespace, to handle
+                the ancient G++ workaround pointed out above.  */
+             info_ptr = recurse (reader, info_ptr, this_entry, fully);
+             continue;
+
+           case DW_TAG_subprogram:
+             if ((m_language == language_fortran
+                  || m_language == language_ada)
+                 && this_entry != nullptr)
+               {
+                 info_ptr = recurse (reader, info_ptr, this_entry, true);
+                 continue;
+               }
+             break;
+           }
+
+         if (sibling != sect_offset (0))
+           {
+             const gdb_byte *sibling_ptr
+               = reader->buffer + to_underlying (sibling);
+
+             if (sibling_ptr < info_ptr)
+               complaint (_("DW_AT_sibling points backwards"));
+             else if (sibling_ptr > reader->buffer_end)
+               reader->die_section->overflow_complaint ();
+             else
+               info_ptr = sibling_ptr;
+           }
+         else
+           info_ptr = skip_children (reader, info_ptr);
+       }
+    }
+
+  return info_ptr;
+}
+
+void
+cooked_indexer::make_index (cutu_reader *reader)
+{
+  check_bounds (reader);
+  find_file_and_directory (reader->comp_unit_die, reader->cu);
+  if (!reader->comp_unit_die->has_children)
+    return;
+  index_dies (reader, reader->info_ptr, nullptr, false);
+
+  for (const auto &entry : m_deferred_entries)
+    {
+      CORE_ADDR key = form_addr (entry.die_offset, m_per_cu->is_dwz);
+      cooked_index_entry *parent
+       = (cooked_index_entry *) addrmap_find (m_die_range_map, key);
+      m_index_storage->add (entry.die_offset, entry.tag, entry.flags,
+                           entry.name, parent, m_per_cu);
+    }
+}
+
 /* Returns nonzero if TAG represents a type that we might generate a partial
    symbol for.  */
 
index 58f6a673e69b96704c6f88c78adeb05a162e1905..06a35d59d28783be201f26e84046361717ba508c 100644 (file)
@@ -106,7 +106,8 @@ struct dwarf2_per_cu_data
       m_header_read_in (false),
       addresses_seen (false),
       unit_type {},
-      lang (language_unknown)
+      lang (language_unknown),
+      scanned (false)
   {
   }
 
@@ -165,6 +166,10 @@ struct dwarf2_per_cu_data
   /* The language of this CU.  */
   ENUM_BITFIELD (language) lang : LANGUAGE_BITS;
 
+  /* True if this CU has been scanned by the indexer; false if
+     not.  */
+  bool scanned : 1;
+
   /* Our index in the unshared "symtabs" vector.  */
   unsigned index = 0;