Parallelize DWARF indexing

author Tom Tromey <tom@tromey.com>

Sun, 18 Apr 2021 21:20:43 +0000 (15:20 -0600)

committer Tom Tromey <tom@tromey.com>

Tue, 12 Apr 2022 15:31:16 +0000 (09:31 -0600)
author Tom Tromey <tom@tromey.com>
Sun, 18 Apr 2021 21:20:43 +0000 (15:20 -0600)
committer Tom Tromey <tom@tromey.com>
Tue, 12 Apr 2022 15:31:16 +0000 (09:31 -0600)
diff --git a/gdb/dwarf2/cooked-index.c b/gdb/dwarf2/cooked-index.c

index 1b7e25d2350c2757eefd383ccfdc1e7f98219b78..e56a47520f86f8b5a2c028dca162d8a02673b1df 100644 (file)
--- a/gdb/dwarf2/cooked-index.c
+++ b/gdb/dwarf2/cooked-index.c
@@ -113,10 +113,41 @@ cooked_index::add (sect_offset die_offset, enum dwarf_tag tag,
    return result;
  }
  
+cooked_index_vector::cooked_index_vector (vec_type &&vec)
+  : m_vector (std::move (vec))
+{
+  finalize ();
+}
+
+/* See cooked-index.h.  */
+
+dwarf2_per_cu_data *
+cooked_index_vector::lookup (CORE_ADDR addr)
+{
+  for (const auto &index : m_vector)
+    {
+      dwarf2_per_cu_data *result = index->lookup (addr);
+      if (result != nullptr)
+       return result;
+    }
+  return nullptr;
+}
+
+/* See cooked-index.h.  */
+
+std::vector<addrmap *>
+cooked_index_vector::get_addrmaps ()
+{
+  std::vector<addrmap *> result;
+  for (const auto &index : m_vector)
+    result.push_back (index->m_addrmap);
+  return result;
+}
+
  /* See cooked-index.h.  */
  
-cooked_index::range
-cooked_index::find (gdb::string_view name, bool completing)
+cooked_index_vector::range
+cooked_index_vector::find (gdb::string_view name, bool completing)
  {
    auto lower = std::lower_bound (m_entries.begin (), m_entries.end (),
                                  name,
@@ -146,8 +177,8 @@ cooked_index::find (gdb::string_view name, bool completing)
  /* See cooked-index.h.  */
  
  gdb::unique_xmalloc_ptr<char>
-cooked_index::handle_gnat_encoded_entry (cooked_index_entry *entry,
-                                        htab_t gnat_entries)
+cooked_index_vector::handle_gnat_encoded_entry (cooked_index_entry *entry,
+                                               htab_t gnat_entries)
  {
    std::string canonical = ada_decode (entry->name, false, false);
    if (canonical.empty ())
@@ -170,9 +201,11 @@ cooked_index::handle_gnat_encoded_entry (cooked_index_entry *entry,
         {
           gdb::unique_xmalloc_ptr<char> new_name
             = make_unique_xstrndup (name.data (), name.length ());
-         last = create (entry->die_offset, DW_TAG_namespace,
-                        0, new_name.get (), parent,
-                        entry->per_cu);
+         /* It doesn't matter which obstack we allocate this on, so
+            we pick the most convenient one.  */
+         last = m_vector[0]->create (entry->die_offset, DW_TAG_namespace,
+                                     0, new_name.get (), parent,
+                                     entry->per_cu);
           last->canonical = last->name;
           m_names.push_back (std::move (new_name));
           *slot = last;
@@ -187,8 +220,28 @@ cooked_index::handle_gnat_encoded_entry (cooked_index_entry *entry,
  
  /* See cooked-index.h.  */
  
+const cooked_index_entry *
+cooked_index_vector::get_main () const
+{
+  const cooked_index_entry *result = nullptr;
+
+  for (const auto &index : m_vector)
+    {
+      const cooked_index_entry *entry = index->get_main ();
+      if (result == nullptr
+         || ((result->flags & IS_MAIN) == 0
+             && entry != nullptr
+             && (entry->flags & IS_MAIN) != 0))
+       result = entry;
+    }
+
+  return result;
+}
+
+/* See cooked-index.h.  */
+
  void
-cooked_index::finalize ()
+cooked_index_vector::finalize ()
  {
    auto hash_name_ptr = [] (const void *p)
      {
@@ -210,38 +263,26 @@ cooked_index::finalize ()
    htab_up seen_names (htab_create_alloc (10, hash_name_ptr, eq_name_ptr,
                                          nullptr, xcalloc, xfree));
  
-  htab_up gnat_entries (htab_create_alloc (10, hash_entry, eq_entry,
-                                          nullptr, xcalloc, xfree));
-
-  for (cooked_index_entry *entry : m_entries)
+  for (auto &index : m_vector)
      {
-      gdb_assert (entry->canonical == nullptr);
-      if ((entry->per_cu->lang != language_cplus
-          && entry->per_cu->lang != language_ada)
-         || (entry->flags & IS_LINKAGE) != 0)
-       entry->canonical = entry->name;
-      else
+      htab_up gnat_entries (htab_create_alloc (10, hash_entry, eq_entry,
+                                              nullptr, xcalloc, xfree));
+
+      std::vector<cooked_index_entry *> entries
+       = std::move (index->m_entries);
+      for (cooked_index_entry *entry : entries)
         {
-         if (entry->per_cu->lang == language_ada)
-           {
-             gdb::unique_xmalloc_ptr<char> canon_name
-               = handle_gnat_encoded_entry (entry, gnat_entries.get ());
-             if (canon_name == nullptr)
-               entry->canonical = entry->name;
-             else
-               {
-                 entry->canonical = canon_name.get ();
-                 m_names.push_back (std::move (canon_name));
-               }
-           }
+         gdb_assert (entry->canonical == nullptr);
+         if ((entry->per_cu->lang != language_cplus
+              && entry->per_cu->lang != language_ada)
+             || (entry->flags & IS_LINKAGE) != 0)
+           entry->canonical = entry->name;
           else
             {
-             void **slot = htab_find_slot (seen_names.get (), entry,
-                                           INSERT);
-             if (*slot == nullptr)
+             if (entry->per_cu->lang == language_ada)
                 {
                   gdb::unique_xmalloc_ptr<char> canon_name
-                   = cp_canonicalize_string (entry->name);
+                   = handle_gnat_encoded_entry (entry, gnat_entries.get ());
                   if (canon_name == nullptr)
                     entry->canonical = entry->name;
                   else
@@ -252,12 +293,39 @@ cooked_index::finalize ()
                 }
               else
                 {
-                 const cooked_index_entry *other
-                   = (const cooked_index_entry *) *slot;
-                 entry->canonical = other->canonical;
+                 void **slot = htab_find_slot (seen_names.get (), entry,
+                                               INSERT);
+                 if (*slot == nullptr)
+                   {
+                     gdb::unique_xmalloc_ptr<char> canon_name
+                       = cp_canonicalize_string (entry->name);
+                     if (canon_name == nullptr)
+                       entry->canonical = entry->name;
+                     else
+                       {
+                         entry->canonical = canon_name.get ();
+                         m_names.push_back (std::move (canon_name));
+                       }
+                   }
+                 else
+                   {
+                     const cooked_index_entry *other
+                       = (const cooked_index_entry *) *slot;
+                     entry->canonical = other->canonical;
+                   }
                 }
             }
         }
+
+      if (m_entries.empty ())
+       m_entries = std::move (entries);
+      else
+       {
+         size_t old_size = m_entries.size ();
+         m_entries.resize (m_entries.size () + entries.size ());
+         memcpy (m_entries.data () + old_size,
+                 entries.data (), entries.size () * sizeof (entries[0]));
+       }
      }
  
    m_names.shrink_to_fit ();
diff --git a/gdb/dwarf2/cooked-index.h b/gdb/dwarf2/cooked-index.h

index 0a38fc88e52bca428c19c4fae968a76ee5998063..188155190ec6110fe78ca943a4efa3daaca59732 100644 (file)
--- a/gdb/dwarf2/cooked-index.h
+++ b/gdb/dwarf2/cooked-index.h
@@ -30,6 +30,7 @@
  #include "gdbsupport/gdb_obstack.h"
  #include "addrmap.h"
  #include "gdbsupport/iterator-range.h"
+#include "gdbsupport/thread-pool.h"
  
  struct dwarf2_per_cu_data;
  
@@ -155,6 +156,8 @@ private:
    void write_scope (struct obstack *storage, const char *sep) const;
  };
  
+class cooked_index_vector;
+
  /* An index of interesting DIEs.  This is "cooked", in contrast to a
     mapped .debug_names or .gdb_index, which are "raw".  An entry in
     the index is of type cooked_index_entry.
@@ -178,13 +181,6 @@ public:
                                  const cooked_index_entry *parent_entry,
                                  dwarf2_per_cu_data *per_cu);
  
-  /* Return the entry that is believed to represent the program's
-     "main".  This will return NULL if no such entry is available.  */
-  const cooked_index_entry *get_main () const
-  {
-    return m_main;
-  }
-
    /* Install a new fixed addrmap from the given mutable addrmap.  */
    void install_addrmap (addrmap *map)
    {
@@ -192,6 +188,17 @@ public:
      m_addrmap = addrmap_create_fixed (map, &m_storage);
    }
  
+  friend class cooked_index_vector;
+
+private:
+
+  /* Return the entry that is believed to represent the program's
+     "main".  This will return NULL if no such entry is available.  */
+  const cooked_index_entry *get_main () const
+  {
+    return m_main;
+  }
+
    /* Look up ADDR in the address map, and return either the
       corresponding CU, or nullptr if the address could not be
       found.  */
@@ -200,10 +207,52 @@ public:
      return (dwarf2_per_cu_data *) addrmap_find (m_addrmap, addr);
    }
  
-  /* Finalize the index.  This should be called a single time, when
-     the index has been fully populated.  It enters all the entries
-     into the internal hash table.  */
-  void finalize ();
+  /* Create a new cooked_index_entry and register it with this object.
+     Entries are owned by this object.  The new item is returned.  */
+  cooked_index_entry *create (sect_offset die_offset,
+                             enum dwarf_tag tag,
+                             cooked_index_flag flags,
+                             const char *name,
+                             const cooked_index_entry *parent_entry,
+                             dwarf2_per_cu_data *per_cu)
+  {
+    return new (&m_storage) cooked_index_entry (die_offset, tag, flags,
+                                               name, parent_entry,
+                                               per_cu);
+  }
+
+  /* Storage for the entries.  */
+  auto_obstack m_storage;
+  /* List of all entries.  */
+  std::vector<cooked_index_entry *> m_entries;
+  /* If we found "main" or an entry with 'is_main' set, store it
+     here.  */
+  cooked_index_entry *m_main = nullptr;
+  /* When constructing the index, entries are stored on a linked list.
+     This member points to the head of that list.  Later, they are
+     entered into the hash table, at which point this is no longer
+     used.  */
+  cooked_index_entry *m_start = nullptr;
+  /* The addrmap.  This maps address ranges to dwarf2_per_cu_data
+     objects.  */
+  addrmap *m_addrmap = nullptr;
+};
+
+/* The main index of DIEs.  The parallel DIE indexers create
+   cooked_index objects.  Then, these are all handled to a
+   cooked_index_vector for storage and final indexing.  The index is
+   made by iterating over the entries previously created.  */
+
+class cooked_index_vector
+{
+public:
+
+  /* A convenience typedef for the vector that is contained in this
+     object.  */
+  typedef std::vector<std::unique_ptr<cooked_index>> vec_type;
+
+  explicit cooked_index_vector (vec_type &&vec);
+  DISABLE_COPY_AND_ASSIGN (cooked_index_vector);
  
    /* A simple range over part of m_entries.  */
    typedef iterator_range<std::vector<cooked_index_entry *>::iterator> range;
@@ -219,6 +268,19 @@ public:
      return { m_entries.begin (), m_entries.end () };
    }
  
+  /* Look up ADDR in the address map, and return either the
+     corresponding CU, or nullptr if the address could not be
+     found.  */
+  dwarf2_per_cu_data *lookup (CORE_ADDR addr);
+
+  /* Return a new vector of all the addrmaps used by all the indexes
+     held by this object.  */
+  std::vector<addrmap *> get_addrmaps ();
+
+  /* Return the entry that is believed to represent the program's
+     "main".  This will return NULL if no such entry is available.  */
+  const cooked_index_entry *get_main () const;
+
  private:
  
    /* GNAT only emits mangled ("encoded") names in the DWARF, and does
@@ -229,32 +291,20 @@ private:
    gdb::unique_xmalloc_ptr<char> handle_gnat_encoded_entry
         (cooked_index_entry *entry, htab_t gnat_entries);
  
-  /* Create a new cooked_index_entry and register it with this object.
-     Entries are owned by this object.  The new item is returned.  */
-  cooked_index_entry *create (sect_offset die_offset,
-                             enum dwarf_tag tag,
-                             cooked_index_flag flags,
-                             const char *name,
-                             const cooked_index_entry *parent_entry,
-                             dwarf2_per_cu_data *per_cu)
-  {
-    return new (&m_storage) cooked_index_entry (die_offset, tag, flags,
-                                               name, parent_entry,
-                                               per_cu);
-  }
+  /* Finalize the index.  This should be called a single time, when
+     the index has been fully populated.  It enters all the entries
+     into the internal hash table.  */
+  void finalize ();
  
-  /* Storage for the entries.  */
-  auto_obstack m_storage;
-  /* List of all entries.  */
+  /* The vector of cooked_index objects.  This is stored because the
+     entries are stored on the obstacks in those objects.  */
+  vec_type m_vector;
+
+  /* List of all entries.  This is sorted during finalization.  */
    std::vector<cooked_index_entry *> m_entries;
-  /* If we found "main" or an entry with 'is_main' set, store it
-     here.  */
-  cooked_index_entry *m_main = nullptr;
+
    /* Storage for canonical names.  */
    std::vector<gdb::unique_xmalloc_ptr<char>> m_names;
-  /* The addrmap.  This maps address ranges to dwarf2_per_cu_data
-     objects.  */
-  addrmap *m_addrmap = nullptr;
  };
  
  #endif /* GDB_DWARF2_COOKED_INDEX_H */
diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c

index 5998b7ffca7773e0577aee4b48e5a8ffa3df1b67..fce465e4dab8ecb28084a0f8efc961fcb3dc9784 100644 (file)
--- a/gdb/dwarf2/read.c
+++ b/gdb/dwarf2/read.c
@@ -92,6 +92,8 @@
  #include "dwarf2/abbrev-cache.h"
  #include "cooked-index.h"
  #include "split-name.h"
+#include "gdbsupport/parallel-for.h"
+#include "gdbsupport/thread-pool.h"
  
  /* When == 1, print basic high level tracing messages.
     When > 1, be more verbose.
@@ -6536,6 +6538,14 @@ lookup_dwo_id (struct dwarf2_cu *cu, struct die_info* comp_unit_die)
  static struct dwo_unit *
  lookup_dwo_unit (dwarf2_cu *cu, die_info *comp_unit_die, const char *dwo_name)
  {
+#if CXX_STD_THREAD
+  /* We need a lock here both to handle the DWO hash table, and BFD,
+     which is not thread-safe.  */
+  static std::mutex dwo_lock;
+
+  std::lock_guard<std::mutex> guard (dwo_lock);
+#endif
+
    dwarf2_per_cu_data *per_cu = cu->per_cu;
    struct dwo_unit *dwo_unit;
    const char *comp_dir;
@@ -6683,9 +6693,14 @@ cutu_reader::cutu_reader (dwarf2_per_cu_data *this_cu,
      }
    else
      {
-      /* If an existing_cu is provided, a dwarf2_cu must not exist for this_cu
-        in per_objfile yet.  */
-      gdb_assert (per_objfile->get_cu (this_cu) == nullptr);
+      /* If an existing_cu is provided, a dwarf2_cu must not exist for
+        this_cu in per_objfile yet.  Here, CACHE doubles as a flag to
+        let us know that the CU is being scanned using the parallel
+        indexer.  This assert is avoided in this case because (1) it
+        is irrelevant, and (2) the get_cu method is not
+        thread-safe.  */
+      gdb_assert (cache != nullptr
+                 || per_objfile->get_cu (this_cu) == nullptr);
        m_new_cu.reset (new dwarf2_cu (this_cu, per_objfile));
        cu = m_new_cu.get ();
      }
@@ -7499,9 +7514,9 @@ process_psymtab_comp_unit (dwarf2_per_cu_data *this_cu,
      {
        if (per_objfile->per_bfd->using_index)
         {
-         if (!this_cu->scanned)
+         bool nope = false;
+         if (this_cu->scanned.compare_exchange_strong (nope, true))
             {
-             this_cu->scanned = true;
               prepare_one_comp_unit (reader.cu, reader.comp_unit_die,
                                      pretend_language);
               gdb_assert (storage != nullptr);
@@ -7869,6 +7884,7 @@ dwarf2_build_psymtabs_hard (dwarf2_per_objfile *per_objfile)
      = per_bfd->using_index ? &index_storage : nullptr;
    create_all_comp_units (per_objfile);
    build_type_psymtabs (per_objfile, index_storage_ptr);
+  std::vector<std::unique_ptr<cooked_index>> indexes;
    if (per_bfd->using_index)
      {
        per_bfd->quick_file_names_table
@@ -7877,15 +7893,70 @@ dwarf2_build_psymtabs_hard (dwarf2_per_objfile *per_objfile)
        if (!per_bfd->debug_aranges.empty ())
         read_addrmap_from_aranges (per_objfile, &per_bfd->debug_aranges,
                                    index_storage.get_addrmap ());
-    }
  
-  for (const auto &per_cu : per_bfd->all_comp_units)
+      {
+       /* Ensure that complaints are handled correctly.  */
+       complaint_interceptor complaint_handler;
+
+       using iter_type = typeof (per_bfd->all_comp_units.begin ());
+
+       /* Each thread returns a pair holding a cooked index, and a vector
+          of errors that should be printed.  The latter is done because
+          GDB's I/O system is not thread-safe.  run_on_main_thread could be
+          used, but that would mean the messages are printed after the
+          prompt, which looks weird.  */
+       using result_type = std::pair<std::unique_ptr<cooked_index>,
+                                     std::vector<gdb_exception>>;
+       std::vector<result_type> results
+         = gdb::parallel_for_each (1, per_bfd->all_comp_units.begin (),
+                                   per_bfd->all_comp_units.end (),
+                                   [=] (iter_type iter, iter_type end)
+                                   {
+                                     std::vector<gdb_exception> errors;
+                                     cooked_index_storage thread_storage;
+                                     for (; iter != end; ++iter)
+                                       {
+                                         dwarf2_per_cu_data *per_cu = iter->get ();
+                                         try
+                                           {
+                                             process_psymtab_comp_unit (per_cu, per_objfile,
+                                                                        false,
+                                                                        language_minimal,
+                                                                        &thread_storage);
+                                           }
+                                         catch (gdb_exception &except)
+                                           {
+                                             errors.push_back (std::move (except));
+                                           }
+                                       }
+                                     return result_type (thread_storage.release (),
+                                                         std::move (errors));
+                                   });
+
+       /* Only show a given exception a single time.  */
+       std::unordered_set<gdb_exception> seen_exceptions;
+       for (auto &one_result : results)
+         {
+           indexes.push_back (std::move (one_result.first));
+           for (auto &one_exc : one_result.second)
+             {
+               if (seen_exceptions.insert (one_exc).second)
+                 exception_print (gdb_stderr, one_exc);
+             }
+         }
+      }
+    }
+  else
      {
-      if (!per_bfd->using_index && per_cu->v.psymtab != NULL)
-       /* In case a forward DW_TAG_imported_unit has read the CU already.  */
-       continue;
-      process_psymtab_comp_unit (per_cu.get (), per_objfile, false,
-                                language_minimal, index_storage_ptr);
+      for (const auto &per_cu : per_bfd->all_comp_units)
+       {
+         if (!per_bfd->using_index && per_cu->v.psymtab != NULL)
+           /* In case a forward DW_TAG_imported_unit has read the CU
+              already.  */
+           continue;
+         process_psymtab_comp_unit (per_cu.get (), per_objfile, false,
+                                    language_minimal, nullptr);
+       }
      }
  
    /* This has to wait until we read the CUs, we need the list of DWOs.  */
@@ -7903,8 +7974,20 @@ dwarf2_build_psymtabs_hard (dwarf2_per_objfile *per_objfile)
  
    if (per_bfd->using_index)
      {
-      per_bfd->cooked_index_table = index_storage.release ();
-      per_bfd->cooked_index_table->finalize ();
+      indexes.push_back (index_storage.release ());
+      /* Remove any NULL entries.  This might happen if parallel-for
+        decides to throttle the number of threads that were used.  */
+      indexes.erase
+       (std::remove_if (indexes.begin (),
+                        indexes.end (),
+                        [] (const std::unique_ptr<cooked_index> &entry)
+                        {
+                          return entry == nullptr;
+                        }),
+        indexes.end ());
+      indexes.shrink_to_fit ();
+      per_bfd->cooked_index_table.reset
+       (new cooked_index_vector (std::move (indexes)));
  
        const cooked_index_entry *main_entry
         = per_bfd->cooked_index_table->get_main ();
@@ -19463,9 +19546,9 @@ cooked_indexer::ensure_cu_exists (cutu_reader *reader,
       Doing this check here avoids self-imports as well.  */
    if (for_scanning)
      {
-      if (per_cu->scanned)
+      bool nope = false;
+      if (!per_cu->scanned.compare_exchange_strong (nope, true))
         return nullptr;
-      per_cu->scanned = true;
      }
    if (per_cu == m_per_cu)
      return reader;
diff --git a/gdb/dwarf2/read.h b/gdb/dwarf2/read.h

index 4ee7de98c501b2e1af60a3b4f786294cb7e0645e..4d39c465420fd240dee0068ff7d7cc7a43c2f902 100644 (file)
--- a/gdb/dwarf2/read.h
+++ b/gdb/dwarf2/read.h
@@ -169,7 +169,7 @@ struct dwarf2_per_cu_data
  
    /* True if this CU has been scanned by the indexer; false if
       not.  */
-  bool scanned : 1;
+  std::atomic<bool> scanned;
  
    /* Our index in the unshared "symtabs" vector.  */
    unsigned index = 0;
@@ -457,7 +457,7 @@ public:
    std::unique_ptr<mapped_debug_names> debug_names_table;
  
    /* The cooked index, or NULL if not using one.  */
-  std::unique_ptr<cooked_index> cooked_index_table;
+  std::unique_ptr<cooked_index_vector> cooked_index_table;
  
    /* When using index_table, this keeps track of all quick_file_names entries.
       TUs typically share line table entries with a CU, so we maintain a
author	Tom Tromey <tom@tromey.com>
	Sun, 18 Apr 2021 21:20:43 +0000 (15:20 -0600)
committer	Tom Tromey <tom@tromey.com>
	Tue, 12 Apr 2022 15:31:16 +0000 (09:31 -0600)
gdb/dwarf2/cooked-index.c		patch \| blob \| history
gdb/dwarf2/cooked-index.h		patch \| blob \| history
gdb/dwarf2/read.c		patch \| blob \| history
gdb/dwarf2/read.h		patch \| blob \| history