Change .gdb_index de-duplication implementation

author Tom Tromey <tromey@adacore.com>

Thu, 22 Sep 2022 19:10:55 +0000 (13:10 -0600)

committer Tom Tromey <tromey@adacore.com>

Mon, 17 Oct 2022 14:01:39 +0000 (08:01 -0600)
author Tom Tromey <tromey@adacore.com>
Thu, 22 Sep 2022 19:10:55 +0000 (13:10 -0600)
committer Tom Tromey <tromey@adacore.com>
Mon, 17 Oct 2022 14:01:39 +0000 (08:01 -0600)
diff --git a/gdb/dwarf2/index-write.c b/gdb/dwarf2/index-write.c

index 6b4052c346759515490883bdea6103210396355b..f592734addc58dad6596b37a1acfa266f8238940 100644 (file)
--- a/gdb/dwarf2/index-write.c
+++ b/gdb/dwarf2/index-write.c
@@ -176,6 +176,10 @@ struct symtab_index_entry
    /* A sorted vector of the indices of all the CUs that hold an object
       of this name.  */
    std::vector<offset_type> cu_indices;
+
+  /* Minimize CU_INDICES, sorting them and removing duplicates as
+     appropriate.  */
+  void minimize ();
  };
  
  /* The symbol table.  This is a power-of-2-sized hash table.  */
@@ -186,6 +190,13 @@ struct mapped_symtab
      data.resize (1024);
    }
  
+  /* Minimize each entry in the symbol table, removing duplicates.  */
+  void minimize ()
+  {
+    for (symtab_index_entry &item : data)
+      item.minimize ();
+  }
+
    offset_type n_elements = 0;
    std::vector<symtab_index_entry> data;
  
@@ -271,21 +282,36 @@ add_index_entry (struct mapped_symtab *symtab, const char *name,
    slot.cu_indices.push_back (cu_index_and_attrs);
  }
  
-/* Sort and remove duplicates of all symbols' cu_indices lists.  */
+/* See symtab_index_entry.  */
  
-static void
-uniquify_cu_indices (struct mapped_symtab *symtab)
+void
+symtab_index_entry::minimize ()
  {
-  for (auto &entry : symtab->data)
+  if (name == nullptr || cu_indices.empty ())
+    return;
+
+  std::sort (cu_indices.begin (), cu_indices.end ());
+  auto from = std::unique (cu_indices.begin (), cu_indices.end ());
+  cu_indices.erase (from, cu_indices.end ());
+
+  /* We don't want to enter a variable or type more than once, so
+     remove any such duplicates from the list as well.  When doing
+     this, we want to keep the entry from the first CU -- but this is
+     implicit due to the sort.  This choice is done because it's
+     similar to what gdb historically did for partial symbols.  */
+  std::unordered_set<offset_type> seen;
+  from = std::remove_if (cu_indices.begin (), cu_indices.end (),
+                        [&] (offset_type val)
      {
-      if (entry.name != NULL && !entry.cu_indices.empty ())
-       {
-         auto &cu_indices = entry.cu_indices;
-         std::sort (cu_indices.begin (), cu_indices.end ());
-         auto from = std::unique (cu_indices.begin (), cu_indices.end ());
-         cu_indices.erase (from, cu_indices.end ());
-       }
-    }
+      gdb_index_symbol_kind kind = GDB_INDEX_SYMBOL_KIND_VALUE (val);
+      if (kind != GDB_INDEX_SYMBOL_KIND_TYPE
+         && kind != GDB_INDEX_SYMBOL_KIND_VARIABLE)
+       return false;
+
+      val &= ~GDB_INDEX_CU_MASK;
+      return !seen.insert (val).second;
+    });
+  cu_indices.erase (from, cu_indices.end ());
  }
  
  /* A form of 'const char *' suitable for container keys.  Only the
@@ -1100,15 +1126,6 @@ write_cooked_index (cooked_index_vector *table,
                     const cu_index_map &cu_index_htab,
                     struct mapped_symtab *symtab)
  {
-  /* We track type names and only enter a given type once.  */
-  htab_up type_names (htab_create_alloc (10, htab_hash_string, htab_eq_string,
-                                        nullptr, xcalloc, xfree));
-  /* Same with variable names.  However, if a type and variable share
-     a name, we want both, which is why there are two hash tables
-     here.  */
-  htab_up var_names (htab_create_alloc (10, htab_hash_string, htab_eq_string,
-                                       nullptr, xcalloc, xfree));
-
    const char *main_for_ada = main_name ();
  
    for (const cooked_index_entry *entry : table->all_entries ())
@@ -1157,24 +1174,12 @@ write_cooked_index (cooked_index_vector *table,
        else if (entry->tag == DW_TAG_variable
                || entry->tag == DW_TAG_constant
                || entry->tag == DW_TAG_enumerator)
-       {
-         kind = GDB_INDEX_SYMBOL_KIND_VARIABLE;
-         void **slot = htab_find_slot (var_names.get (), name, INSERT);
-         if (*slot != nullptr)
-           continue;
-         *slot = (void *) name;
-       }
+       kind = GDB_INDEX_SYMBOL_KIND_VARIABLE;
        else if (entry->tag == DW_TAG_module
                || entry->tag == DW_TAG_common_block)
         kind = GDB_INDEX_SYMBOL_KIND_OTHER;
        else
-       {
-         kind = GDB_INDEX_SYMBOL_KIND_TYPE;
-         void **slot = htab_find_slot (type_names.get (), name, INSERT);
-         if (*slot != nullptr)
-           continue;
-         *slot = (void *) name;
-       }
+       kind = GDB_INDEX_SYMBOL_KIND_TYPE;
  
        add_index_entry (symtab, name, (entry->flags & IS_STATIC) != 0,
                        kind, it->second);
@@ -1252,7 +1257,7 @@ write_gdbindex (dwarf2_per_objfile *per_objfile,
  
    /* Now that we've processed all symbols we can shrink their cu_indices
       lists.  */
-  uniquify_cu_indices (&symtab);
+  symtab.minimize ();
  
    data_buf symtab_vec, constant_pool;
    if (symtab.n_elements == 0)
author	Tom Tromey <tromey@adacore.com>
	Thu, 22 Sep 2022 19:10:55 +0000 (13:10 -0600)
committer	Tom Tromey <tromey@adacore.com>
	Mon, 17 Oct 2022 14:01:39 +0000 (08:01 -0600)