gdb/bcache.c

   1 /* Implement a cached obstack.
   2    Written by Fred Fish <fnf@cygnus.com>
   3    Rewritten by Jim Blandy <jimb@cygnus.com>
   4
   5    Copyright (C) 1999, 2000, 2002, 2003, 2007, 2008, 2009, 2010
   6    Free Software Foundation, Inc.
   7
   8    This file is part of GDB.
   9
  10    This program is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 3 of the License, or
  13    (at your option) any later version.
  14
  15    This program is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  22
  23 #include "defs.h"
  24 #include "gdb_obstack.h"
  25 #include "bcache.h"
  26 #include "gdb_string.h"         /* For memcpy declaration */
  27 #include "gdb_assert.h"
  28
  29 #include <stddef.h>
  30 #include <stdlib.h>
  31
  32 /* The type used to hold a single bcache string.  The user data is
  33    stored in d.data.  Since it can be any type, it needs to have the
  34    same alignment as the most strict alignment of any type on the host
  35    machine.  I don't know of any really correct way to do this in
  36    stock ANSI C, so just do it the same way obstack.h does.  */
  37
  38 struct bstring
  39 {
  40   /* Hash chain.  */
  41   struct bstring *next;
  42   /* Assume the data length is no more than 64k.  */
  43   unsigned short length;
  44   /* The half hash hack.  This contains the upper 16 bits of the hash
  45      value and is used as a pre-check when comparing two strings and
  46      avoids the need to do length or memcmp calls.  It proves to be
  47      roughly 100% effective.  */
  48   unsigned short half_hash;
  49
  50   union
  51   {
  52     char data[1];
  53     double dummy;
  54   }
  55   d;
  56 };
  57
  58
  59 /* The structure for a bcache itself.  The bcache is initialized, in
  60    bcache_xmalloc(), by filling it with zeros and then setting the
  61    corresponding obstack's malloc() and free() methods.  */
  62
  63 struct bcache
  64 {
  65   /* All the bstrings are allocated here.  */
  66   struct obstack cache;
  67
  68   /* How many hash buckets we're using.  */
  69   unsigned int num_buckets;
  70
  71   /* Hash buckets.  This table is allocated using malloc, so when we
  72      grow the table we can return the old table to the system.  */
  73   struct bstring **bucket;
  74
  75   /* Statistics.  */
  76   unsigned long unique_count;   /* number of unique strings */
  77   long total_count;     /* total number of strings cached, including dups */
  78   long unique_size;     /* size of unique strings, in bytes */
  79   long total_size;      /* total number of bytes cached, including dups */
  80   long structure_size;  /* total size of bcache, including infrastructure */
  81   /* Number of times that the hash table is expanded and hence
  82      re-built, and the corresponding number of times that a string is
  83      [re]hashed as part of entering it into the expanded table.  The
  84      total number of hashes can be computed by adding TOTAL_COUNT to
  85      expand_hash_count.  */
  86   unsigned long expand_count;
  87   unsigned long expand_hash_count;
  88   /* Number of times that the half-hash compare hit (compare the upper
  89      16 bits of hash values) hit, but the corresponding combined
  90      length/data compare missed.  */
  91   unsigned long half_hash_miss_count;
  92
  93   /* Hash function to be used for this bcache object.  */
  94   unsigned long (*hash_function)(const void *addr, int length);
  95
  96   /* Compare function to be used for this bcache object.  */
  97   int (*compare_function)(const void *, const void *, int length);
  98 };
  99
 100 /* The old hash function was stolen from SDBM. This is what DB 3.0 uses now,
 101  * and is better than the old one.
 102  */
 103 \f
 104 unsigned long
 105 hash(const void *addr, int length)
 106 {
 107   return hash_continue (addr, length, 0);
 108 }
 109
 110 /* Continue the calculation of the hash H at the given address.  */
 111
 112 unsigned long
 113 hash_continue (const void *addr, int length, unsigned long h)
 114 {
 115   const unsigned char *k, *e;
 116
 117   k = (const unsigned char *)addr;
 118   e = k+length;
 119   for (; k< e;++k)
 120     {
 121       h *=16777619;
 122       h ^= *k;
 123     }
 124   return (h);
 125 }
 126 \f
 127 /* Growing the bcache's hash table.  */
 128
 129 /* If the average chain length grows beyond this, then we want to
 130    resize our hash table.  */
 131 #define CHAIN_LENGTH_THRESHOLD (5)
 132
 133 static void
 134 expand_hash_table (struct bcache *bcache)
 135 {
 136   /* A table of good hash table sizes.  Whenever we grow, we pick the
 137      next larger size from this table.  sizes[i] is close to 1 << (i+10),
 138      so we roughly double the table size each time.  After we fall off
 139      the end of this table, we just double.  Don't laugh --- there have
 140      been executables sighted with a gigabyte of debug info.  */
 141   static unsigned long sizes[] = {
 142     1021, 2053, 4099, 8191, 16381, 32771,
 143     65537, 131071, 262144, 524287, 1048573, 2097143,
 144     4194301, 8388617, 16777213, 33554467, 67108859, 134217757,
 145     268435459, 536870923, 1073741827, 2147483659UL
 146   };
 147   unsigned int new_num_buckets;
 148   struct bstring **new_buckets;
 149   unsigned int i;
 150
 151   /* Count the stats.  Every unique item needs to be re-hashed and
 152      re-entered.  */
 153   bcache->expand_count++;
 154   bcache->expand_hash_count += bcache->unique_count;
 155
 156   /* Find the next size.  */
 157   new_num_buckets = bcache->num_buckets * 2;
 158   for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++)
 159     if (sizes[i] > bcache->num_buckets)
 160       {
 161         new_num_buckets = sizes[i];
 162         break;
 163       }
 164
 165   /* Allocate the new table.  */
 166   {
 167     size_t new_size = new_num_buckets * sizeof (new_buckets[0]);
 168
 169     new_buckets = (struct bstring **) xmalloc (new_size);
 170     memset (new_buckets, 0, new_size);
 171
 172     bcache->structure_size -= (bcache->num_buckets
 173                                * sizeof (bcache->bucket[0]));
 174     bcache->structure_size += new_size;
 175   }
 176
 177   /* Rehash all existing strings.  */
 178   for (i = 0; i < bcache->num_buckets; i++)
 179     {
 180       struct bstring *s, *next;
 181
 182       for (s = bcache->bucket[i]; s; s = next)
 183         {
 184           struct bstring **new_bucket;
 185           next = s->next;
 186
 187           new_bucket = &new_buckets[(hash (&s->d.data, s->length)
 188                                      % new_num_buckets)];
 189           s->next = *new_bucket;
 190           *new_bucket = s;
 191         }
 192     }
 193
 194   /* Plug in the new table.  */
 195   if (bcache->bucket)
 196     xfree (bcache->bucket);
 197   bcache->bucket = new_buckets;
 198   bcache->num_buckets = new_num_buckets;
 199 }
 200
 201 \f
 202 /* Looking up things in the bcache.  */
 203
 204 /* The number of bytes needed to allocate a struct bstring whose data
 205    is N bytes long.  */
 206 #define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n))
 207
 208 /* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
 209    never seen those bytes before, add a copy of them to BCACHE.  In
 210    either case, return a pointer to BCACHE's copy of that string.  */
 211 const void *
 212 bcache (const void *addr, int length, struct bcache *bcache)
 213 {
 214   return bcache_full (addr, length, bcache, NULL);
 215 }
 216
 217 /* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
 218    never seen those bytes before, add a copy of them to BCACHE.  In
 219    either case, return a pointer to BCACHE's copy of that string.  If
 220    optional ADDED is not NULL, return 1 in case of new entry or 0 if
 221    returning an old entry.  */
 222
 223 const void *
 224 bcache_full (const void *addr, int length, struct bcache *bcache, int *added)
 225 {
 226   unsigned long full_hash;
 227   unsigned short half_hash;
 228   int hash_index;
 229   struct bstring *s;
 230
 231   if (added)
 232     *added = 0;
 233
 234   /* Lazily initialize the obstack.  This can save quite a bit of
 235      memory in some cases.  */
 236   if (bcache->total_count == 0)
 237     {
 238       /* We could use obstack_specify_allocation here instead, but
 239          gdb_obstack.h specifies the allocation/deallocation
 240          functions.  */
 241       obstack_init (&bcache->cache);
 242     }
 243
 244   /* If our average chain length is too high, expand the hash table.  */
 245   if (bcache->unique_count >= bcache->num_buckets * CHAIN_LENGTH_THRESHOLD)
 246     expand_hash_table (bcache);
 247
 248   bcache->total_count++;
 249   bcache->total_size += length;
 250
 251   full_hash = bcache->hash_function (addr, length);
 252
 253   half_hash = (full_hash >> 16);
 254   hash_index = full_hash % bcache->num_buckets;
 255
 256   /* Search the hash bucket for a string identical to the caller's.
 257      As a short-circuit first compare the upper part of each hash
 258      values.  */
 259   for (s = bcache->bucket[hash_index]; s; s = s->next)
 260     {
 261       if (s->half_hash == half_hash)
 262         {
 263           if (s->length == length
 264               && bcache->compare_function (&s->d.data, addr, length))
 265             return &s->d.data;
 266           else
 267             bcache->half_hash_miss_count++;
 268         }
 269     }
 270
 271   /* The user's string isn't in the list.  Insert it after *ps.  */
 272   {
 273     struct bstring *new
 274       = obstack_alloc (&bcache->cache, BSTRING_SIZE (length));
 275
 276     memcpy (&new->d.data, addr, length);
 277     new->length = length;
 278     new->next = bcache->bucket[hash_index];
 279     new->half_hash = half_hash;
 280     bcache->bucket[hash_index] = new;
 281
 282     bcache->unique_count++;
 283     bcache->unique_size += length;
 284     bcache->structure_size += BSTRING_SIZE (length);
 285
 286     if (added)
 287       *added = 1;
 288
 289     return &new->d.data;
 290   }
 291 }
 292 \f
 293
 294 /* Compare the byte string at ADDR1 of lenght LENGHT to the
 295    string at ADDR2.  Return 1 if they are equal.  */
 296
 297 static int
 298 bcache_compare (const void *addr1, const void *addr2, int length)
 299 {
 300   return memcmp (addr1, addr2, length) == 0;
 301 }
 302
 303 /* Allocating and freeing bcaches.  */
 304
 305 /* Allocated a bcache.  HASH_FUNCTION and COMPARE_FUNCTION can be used
 306    to pass in custom hash, and compare functions to be used by this
 307    bcache. If HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION
 308    is NULL memcmp() is used.  */
 309
 310 struct bcache *
 311 bcache_xmalloc (unsigned long (*hash_function)(const void *, int length),
 312                 int (*compare_function)(const void *, const void *, int length))
 313 {
 314   /* Allocate the bcache pre-zeroed.  */
 315   struct bcache *b = XCALLOC (1, struct bcache);
 316
 317   if (hash_function)
 318     b->hash_function = hash_function;
 319   else
 320     b->hash_function = hash;
 321
 322   if (compare_function)
 323     b->compare_function = compare_function;
 324   else
 325     b->compare_function = bcache_compare;
 326   return b;
 327 }
 328
 329 /* Free all the storage associated with BCACHE.  */
 330 void
 331 bcache_xfree (struct bcache *bcache)
 332 {
 333   if (bcache == NULL)
 334     return;
 335   /* Only free the obstack if we actually initialized it.  */
 336   if (bcache->total_count > 0)
 337     obstack_free (&bcache->cache, 0);
 338   xfree (bcache->bucket);
 339   xfree (bcache);
 340 }
 341
 342
 343 \f
 344 /* Printing statistics.  */
 345
 346 static void
 347 print_percentage (int portion, int total)
 348 {
 349   if (total == 0)
 350     /* i18n: Like "Percentage of duplicates, by count: (not applicable)" */
 351     printf_filtered (_("(not applicable)\n"));
 352   else
 353     printf_filtered ("%3d%%\n", (int) (portion * 100.0 / total));
 354 }
 355
 356
 357 /* Print statistics on BCACHE's memory usage and efficacity at
 358    eliminating duplication.  NAME should describe the kind of data
 359    BCACHE holds.  Statistics are printed using `printf_filtered' and
 360    its ilk.  */
 361 void
 362 print_bcache_statistics (struct bcache *c, char *type)
 363 {
 364   int occupied_buckets;
 365   int max_chain_length;
 366   int median_chain_length;
 367   int max_entry_size;
 368   int median_entry_size;
 369
 370   /* Count the number of occupied buckets, tally the various string
 371      lengths, and measure chain lengths.  */
 372   {
 373     unsigned int b;
 374     int *chain_length = XCALLOC (c->num_buckets + 1, int);
 375     int *entry_size = XCALLOC (c->unique_count + 1, int);
 376     int stringi = 0;
 377
 378     occupied_buckets = 0;
 379
 380     for (b = 0; b < c->num_buckets; b++)
 381       {
 382         struct bstring *s = c->bucket[b];
 383
 384         chain_length[b] = 0;
 385
 386         if (s)
 387           {
 388             occupied_buckets++;
 389
 390             while (s)
 391               {
 392                 gdb_assert (b < c->num_buckets);
 393                 chain_length[b]++;
 394                 gdb_assert (stringi < c->unique_count);
 395                 entry_size[stringi++] = s->length;
 396                 s = s->next;
 397               }
 398           }
 399       }
 400
 401     /* To compute the median, we need the set of chain lengths sorted.  */
 402     qsort (chain_length, c->num_buckets, sizeof (chain_length[0]),
 403            compare_positive_ints);
 404     qsort (entry_size, c->unique_count, sizeof (entry_size[0]),
 405            compare_positive_ints);
 406
 407     if (c->num_buckets > 0)
 408       {
 409         max_chain_length = chain_length[c->num_buckets - 1];
 410         median_chain_length = chain_length[c->num_buckets / 2];
 411       }
 412     else
 413       {
 414         max_chain_length = 0;
 415         median_chain_length = 0;
 416       }
 417     if (c->unique_count > 0)
 418       {
 419         max_entry_size = entry_size[c->unique_count - 1];
 420         median_entry_size = entry_size[c->unique_count / 2];
 421       }
 422     else
 423       {
 424         max_entry_size = 0;
 425         median_entry_size = 0;
 426       }
 427
 428     xfree (chain_length);
 429     xfree (entry_size);
 430   }
 431
 432   printf_filtered (_("  Cached '%s' statistics:\n"), type);
 433   printf_filtered (_("    Total object count:  %ld\n"), c->total_count);
 434   printf_filtered (_("    Unique object count: %lu\n"), c->unique_count);
 435   printf_filtered (_("    Percentage of duplicates, by count: "));
 436   print_percentage (c->total_count - c->unique_count, c->total_count);
 437   printf_filtered ("\n");
 438
 439   printf_filtered (_("    Total object size:   %ld\n"), c->total_size);
 440   printf_filtered (_("    Unique object size:  %ld\n"), c->unique_size);
 441   printf_filtered (_("    Percentage of duplicates, by size:  "));
 442   print_percentage (c->total_size - c->unique_size, c->total_size);
 443   printf_filtered ("\n");
 444
 445   printf_filtered (_("    Max entry size:     %d\n"), max_entry_size);
 446   printf_filtered (_("    Average entry size: "));
 447   if (c->unique_count > 0)
 448     printf_filtered ("%ld\n", c->unique_size / c->unique_count);
 449   else
 450     /* i18n: "Average entry size: (not applicable)" */
 451     printf_filtered (_("(not applicable)\n"));
 452   printf_filtered (_("    Median entry size:  %d\n"), median_entry_size);
 453   printf_filtered ("\n");
 454
 455   printf_filtered (_("    Total memory used by bcache, including overhead: %ld\n"),
 456                    c->structure_size);
 457   printf_filtered (_("    Percentage memory overhead: "));
 458   print_percentage (c->structure_size - c->unique_size, c->unique_size);
 459   printf_filtered (_("    Net memory savings:         "));
 460   print_percentage (c->total_size - c->structure_size, c->total_size);
 461   printf_filtered ("\n");
 462
 463   printf_filtered (_("    Hash table size:           %3d\n"), c->num_buckets);
 464   printf_filtered (_("    Hash table expands:        %lu\n"),
 465                    c->expand_count);
 466   printf_filtered (_("    Hash table hashes:         %lu\n"),
 467                    c->total_count + c->expand_hash_count);
 468   printf_filtered (_("    Half hash misses:          %lu\n"),
 469                    c->half_hash_miss_count);
 470   printf_filtered (_("    Hash table population:     "));
 471   print_percentage (occupied_buckets, c->num_buckets);
 472   printf_filtered (_("    Median hash chain length:  %3d\n"),
 473                    median_chain_length);
 474   printf_filtered (_("    Average hash chain length: "));
 475   if (c->num_buckets > 0)
 476     printf_filtered ("%3lu\n", c->unique_count / c->num_buckets);
 477   else
 478     /* i18n: "Average hash chain length: (not applicable)" */
 479     printf_filtered (_("(not applicable)\n"));
 480   printf_filtered (_("    Maximum hash chain length: %3d\n"), max_chain_length);
 481   printf_filtered ("\n");
 482 }
 483
 484 int
 485 bcache_memory_used (struct bcache *bcache)
 486 {
 487   if (bcache->total_count == 0)
 488     return 0;
 489   return obstack_memory_used (&bcache->cache);
 490 }