gdb/bcache.c

   1 /* Implement a cached obstack.
   2    Written by Fred Fish <fnf@cygnus.com>
   3    Rewritten by Jim Blandy <jimb@cygnus.com>
   4
   5    Copyright (C) 1999-2014 Free Software Foundation, Inc.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 #include "defs.h"
  23 #include "gdb_obstack.h"
  24 #include "bcache.h"
  25 #include <string.h>             /* For memcpy declaration */
  26 #include "gdb_assert.h"
  27
  28 #include <stddef.h>
  29
  30 /* The type used to hold a single bcache string.  The user data is
  31    stored in d.data.  Since it can be any type, it needs to have the
  32    same alignment as the most strict alignment of any type on the host
  33    machine.  I don't know of any really correct way to do this in
  34    stock ANSI C, so just do it the same way obstack.h does.  */
  35
  36 struct bstring
  37 {
  38   /* Hash chain.  */
  39   struct bstring *next;
  40   /* Assume the data length is no more than 64k.  */
  41   unsigned short length;
  42   /* The half hash hack.  This contains the upper 16 bits of the hash
  43      value and is used as a pre-check when comparing two strings and
  44      avoids the need to do length or memcmp calls.  It proves to be
  45      roughly 100% effective.  */
  46   unsigned short half_hash;
  47
  48   union
  49   {
  50     char data[1];
  51     double dummy;
  52   }
  53   d;
  54 };
  55
  56
  57 /* The structure for a bcache itself.  The bcache is initialized, in
  58    bcache_xmalloc(), by filling it with zeros and then setting the
  59    corresponding obstack's malloc() and free() methods.  */
  60
  61 struct bcache
  62 {
  63   /* All the bstrings are allocated here.  */
  64   struct obstack cache;
  65
  66   /* How many hash buckets we're using.  */
  67   unsigned int num_buckets;
  68
  69   /* Hash buckets.  This table is allocated using malloc, so when we
  70      grow the table we can return the old table to the system.  */
  71   struct bstring **bucket;
  72
  73   /* Statistics.  */
  74   unsigned long unique_count;   /* number of unique strings */
  75   long total_count;     /* total number of strings cached, including dups */
  76   long unique_size;     /* size of unique strings, in bytes */
  77   long total_size;      /* total number of bytes cached, including dups */
  78   long structure_size;  /* total size of bcache, including infrastructure */
  79   /* Number of times that the hash table is expanded and hence
  80      re-built, and the corresponding number of times that a string is
  81      [re]hashed as part of entering it into the expanded table.  The
  82      total number of hashes can be computed by adding TOTAL_COUNT to
  83      expand_hash_count.  */
  84   unsigned long expand_count;
  85   unsigned long expand_hash_count;
  86   /* Number of times that the half-hash compare hit (compare the upper
  87      16 bits of hash values) hit, but the corresponding combined
  88      length/data compare missed.  */
  89   unsigned long half_hash_miss_count;
  90
  91   /* Hash function to be used for this bcache object.  */
  92   unsigned long (*hash_function)(const void *addr, int length);
  93
  94   /* Compare function to be used for this bcache object.  */
  95   int (*compare_function)(const void *, const void *, int length);
  96 };
  97
  98 /* The old hash function was stolen from SDBM. This is what DB 3.0
  99    uses now, and is better than the old one.  */
 100 \f
 101 unsigned long
 102 hash(const void *addr, int length)
 103 {
 104   return hash_continue (addr, length, 0);
 105 }
 106
 107 /* Continue the calculation of the hash H at the given address.  */
 108
 109 unsigned long
 110 hash_continue (const void *addr, int length, unsigned long h)
 111 {
 112   const unsigned char *k, *e;
 113
 114   k = (const unsigned char *)addr;
 115   e = k+length;
 116   for (; k< e;++k)
 117     {
 118       h *=16777619;
 119       h ^= *k;
 120     }
 121   return (h);
 122 }
 123 \f
 124 /* Growing the bcache's hash table.  */
 125
 126 /* If the average chain length grows beyond this, then we want to
 127    resize our hash table.  */
 128 #define CHAIN_LENGTH_THRESHOLD (5)
 129
 130 static void
 131 expand_hash_table (struct bcache *bcache)
 132 {
 133   /* A table of good hash table sizes.  Whenever we grow, we pick the
 134      next larger size from this table.  sizes[i] is close to 1 << (i+10),
 135      so we roughly double the table size each time.  After we fall off
 136      the end of this table, we just double.  Don't laugh --- there have
 137      been executables sighted with a gigabyte of debug info.  */
 138   static unsigned long sizes[] = {
 139     1021, 2053, 4099, 8191, 16381, 32771,
 140     65537, 131071, 262144, 524287, 1048573, 2097143,
 141     4194301, 8388617, 16777213, 33554467, 67108859, 134217757,
 142     268435459, 536870923, 1073741827, 2147483659UL
 143   };
 144   unsigned int new_num_buckets;
 145   struct bstring **new_buckets;
 146   unsigned int i;
 147
 148   /* Count the stats.  Every unique item needs to be re-hashed and
 149      re-entered.  */
 150   bcache->expand_count++;
 151   bcache->expand_hash_count += bcache->unique_count;
 152
 153   /* Find the next size.  */
 154   new_num_buckets = bcache->num_buckets * 2;
 155   for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++)
 156     if (sizes[i] > bcache->num_buckets)
 157       {
 158         new_num_buckets = sizes[i];
 159         break;
 160       }
 161
 162   /* Allocate the new table.  */
 163   {
 164     size_t new_size = new_num_buckets * sizeof (new_buckets[0]);
 165
 166     new_buckets = (struct bstring **) xmalloc (new_size);
 167     memset (new_buckets, 0, new_size);
 168
 169     bcache->structure_size -= (bcache->num_buckets
 170                                * sizeof (bcache->bucket[0]));
 171     bcache->structure_size += new_size;
 172   }
 173
 174   /* Rehash all existing strings.  */
 175   for (i = 0; i < bcache->num_buckets; i++)
 176     {
 177       struct bstring *s, *next;
 178
 179       for (s = bcache->bucket[i]; s; s = next)
 180         {
 181           struct bstring **new_bucket;
 182           next = s->next;
 183
 184           new_bucket = &new_buckets[(bcache->hash_function (&s->d.data,
 185                                                             s->length)
 186                                      % new_num_buckets)];
 187           s->next = *new_bucket;
 188           *new_bucket = s;
 189         }
 190     }
 191
 192   /* Plug in the new table.  */
 193   if (bcache->bucket)
 194     xfree (bcache->bucket);
 195   bcache->bucket = new_buckets;
 196   bcache->num_buckets = new_num_buckets;
 197 }
 198
 199 \f
 200 /* Looking up things in the bcache.  */
 201
 202 /* The number of bytes needed to allocate a struct bstring whose data
 203    is N bytes long.  */
 204 #define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n))
 205
 206 /* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
 207    never seen those bytes before, add a copy of them to BCACHE.  In
 208    either case, return a pointer to BCACHE's copy of that string.  */
 209 const void *
 210 bcache (const void *addr, int length, struct bcache *cache)
 211 {
 212   return bcache_full (addr, length, cache, NULL);
 213 }
 214
 215 /* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
 216    never seen those bytes before, add a copy of them to BCACHE.  In
 217    either case, return a pointer to BCACHE's copy of that string.  If
 218    optional ADDED is not NULL, return 1 in case of new entry or 0 if
 219    returning an old entry.  */
 220
 221 const void *
 222 bcache_full (const void *addr, int length, struct bcache *bcache, int *added)
 223 {
 224   unsigned long full_hash;
 225   unsigned short half_hash;
 226   int hash_index;
 227   struct bstring *s;
 228
 229   if (added)
 230     *added = 0;
 231
 232   /* Lazily initialize the obstack.  This can save quite a bit of
 233      memory in some cases.  */
 234   if (bcache->total_count == 0)
 235     {
 236       /* We could use obstack_specify_allocation here instead, but
 237          gdb_obstack.h specifies the allocation/deallocation
 238          functions.  */
 239       obstack_init (&bcache->cache);
 240     }
 241
 242   /* If our average chain length is too high, expand the hash table.  */
 243   if (bcache->unique_count >= bcache->num_buckets * CHAIN_LENGTH_THRESHOLD)
 244     expand_hash_table (bcache);
 245
 246   bcache->total_count++;
 247   bcache->total_size += length;
 248
 249   full_hash = bcache->hash_function (addr, length);
 250
 251   half_hash = (full_hash >> 16);
 252   hash_index = full_hash % bcache->num_buckets;
 253
 254   /* Search the hash bucket for a string identical to the caller's.
 255      As a short-circuit first compare the upper part of each hash
 256      values.  */
 257   for (s = bcache->bucket[hash_index]; s; s = s->next)
 258     {
 259       if (s->half_hash == half_hash)
 260         {
 261           if (s->length == length
 262               && bcache->compare_function (&s->d.data, addr, length))
 263             return &s->d.data;
 264           else
 265             bcache->half_hash_miss_count++;
 266         }
 267     }
 268
 269   /* The user's string isn't in the list.  Insert it after *ps.  */
 270   {
 271     struct bstring *new
 272       = obstack_alloc (&bcache->cache, BSTRING_SIZE (length));
 273
 274     memcpy (&new->d.data, addr, length);
 275     new->length = length;
 276     new->next = bcache->bucket[hash_index];
 277     new->half_hash = half_hash;
 278     bcache->bucket[hash_index] = new;
 279
 280     bcache->unique_count++;
 281     bcache->unique_size += length;
 282     bcache->structure_size += BSTRING_SIZE (length);
 283
 284     if (added)
 285       *added = 1;
 286
 287     return &new->d.data;
 288   }
 289 }
 290 \f
 291
 292 /* Compare the byte string at ADDR1 of lenght LENGHT to the
 293    string at ADDR2.  Return 1 if they are equal.  */
 294
 295 static int
 296 bcache_compare (const void *addr1, const void *addr2, int length)
 297 {
 298   return memcmp (addr1, addr2, length) == 0;
 299 }
 300
 301 /* Allocating and freeing bcaches.  */
 302
 303 /* Allocated a bcache.  HASH_FUNCTION and COMPARE_FUNCTION can be used
 304    to pass in custom hash, and compare functions to be used by this
 305    bcache.  If HASH_FUNCTION is NULL hash() is used and if
 306    COMPARE_FUNCTION is NULL memcmp() is used.  */
 307
 308 struct bcache *
 309 bcache_xmalloc (unsigned long (*hash_function)(const void *, int length),
 310                 int (*compare_function)(const void *,
 311                                         const void *,
 312                                         int length))
 313 {
 314   /* Allocate the bcache pre-zeroed.  */
 315   struct bcache *b = XCNEW (struct bcache);
 316
 317   if (hash_function)
 318     b->hash_function = hash_function;
 319   else
 320     b->hash_function = hash;
 321
 322   if (compare_function)
 323     b->compare_function = compare_function;
 324   else
 325     b->compare_function = bcache_compare;
 326   return b;
 327 }
 328
 329 /* Free all the storage associated with BCACHE.  */
 330 void
 331 bcache_xfree (struct bcache *bcache)
 332 {
 333   if (bcache == NULL)
 334     return;
 335   /* Only free the obstack if we actually initialized it.  */
 336   if (bcache->total_count > 0)
 337     obstack_free (&bcache->cache, 0);
 338   xfree (bcache->bucket);
 339   xfree (bcache);
 340 }
 341
 342
 343 \f
 344 /* Printing statistics.  */
 345
 346 static void
 347 print_percentage (int portion, int total)
 348 {
 349   if (total == 0)
 350     /* i18n: Like "Percentage of duplicates, by count: (not applicable)".  */
 351     printf_filtered (_("(not applicable)\n"));
 352   else
 353     printf_filtered ("%3d%%\n", (int) (portion * 100.0 / total));
 354 }
 355
 356
 357 /* Print statistics on BCACHE's memory usage and efficacity at
 358    eliminating duplication.  NAME should describe the kind of data
 359    BCACHE holds.  Statistics are printed using `printf_filtered' and
 360    its ilk.  */
 361 void
 362 print_bcache_statistics (struct bcache *c, char *type)
 363 {
 364   int occupied_buckets;
 365   int max_chain_length;
 366   int median_chain_length;
 367   int max_entry_size;
 368   int median_entry_size;
 369
 370   /* Count the number of occupied buckets, tally the various string
 371      lengths, and measure chain lengths.  */
 372   {
 373     unsigned int b;
 374     int *chain_length = XCNEWVEC (int, c->num_buckets + 1);
 375     int *entry_size = XCNEWVEC (int, c->unique_count + 1);
 376     int stringi = 0;
 377
 378     occupied_buckets = 0;
 379
 380     for (b = 0; b < c->num_buckets; b++)
 381       {
 382         struct bstring *s = c->bucket[b];
 383
 384         chain_length[b] = 0;
 385
 386         if (s)
 387           {
 388             occupied_buckets++;
 389
 390             while (s)
 391               {
 392                 gdb_assert (b < c->num_buckets);
 393                 chain_length[b]++;
 394                 gdb_assert (stringi < c->unique_count);
 395                 entry_size[stringi++] = s->length;
 396                 s = s->next;
 397               }
 398           }
 399       }
 400
 401     /* To compute the median, we need the set of chain lengths
 402        sorted.  */
 403     qsort (chain_length, c->num_buckets, sizeof (chain_length[0]),
 404            compare_positive_ints);
 405     qsort (entry_size, c->unique_count, sizeof (entry_size[0]),
 406            compare_positive_ints);
 407
 408     if (c->num_buckets > 0)
 409       {
 410         max_chain_length = chain_length[c->num_buckets - 1];
 411         median_chain_length = chain_length[c->num_buckets / 2];
 412       }
 413     else
 414       {
 415         max_chain_length = 0;
 416         median_chain_length = 0;
 417       }
 418     if (c->unique_count > 0)
 419       {
 420         max_entry_size = entry_size[c->unique_count - 1];
 421         median_entry_size = entry_size[c->unique_count / 2];
 422       }
 423     else
 424       {
 425         max_entry_size = 0;
 426         median_entry_size = 0;
 427       }
 428
 429     xfree (chain_length);
 430     xfree (entry_size);
 431   }
 432
 433   printf_filtered (_("  Cached '%s' statistics:\n"), type);
 434   printf_filtered (_("    Total object count:  %ld\n"), c->total_count);
 435   printf_filtered (_("    Unique object count: %lu\n"), c->unique_count);
 436   printf_filtered (_("    Percentage of duplicates, by count: "));
 437   print_percentage (c->total_count - c->unique_count, c->total_count);
 438   printf_filtered ("\n");
 439
 440   printf_filtered (_("    Total object size:   %ld\n"), c->total_size);
 441   printf_filtered (_("    Unique object size:  %ld\n"), c->unique_size);
 442   printf_filtered (_("    Percentage of duplicates, by size:  "));
 443   print_percentage (c->total_size - c->unique_size, c->total_size);
 444   printf_filtered ("\n");
 445
 446   printf_filtered (_("    Max entry size:     %d\n"), max_entry_size);
 447   printf_filtered (_("    Average entry size: "));
 448   if (c->unique_count > 0)
 449     printf_filtered ("%ld\n", c->unique_size / c->unique_count);
 450   else
 451     /* i18n: "Average entry size: (not applicable)".  */
 452     printf_filtered (_("(not applicable)\n"));
 453   printf_filtered (_("    Median entry size:  %d\n"), median_entry_size);
 454   printf_filtered ("\n");
 455
 456   printf_filtered (_("    \
 457 Total memory used by bcache, including overhead: %ld\n"),
 458                    c->structure_size);
 459   printf_filtered (_("    Percentage memory overhead: "));
 460   print_percentage (c->structure_size - c->unique_size, c->unique_size);
 461   printf_filtered (_("    Net memory savings:         "));
 462   print_percentage (c->total_size - c->structure_size, c->total_size);
 463   printf_filtered ("\n");
 464
 465   printf_filtered (_("    Hash table size:           %3d\n"),
 466                    c->num_buckets);
 467   printf_filtered (_("    Hash table expands:        %lu\n"),
 468                    c->expand_count);
 469   printf_filtered (_("    Hash table hashes:         %lu\n"),
 470                    c->total_count + c->expand_hash_count);
 471   printf_filtered (_("    Half hash misses:          %lu\n"),
 472                    c->half_hash_miss_count);
 473   printf_filtered (_("    Hash table population:     "));
 474   print_percentage (occupied_buckets, c->num_buckets);
 475   printf_filtered (_("    Median hash chain length:  %3d\n"),
 476                    median_chain_length);
 477   printf_filtered (_("    Average hash chain length: "));
 478   if (c->num_buckets > 0)
 479     printf_filtered ("%3lu\n", c->unique_count / c->num_buckets);
 480   else
 481     /* i18n: "Average hash chain length: (not applicable)".  */
 482     printf_filtered (_("(not applicable)\n"));
 483   printf_filtered (_("    Maximum hash chain length: %3d\n"),
 484                    max_chain_length);
 485   printf_filtered ("\n");
 486 }
 487
 488 int
 489 bcache_memory_used (struct bcache *bcache)
 490 {
 491   if (bcache->total_count == 0)
 492     return 0;
 493   return obstack_memory_used (&bcache->cache);
 494 }