GDB (API)
/home/stan/gdb/src/gdb/bcache.c
Go to the documentation of this file.
00001 /* Implement a cached obstack.
00002    Written by Fred Fish <fnf@cygnus.com>
00003    Rewritten by Jim Blandy <jimb@cygnus.com>
00004 
00005    Copyright (C) 1999-2013 Free Software Foundation, Inc.
00006 
00007    This file is part of GDB.
00008 
00009    This program is free software; you can redistribute it and/or modify
00010    it under the terms of the GNU General Public License as published by
00011    the Free Software Foundation; either version 3 of the License, or
00012    (at your option) any later version.
00013 
00014    This program is distributed in the hope that it will be useful,
00015    but WITHOUT ANY WARRANTY; without even the implied warranty of
00016    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017    GNU General Public License for more details.
00018 
00019    You should have received a copy of the GNU General Public License
00020    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
00021 
00022 #include "defs.h"
00023 #include "gdb_obstack.h"
00024 #include "bcache.h"
00025 #include "gdb_string.h"         /* For memcpy declaration */
00026 #include "gdb_assert.h"
00027 
00028 #include <stddef.h>
00029 #include <stdlib.h>
00030 
00031 /* The type used to hold a single bcache string.  The user data is
00032    stored in d.data.  Since it can be any type, it needs to have the
00033    same alignment as the most strict alignment of any type on the host
00034    machine.  I don't know of any really correct way to do this in
00035    stock ANSI C, so just do it the same way obstack.h does.  */
00036 
00037 struct bstring
00038 {
00039   /* Hash chain.  */
00040   struct bstring *next;
00041   /* Assume the data length is no more than 64k.  */
00042   unsigned short length;
00043   /* The half hash hack.  This contains the upper 16 bits of the hash
00044      value and is used as a pre-check when comparing two strings and
00045      avoids the need to do length or memcmp calls.  It proves to be
00046      roughly 100% effective.  */
00047   unsigned short half_hash;
00048 
00049   union
00050   {
00051     char data[1];
00052     double dummy;
00053   }
00054   d;
00055 };
00056 
00057 
00058 /* The structure for a bcache itself.  The bcache is initialized, in
00059    bcache_xmalloc(), by filling it with zeros and then setting the
00060    corresponding obstack's malloc() and free() methods.  */
00061 
00062 struct bcache
00063 {
00064   /* All the bstrings are allocated here.  */
00065   struct obstack cache;
00066 
00067   /* How many hash buckets we're using.  */
00068   unsigned int num_buckets;
00069   
00070   /* Hash buckets.  This table is allocated using malloc, so when we
00071      grow the table we can return the old table to the system.  */
00072   struct bstring **bucket;
00073 
00074   /* Statistics.  */
00075   unsigned long unique_count;   /* number of unique strings */
00076   long total_count;     /* total number of strings cached, including dups */
00077   long unique_size;     /* size of unique strings, in bytes */
00078   long total_size;      /* total number of bytes cached, including dups */
00079   long structure_size;  /* total size of bcache, including infrastructure */
00080   /* Number of times that the hash table is expanded and hence
00081      re-built, and the corresponding number of times that a string is
00082      [re]hashed as part of entering it into the expanded table.  The
00083      total number of hashes can be computed by adding TOTAL_COUNT to
00084      expand_hash_count.  */
00085   unsigned long expand_count;
00086   unsigned long expand_hash_count;
00087   /* Number of times that the half-hash compare hit (compare the upper
00088      16 bits of hash values) hit, but the corresponding combined
00089      length/data compare missed.  */
00090   unsigned long half_hash_miss_count;
00091 
00092   /* Hash function to be used for this bcache object.  */
00093   unsigned long (*hash_function)(const void *addr, int length);
00094 
00095   /* Compare function to be used for this bcache object.  */
00096   int (*compare_function)(const void *, const void *, int length);
00097 };
00098 
00099 /* The old hash function was stolen from SDBM. This is what DB 3.0
00100    uses now, and is better than the old one.  */
00101 
00102 unsigned long
00103 hash(const void *addr, int length)
00104 {
00105   return hash_continue (addr, length, 0);
00106 }
00107 
00108 /* Continue the calculation of the hash H at the given address.  */
00109 
00110 unsigned long
00111 hash_continue (const void *addr, int length, unsigned long h)
00112 {
00113   const unsigned char *k, *e;
00114 
00115   k = (const unsigned char *)addr;
00116   e = k+length;
00117   for (; k< e;++k)
00118     {
00119       h *=16777619;
00120       h ^= *k;
00121     }
00122   return (h);
00123 }
00124 
00125 /* Growing the bcache's hash table.  */
00126 
00127 /* If the average chain length grows beyond this, then we want to
00128    resize our hash table.  */
00129 #define CHAIN_LENGTH_THRESHOLD (5)
00130 
00131 static void
00132 expand_hash_table (struct bcache *bcache)
00133 {
00134   /* A table of good hash table sizes.  Whenever we grow, we pick the
00135      next larger size from this table.  sizes[i] is close to 1 << (i+10),
00136      so we roughly double the table size each time.  After we fall off 
00137      the end of this table, we just double.  Don't laugh --- there have
00138      been executables sighted with a gigabyte of debug info.  */
00139   static unsigned long sizes[] = { 
00140     1021, 2053, 4099, 8191, 16381, 32771,
00141     65537, 131071, 262144, 524287, 1048573, 2097143,
00142     4194301, 8388617, 16777213, 33554467, 67108859, 134217757,
00143     268435459, 536870923, 1073741827, 2147483659UL
00144   };
00145   unsigned int new_num_buckets;
00146   struct bstring **new_buckets;
00147   unsigned int i;
00148 
00149   /* Count the stats.  Every unique item needs to be re-hashed and
00150      re-entered.  */
00151   bcache->expand_count++;
00152   bcache->expand_hash_count += bcache->unique_count;
00153 
00154   /* Find the next size.  */
00155   new_num_buckets = bcache->num_buckets * 2;
00156   for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++)
00157     if (sizes[i] > bcache->num_buckets)
00158       {
00159         new_num_buckets = sizes[i];
00160         break;
00161       }
00162 
00163   /* Allocate the new table.  */
00164   {
00165     size_t new_size = new_num_buckets * sizeof (new_buckets[0]);
00166 
00167     new_buckets = (struct bstring **) xmalloc (new_size);
00168     memset (new_buckets, 0, new_size);
00169 
00170     bcache->structure_size -= (bcache->num_buckets
00171                                * sizeof (bcache->bucket[0]));
00172     bcache->structure_size += new_size;
00173   }
00174 
00175   /* Rehash all existing strings.  */
00176   for (i = 0; i < bcache->num_buckets; i++)
00177     {
00178       struct bstring *s, *next;
00179 
00180       for (s = bcache->bucket[i]; s; s = next)
00181         {
00182           struct bstring **new_bucket;
00183           next = s->next;
00184 
00185           new_bucket = &new_buckets[(bcache->hash_function (&s->d.data,
00186                                                             s->length)
00187                                      % new_num_buckets)];
00188           s->next = *new_bucket;
00189           *new_bucket = s;
00190         }
00191     }
00192 
00193   /* Plug in the new table.  */
00194   if (bcache->bucket)
00195     xfree (bcache->bucket);
00196   bcache->bucket = new_buckets;
00197   bcache->num_buckets = new_num_buckets;
00198 }
00199 
00200 
00201 /* Looking up things in the bcache.  */
00202 
00203 /* The number of bytes needed to allocate a struct bstring whose data
00204    is N bytes long.  */
00205 #define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n))
00206 
00207 /* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
00208    never seen those bytes before, add a copy of them to BCACHE.  In
00209    either case, return a pointer to BCACHE's copy of that string.  */
00210 const void *
00211 bcache (const void *addr, int length, struct bcache *cache)
00212 {
00213   return bcache_full (addr, length, cache, NULL);
00214 }
00215 
00216 /* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
00217    never seen those bytes before, add a copy of them to BCACHE.  In
00218    either case, return a pointer to BCACHE's copy of that string.  If
00219    optional ADDED is not NULL, return 1 in case of new entry or 0 if
00220    returning an old entry.  */
00221 
00222 const void *
00223 bcache_full (const void *addr, int length, struct bcache *bcache, int *added)
00224 {
00225   unsigned long full_hash;
00226   unsigned short half_hash;
00227   int hash_index;
00228   struct bstring *s;
00229 
00230   if (added)
00231     *added = 0;
00232 
00233   /* Lazily initialize the obstack.  This can save quite a bit of
00234      memory in some cases.  */
00235   if (bcache->total_count == 0)
00236     {
00237       /* We could use obstack_specify_allocation here instead, but
00238          gdb_obstack.h specifies the allocation/deallocation
00239          functions.  */
00240       obstack_init (&bcache->cache);
00241     }
00242 
00243   /* If our average chain length is too high, expand the hash table.  */
00244   if (bcache->unique_count >= bcache->num_buckets * CHAIN_LENGTH_THRESHOLD)
00245     expand_hash_table (bcache);
00246 
00247   bcache->total_count++;
00248   bcache->total_size += length;
00249 
00250   full_hash = bcache->hash_function (addr, length);
00251 
00252   half_hash = (full_hash >> 16);
00253   hash_index = full_hash % bcache->num_buckets;
00254 
00255   /* Search the hash bucket for a string identical to the caller's.
00256      As a short-circuit first compare the upper part of each hash
00257      values.  */
00258   for (s = bcache->bucket[hash_index]; s; s = s->next)
00259     {
00260       if (s->half_hash == half_hash)
00261         {
00262           if (s->length == length
00263               && bcache->compare_function (&s->d.data, addr, length))
00264             return &s->d.data;
00265           else
00266             bcache->half_hash_miss_count++;
00267         }
00268     }
00269 
00270   /* The user's string isn't in the list.  Insert it after *ps.  */
00271   {
00272     struct bstring *new
00273       = obstack_alloc (&bcache->cache, BSTRING_SIZE (length));
00274 
00275     memcpy (&new->d.data, addr, length);
00276     new->length = length;
00277     new->next = bcache->bucket[hash_index];
00278     new->half_hash = half_hash;
00279     bcache->bucket[hash_index] = new;
00280 
00281     bcache->unique_count++;
00282     bcache->unique_size += length;
00283     bcache->structure_size += BSTRING_SIZE (length);
00284 
00285     if (added)
00286       *added = 1;
00287 
00288     return &new->d.data;
00289   }
00290 }
00291 
00292 
00293 /* Compare the byte string at ADDR1 of lenght LENGHT to the
00294    string at ADDR2.  Return 1 if they are equal.  */
00295 
00296 static int
00297 bcache_compare (const void *addr1, const void *addr2, int length)
00298 {
00299   return memcmp (addr1, addr2, length) == 0;
00300 }
00301 
00302 /* Allocating and freeing bcaches.  */
00303 
00304 /* Allocated a bcache.  HASH_FUNCTION and COMPARE_FUNCTION can be used
00305    to pass in custom hash, and compare functions to be used by this
00306    bcache.  If HASH_FUNCTION is NULL hash() is used and if
00307    COMPARE_FUNCTION is NULL memcmp() is used.  */
00308 
00309 struct bcache *
00310 bcache_xmalloc (unsigned long (*hash_function)(const void *, int length),
00311                 int (*compare_function)(const void *, 
00312                                         const void *, 
00313                                         int length))
00314 {
00315   /* Allocate the bcache pre-zeroed.  */
00316   struct bcache *b = XCALLOC (1, struct bcache);
00317 
00318   if (hash_function)
00319     b->hash_function = hash_function;
00320   else
00321     b->hash_function = hash;
00322 
00323   if (compare_function)
00324     b->compare_function = compare_function;
00325   else
00326     b->compare_function = bcache_compare;
00327   return b;
00328 }
00329 
00330 /* Free all the storage associated with BCACHE.  */
00331 void
00332 bcache_xfree (struct bcache *bcache)
00333 {
00334   if (bcache == NULL)
00335     return;
00336   /* Only free the obstack if we actually initialized it.  */
00337   if (bcache->total_count > 0)
00338     obstack_free (&bcache->cache, 0);
00339   xfree (bcache->bucket);
00340   xfree (bcache);
00341 }
00342 
00343 
00344 
00345 /* Printing statistics.  */
00346 
00347 static void
00348 print_percentage (int portion, int total)
00349 {
00350   if (total == 0)
00351     /* i18n: Like "Percentage of duplicates, by count: (not applicable)".  */
00352     printf_filtered (_("(not applicable)\n"));
00353   else
00354     printf_filtered ("%3d%%\n", (int) (portion * 100.0 / total));
00355 }
00356 
00357 
00358 /* Print statistics on BCACHE's memory usage and efficacity at
00359    eliminating duplication.  NAME should describe the kind of data
00360    BCACHE holds.  Statistics are printed using `printf_filtered' and
00361    its ilk.  */
00362 void
00363 print_bcache_statistics (struct bcache *c, char *type)
00364 {
00365   int occupied_buckets;
00366   int max_chain_length;
00367   int median_chain_length;
00368   int max_entry_size;
00369   int median_entry_size;
00370 
00371   /* Count the number of occupied buckets, tally the various string
00372      lengths, and measure chain lengths.  */
00373   {
00374     unsigned int b;
00375     int *chain_length = XCALLOC (c->num_buckets + 1, int);
00376     int *entry_size = XCALLOC (c->unique_count + 1, int);
00377     int stringi = 0;
00378 
00379     occupied_buckets = 0;
00380 
00381     for (b = 0; b < c->num_buckets; b++)
00382       {
00383         struct bstring *s = c->bucket[b];
00384 
00385         chain_length[b] = 0;
00386 
00387         if (s)
00388           {
00389             occupied_buckets++;
00390             
00391             while (s)
00392               {
00393                 gdb_assert (b < c->num_buckets);
00394                 chain_length[b]++;
00395                 gdb_assert (stringi < c->unique_count);
00396                 entry_size[stringi++] = s->length;
00397                 s = s->next;
00398               }
00399           }
00400       }
00401 
00402     /* To compute the median, we need the set of chain lengths
00403        sorted.  */
00404     qsort (chain_length, c->num_buckets, sizeof (chain_length[0]),
00405            compare_positive_ints);
00406     qsort (entry_size, c->unique_count, sizeof (entry_size[0]),
00407            compare_positive_ints);
00408 
00409     if (c->num_buckets > 0)
00410       {
00411         max_chain_length = chain_length[c->num_buckets - 1];
00412         median_chain_length = chain_length[c->num_buckets / 2];
00413       }
00414     else
00415       {
00416         max_chain_length = 0;
00417         median_chain_length = 0;
00418       }
00419     if (c->unique_count > 0)
00420       {
00421         max_entry_size = entry_size[c->unique_count - 1];
00422         median_entry_size = entry_size[c->unique_count / 2];
00423       }
00424     else
00425       {
00426         max_entry_size = 0;
00427         median_entry_size = 0;
00428       }
00429 
00430     xfree (chain_length);
00431     xfree (entry_size);
00432   }
00433 
00434   printf_filtered (_("  Cached '%s' statistics:\n"), type);
00435   printf_filtered (_("    Total object count:  %ld\n"), c->total_count);
00436   printf_filtered (_("    Unique object count: %lu\n"), c->unique_count);
00437   printf_filtered (_("    Percentage of duplicates, by count: "));
00438   print_percentage (c->total_count - c->unique_count, c->total_count);
00439   printf_filtered ("\n");
00440 
00441   printf_filtered (_("    Total object size:   %ld\n"), c->total_size);
00442   printf_filtered (_("    Unique object size:  %ld\n"), c->unique_size);
00443   printf_filtered (_("    Percentage of duplicates, by size:  "));
00444   print_percentage (c->total_size - c->unique_size, c->total_size);
00445   printf_filtered ("\n");
00446 
00447   printf_filtered (_("    Max entry size:     %d\n"), max_entry_size);
00448   printf_filtered (_("    Average entry size: "));
00449   if (c->unique_count > 0)
00450     printf_filtered ("%ld\n", c->unique_size / c->unique_count);
00451   else
00452     /* i18n: "Average entry size: (not applicable)".  */
00453     printf_filtered (_("(not applicable)\n"));    
00454   printf_filtered (_("    Median entry size:  %d\n"), median_entry_size);
00455   printf_filtered ("\n");
00456 
00457   printf_filtered (_("    \
00458 Total memory used by bcache, including overhead: %ld\n"),
00459                    c->structure_size);
00460   printf_filtered (_("    Percentage memory overhead: "));
00461   print_percentage (c->structure_size - c->unique_size, c->unique_size);
00462   printf_filtered (_("    Net memory savings:         "));
00463   print_percentage (c->total_size - c->structure_size, c->total_size);
00464   printf_filtered ("\n");
00465 
00466   printf_filtered (_("    Hash table size:           %3d\n"), 
00467                    c->num_buckets);
00468   printf_filtered (_("    Hash table expands:        %lu\n"),
00469                    c->expand_count);
00470   printf_filtered (_("    Hash table hashes:         %lu\n"),
00471                    c->total_count + c->expand_hash_count);
00472   printf_filtered (_("    Half hash misses:          %lu\n"),
00473                    c->half_hash_miss_count);
00474   printf_filtered (_("    Hash table population:     "));
00475   print_percentage (occupied_buckets, c->num_buckets);
00476   printf_filtered (_("    Median hash chain length:  %3d\n"),
00477                    median_chain_length);
00478   printf_filtered (_("    Average hash chain length: "));
00479   if (c->num_buckets > 0)
00480     printf_filtered ("%3lu\n", c->unique_count / c->num_buckets);
00481   else
00482     /* i18n: "Average hash chain length: (not applicable)".  */
00483     printf_filtered (_("(not applicable)\n"));
00484   printf_filtered (_("    Maximum hash chain length: %3d\n"), 
00485                    max_chain_length);
00486   printf_filtered ("\n");
00487 }
00488 
00489 int
00490 bcache_memory_used (struct bcache *bcache)
00491 {
00492   if (bcache->total_count == 0)
00493     return 0;
00494   return obstack_memory_used (&bcache->cache);
00495 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines