GDB (API)
|
00001 /* Implement a cached obstack. 00002 Written by Fred Fish <fnf@cygnus.com> 00003 Rewritten by Jim Blandy <jimb@cygnus.com> 00004 00005 Copyright (C) 1999-2013 Free Software Foundation, Inc. 00006 00007 This file is part of GDB. 00008 00009 This program is free software; you can redistribute it and/or modify 00010 it under the terms of the GNU General Public License as published by 00011 the Free Software Foundation; either version 3 of the License, or 00012 (at your option) any later version. 00013 00014 This program is distributed in the hope that it will be useful, 00015 but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 GNU General Public License for more details. 00018 00019 You should have received a copy of the GNU General Public License 00020 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 00021 00022 #include "defs.h" 00023 #include "gdb_obstack.h" 00024 #include "bcache.h" 00025 #include "gdb_string.h" /* For memcpy declaration */ 00026 #include "gdb_assert.h" 00027 00028 #include <stddef.h> 00029 #include <stdlib.h> 00030 00031 /* The type used to hold a single bcache string. The user data is 00032 stored in d.data. Since it can be any type, it needs to have the 00033 same alignment as the most strict alignment of any type on the host 00034 machine. I don't know of any really correct way to do this in 00035 stock ANSI C, so just do it the same way obstack.h does. */ 00036 00037 struct bstring 00038 { 00039 /* Hash chain. */ 00040 struct bstring *next; 00041 /* Assume the data length is no more than 64k. */ 00042 unsigned short length; 00043 /* The half hash hack. This contains the upper 16 bits of the hash 00044 value and is used as a pre-check when comparing two strings and 00045 avoids the need to do length or memcmp calls. It proves to be 00046 roughly 100% effective. */ 00047 unsigned short half_hash; 00048 00049 union 00050 { 00051 char data[1]; 00052 double dummy; 00053 } 00054 d; 00055 }; 00056 00057 00058 /* The structure for a bcache itself. The bcache is initialized, in 00059 bcache_xmalloc(), by filling it with zeros and then setting the 00060 corresponding obstack's malloc() and free() methods. */ 00061 00062 struct bcache 00063 { 00064 /* All the bstrings are allocated here. */ 00065 struct obstack cache; 00066 00067 /* How many hash buckets we're using. */ 00068 unsigned int num_buckets; 00069 00070 /* Hash buckets. This table is allocated using malloc, so when we 00071 grow the table we can return the old table to the system. */ 00072 struct bstring **bucket; 00073 00074 /* Statistics. */ 00075 unsigned long unique_count; /* number of unique strings */ 00076 long total_count; /* total number of strings cached, including dups */ 00077 long unique_size; /* size of unique strings, in bytes */ 00078 long total_size; /* total number of bytes cached, including dups */ 00079 long structure_size; /* total size of bcache, including infrastructure */ 00080 /* Number of times that the hash table is expanded and hence 00081 re-built, and the corresponding number of times that a string is 00082 [re]hashed as part of entering it into the expanded table. The 00083 total number of hashes can be computed by adding TOTAL_COUNT to 00084 expand_hash_count. */ 00085 unsigned long expand_count; 00086 unsigned long expand_hash_count; 00087 /* Number of times that the half-hash compare hit (compare the upper 00088 16 bits of hash values) hit, but the corresponding combined 00089 length/data compare missed. */ 00090 unsigned long half_hash_miss_count; 00091 00092 /* Hash function to be used for this bcache object. */ 00093 unsigned long (*hash_function)(const void *addr, int length); 00094 00095 /* Compare function to be used for this bcache object. */ 00096 int (*compare_function)(const void *, const void *, int length); 00097 }; 00098 00099 /* The old hash function was stolen from SDBM. This is what DB 3.0 00100 uses now, and is better than the old one. */ 00101 00102 unsigned long 00103 hash(const void *addr, int length) 00104 { 00105 return hash_continue (addr, length, 0); 00106 } 00107 00108 /* Continue the calculation of the hash H at the given address. */ 00109 00110 unsigned long 00111 hash_continue (const void *addr, int length, unsigned long h) 00112 { 00113 const unsigned char *k, *e; 00114 00115 k = (const unsigned char *)addr; 00116 e = k+length; 00117 for (; k< e;++k) 00118 { 00119 h *=16777619; 00120 h ^= *k; 00121 } 00122 return (h); 00123 } 00124 00125 /* Growing the bcache's hash table. */ 00126 00127 /* If the average chain length grows beyond this, then we want to 00128 resize our hash table. */ 00129 #define CHAIN_LENGTH_THRESHOLD (5) 00130 00131 static void 00132 expand_hash_table (struct bcache *bcache) 00133 { 00134 /* A table of good hash table sizes. Whenever we grow, we pick the 00135 next larger size from this table. sizes[i] is close to 1 << (i+10), 00136 so we roughly double the table size each time. After we fall off 00137 the end of this table, we just double. Don't laugh --- there have 00138 been executables sighted with a gigabyte of debug info. */ 00139 static unsigned long sizes[] = { 00140 1021, 2053, 4099, 8191, 16381, 32771, 00141 65537, 131071, 262144, 524287, 1048573, 2097143, 00142 4194301, 8388617, 16777213, 33554467, 67108859, 134217757, 00143 268435459, 536870923, 1073741827, 2147483659UL 00144 }; 00145 unsigned int new_num_buckets; 00146 struct bstring **new_buckets; 00147 unsigned int i; 00148 00149 /* Count the stats. Every unique item needs to be re-hashed and 00150 re-entered. */ 00151 bcache->expand_count++; 00152 bcache->expand_hash_count += bcache->unique_count; 00153 00154 /* Find the next size. */ 00155 new_num_buckets = bcache->num_buckets * 2; 00156 for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++) 00157 if (sizes[i] > bcache->num_buckets) 00158 { 00159 new_num_buckets = sizes[i]; 00160 break; 00161 } 00162 00163 /* Allocate the new table. */ 00164 { 00165 size_t new_size = new_num_buckets * sizeof (new_buckets[0]); 00166 00167 new_buckets = (struct bstring **) xmalloc (new_size); 00168 memset (new_buckets, 0, new_size); 00169 00170 bcache->structure_size -= (bcache->num_buckets 00171 * sizeof (bcache->bucket[0])); 00172 bcache->structure_size += new_size; 00173 } 00174 00175 /* Rehash all existing strings. */ 00176 for (i = 0; i < bcache->num_buckets; i++) 00177 { 00178 struct bstring *s, *next; 00179 00180 for (s = bcache->bucket[i]; s; s = next) 00181 { 00182 struct bstring **new_bucket; 00183 next = s->next; 00184 00185 new_bucket = &new_buckets[(bcache->hash_function (&s->d.data, 00186 s->length) 00187 % new_num_buckets)]; 00188 s->next = *new_bucket; 00189 *new_bucket = s; 00190 } 00191 } 00192 00193 /* Plug in the new table. */ 00194 if (bcache->bucket) 00195 xfree (bcache->bucket); 00196 bcache->bucket = new_buckets; 00197 bcache->num_buckets = new_num_buckets; 00198 } 00199 00200 00201 /* Looking up things in the bcache. */ 00202 00203 /* The number of bytes needed to allocate a struct bstring whose data 00204 is N bytes long. */ 00205 #define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n)) 00206 00207 /* Find a copy of the LENGTH bytes at ADDR in BCACHE. If BCACHE has 00208 never seen those bytes before, add a copy of them to BCACHE. In 00209 either case, return a pointer to BCACHE's copy of that string. */ 00210 const void * 00211 bcache (const void *addr, int length, struct bcache *cache) 00212 { 00213 return bcache_full (addr, length, cache, NULL); 00214 } 00215 00216 /* Find a copy of the LENGTH bytes at ADDR in BCACHE. If BCACHE has 00217 never seen those bytes before, add a copy of them to BCACHE. In 00218 either case, return a pointer to BCACHE's copy of that string. If 00219 optional ADDED is not NULL, return 1 in case of new entry or 0 if 00220 returning an old entry. */ 00221 00222 const void * 00223 bcache_full (const void *addr, int length, struct bcache *bcache, int *added) 00224 { 00225 unsigned long full_hash; 00226 unsigned short half_hash; 00227 int hash_index; 00228 struct bstring *s; 00229 00230 if (added) 00231 *added = 0; 00232 00233 /* Lazily initialize the obstack. This can save quite a bit of 00234 memory in some cases. */ 00235 if (bcache->total_count == 0) 00236 { 00237 /* We could use obstack_specify_allocation here instead, but 00238 gdb_obstack.h specifies the allocation/deallocation 00239 functions. */ 00240 obstack_init (&bcache->cache); 00241 } 00242 00243 /* If our average chain length is too high, expand the hash table. */ 00244 if (bcache->unique_count >= bcache->num_buckets * CHAIN_LENGTH_THRESHOLD) 00245 expand_hash_table (bcache); 00246 00247 bcache->total_count++; 00248 bcache->total_size += length; 00249 00250 full_hash = bcache->hash_function (addr, length); 00251 00252 half_hash = (full_hash >> 16); 00253 hash_index = full_hash % bcache->num_buckets; 00254 00255 /* Search the hash bucket for a string identical to the caller's. 00256 As a short-circuit first compare the upper part of each hash 00257 values. */ 00258 for (s = bcache->bucket[hash_index]; s; s = s->next) 00259 { 00260 if (s->half_hash == half_hash) 00261 { 00262 if (s->length == length 00263 && bcache->compare_function (&s->d.data, addr, length)) 00264 return &s->d.data; 00265 else 00266 bcache->half_hash_miss_count++; 00267 } 00268 } 00269 00270 /* The user's string isn't in the list. Insert it after *ps. */ 00271 { 00272 struct bstring *new 00273 = obstack_alloc (&bcache->cache, BSTRING_SIZE (length)); 00274 00275 memcpy (&new->d.data, addr, length); 00276 new->length = length; 00277 new->next = bcache->bucket[hash_index]; 00278 new->half_hash = half_hash; 00279 bcache->bucket[hash_index] = new; 00280 00281 bcache->unique_count++; 00282 bcache->unique_size += length; 00283 bcache->structure_size += BSTRING_SIZE (length); 00284 00285 if (added) 00286 *added = 1; 00287 00288 return &new->d.data; 00289 } 00290 } 00291 00292 00293 /* Compare the byte string at ADDR1 of lenght LENGHT to the 00294 string at ADDR2. Return 1 if they are equal. */ 00295 00296 static int 00297 bcache_compare (const void *addr1, const void *addr2, int length) 00298 { 00299 return memcmp (addr1, addr2, length) == 0; 00300 } 00301 00302 /* Allocating and freeing bcaches. */ 00303 00304 /* Allocated a bcache. HASH_FUNCTION and COMPARE_FUNCTION can be used 00305 to pass in custom hash, and compare functions to be used by this 00306 bcache. If HASH_FUNCTION is NULL hash() is used and if 00307 COMPARE_FUNCTION is NULL memcmp() is used. */ 00308 00309 struct bcache * 00310 bcache_xmalloc (unsigned long (*hash_function)(const void *, int length), 00311 int (*compare_function)(const void *, 00312 const void *, 00313 int length)) 00314 { 00315 /* Allocate the bcache pre-zeroed. */ 00316 struct bcache *b = XCALLOC (1, struct bcache); 00317 00318 if (hash_function) 00319 b->hash_function = hash_function; 00320 else 00321 b->hash_function = hash; 00322 00323 if (compare_function) 00324 b->compare_function = compare_function; 00325 else 00326 b->compare_function = bcache_compare; 00327 return b; 00328 } 00329 00330 /* Free all the storage associated with BCACHE. */ 00331 void 00332 bcache_xfree (struct bcache *bcache) 00333 { 00334 if (bcache == NULL) 00335 return; 00336 /* Only free the obstack if we actually initialized it. */ 00337 if (bcache->total_count > 0) 00338 obstack_free (&bcache->cache, 0); 00339 xfree (bcache->bucket); 00340 xfree (bcache); 00341 } 00342 00343 00344 00345 /* Printing statistics. */ 00346 00347 static void 00348 print_percentage (int portion, int total) 00349 { 00350 if (total == 0) 00351 /* i18n: Like "Percentage of duplicates, by count: (not applicable)". */ 00352 printf_filtered (_("(not applicable)\n")); 00353 else 00354 printf_filtered ("%3d%%\n", (int) (portion * 100.0 / total)); 00355 } 00356 00357 00358 /* Print statistics on BCACHE's memory usage and efficacity at 00359 eliminating duplication. NAME should describe the kind of data 00360 BCACHE holds. Statistics are printed using `printf_filtered' and 00361 its ilk. */ 00362 void 00363 print_bcache_statistics (struct bcache *c, char *type) 00364 { 00365 int occupied_buckets; 00366 int max_chain_length; 00367 int median_chain_length; 00368 int max_entry_size; 00369 int median_entry_size; 00370 00371 /* Count the number of occupied buckets, tally the various string 00372 lengths, and measure chain lengths. */ 00373 { 00374 unsigned int b; 00375 int *chain_length = XCALLOC (c->num_buckets + 1, int); 00376 int *entry_size = XCALLOC (c->unique_count + 1, int); 00377 int stringi = 0; 00378 00379 occupied_buckets = 0; 00380 00381 for (b = 0; b < c->num_buckets; b++) 00382 { 00383 struct bstring *s = c->bucket[b]; 00384 00385 chain_length[b] = 0; 00386 00387 if (s) 00388 { 00389 occupied_buckets++; 00390 00391 while (s) 00392 { 00393 gdb_assert (b < c->num_buckets); 00394 chain_length[b]++; 00395 gdb_assert (stringi < c->unique_count); 00396 entry_size[stringi++] = s->length; 00397 s = s->next; 00398 } 00399 } 00400 } 00401 00402 /* To compute the median, we need the set of chain lengths 00403 sorted. */ 00404 qsort (chain_length, c->num_buckets, sizeof (chain_length[0]), 00405 compare_positive_ints); 00406 qsort (entry_size, c->unique_count, sizeof (entry_size[0]), 00407 compare_positive_ints); 00408 00409 if (c->num_buckets > 0) 00410 { 00411 max_chain_length = chain_length[c->num_buckets - 1]; 00412 median_chain_length = chain_length[c->num_buckets / 2]; 00413 } 00414 else 00415 { 00416 max_chain_length = 0; 00417 median_chain_length = 0; 00418 } 00419 if (c->unique_count > 0) 00420 { 00421 max_entry_size = entry_size[c->unique_count - 1]; 00422 median_entry_size = entry_size[c->unique_count / 2]; 00423 } 00424 else 00425 { 00426 max_entry_size = 0; 00427 median_entry_size = 0; 00428 } 00429 00430 xfree (chain_length); 00431 xfree (entry_size); 00432 } 00433 00434 printf_filtered (_(" Cached '%s' statistics:\n"), type); 00435 printf_filtered (_(" Total object count: %ld\n"), c->total_count); 00436 printf_filtered (_(" Unique object count: %lu\n"), c->unique_count); 00437 printf_filtered (_(" Percentage of duplicates, by count: ")); 00438 print_percentage (c->total_count - c->unique_count, c->total_count); 00439 printf_filtered ("\n"); 00440 00441 printf_filtered (_(" Total object size: %ld\n"), c->total_size); 00442 printf_filtered (_(" Unique object size: %ld\n"), c->unique_size); 00443 printf_filtered (_(" Percentage of duplicates, by size: ")); 00444 print_percentage (c->total_size - c->unique_size, c->total_size); 00445 printf_filtered ("\n"); 00446 00447 printf_filtered (_(" Max entry size: %d\n"), max_entry_size); 00448 printf_filtered (_(" Average entry size: ")); 00449 if (c->unique_count > 0) 00450 printf_filtered ("%ld\n", c->unique_size / c->unique_count); 00451 else 00452 /* i18n: "Average entry size: (not applicable)". */ 00453 printf_filtered (_("(not applicable)\n")); 00454 printf_filtered (_(" Median entry size: %d\n"), median_entry_size); 00455 printf_filtered ("\n"); 00456 00457 printf_filtered (_(" \ 00458 Total memory used by bcache, including overhead: %ld\n"), 00459 c->structure_size); 00460 printf_filtered (_(" Percentage memory overhead: ")); 00461 print_percentage (c->structure_size - c->unique_size, c->unique_size); 00462 printf_filtered (_(" Net memory savings: ")); 00463 print_percentage (c->total_size - c->structure_size, c->total_size); 00464 printf_filtered ("\n"); 00465 00466 printf_filtered (_(" Hash table size: %3d\n"), 00467 c->num_buckets); 00468 printf_filtered (_(" Hash table expands: %lu\n"), 00469 c->expand_count); 00470 printf_filtered (_(" Hash table hashes: %lu\n"), 00471 c->total_count + c->expand_hash_count); 00472 printf_filtered (_(" Half hash misses: %lu\n"), 00473 c->half_hash_miss_count); 00474 printf_filtered (_(" Hash table population: ")); 00475 print_percentage (occupied_buckets, c->num_buckets); 00476 printf_filtered (_(" Median hash chain length: %3d\n"), 00477 median_chain_length); 00478 printf_filtered (_(" Average hash chain length: ")); 00479 if (c->num_buckets > 0) 00480 printf_filtered ("%3lu\n", c->unique_count / c->num_buckets); 00481 else 00482 /* i18n: "Average hash chain length: (not applicable)". */ 00483 printf_filtered (_("(not applicable)\n")); 00484 printf_filtered (_(" Maximum hash chain length: %3d\n"), 00485 max_chain_length); 00486 printf_filtered ("\n"); 00487 } 00488 00489 int 00490 bcache_memory_used (struct bcache *bcache) 00491 { 00492 if (bcache->total_count == 0) 00493 return 0; 00494 return obstack_memory_used (&bcache->cache); 00495 }