1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : adhash.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #include "gb_data.h" |
---|
12 | #include "gb_tune.h" |
---|
13 | #include "gb_hashindex.h" |
---|
14 | |
---|
15 | #include <arb_strbuf.h> |
---|
16 | #include <arb_sort.h> |
---|
17 | |
---|
18 | #include <climits> |
---|
19 | #include <cfloat> |
---|
20 | #include <cctype> |
---|
21 | |
---|
22 | |
---|
23 | struct gbs_hash_entry { |
---|
24 | char *key; |
---|
25 | long val; |
---|
26 | gbs_hash_entry *next; |
---|
27 | }; |
---|
28 | struct GB_HASH { |
---|
29 | size_t size; // size of hashtable |
---|
30 | size_t nelem; // number of elements inserted |
---|
31 | GB_CASE case_sens; |
---|
32 | gbs_hash_entry **entries; // the hash table (has 'size' entries) |
---|
33 | |
---|
34 | void (*freefun)(long val); // function to free hash values (see GBS_create_dynaval_hash) |
---|
35 | |
---|
36 | }; |
---|
37 | |
---|
38 | struct numhash_entry { |
---|
39 | long key; |
---|
40 | long val; |
---|
41 | numhash_entry *next; |
---|
42 | }; |
---|
43 | |
---|
44 | struct GB_NUMHASH { |
---|
45 | long size; // size of hashtable |
---|
46 | size_t nelem; // number of elements inserted |
---|
47 | numhash_entry **entries; |
---|
48 | }; |
---|
49 | |
---|
50 | // prime numbers |
---|
51 | |
---|
52 | #define KNOWN_PRIMES 279 |
---|
53 | static size_t sorted_primes[KNOWN_PRIMES] = { |
---|
54 | 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 47, 53, 59, 67, 71, 79, 89, 97, 103, 109, 127, 137, 149, 157, 167, 179, 191, 211, |
---|
55 | 223, 239, 257, 271, 293, 311, 331, 349, 373, 397, 419, 443, 467, 499, 541, 571, 607, 641, 677, 719, 757, 797, 839, 887, 937, |
---|
56 | 991, 1049, 1109, 1171, 1237, 1303, 1373, 1447, 1531, 1613, 1699, 1789, 1889, 1993, 2099, 2213, 2333, 2459, 2591, 2729, 2879, |
---|
57 | 3037, 3203, 3373, 3557, 3761, 3967, 4177, 4397, 4637, 4889, 5147, 5419, 5711, 6029, 6353, 6689, 7043, 7417, 7817, 8231, 8669, |
---|
58 | 9127, 9613, 10133, 10667, 11239, 11831, 12457, 13121, 13829, 14557, 15329, 16139, 16993, 17891, 18839, 19841, 20887, 21991, 23159, |
---|
59 | 24379, 25667, 27031, 28463, 29983, 31567, 33247, 35023, 36871, 38821, 40867, 43019, 45289, 47681, 50207, 52859, 55661, 58601, |
---|
60 | 61687, 64937, 68371, 71971, 75767, 79757, 83969, 88397, 93053, 97961, 103123, 108553, 114269, 120293, 126631, 133303, 140321, |
---|
61 | 147709, 155501, 163697, 172313, 181387, 190979, 201031, 211619, 222773, 234499, 246889, 259907, 273601, 288007, 303187, 319147, |
---|
62 | 335953, 353641, 372263, 391861, 412487, 434201, 457057, 481123, 506449, 533111, 561173, 590713, 621821, 654553, 689021, 725293, |
---|
63 | 763471, 803659, 845969, 890501, 937373, 986717, 1038671, 1093357, 1150909, 1211489, 1275269, 1342403, 1413077, 1487459, 1565747, |
---|
64 | 1648181, 1734937, 1826257, 1922383, 2023577, 2130101, 2242213, 2360243, 2484473, 2615243, 2752889, 2897789, 3050321, 3210871, |
---|
65 | 3379877, 3557773, 3745051, 3942209, 4149703, 4368113, 4598063, 4840103, 5094853, 5363011, 5645279, 5942399, 6255157, 6584377, |
---|
66 | 6930929, 7295719, 7679713, 8083919, 8509433, 8957309, 9428759, 9925021, 10447391, 10997279, 11576087, 12185359, 12826699, 13501819, |
---|
67 | 14212447, 14960471, 15747869, 16576727, 17449207, 18367597, 19334317, 20351927, 21423107, 22550639, 23737523, 24986867, 26301967, |
---|
68 | 27686291, 29143493, 30677363, 32291971, 33991597, 35780639, 37663841, 39646153, 41732809, 43929307, 46241389, 48675167, 51237019, |
---|
69 | 53933713, 56772371, 59760391, 62905681, 66216511, 69701591, 73370107, 77231711, 81296543, 85575313, 90079313, 94820347, 99810899 |
---|
70 | }; |
---|
71 | |
---|
72 | // define CALC_PRIMES only to expand the above table |
---|
73 | #if defined(DEBUG) |
---|
74 | // #define CALC_PRIMES |
---|
75 | #endif // DEBUG |
---|
76 | |
---|
77 | #ifdef CALC_PRIMES |
---|
78 | |
---|
79 | #define CALC_PRIMES_UP_TO 100000000U |
---|
80 | #define PRIME_UNDENSITY 20U // the higher, the less primes are stored |
---|
81 | |
---|
82 | #warning "please don't define CALC_PRIMES permanently" |
---|
83 | |
---|
84 | static unsigned char bit_val[8] = { 1, 2, 4, 8, 16, 32, 64, 128 }; |
---|
85 | |
---|
86 | static int bit_value(const unsigned char *eratosthenes, long num) { |
---|
87 | // 'num' is odd and lowest 'num' is 3 |
---|
88 | long bit_num = ((num-1) >> 1)-1; // 3->0 5->1 7->2 etc. |
---|
89 | long byte_num = bit_num >> 3; // div 8 |
---|
90 | char byte = eratosthenes[byte_num]; |
---|
91 | |
---|
92 | gb_assert(bit_num >= 0); |
---|
93 | gb_assert((num&1) == 1); // has to odd |
---|
94 | |
---|
95 | bit_num = bit_num & 7; |
---|
96 | |
---|
97 | return (byte & bit_val[bit_num]) ? 1 : 0; |
---|
98 | } |
---|
99 | static void set_bit_value(unsigned char *eratosthenes, long num, int val) { |
---|
100 | // 'num' is odd and lowest 'num' is 3; val is 0 or 1 |
---|
101 | long bit_num = ((num-1) >> 1)-1; // 3->0 5->1 7->2 etc. |
---|
102 | long byte_num = bit_num >> 3; // div 8 |
---|
103 | char byte = eratosthenes[byte_num]; |
---|
104 | |
---|
105 | gb_assert(bit_num >= 0); |
---|
106 | gb_assert((num&1) == 1); // has to odd |
---|
107 | |
---|
108 | bit_num = bit_num & 7; |
---|
109 | |
---|
110 | if (val) { |
---|
111 | byte |= bit_val[bit_num]; |
---|
112 | } |
---|
113 | else { |
---|
114 | byte &= (0xff - bit_val[bit_num]); |
---|
115 | } |
---|
116 | eratosthenes[byte_num] = byte; |
---|
117 | } |
---|
118 | |
---|
119 | static void calculate_primes_upto() { |
---|
120 | { |
---|
121 | size_t bits_needed = CALC_PRIMES_UP_TO/2+1; // only need bits for odd numbers |
---|
122 | size_t bytes_needed = (bits_needed/8)+1; |
---|
123 | unsigned char *eratosthenes = (unsigned char *)GB_calloc(bytes_needed, 1); // bit = 1 means "is not a prime" |
---|
124 | size_t prime_count = 0; |
---|
125 | size_t num; |
---|
126 | |
---|
127 | printf("eratosthenes' size = %zu\n", bytes_needed); |
---|
128 | GBK_dump_backtrace(stderr, "calculate_primes_upto"); |
---|
129 | |
---|
130 | if (!eratosthenes) { |
---|
131 | GB_internal_error("out of memory"); |
---|
132 | return; |
---|
133 | } |
---|
134 | |
---|
135 | for (num = 3; num <= CALC_PRIMES_UP_TO; num += 2) { |
---|
136 | if (bit_value(eratosthenes, num) == 0) { // is a prime number |
---|
137 | size_t num2; |
---|
138 | prime_count++; |
---|
139 | for (num2 = num*2; num2 <= CALC_PRIMES_UP_TO; num2 += num) { // with all multiples |
---|
140 | if ((num2&1) == 1) { // skip even numbers |
---|
141 | set_bit_value(eratosthenes, num2, 1); |
---|
142 | } |
---|
143 | } |
---|
144 | } |
---|
145 | // otherwise it is no prime and all multiples are already set to 1 |
---|
146 | } |
---|
147 | |
---|
148 | // thin out prime numbers (we don't need all of them) |
---|
149 | { |
---|
150 | size_t prime_count2 = 0; |
---|
151 | size_t last_prime = 1; |
---|
152 | size_t printed = 0; |
---|
153 | |
---|
154 | for (num = 3; num <= CALC_PRIMES_UP_TO; num += 2) { |
---|
155 | if (bit_value(eratosthenes, num) == 0) { // is a prime number |
---|
156 | size_t diff = num-last_prime; |
---|
157 | if ((diff*PRIME_UNDENSITY)<num) { |
---|
158 | set_bit_value(eratosthenes, num, 1); // delete unneeded prime |
---|
159 | } |
---|
160 | else { |
---|
161 | prime_count2++; // count needed primes |
---|
162 | last_prime = num; |
---|
163 | } |
---|
164 | } |
---|
165 | } |
---|
166 | |
---|
167 | printf("\nUsing %zu prime numbers up to %zu:\n\n", prime_count2, CALC_PRIMES_UP_TO); |
---|
168 | printf("#define KNOWN_PRIMES %zu\n", prime_count2); |
---|
169 | printf("static size_t sorted_primes[KNOWN_PRIMES] = {\n "); |
---|
170 | printed = 4; |
---|
171 | |
---|
172 | for (num = 3; num <= CALC_PRIMES_UP_TO; num += 2) { |
---|
173 | if (bit_value(eratosthenes, num) == 0) { // is a prime number |
---|
174 | if (printed>128) { |
---|
175 | printf("\n "); |
---|
176 | printed = 4; |
---|
177 | } |
---|
178 | |
---|
179 | if (num>INT_MAX) { |
---|
180 | printed += printf("%zuU, ", num); |
---|
181 | } |
---|
182 | else { |
---|
183 | printed += printf("%zu, ", num); |
---|
184 | } |
---|
185 | } |
---|
186 | } |
---|
187 | printf("\n};\n\n"); |
---|
188 | } |
---|
189 | |
---|
190 | free(eratosthenes); |
---|
191 | } |
---|
192 | fflush(stdout); |
---|
193 | exit(1); |
---|
194 | } |
---|
195 | |
---|
196 | #endif // CALC_PRIMES |
---|
197 | |
---|
198 | size_t gbs_get_a_prime(size_t above_or_equal_this) { |
---|
199 | // return a prime number above_or_equal_this |
---|
200 | // NOTE: it is not necessarily the next prime number, because we don't calculate all prime numbers! |
---|
201 | |
---|
202 | #if defined(CALC_PRIMES) |
---|
203 | calculate_primes_upto(); |
---|
204 | #endif // CALC_PRIMES |
---|
205 | |
---|
206 | if (sorted_primes[KNOWN_PRIMES-1] >= above_or_equal_this) { |
---|
207 | int l = 0, h = KNOWN_PRIMES-1; |
---|
208 | |
---|
209 | while (l < h) { |
---|
210 | int m = (l+h)/2; |
---|
211 | #if defined(DEBUG) && 0 |
---|
212 | printf("l=%-3i m=%-3i h=%-3i above_or_equal_this=%li sorted_primes[%i]=%li sorted_primes[%i]=%li sorted_primes[%i]=%li\n", |
---|
213 | l, m, h, above_or_equal_this, l, sorted_primes[l], m, sorted_primes[m], h, sorted_primes[h]); |
---|
214 | #endif // DEBUG |
---|
215 | gb_assert(l <= m); |
---|
216 | gb_assert(m <= h); |
---|
217 | if (sorted_primes[m] > above_or_equal_this) { |
---|
218 | h = m-1; |
---|
219 | } |
---|
220 | else { |
---|
221 | if (sorted_primes[m] < above_or_equal_this) { |
---|
222 | l = m+1; |
---|
223 | } |
---|
224 | else { |
---|
225 | h = l = m; |
---|
226 | } |
---|
227 | } |
---|
228 | } |
---|
229 | |
---|
230 | if (sorted_primes[l] < above_or_equal_this) { |
---|
231 | l++; // take next |
---|
232 | gb_assert(l<KNOWN_PRIMES); |
---|
233 | } |
---|
234 | |
---|
235 | gb_assert(sorted_primes[l] >= above_or_equal_this); |
---|
236 | gb_assert(l == 0 || sorted_primes[l-1] < above_or_equal_this); |
---|
237 | |
---|
238 | return sorted_primes[l]; |
---|
239 | } |
---|
240 | |
---|
241 | fprintf(stderr, "Warning: gbs_get_a_prime failed for value %zu (performance bleed)\n", above_or_equal_this); |
---|
242 | gb_assert(0); // add more primes to sorted_primes[] |
---|
243 | |
---|
244 | return above_or_equal_this; // NEED_NO_COV |
---|
245 | } |
---|
246 | |
---|
247 | // ----------------------------------------------- |
---|
248 | // Some Hash Procedures for [string,long] |
---|
249 | |
---|
250 | inline size_t hash_size(size_t estimated_elements) { |
---|
251 | size_t min_hash_size = 2*estimated_elements; // -> fill rate ~ 50% -> collisions unlikely |
---|
252 | size_t next_prime = gbs_get_a_prime(min_hash_size); // use next prime number |
---|
253 | |
---|
254 | return next_prime; |
---|
255 | } |
---|
256 | |
---|
257 | |
---|
258 | GB_HASH *GBS_create_hash(long estimated_elements, GB_CASE case_sens) { |
---|
259 | /*! Create a hash |
---|
260 | * @param estimated_elements estimated number of elements added to hash (if you add more elements, hash will still work, but get slow) |
---|
261 | * @param case_sens GB_IGNORE_CASE or GB_MIND_CASE |
---|
262 | * Uses linked lists to avoid collisions. |
---|
263 | */ |
---|
264 | GB_HASH *hs; |
---|
265 | long size = hash_size(estimated_elements); |
---|
266 | |
---|
267 | hs = (GB_HASH*)GB_calloc(sizeof(*hs), 1); |
---|
268 | hs->size = size; |
---|
269 | hs->nelem = 0; |
---|
270 | hs->case_sens = case_sens; |
---|
271 | hs->entries = (gbs_hash_entry **)GB_calloc(sizeof(gbs_hash_entry *), size); |
---|
272 | hs->freefun = NULL; |
---|
273 | |
---|
274 | return hs; |
---|
275 | } |
---|
276 | |
---|
277 | GB_HASH *GBS_create_dynaval_hash(long estimated_elements, GB_CASE case_sens, void (*freefun)(long)) { |
---|
278 | //! like GBS_create_hash, but values stored in hash get freed using 'freefun' when hash gets destroyed |
---|
279 | GB_HASH *hs = GBS_create_hash(estimated_elements, case_sens); |
---|
280 | hs->freefun = freefun; |
---|
281 | return hs; |
---|
282 | } |
---|
283 | |
---|
284 | void GBS_dynaval_free(long val) { |
---|
285 | free((void*)val); |
---|
286 | } |
---|
287 | |
---|
288 | #if defined(DEBUG) |
---|
289 | inline void dump_access(const char *title, const GB_HASH *hs, double mean_access) { |
---|
290 | fprintf(stderr, |
---|
291 | "%s: size=%zu elements=%zu mean_access=%.2f hash-speed=%.1f%%\n", |
---|
292 | title, hs->size, hs->nelem, mean_access, 100.0/mean_access); |
---|
293 | } |
---|
294 | |
---|
295 | static double hash_mean_access_costs(const GB_HASH *hs) { |
---|
296 | /* returns the mean access costs of the hash [1.0 .. inf[ |
---|
297 | * 1.0 is optimal |
---|
298 | * 2.0 means: hash speed is 50% (1/2.0) |
---|
299 | */ |
---|
300 | double mean_access = 1.0; |
---|
301 | |
---|
302 | if (hs->nelem) { |
---|
303 | int strcmps_needed = 0; |
---|
304 | size_t pos; |
---|
305 | |
---|
306 | for (pos = 0; pos<hs->size; pos++) { |
---|
307 | int strcmps = 1; |
---|
308 | gbs_hash_entry *e; |
---|
309 | |
---|
310 | for (e = hs->entries[pos]; e; e = e->next) { |
---|
311 | strcmps_needed += strcmps++; |
---|
312 | } |
---|
313 | } |
---|
314 | |
---|
315 | mean_access = (double)strcmps_needed/hs->nelem; |
---|
316 | } |
---|
317 | return mean_access; |
---|
318 | } |
---|
319 | #endif // DEBUG |
---|
320 | |
---|
321 | |
---|
322 | void GBS_optimize_hash(const GB_HASH *hs) { |
---|
323 | if (hs->nelem > hs->size) { // hash is overfilled (Note: even 50% fillrate is slow) |
---|
324 | size_t new_size = gbs_get_a_prime(hs->nelem*3); |
---|
325 | |
---|
326 | #if defined(DEBUG) |
---|
327 | dump_access("Optimizing filled hash", hs, hash_mean_access_costs(hs)); |
---|
328 | #endif // DEBUG |
---|
329 | |
---|
330 | if (new_size>hs->size) { // avoid overflow |
---|
331 | gbs_hash_entry **new_entries = (gbs_hash_entry**)GB_calloc(sizeof(*new_entries), new_size); |
---|
332 | size_t pos; |
---|
333 | |
---|
334 | for (pos = 0; pos<hs->size; ++pos) { |
---|
335 | gbs_hash_entry *e; |
---|
336 | gbs_hash_entry *next; |
---|
337 | |
---|
338 | for (e = hs->entries[pos]; e; e = next) { |
---|
339 | long new_idx; |
---|
340 | next = e->next; |
---|
341 | |
---|
342 | GB_CALC_HASH_INDEX(e->key, new_idx, new_size, hs->case_sens); |
---|
343 | |
---|
344 | e->next = new_entries[new_idx]; |
---|
345 | new_entries[new_idx] = e; |
---|
346 | } |
---|
347 | } |
---|
348 | |
---|
349 | free(hs->entries); |
---|
350 | |
---|
351 | { |
---|
352 | GB_HASH *hs_mutable = const_cast<GB_HASH*>(hs); |
---|
353 | hs_mutable->size = new_size; |
---|
354 | hs_mutable->entries = new_entries; |
---|
355 | } |
---|
356 | } |
---|
357 | #if defined(DEBUG) |
---|
358 | dump_access("Optimized hash ", hs, hash_mean_access_costs(hs)); |
---|
359 | #endif // DEBUG |
---|
360 | |
---|
361 | } |
---|
362 | } |
---|
363 | |
---|
364 | static void gbs_hash_to_strstruct(const char *key, long val, void *cd_out) { |
---|
365 | const char *p; |
---|
366 | int c; |
---|
367 | GBS_strstruct *out = (GBS_strstruct*)cd_out; |
---|
368 | |
---|
369 | for (p = key; (c=*p); p++) { |
---|
370 | GBS_chrcat(out, c); |
---|
371 | if (c==':') GBS_chrcat(out, c); |
---|
372 | } |
---|
373 | GBS_chrcat(out, ':'); |
---|
374 | GBS_intcat(out, val); |
---|
375 | GBS_chrcat(out, ' '); |
---|
376 | } |
---|
377 | |
---|
378 | char *GBS_hashtab_2_string(const GB_HASH *hash) { |
---|
379 | GBS_strstruct *out = GBS_stropen(1024); |
---|
380 | GBS_hash_do_const_loop(hash, gbs_hash_to_strstruct, out); |
---|
381 | return GBS_strclose(out); |
---|
382 | } |
---|
383 | |
---|
384 | |
---|
385 | static gbs_hash_entry *find_hash_entry(const GB_HASH *hs, const char *key, size_t *index) { |
---|
386 | gbs_hash_entry *e; |
---|
387 | if (hs->case_sens == GB_IGNORE_CASE) { |
---|
388 | GB_CALC_HASH_INDEX_CASE_IGNORED(key, *index, hs->size); |
---|
389 | for (e=hs->entries[*index]; e; e=e->next) { |
---|
390 | if (!strcasecmp(e->key, key)) return e; |
---|
391 | } |
---|
392 | } |
---|
393 | else { |
---|
394 | GB_CALC_HASH_INDEX_CASE_SENSITIVE(key, *index, hs->size); |
---|
395 | for (e=hs->entries[*index]; e; e=e->next) { |
---|
396 | if (!strcmp(e->key, key)) return e; |
---|
397 | } |
---|
398 | } |
---|
399 | return 0; |
---|
400 | } |
---|
401 | |
---|
402 | long GBS_read_hash(const GB_HASH *hs, const char *key) { |
---|
403 | size_t i; |
---|
404 | gbs_hash_entry *e = find_hash_entry(hs, key, &i); |
---|
405 | |
---|
406 | return e ? e->val : 0; |
---|
407 | } |
---|
408 | |
---|
409 | static void delete_from_list(GB_HASH *hs, size_t i, gbs_hash_entry *e) { |
---|
410 | // delete the hash entry 'e' from list at index 'i' |
---|
411 | hs->nelem--; |
---|
412 | if (hs->entries[i] == e) { |
---|
413 | hs->entries[i] = e->next; |
---|
414 | } |
---|
415 | else { |
---|
416 | gbs_hash_entry *ee; |
---|
417 | for (ee = hs->entries[i]; ee->next != e; ee = ee->next) ; |
---|
418 | if (ee->next == e) { |
---|
419 | ee->next = e->next; |
---|
420 | } |
---|
421 | else { |
---|
422 | GB_internal_error("Database may be corrupt, hash tables error"); // NEED_NO_COV |
---|
423 | } |
---|
424 | } |
---|
425 | free(e->key); |
---|
426 | if (hs->freefun) hs->freefun(e->val); |
---|
427 | gbm_free_mem(e, sizeof(gbs_hash_entry), GBM_HASH_INDEX); |
---|
428 | } |
---|
429 | |
---|
430 | static long write_hash(GB_HASH *hs, char *key, bool copyKey, long val) { |
---|
431 | /* returns the old value (or 0 if key had no entry) |
---|
432 | * if 'copyKey' == false, 'key' will be freed (now or later) and may be invalid! |
---|
433 | * if 'copyKey' == true, 'key' will not be touched in any way! |
---|
434 | */ |
---|
435 | |
---|
436 | size_t i; |
---|
437 | gbs_hash_entry *e = find_hash_entry(hs, key, &i); |
---|
438 | long oldval = 0; |
---|
439 | |
---|
440 | if (e) { |
---|
441 | oldval = e->val; |
---|
442 | |
---|
443 | if (!val) delete_from_list(hs, i, e); // (val == 0 is not stored, cause 0 is the default value) |
---|
444 | else e->val = val; |
---|
445 | |
---|
446 | if (!copyKey) free(key); // already had an entry -> delete unused mem |
---|
447 | } |
---|
448 | else if (val != 0) { // don't store 0 |
---|
449 | // create new hash entry |
---|
450 | e = (gbs_hash_entry *)gbm_get_mem(sizeof(gbs_hash_entry), GBM_HASH_INDEX); |
---|
451 | e->next = hs->entries[i]; |
---|
452 | e->key = copyKey ? strdup(key) : key; |
---|
453 | e->val = val; |
---|
454 | |
---|
455 | hs->entries[i] = e; |
---|
456 | hs->nelem++; |
---|
457 | } |
---|
458 | else { |
---|
459 | if (!copyKey) free(key); // don't need an entry -> delete unused mem |
---|
460 | } |
---|
461 | return oldval; |
---|
462 | } |
---|
463 | |
---|
464 | long GBS_write_hash(GB_HASH *hs, const char *key, long val) { |
---|
465 | // returns the old value (or 0 if key had no entry) |
---|
466 | return write_hash(hs, (char*)key, true, val); |
---|
467 | } |
---|
468 | |
---|
469 | long GBS_write_hash_no_strdup(GB_HASH *hs, char *key, long val) { |
---|
470 | /* same as GBS_write_hash, but does no strdup. 'key' is freed later in GBS_free_hash, |
---|
471 | * so the user has to 'malloc' the string and give control to the hash. |
---|
472 | * Note: after calling this function 'key' may be invalid! |
---|
473 | */ |
---|
474 | return write_hash(hs, key, false, val); |
---|
475 | } |
---|
476 | |
---|
477 | long GBS_incr_hash(GB_HASH *hs, const char *key) { |
---|
478 | // returns new value |
---|
479 | size_t i; |
---|
480 | gbs_hash_entry *e = find_hash_entry(hs, key, &i); |
---|
481 | long result; |
---|
482 | |
---|
483 | if (e) { |
---|
484 | result = ++e->val; |
---|
485 | if (!result) delete_from_list(hs, i, e); |
---|
486 | } |
---|
487 | else { |
---|
488 | e = (gbs_hash_entry *)gbm_get_mem(sizeof(gbs_hash_entry), GBM_HASH_INDEX); |
---|
489 | e->next = hs->entries[i]; |
---|
490 | e->key = strdup(key); |
---|
491 | e->val = result = 1; |
---|
492 | |
---|
493 | hs->entries[i] = e; |
---|
494 | hs->nelem++; |
---|
495 | } |
---|
496 | return result; |
---|
497 | } |
---|
498 | |
---|
499 | #if defined(DEVEL_RALF) |
---|
500 | // #define DUMP_HASH_ENTRIES |
---|
501 | #endif // DEVEL_RALF |
---|
502 | |
---|
503 | static void GBS_erase_hash(GB_HASH *hs) { |
---|
504 | size_t hsize = hs->size; |
---|
505 | |
---|
506 | #if defined(DUMP_HASH_ENTRIES) |
---|
507 | for (size_t i = 0; i < hsize; i++) { |
---|
508 | printf("hash[%zu] =", i); |
---|
509 | for (gbs_hash_entry *e = hs->entries[i]; e; e = e->next) { |
---|
510 | printf(" '%s'", e->key); |
---|
511 | } |
---|
512 | printf("\n"); |
---|
513 | } |
---|
514 | #endif // DUMP_HASH_ENTRIES |
---|
515 | |
---|
516 | // check hash size |
---|
517 | if (hsize >= 10) { // ignore small hashes |
---|
518 | #if defined(DEBUG) |
---|
519 | double mean_access = hash_mean_access_costs(hs); |
---|
520 | if (mean_access > 1.5) { // every 2nd access is a collision - increase hash size? |
---|
521 | dump_access("hash-size-warning", hs, mean_access); |
---|
522 | #if defined(DEVEL_RALF) && !defined(UNIT_TESTS) |
---|
523 | gb_assert(mean_access<2.0); // hash with 50% speed or less |
---|
524 | #endif // DEVEL_RALF |
---|
525 | } |
---|
526 | #else |
---|
527 | if (hs->nelem >= (2*hsize)) { |
---|
528 | GB_warningf("Performance leak - very slow hash detected (elems=%zu, size=%zu)\n", hs->nelem, hs->size); |
---|
529 | GBK_dump_backtrace(stderr, "detected performance leak"); |
---|
530 | } |
---|
531 | #endif // DEBUG |
---|
532 | } |
---|
533 | |
---|
534 | for (size_t i = 0; i < hsize; i++) { |
---|
535 | for (gbs_hash_entry *e = hs->entries[i]; e; ) { |
---|
536 | free(e->key); |
---|
537 | if (hs->freefun) hs->freefun(e->val); |
---|
538 | |
---|
539 | gbs_hash_entry *next = e->next; |
---|
540 | gbm_free_mem(e, sizeof(gbs_hash_entry), GBM_HASH_INDEX); |
---|
541 | e = next; |
---|
542 | } |
---|
543 | hs->entries[i] = 0; |
---|
544 | } |
---|
545 | hs->nelem = 0; |
---|
546 | } |
---|
547 | |
---|
548 | void GBS_free_hash(GB_HASH *hs) { |
---|
549 | gb_assert(hs); |
---|
550 | GBS_erase_hash(hs); |
---|
551 | free(hs->entries); |
---|
552 | free(hs); |
---|
553 | } |
---|
554 | |
---|
555 | void GBS_hash_do_loop(GB_HASH *hs, gb_hash_loop_type func, void *client_data) { |
---|
556 | size_t hsize = hs->size; |
---|
557 | for (size_t i=0; i<hsize; i++) { |
---|
558 | for (gbs_hash_entry *e = hs->entries[i]; e; ) { |
---|
559 | gbs_hash_entry *next = e->next; |
---|
560 | if (e->val) { |
---|
561 | e->val = func(e->key, e->val, client_data); |
---|
562 | if (!e->val) delete_from_list(hs, i, e); |
---|
563 | } |
---|
564 | e = next; |
---|
565 | } |
---|
566 | } |
---|
567 | } |
---|
568 | |
---|
569 | void GBS_hash_do_const_loop(const GB_HASH *hs, gb_hash_const_loop_type func, void *client_data) { |
---|
570 | size_t hsize = hs->size; |
---|
571 | for (size_t i=0; i<hsize; i++) { |
---|
572 | for (gbs_hash_entry *e = hs->entries[i]; e; ) { |
---|
573 | gbs_hash_entry *next = e->next; |
---|
574 | if (e->val) func(e->key, e->val, client_data); |
---|
575 | e = next; |
---|
576 | } |
---|
577 | } |
---|
578 | } |
---|
579 | |
---|
580 | #if defined(WARN_TODO) |
---|
581 | #warning rename GBS_hash_count_elems -> GBS_hash_elements |
---|
582 | #endif |
---|
583 | |
---|
584 | size_t GBS_hash_count_elems(const GB_HASH *hs) { |
---|
585 | #if defined(DEBUG) |
---|
586 | // @@@ old code, just left here to ensure hs->nelem is correct --ralf Mar/2010 |
---|
587 | size_t count = 0; |
---|
588 | size_t hsize = hs->size; |
---|
589 | for (size_t i = 0; i<hsize; ++i) { |
---|
590 | gbs_hash_entry *e; |
---|
591 | for (e=hs->entries[i]; e; e=e->next) { |
---|
592 | if (e->val) ++count; |
---|
593 | } |
---|
594 | } |
---|
595 | |
---|
596 | gb_assert(count == hs->nelem); |
---|
597 | #else |
---|
598 | size_t count = hs->nelem; |
---|
599 | #endif // DEBUG |
---|
600 | |
---|
601 | return count; |
---|
602 | } |
---|
603 | |
---|
604 | const char *GBS_hash_next_element_that(const GB_HASH *hs, const char *last_key, bool (*condition)(const char *key, long val, void *cd), void *cd) { |
---|
605 | /* Returns the key of the next element after 'last_key' matching 'condition' (i.e. where condition returns true). |
---|
606 | * If 'last_key' is NULL, the first matching element is returned. |
---|
607 | * Returns NULL if no (more) elements match the 'condition'. |
---|
608 | */ |
---|
609 | |
---|
610 | size_t size = hs->size; |
---|
611 | size_t i = 0; |
---|
612 | gbs_hash_entry *e = 0; |
---|
613 | |
---|
614 | if (last_key) { |
---|
615 | e = find_hash_entry(hs, last_key, &i); |
---|
616 | if (!e) return NULL; |
---|
617 | |
---|
618 | e = e->next; // use next entry after 'last_key' |
---|
619 | if (!e) i++; |
---|
620 | } |
---|
621 | |
---|
622 | for (; i<size && !e; ++i) e = hs->entries[i]; // search first/next entry |
---|
623 | |
---|
624 | while (e) { |
---|
625 | if ((*condition)(e->key, e->val, cd)) break; |
---|
626 | e = e->next; |
---|
627 | if (!e) { |
---|
628 | for (i++; i<size && !e; ++i) e = hs->entries[i]; |
---|
629 | } |
---|
630 | } |
---|
631 | |
---|
632 | return e ? e->key : NULL; |
---|
633 | } |
---|
634 | |
---|
635 | static int wrap_hashCompare4gb_sort(const void *v0, const void *v1, void *sorter) { |
---|
636 | const gbs_hash_entry *e0 = (const gbs_hash_entry*)v0; |
---|
637 | const gbs_hash_entry *e1 = (const gbs_hash_entry*)v1; |
---|
638 | |
---|
639 | return ((gbs_hash_compare_function)sorter)(e0->key, e0->val, e1->key, e1->val); |
---|
640 | } |
---|
641 | |
---|
642 | void GBS_hash_do_sorted_loop(GB_HASH *hs, gb_hash_loop_type func, gbs_hash_compare_function sorter, void *client_data) { |
---|
643 | size_t hsize = hs->size; |
---|
644 | gbs_hash_entry **mtab = (gbs_hash_entry **)GB_calloc(sizeof(void *), hs->nelem); |
---|
645 | |
---|
646 | size_t j = 0; |
---|
647 | for (size_t i = 0; i < hsize; i++) { |
---|
648 | for (gbs_hash_entry *e = hs->entries[i]; e; e = e->next) { |
---|
649 | if (e->val) { |
---|
650 | mtab[j++] = e; |
---|
651 | } |
---|
652 | } |
---|
653 | } |
---|
654 | |
---|
655 | GB_sort((void**)mtab, 0, j, wrap_hashCompare4gb_sort, (void*)sorter); |
---|
656 | |
---|
657 | for (size_t i = 0; i < j; i++) { |
---|
658 | long new_val = func(mtab[i]->key, mtab[i]->val, client_data); |
---|
659 | if (new_val != mtab[i]->val) GBS_write_hash(hs, mtab[i]->key, new_val); |
---|
660 | } |
---|
661 | |
---|
662 | free(mtab); |
---|
663 | } |
---|
664 | |
---|
665 | int GBS_HCF_sortedByKey(const char *k0, long /*v0*/, const char *k1, long /*v1*/) { |
---|
666 | return strcmp(k0, k1); |
---|
667 | } |
---|
668 | |
---|
669 | // --------------------------------------------- |
---|
670 | // Some Hash Procedures for [long,long] |
---|
671 | |
---|
672 | inline long gbs_numhash_index(long key, long size) { |
---|
673 | long x; |
---|
674 | x = (key * (long long)97)%size; // make one multiplier a (long long) to avoid |
---|
675 | if (x<0) x += size; // int overflow and abort if compiled with -ftrapv |
---|
676 | return x; |
---|
677 | } |
---|
678 | |
---|
679 | |
---|
680 | GB_NUMHASH *GBS_create_numhash(size_t estimated_elements) { |
---|
681 | size_t size = hash_size(estimated_elements); |
---|
682 | GB_NUMHASH *hs = (GB_NUMHASH *)GB_calloc(sizeof(*hs), 1); |
---|
683 | |
---|
684 | hs->size = size; |
---|
685 | hs->nelem = 0; |
---|
686 | hs->entries = (numhash_entry **)GB_calloc(sizeof(*(hs->entries)), (size_t)size); |
---|
687 | |
---|
688 | return hs; |
---|
689 | } |
---|
690 | |
---|
691 | long GBS_read_numhash(GB_NUMHASH *hs, long key) { |
---|
692 | size_t i = gbs_numhash_index(key, hs->size); |
---|
693 | for (numhash_entry *e = hs->entries[i]; e; e = e->next) { |
---|
694 | if (e->key==key) return e->val; |
---|
695 | } |
---|
696 | return 0; |
---|
697 | } |
---|
698 | |
---|
699 | long GBS_write_numhash(GB_NUMHASH *hs, long key, long val) { |
---|
700 | size_t i = gbs_numhash_index(key, hs->size); |
---|
701 | long oldval = 0; |
---|
702 | |
---|
703 | if (val == 0) { // erase |
---|
704 | numhash_entry **nextPtr = &(hs->entries[i]); |
---|
705 | |
---|
706 | for (numhash_entry *e = hs->entries[i]; e; e = e->next) { |
---|
707 | if (e->key == key) { |
---|
708 | *nextPtr = e->next; // unlink entry |
---|
709 | gbm_free_mem(e, sizeof(*e), GBM_HASH_INDEX); |
---|
710 | hs->nelem--; |
---|
711 | return 0; |
---|
712 | } |
---|
713 | nextPtr = &(e->next); |
---|
714 | } |
---|
715 | } |
---|
716 | else { |
---|
717 | for (numhash_entry *e=hs->entries[i]; e; e=e->next) { |
---|
718 | if (e->key==key) { |
---|
719 | oldval = e->val; gb_assert(oldval); |
---|
720 | e->val = val; |
---|
721 | break; |
---|
722 | } |
---|
723 | } |
---|
724 | |
---|
725 | if (!oldval) { |
---|
726 | numhash_entry *e = (numhash_entry *)gbm_get_mem(sizeof(*e), GBM_HASH_INDEX); |
---|
727 | |
---|
728 | e->next = hs->entries[i]; |
---|
729 | e->key = key; |
---|
730 | e->val = val; |
---|
731 | |
---|
732 | hs->nelem++; |
---|
733 | hs->entries[i] = e; |
---|
734 | } |
---|
735 | } |
---|
736 | return oldval; |
---|
737 | } |
---|
738 | |
---|
739 | static void GBS_erase_numhash(GB_NUMHASH *hs) { |
---|
740 | size_t hsize = hs->size; |
---|
741 | |
---|
742 | for (size_t i=0; i<hsize; i++) { |
---|
743 | for (numhash_entry *e = hs->entries[i]; e; ) { |
---|
744 | numhash_entry *next = e->next; |
---|
745 | |
---|
746 | gbm_free_mem(e, sizeof(*e), GBM_HASH_INDEX); |
---|
747 | e = next; |
---|
748 | } |
---|
749 | } |
---|
750 | |
---|
751 | hs->nelem = 0; |
---|
752 | } |
---|
753 | |
---|
754 | void GBS_free_numhash(GB_NUMHASH *hs) { |
---|
755 | GBS_erase_numhash(hs); |
---|
756 | free(hs->entries); |
---|
757 | free(hs); |
---|
758 | } |
---|
759 | |
---|
760 | // -------------------------------------------------------------------------------- |
---|
761 | |
---|
762 | #ifdef UNIT_TESTS |
---|
763 | |
---|
764 | #include <test_unit.h> |
---|
765 | |
---|
766 | // determine hash quality |
---|
767 | |
---|
768 | struct gbs_hash_statistic_summary { |
---|
769 | long count; // how many stats |
---|
770 | long min_size, max_size, sum_size; |
---|
771 | long min_nelem, max_nelem, sum_nelem; |
---|
772 | long min_collisions, max_collisions, sum_collisions; |
---|
773 | double min_fill_ratio, max_fill_ratio, sum_fill_ratio; |
---|
774 | double min_hash_quality, max_hash_quality, sum_hash_quality; |
---|
775 | |
---|
776 | void init() { |
---|
777 | count = 0; |
---|
778 | min_size = min_nelem = min_collisions = LONG_MAX; |
---|
779 | max_size = max_nelem = max_collisions = LONG_MIN; |
---|
780 | min_fill_ratio = min_hash_quality = DBL_MAX; |
---|
781 | max_fill_ratio = max_hash_quality = DBL_MIN; |
---|
782 | |
---|
783 | sum_size = sum_nelem = sum_collisions = 0; |
---|
784 | sum_fill_ratio = sum_hash_quality = 0.0; |
---|
785 | } |
---|
786 | }; |
---|
787 | |
---|
788 | class hash_statistic_manager : virtual Noncopyable { |
---|
789 | GB_HASH *stat_hash; |
---|
790 | public: |
---|
791 | hash_statistic_manager() : stat_hash(NULL) { } |
---|
792 | ~hash_statistic_manager() { |
---|
793 | if (stat_hash) GBS_free_hash(stat_hash); |
---|
794 | } |
---|
795 | |
---|
796 | gbs_hash_statistic_summary *get_stat_summary(const char *id) { |
---|
797 | if (!stat_hash) stat_hash = GBS_create_dynaval_hash(10, GB_MIND_CASE, GBS_dynaval_free); |
---|
798 | |
---|
799 | long found = GBS_read_hash(stat_hash, id); |
---|
800 | if (!found) { |
---|
801 | gbs_hash_statistic_summary *stat = (gbs_hash_statistic_summary*)GB_calloc(1, sizeof(*stat)); |
---|
802 | stat->init(); |
---|
803 | found = (long)stat; |
---|
804 | GBS_write_hash(stat_hash, id, found); |
---|
805 | } |
---|
806 | |
---|
807 | return (gbs_hash_statistic_summary*)found; |
---|
808 | } |
---|
809 | }; |
---|
810 | |
---|
811 | static void addto_hash_statistic_summary(gbs_hash_statistic_summary *stat, long size, long nelem, long collisions, double fill_ratio, double hash_quality) { |
---|
812 | stat->count++; |
---|
813 | |
---|
814 | if (stat->min_size > size) stat->min_size = size; |
---|
815 | if (stat->max_size < size) stat->max_size = size; |
---|
816 | |
---|
817 | if (stat->min_nelem > nelem) stat->min_nelem = nelem; |
---|
818 | if (stat->max_nelem < nelem) stat->max_nelem = nelem; |
---|
819 | |
---|
820 | if (stat->min_collisions > collisions) stat->min_collisions = collisions; |
---|
821 | if (stat->max_collisions < collisions) stat->max_collisions = collisions; |
---|
822 | |
---|
823 | if (stat->min_fill_ratio > fill_ratio) stat->min_fill_ratio = fill_ratio; |
---|
824 | if (stat->max_fill_ratio < fill_ratio) stat->max_fill_ratio = fill_ratio; |
---|
825 | |
---|
826 | if (stat->min_hash_quality > hash_quality) stat->min_hash_quality = hash_quality; |
---|
827 | if (stat->max_hash_quality < hash_quality) stat->max_hash_quality = hash_quality; |
---|
828 | |
---|
829 | stat->sum_size += size; |
---|
830 | stat->sum_nelem += nelem; |
---|
831 | stat->sum_collisions += collisions; |
---|
832 | stat->sum_fill_ratio += fill_ratio; |
---|
833 | stat->sum_hash_quality += hash_quality; |
---|
834 | } |
---|
835 | |
---|
836 | static hash_statistic_manager hash_stat_man; |
---|
837 | |
---|
838 | static void test_clear_hash_statistic_summary(const char *id) { |
---|
839 | hash_stat_man.get_stat_summary(id)->init(); |
---|
840 | } |
---|
841 | |
---|
842 | static void test_print_hash_statistic_summary(const char *id) { |
---|
843 | gbs_hash_statistic_summary *stat = hash_stat_man.get_stat_summary(id); |
---|
844 | long count = stat->count; |
---|
845 | printf("Statistic summary for %li hashes of type '%s':\n", count, id); |
---|
846 | printf("- size: min = %6li ; max = %6li ; mean = %6.1f\n", stat->min_size, stat->max_size, (double)stat->sum_size/count); |
---|
847 | printf("- nelem: min = %6li ; max = %6li ; mean = %6.1f\n", stat->min_nelem, stat->max_nelem, (double)stat->sum_nelem/count); |
---|
848 | printf("- fill_ratio: min = %5.1f%% ; max = %5.1f%% ; mean = %5.1f%%\n", stat->min_fill_ratio*100.0, stat->max_fill_ratio*100.0, (double)stat->sum_fill_ratio/count*100.0); |
---|
849 | printf("- collisions: min = %6li ; max = %6li ; mean = %6.1f\n", stat->min_collisions, stat->max_collisions, (double)stat->sum_collisions/count); |
---|
850 | printf("- hash_quality: min = %5.1f%% ; max = %5.1f%% ; mean = %5.1f%%\n", stat->min_hash_quality*100.0, stat->max_hash_quality*100.0, (double)stat->sum_hash_quality/count*100.0); |
---|
851 | } |
---|
852 | |
---|
853 | static void test_calc_hash_statistic(const GB_HASH *hs, const char *id, int print) { |
---|
854 | long queues = 0; |
---|
855 | long collisions; |
---|
856 | double fill_ratio = (double)hs->nelem/hs->size; |
---|
857 | double hash_quality; |
---|
858 | |
---|
859 | for (size_t i = 0; i < hs->size; i++) { |
---|
860 | if (hs->entries[i]) queues++; |
---|
861 | } |
---|
862 | collisions = hs->nelem - queues; |
---|
863 | |
---|
864 | hash_quality = (double)queues/hs->nelem; // no collisions means 100% quality |
---|
865 | |
---|
866 | if (print != 0) { |
---|
867 | printf("Statistic for hash '%s':\n", id); |
---|
868 | printf("- size = %zu\n", hs->size); |
---|
869 | printf("- elements = %zu (fill ratio = %4.1f%%)\n", hs->nelem, fill_ratio*100.0); |
---|
870 | printf("- collisions = %li (hash quality = %4.1f%%)\n", collisions, hash_quality*100.0); |
---|
871 | } |
---|
872 | |
---|
873 | addto_hash_statistic_summary(hash_stat_man.get_stat_summary(id), hs->size, hs->nelem, collisions, fill_ratio, hash_quality); |
---|
874 | } |
---|
875 | |
---|
876 | static long test_numhash_count_elems(GB_NUMHASH *hs) { |
---|
877 | return hs->nelem; |
---|
878 | } |
---|
879 | |
---|
880 | static long insert_into_hash(const char *key, long val, void *cl_toHash) { |
---|
881 | GB_HASH *toHash = (GB_HASH*)cl_toHash; |
---|
882 | GBS_write_hash(toHash, key, val); |
---|
883 | return val; |
---|
884 | } |
---|
885 | static long erase_from_hash(const char *key, long val, void *cl_fromHash) { |
---|
886 | GB_HASH *fromHash = (GB_HASH*)cl_fromHash; |
---|
887 | long val2 = GBS_read_hash(fromHash, key); |
---|
888 | |
---|
889 | if (val2 == val) { |
---|
890 | GBS_write_hash(fromHash, key, 0); |
---|
891 | } |
---|
892 | else { |
---|
893 | printf("value mismatch in hashes_are_equal(): key='%s' val: %li != %li\n", key, val2, val); // NEED_NO_COV |
---|
894 | } |
---|
895 | return val; |
---|
896 | } |
---|
897 | |
---|
898 | static bool hashes_are_equal(GB_HASH *h1, GB_HASH *h2) { |
---|
899 | size_t count1 = GBS_hash_count_elems(h1); |
---|
900 | size_t count2 = GBS_hash_count_elems(h2); |
---|
901 | |
---|
902 | bool equal = (count1 == count2); |
---|
903 | if (equal) { |
---|
904 | GB_HASH *copy = GBS_create_hash(count1, GB_MIND_CASE); |
---|
905 | |
---|
906 | GBS_hash_do_loop(h1, insert_into_hash, copy); |
---|
907 | GBS_hash_do_loop(h2, erase_from_hash, copy); |
---|
908 | |
---|
909 | equal = (GBS_hash_count_elems(copy) == 0); |
---|
910 | GBS_free_hash(copy); |
---|
911 | } |
---|
912 | return equal; |
---|
913 | } |
---|
914 | |
---|
915 | struct TestData : virtual Noncopyable { |
---|
916 | GB_HASH *mind; |
---|
917 | GB_HASH *ignore; |
---|
918 | GB_NUMHASH *num; |
---|
919 | |
---|
920 | TestData() { |
---|
921 | mind = GBS_create_hash(100, GB_MIND_CASE); |
---|
922 | ignore = GBS_create_hash(100, GB_IGNORE_CASE); |
---|
923 | num = GBS_create_numhash(100); |
---|
924 | } |
---|
925 | ~TestData() { |
---|
926 | GBS_free_numhash(num); |
---|
927 | GBS_free_hash(ignore); |
---|
928 | GBS_free_hash(mind); |
---|
929 | } |
---|
930 | |
---|
931 | void reset() { |
---|
932 | GBS_erase_hash(mind); |
---|
933 | GBS_erase_hash(ignore); |
---|
934 | GBS_erase_numhash(num); |
---|
935 | } |
---|
936 | |
---|
937 | GB_HASH *get_hash(bool case_sens) { |
---|
938 | return case_sens ? mind : ignore; |
---|
939 | } |
---|
940 | }; |
---|
941 | |
---|
942 | static TestData TEST; |
---|
943 | |
---|
944 | static size_t test_hash_count_value(GB_HASH *hs, long val) { |
---|
945 | size_t hsize = hs->size; |
---|
946 | size_t count = 0; |
---|
947 | |
---|
948 | gb_assert(val != 0); // counting zero values makes no sense (cause these are not stored in the hash) |
---|
949 | |
---|
950 | for (size_t i = 0; i<hsize; ++i) { |
---|
951 | for (gbs_hash_entry *e=hs->entries[i]; e; e=e->next) { |
---|
952 | if (e->val == val) { |
---|
953 | ++count; |
---|
954 | } |
---|
955 | } |
---|
956 | } |
---|
957 | |
---|
958 | return count; |
---|
959 | } |
---|
960 | |
---|
961 | void TEST_GBS_write_hash() { |
---|
962 | TEST.reset(); |
---|
963 | |
---|
964 | for (int case_sens = 0; case_sens <= 1; ++case_sens) { |
---|
965 | GB_HASH *hash = TEST.get_hash(case_sens); |
---|
966 | |
---|
967 | GBS_write_hash(hash, "foo", 1); |
---|
968 | TEST_EXPECT_EQUAL(GBS_hash_count_elems(hash), 1); |
---|
969 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 1); |
---|
970 | |
---|
971 | GBS_write_hash(hash, "foo", 2); |
---|
972 | TEST_EXPECT_EQUAL(GBS_hash_count_elems(hash), 1); |
---|
973 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 2); |
---|
974 | |
---|
975 | GBS_write_hash(hash, "foo", 0); |
---|
976 | TEST_EXPECT_ZERO(GBS_hash_count_elems(hash)); |
---|
977 | TEST_EXPECT_ZERO(GBS_read_hash(hash, "foo")); |
---|
978 | |
---|
979 | GBS_write_hash(hash, "foo", 1); |
---|
980 | GBS_write_hash(hash, "FOO", 2); |
---|
981 | GBS_write_hash(hash, "BAR", 1); |
---|
982 | GBS_write_hash(hash, "bar", 2); |
---|
983 | |
---|
984 | if (case_sens) { |
---|
985 | TEST_EXPECT_EQUAL(GBS_hash_count_elems(hash), 4); |
---|
986 | |
---|
987 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 1); |
---|
988 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "FOO"), 2); |
---|
989 | TEST_EXPECT_ZERO(GBS_read_hash(hash, "Foo")); |
---|
990 | |
---|
991 | TEST_EXPECT_EQUAL(test_hash_count_value(hash, 1), 2); |
---|
992 | TEST_EXPECT_EQUAL(test_hash_count_value(hash, 2), 2); |
---|
993 | } |
---|
994 | else { |
---|
995 | TEST_EXPECT_EQUAL(GBS_hash_count_elems(hash), 2); |
---|
996 | |
---|
997 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 2); |
---|
998 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "FOO"), 2); |
---|
999 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "Foo"), 2); |
---|
1000 | |
---|
1001 | TEST_EXPECT_ZERO(test_hash_count_value(hash, 1)); |
---|
1002 | TEST_EXPECT_EQUAL(test_hash_count_value(hash, 2), 2); |
---|
1003 | } |
---|
1004 | |
---|
1005 | if (case_sens) { |
---|
1006 | TEST_EXPECT_ZERO(GBS_read_hash(hash, "foobar")); |
---|
1007 | GBS_write_hash_no_strdup(hash, strdup("foobar"), 0); |
---|
1008 | TEST_EXPECT_ZERO(GBS_read_hash(hash, "foobar")); |
---|
1009 | GBS_write_hash_no_strdup(hash, strdup("foobar"), 3); |
---|
1010 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foobar"), 3); |
---|
1011 | GBS_write_hash_no_strdup(hash, strdup("foobar"), 0); |
---|
1012 | TEST_EXPECT_ZERO(GBS_read_hash(hash, "foobar")); |
---|
1013 | } |
---|
1014 | } |
---|
1015 | } |
---|
1016 | |
---|
1017 | void TEST_GBS_incr_hash() { |
---|
1018 | TEST.reset(); |
---|
1019 | |
---|
1020 | for (int case_sens = 0; case_sens <= 1; ++case_sens) { |
---|
1021 | GB_HASH *hash = TEST.get_hash(case_sens); |
---|
1022 | |
---|
1023 | GBS_incr_hash(hash, "foo"); |
---|
1024 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 1); |
---|
1025 | |
---|
1026 | GBS_incr_hash(hash, "foo"); |
---|
1027 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 2); |
---|
1028 | |
---|
1029 | GBS_incr_hash(hash, "FOO"); |
---|
1030 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), case_sens ? 2 : 3); |
---|
1031 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, "FOO"), case_sens ? 1 : 3); |
---|
1032 | } |
---|
1033 | } |
---|
1034 | |
---|
1035 | static void test_string_2_hashtab(GB_HASH *hash, char *data) { |
---|
1036 | // modifies data |
---|
1037 | char *p, *d, *dp; |
---|
1038 | int c; |
---|
1039 | char *nextp; |
---|
1040 | char *str; |
---|
1041 | int strlen; |
---|
1042 | long val; |
---|
1043 | |
---|
1044 | for (p = data; p; p = nextp) { |
---|
1045 | strlen = 0; |
---|
1046 | for (dp = p; (c = *dp); dp++) { |
---|
1047 | if (c==':') { |
---|
1048 | if (dp[1] == ':') dp++; |
---|
1049 | else break; |
---|
1050 | } |
---|
1051 | strlen++; |
---|
1052 | } |
---|
1053 | if (*dp) { |
---|
1054 | nextp = strchr(dp, ' '); |
---|
1055 | if (nextp) nextp++; |
---|
1056 | } |
---|
1057 | else break; |
---|
1058 | |
---|
1059 | str = (char *)GB_calloc(sizeof(char), strlen+1); |
---|
1060 | for (dp = p, d = str; (c = *dp); dp++) { |
---|
1061 | if (c==':') { |
---|
1062 | if (dp[1] == ':') { |
---|
1063 | *(d++) = c; |
---|
1064 | dp++; |
---|
1065 | } |
---|
1066 | else break; |
---|
1067 | } |
---|
1068 | else { |
---|
1069 | *(d++) = c; |
---|
1070 | } |
---|
1071 | } |
---|
1072 | val = atoi(dp+1); |
---|
1073 | GBS_write_hash_no_strdup(hash, str, val); |
---|
1074 | } |
---|
1075 | } |
---|
1076 | |
---|
1077 | void TEST_GBS_hashtab_2_string() { |
---|
1078 | TEST.reset(); |
---|
1079 | |
---|
1080 | for (int case_sens = 0; case_sens <= 1; ++case_sens) { |
---|
1081 | GB_HASH *hash = TEST.get_hash(case_sens); |
---|
1082 | |
---|
1083 | GBS_write_hash(hash, "foo", 1); |
---|
1084 | GBS_write_hash(hash, "bar", 2); |
---|
1085 | GBS_write_hash(hash, "FOO", 3); |
---|
1086 | GBS_write_hash(hash, "BAR", 4); |
---|
1087 | GBS_write_hash(hash, "foo:bar", 3); |
---|
1088 | GBS_write_hash(hash, "FOO:bar", 4); |
---|
1089 | } |
---|
1090 | for (int case_sens = 0; case_sens <= 1; ++case_sens) { |
---|
1091 | GB_HASH *hash = TEST.get_hash(case_sens); |
---|
1092 | |
---|
1093 | char *as_string = GBS_hashtab_2_string(hash); |
---|
1094 | TEST_REJECT_NULL(as_string); |
---|
1095 | |
---|
1096 | GB_HASH *hash2 = GBS_create_hash(1000, case_sens ? GB_MIND_CASE : GB_IGNORE_CASE); |
---|
1097 | test_string_2_hashtab(hash2, as_string); |
---|
1098 | TEST_EXPECT(hashes_are_equal(hash, hash2)); |
---|
1099 | TEST_EXPECT(hashes_are_equal(hash, hash2)); |
---|
1100 | |
---|
1101 | free(as_string); |
---|
1102 | GBS_free_hash(hash2); |
---|
1103 | } |
---|
1104 | |
---|
1105 | { |
---|
1106 | GB_HASH *hash = TEST.get_hash(true); |
---|
1107 | char *as_string = GBS_hashtab_2_string(hash); |
---|
1108 | |
---|
1109 | GB_HASH *hash2 = GBS_create_hash(21, GB_MIND_CASE); |
---|
1110 | GBS_hash_do_sorted_loop(hash, insert_into_hash, GBS_HCF_sortedByKey, hash2); |
---|
1111 | |
---|
1112 | GB_HASH *hash3 = GBS_create_hash(100, GB_MIND_CASE); |
---|
1113 | GBS_hash_do_sorted_loop(hash, insert_into_hash, GBS_HCF_sortedByKey, hash3); |
---|
1114 | |
---|
1115 | char *as_string2 = GBS_hashtab_2_string(hash2); |
---|
1116 | char *as_string3 = GBS_hashtab_2_string(hash3); |
---|
1117 | |
---|
1118 | TEST_EXPECT_EQUAL__BROKEN(as_string, as_string2, "FOO::bar:4 BAR:4 bar:2 foo:1 FOO:3 foo::bar:3 "); |
---|
1119 | TEST_EXPECT_EQUAL (as_string, as_string3); |
---|
1120 | |
---|
1121 | GBS_free_hash(hash3); |
---|
1122 | GBS_free_hash(hash2); |
---|
1123 | |
---|
1124 | free(as_string3); |
---|
1125 | free(as_string2); |
---|
1126 | free(as_string); |
---|
1127 | } |
---|
1128 | } |
---|
1129 | |
---|
1130 | inline long key2val(long key, int pass) { |
---|
1131 | long val; |
---|
1132 | switch (pass) { |
---|
1133 | case 1: |
---|
1134 | val = key/3; |
---|
1135 | break; |
---|
1136 | case 2: |
---|
1137 | val = key*17461; |
---|
1138 | break; |
---|
1139 | default : |
---|
1140 | val = LONG_MIN; |
---|
1141 | TEST_EXPECT(0); // NEED_NO_COV |
---|
1142 | break; |
---|
1143 | } |
---|
1144 | return val; |
---|
1145 | } |
---|
1146 | |
---|
1147 | void TEST_numhash() { |
---|
1148 | GB_NUMHASH *numhash = GBS_create_numhash(10); |
---|
1149 | GB_NUMHASH *numhash2 = GBS_create_numhash(10); |
---|
1150 | |
---|
1151 | const long LOW = -200; |
---|
1152 | const long HIGH = 200; |
---|
1153 | const long STEP = 17; |
---|
1154 | |
---|
1155 | long added = 0; |
---|
1156 | for (int pass = 1; pass <= 2; ++pass) { |
---|
1157 | added = 0; |
---|
1158 | for (long key = LOW; key <= HIGH; key += STEP) { |
---|
1159 | long val = key2val(key, pass); |
---|
1160 | GBS_write_numhash(numhash, key, val); |
---|
1161 | added++; |
---|
1162 | } |
---|
1163 | |
---|
1164 | TEST_EXPECT_EQUAL(test_numhash_count_elems(numhash), added); |
---|
1165 | |
---|
1166 | for (long key = LOW; key <= HIGH; key += STEP) { |
---|
1167 | TEST_EXPECT_EQUAL(key2val(key, pass), GBS_read_numhash(numhash, key)); |
---|
1168 | } |
---|
1169 | } |
---|
1170 | |
---|
1171 | TEST_EXPECT_ZERO(GBS_read_numhash(numhash, -4711)); // not-existing entry |
---|
1172 | |
---|
1173 | // erase by overwrite: |
---|
1174 | for (long key = LOW; key <= HIGH; key += STEP) { |
---|
1175 | GBS_write_numhash(numhash2, key, GBS_read_numhash(numhash, key)); // copy numhash->numhash2 |
---|
1176 | GBS_write_numhash(numhash, key, (long)NULL); |
---|
1177 | } |
---|
1178 | TEST_EXPECT_EQUAL(test_numhash_count_elems(numhash2), added); |
---|
1179 | TEST_EXPECT_ZERO(test_numhash_count_elems(numhash)); |
---|
1180 | |
---|
1181 | GBS_free_numhash(numhash2); // free filled hash |
---|
1182 | GBS_free_numhash(numhash); // free empty hash |
---|
1183 | } |
---|
1184 | |
---|
1185 | |
---|
1186 | static int freeCounter; |
---|
1187 | static void freeDynamicHashElem(long cl_ptr) { |
---|
1188 | GBS_dynaval_free(cl_ptr); |
---|
1189 | freeCounter++; |
---|
1190 | } |
---|
1191 | |
---|
1192 | void TEST_GBS_dynaval_hash() { |
---|
1193 | const int SIZE = 10; |
---|
1194 | const int ELEMS = 30; |
---|
1195 | |
---|
1196 | GB_HASH *dynahash = GBS_create_dynaval_hash(SIZE, GB_MIND_CASE, freeDynamicHashElem); |
---|
1197 | |
---|
1198 | for (int pass = 1; pass <= 2; ++pass) { |
---|
1199 | freeCounter = 0; |
---|
1200 | |
---|
1201 | for (int i = 0; i<ELEMS; ++i) { |
---|
1202 | char *val = GBS_global_string_copy("value %i", i); |
---|
1203 | char *oldval = (char*)GBS_write_hash(dynahash, GBS_global_string("key %i", i), (long)val); |
---|
1204 | free(oldval); |
---|
1205 | } |
---|
1206 | |
---|
1207 | TEST_EXPECT_ZERO(freeCounter); // overwriting values shall not automatically free them |
---|
1208 | } |
---|
1209 | |
---|
1210 | freeCounter = 0; |
---|
1211 | GBS_free_hash(dynahash); |
---|
1212 | TEST_EXPECT_EQUAL(freeCounter, ELEMS); |
---|
1213 | } |
---|
1214 | |
---|
1215 | void TEST_GBS_optimize_hash_and_stats() { |
---|
1216 | const int SIZE = 10; |
---|
1217 | const int FILL = 70; |
---|
1218 | |
---|
1219 | test_clear_hash_statistic_summary("test"); |
---|
1220 | for (int pass = 1; pass <= 3; ++pass) { |
---|
1221 | GB_HASH *hash = GBS_create_hash(SIZE, GB_MIND_CASE); |
---|
1222 | |
---|
1223 | for (int i = 1; i <= FILL; ++i) { |
---|
1224 | const char *key = GBS_global_string("%i", i); |
---|
1225 | GBS_write_hash(hash, key, i); |
---|
1226 | } |
---|
1227 | TEST_EXPECT(hash->nelem > hash->size); // ensure hash is overfilled! |
---|
1228 | |
---|
1229 | switch (pass) { |
---|
1230 | case 1: // nothing, only free overfilled hash below |
---|
1231 | break; |
---|
1232 | case 2: // test overwrite overfilled hash |
---|
1233 | for (int i = 1; i <= FILL; ++i) { |
---|
1234 | const char *key = GBS_global_string("%i", i); |
---|
1235 | |
---|
1236 | TEST_EXPECT_EQUAL(GBS_read_hash(hash, key), i); |
---|
1237 | GBS_write_hash(hash, key, 0); |
---|
1238 | TEST_EXPECT_ZERO(GBS_read_hash(hash, key)); |
---|
1239 | } |
---|
1240 | break; |
---|
1241 | case 3: // test optimize |
---|
1242 | GBS_optimize_hash(hash); |
---|
1243 | TEST_EXPECT_LESS_EQUAL(hash->nelem, hash->size); |
---|
1244 | break; |
---|
1245 | default : |
---|
1246 | TEST_EXPECT(0); // NEED_NO_COV |
---|
1247 | break; |
---|
1248 | } |
---|
1249 | |
---|
1250 | test_calc_hash_statistic(hash, "test", 1); |
---|
1251 | GBS_free_hash(hash); |
---|
1252 | } |
---|
1253 | |
---|
1254 | test_print_hash_statistic_summary("test"); |
---|
1255 | } |
---|
1256 | |
---|
1257 | static bool has_value(const char *, long val, void *cd) { return val == (long)cd; } |
---|
1258 | static bool has_value_greater(const char *, long val, void *cd) { return val > (long)cd; } |
---|
1259 | |
---|
1260 | void TEST_GBS_hash_next_element_that() { |
---|
1261 | TEST.reset(); |
---|
1262 | |
---|
1263 | for (int case_sens = 0; case_sens <= 1; ++case_sens) { |
---|
1264 | GB_HASH *hash = TEST.get_hash(case_sens); |
---|
1265 | |
---|
1266 | GBS_write_hash(hash, "foo", 0); |
---|
1267 | GBS_write_hash(hash, "bar", 1); |
---|
1268 | GBS_write_hash(hash, "foobar", 2); |
---|
1269 | GBS_write_hash(hash, "barfoo", 3); |
---|
1270 | |
---|
1271 | #define READ_REVERSE(value) GBS_hash_next_element_that(hash, NULL, has_value, (void*)value) |
---|
1272 | #define ASSERT_READ_REVERSE_RETURNS(value, expected) TEST_EXPECT_EQUAL((const char *)expected, READ_REVERSE(value)); |
---|
1273 | |
---|
1274 | ASSERT_READ_REVERSE_RETURNS(0, NULL); |
---|
1275 | ASSERT_READ_REVERSE_RETURNS(1, "bar"); |
---|
1276 | ASSERT_READ_REVERSE_RETURNS(2, "foobar"); |
---|
1277 | ASSERT_READ_REVERSE_RETURNS(3, "barfoo"); |
---|
1278 | ASSERT_READ_REVERSE_RETURNS(4, NULL); |
---|
1279 | |
---|
1280 | const char *key = NULL; |
---|
1281 | long sum = 0; |
---|
1282 | |
---|
1283 | for (int iter = 1; iter <= 3; ++iter) { |
---|
1284 | key = GBS_hash_next_element_that(hash, key, has_value_greater, (void*)1); |
---|
1285 | if (iter == 3) TEST_REJECT(key); |
---|
1286 | else { |
---|
1287 | TEST_REJECT_NULL(key); |
---|
1288 | sum += GBS_read_hash(hash, key); |
---|
1289 | } |
---|
1290 | } |
---|
1291 | TEST_EXPECT_EQUAL(sum, 5); // sum of all values > 1 |
---|
1292 | } |
---|
1293 | } |
---|
1294 | |
---|
1295 | const size_t MAX_PRIME = sorted_primes[KNOWN_PRIMES-1]; |
---|
1296 | |
---|
1297 | static size_t get_overflown_prime() { return gbs_get_a_prime(MAX_PRIME+1); } |
---|
1298 | #if defined(ASSERTION_USED) |
---|
1299 | static void detect_prime_overflow() { get_overflown_prime(); } |
---|
1300 | #endif // ASSERTION_USED |
---|
1301 | |
---|
1302 | void TEST_hash_specials() { |
---|
1303 | const size_t SOME_PRIME = 434201; |
---|
1304 | TEST_EXPECT_EQUAL(gbs_get_a_prime(SOME_PRIME), SOME_PRIME); |
---|
1305 | TEST_EXPECT_EQUAL(gbs_get_a_prime(MAX_PRIME), MAX_PRIME); |
---|
1306 | |
---|
1307 | #if defined(ASSERTION_USED) |
---|
1308 | TEST_EXPECT_CODE_ASSERTION_FAILS(detect_prime_overflow); |
---|
1309 | #else |
---|
1310 | TEST_EXPECT_EQUAL(get_overflown_prime(), MAX_PRIME+1); |
---|
1311 | #endif // ASSERTION_USED |
---|
1312 | } |
---|
1313 | |
---|
1314 | #endif // UNIT_TESTS |
---|
1315 | |
---|
1316 | |
---|
1317 | |
---|