1 | // ============================================================= // |
---|
2 | // // |
---|
3 | // File : PT_debug.cxx // |
---|
4 | // Purpose : debugging code // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in March 2011 // |
---|
7 | // Institute of Microbiology (Technical University Munich) // |
---|
8 | // http://www.arb-home.de/ // |
---|
9 | // // |
---|
10 | // ============================================================= // |
---|
11 | |
---|
12 | #include "probe.h" |
---|
13 | #include "probe_tree.h" |
---|
14 | #include "pt_prototypes.h" |
---|
15 | |
---|
16 | #include <arb_misc.h> |
---|
17 | #include <arb_file.h> |
---|
18 | |
---|
19 | #if defined(CALCULATE_STATS_ON_QUERY) |
---|
20 | |
---|
21 | #define DEBUG_MAX_CHAIN_SIZE 10000000 |
---|
22 | #define DEBUG_MAX_CHAIN_NAME_DIST 99999 |
---|
23 | #define DEBUG_TREE_DEPTH (PT_POS_TREE_HEIGHT+1) |
---|
24 | |
---|
25 | struct PT_statistic { |
---|
26 | static bool show_for_index(int i, int imax) { |
---|
27 | // return true; // uncomment to show all |
---|
28 | if (i == imax) return true; // always show last index |
---|
29 | if (i<20) return true; |
---|
30 | if (i<100) return (i%10) == 9; |
---|
31 | if (i<1000) return (i%100) == 99; |
---|
32 | if (i<10000) return (i%1000) == 999; |
---|
33 | if (i<100000) return (i%10000) == 9999; |
---|
34 | if (i<1000000) return (i%100000) == 99999; |
---|
35 | if (i<10000000) return (i%1000000) == 999999; |
---|
36 | pt_assert(0); |
---|
37 | return true; |
---|
38 | } |
---|
39 | |
---|
40 | // nodes |
---|
41 | size_t nodes[DEBUG_TREE_DEPTH]; |
---|
42 | size_t nodes_mem[DEBUG_TREE_DEPTH]; |
---|
43 | |
---|
44 | size_t splits[DEBUG_TREE_DEPTH][PT_BASES]; |
---|
45 | |
---|
46 | // leafs |
---|
47 | size_t tips[DEBUG_TREE_DEPTH]; |
---|
48 | size_t tips_mem[DEBUG_TREE_DEPTH]; |
---|
49 | |
---|
50 | // chains |
---|
51 | size_t chains_at_depth[DEBUG_TREE_DEPTH]; |
---|
52 | size_t chains_at_depth_mem[DEBUG_TREE_DEPTH]; |
---|
53 | |
---|
54 | size_t chains_of_size[DEBUG_MAX_CHAIN_SIZE+1]; |
---|
55 | size_t chains_of_size_mem[DEBUG_MAX_CHAIN_SIZE+1]; |
---|
56 | |
---|
57 | size_t chaincount; |
---|
58 | |
---|
59 | size_t chain_name_dist[DEBUG_MAX_CHAIN_NAME_DIST+1]; |
---|
60 | |
---|
61 | struct chain_head_mem { |
---|
62 | size_t flag; |
---|
63 | size_t apos; |
---|
64 | size_t size; |
---|
65 | } chm; |
---|
66 | |
---|
67 | PT_statistic() { memset(this, 0, sizeof(*this)); } |
---|
68 | |
---|
69 | void analyse(POS_TREE2 *pt, int height) { |
---|
70 | pt_assert(height<DEBUG_TREE_DEPTH); |
---|
71 | switch (pt->get_type()) { |
---|
72 | case PT2_NODE: { |
---|
73 | int basecnt = 0; |
---|
74 | for (int i=PT_QU; i<PT_BASES; i++) { |
---|
75 | POS_TREE2 *pt_help = PT_read_son(pt, (PT_base)i); |
---|
76 | if (pt_help) { |
---|
77 | basecnt++; |
---|
78 | analyse(pt_help, height+1); |
---|
79 | } |
---|
80 | } |
---|
81 | |
---|
82 | nodes[height]++; |
---|
83 | nodes_mem[height] += PT_node_size(pt); |
---|
84 | |
---|
85 | splits[height][basecnt]++; |
---|
86 | break; |
---|
87 | } |
---|
88 | case PT2_LEAF: |
---|
89 | tips[height]++; |
---|
90 | tips_mem[height] += PT_leaf_size(pt); |
---|
91 | break; |
---|
92 | |
---|
93 | case PT2_CHAIN: { |
---|
94 | size_t size = 1; |
---|
95 | ChainIteratorStage2 iter(pt); |
---|
96 | |
---|
97 | { |
---|
98 | chm.flag++; |
---|
99 | const char *ptr = ((char*)pt)+1; |
---|
100 | const char *next = ptr; |
---|
101 | |
---|
102 | next = next + (pt->flags&1 ? 4 : 2); chm.apos += (next-ptr); ptr = next; |
---|
103 | PT_read_compact_nat(next); chm.size += (next-ptr); ptr = next; |
---|
104 | } |
---|
105 | |
---|
106 | int lastName = iter.at().get_name(); |
---|
107 | while (++iter) { |
---|
108 | { |
---|
109 | int thisName = iter.at().get_name(); |
---|
110 | pt_assert(thisName >= lastName); |
---|
111 | |
---|
112 | int nameDist = thisName-lastName; |
---|
113 | if (nameDist>DEBUG_MAX_CHAIN_NAME_DIST) nameDist = DEBUG_MAX_CHAIN_NAME_DIST; |
---|
114 | ++chain_name_dist[nameDist]; |
---|
115 | lastName = thisName; |
---|
116 | } |
---|
117 | ++size; |
---|
118 | } |
---|
119 | |
---|
120 | if (size>DEBUG_MAX_CHAIN_SIZE) size = DEBUG_MAX_CHAIN_SIZE; |
---|
121 | |
---|
122 | size_t mem = iter.memptr()-(char*)pt; |
---|
123 | |
---|
124 | chains_of_size[size]++; |
---|
125 | chains_of_size_mem[size] += mem; |
---|
126 | chains_at_depth[height]++; |
---|
127 | chains_at_depth_mem[height] += mem; |
---|
128 | |
---|
129 | chaincount++; |
---|
130 | break; |
---|
131 | } |
---|
132 | } |
---|
133 | } |
---|
134 | |
---|
135 | void printCountAndMem(size_t count, size_t mem) { |
---|
136 | printf("%10zu mem:%7s mean:%5.2f b", count, GBS_readable_size(mem, "b"), mem/double(count)); |
---|
137 | } |
---|
138 | void printCountPercentAndMem(size_t count, size_t all, size_t mem, size_t allmem) { |
---|
139 | printf("%10zu (=%5.2f%%) mem:%7s (=%5.2f%%) mean:%5.2f b", |
---|
140 | count, count*100.0/all, |
---|
141 | GBS_readable_size(mem, "b"), mem*100.0/allmem, |
---|
142 | mem/double(count)); |
---|
143 | } |
---|
144 | |
---|
145 | void LF() { putchar('\n'); } |
---|
146 | |
---|
147 | void dump(size_t indexsize) { |
---|
148 | size_t sum = 0; |
---|
149 | size_t mem = 0; |
---|
150 | size_t allmem = 0; |
---|
151 | |
---|
152 | for (int i=0; i<DEBUG_TREE_DEPTH; i++) { |
---|
153 | size_t k = nodes[i]; |
---|
154 | if (k) { |
---|
155 | sum += k; |
---|
156 | mem += nodes_mem[i]; |
---|
157 | printf("nodes at depth %2i:", i); printCountAndMem(k, nodes_mem[i]); |
---|
158 | for (int j=0; j<PT_BASES; j++) { |
---|
159 | k = splits[i][j]; |
---|
160 | printf(" [%i]=%-10zu", j, k); |
---|
161 | } |
---|
162 | LF(); |
---|
163 | } |
---|
164 | } |
---|
165 | fputs("nodes (all): ", stdout); printCountAndMem(sum, mem); printf(" (%5.2f%%)\n", mem*100.0/indexsize); |
---|
166 | allmem += mem; |
---|
167 | |
---|
168 | mem = sum = 0; |
---|
169 | for (int i=0; i<DEBUG_TREE_DEPTH; i++) { |
---|
170 | size_t k = tips[i]; |
---|
171 | if (k) { |
---|
172 | sum += k; |
---|
173 | mem += tips_mem[i]; |
---|
174 | printf("tips at depth %2i:", i); printCountAndMem(k, tips_mem[i]); LF(); |
---|
175 | } |
---|
176 | } |
---|
177 | fputs("tips (all): ", stdout); printCountAndMem(sum, mem); printf(" (%5.2f%%)\n", mem*100.0/indexsize); |
---|
178 | allmem += mem; |
---|
179 | |
---|
180 | mem = sum = 0; |
---|
181 | for (int i=0; i<DEBUG_TREE_DEPTH; i++) { |
---|
182 | size_t k = chains_at_depth[i]; |
---|
183 | if (k) { |
---|
184 | sum += k; |
---|
185 | mem += chains_at_depth_mem[i]; |
---|
186 | printf("chains at depth %2i:", i); printCountAndMem(k, chains_at_depth_mem[i]); LF(); |
---|
187 | } |
---|
188 | } |
---|
189 | fputs("chains (all): ", stdout); printCountAndMem(sum, mem); printf(" (%5.2f%%)\n", mem*100.0/indexsize); |
---|
190 | allmem += mem; |
---|
191 | |
---|
192 | #if defined(ASSERTION_USED) |
---|
193 | size_t chain_mem1 = mem; |
---|
194 | #endif |
---|
195 | |
---|
196 | size_t chain_sum = 0; |
---|
197 | |
---|
198 | mem = sum = 0; |
---|
199 | for (int i=0; i <= DEBUG_MAX_CHAIN_SIZE; i++) { |
---|
200 | size_t k = chains_of_size[i]; |
---|
201 | if (k) { |
---|
202 | size_t e = i*k; |
---|
203 | |
---|
204 | chain_sum += k; |
---|
205 | sum += e; |
---|
206 | mem += chains_of_size_mem[i]; |
---|
207 | } |
---|
208 | } |
---|
209 | { |
---|
210 | int first_skipped = 0; |
---|
211 | |
---|
212 | size_t k_sum = 0; |
---|
213 | size_t e_sum = 0; |
---|
214 | size_t m_sum = 0; |
---|
215 | |
---|
216 | for (int i=0; i <= DEBUG_MAX_CHAIN_SIZE; i++) { |
---|
217 | bool show = show_for_index(i, DEBUG_MAX_CHAIN_SIZE); |
---|
218 | if (!show && !first_skipped) { |
---|
219 | first_skipped = i; |
---|
220 | pt_assert(first_skipped); // 0 has to be shown always |
---|
221 | } |
---|
222 | |
---|
223 | size_t k = chains_of_size[i]; |
---|
224 | if (k) { |
---|
225 | size_t e = i*k; |
---|
226 | |
---|
227 | k_sum += k; |
---|
228 | e_sum += e; |
---|
229 | m_sum += chains_of_size_mem[i]; |
---|
230 | } |
---|
231 | |
---|
232 | if (show) { |
---|
233 | if (k_sum) { |
---|
234 | if (first_skipped) printf("chain of size %7i-%7i", first_skipped, i); |
---|
235 | else printf("chain of size %15i", i); |
---|
236 | |
---|
237 | printf(" occur %10zu (%5.2f%%) entries:", k_sum, k_sum*100.0/chain_sum); |
---|
238 | printCountPercentAndMem(e_sum, sum, m_sum, mem); |
---|
239 | LF(); |
---|
240 | } |
---|
241 | |
---|
242 | first_skipped = 0; |
---|
243 | |
---|
244 | k_sum = 0; |
---|
245 | e_sum = 0; |
---|
246 | m_sum = 0; |
---|
247 | } |
---|
248 | } |
---|
249 | } |
---|
250 | fputs("ch.entries (all): ", stdout); printCountAndMem(sum, mem); printf(" (%5.2f%%)\n", mem*100.0/indexsize); |
---|
251 | // Note: chains were already added to allmem above |
---|
252 | pt_assert(chain_mem1 == mem); // both chain-examinations should result in same mem size |
---|
253 | |
---|
254 | { |
---|
255 | size_t followup_chain_entries = 0; |
---|
256 | for (int i = 0; i <= DEBUG_MAX_CHAIN_NAME_DIST; ++i) { // LOOP_VECTORIZED |
---|
257 | followup_chain_entries += chain_name_dist[i]; |
---|
258 | } |
---|
259 | |
---|
260 | double pcsum = 0.0; |
---|
261 | int first_skipped = 0; |
---|
262 | size_t k_sum = 0; |
---|
263 | |
---|
264 | for (int i = 0; i <= DEBUG_MAX_CHAIN_NAME_DIST; ++i) { |
---|
265 | bool show = show_for_index(i, DEBUG_MAX_CHAIN_NAME_DIST); |
---|
266 | if (!show && !first_skipped) { |
---|
267 | first_skipped = i; |
---|
268 | pt_assert(first_skipped); // 0 has to be shown always |
---|
269 | } |
---|
270 | |
---|
271 | k_sum += chain_name_dist[i]; |
---|
272 | if (show) { |
---|
273 | if (k_sum) { |
---|
274 | if (first_skipped) printf("chain-entry-name-dist of %5i-%5i", first_skipped, i); |
---|
275 | else printf("chain-entry-name-dist of %11i", i); |
---|
276 | |
---|
277 | double pc = k_sum*100.0/followup_chain_entries; |
---|
278 | pcsum += pc; |
---|
279 | |
---|
280 | printf(" occurs %10zu (=%5.2f%%; sum=%5.2f%%)\n", k_sum, pc, pcsum); |
---|
281 | } |
---|
282 | |
---|
283 | first_skipped = 0; |
---|
284 | k_sum = 0; |
---|
285 | } |
---|
286 | } |
---|
287 | printf("overall followup-chain-entries: %zu\n", followup_chain_entries); |
---|
288 | } |
---|
289 | |
---|
290 | { |
---|
291 | size_t allhead = chm.flag+chm.apos+chm.size; |
---|
292 | printf("Chain header summary:\n" |
---|
293 | " flag %s (=%5.2f%%) mean:%5.2f b\n", GBS_readable_size(chm.flag, "b"), chm.flag *100.0/mem, 1.0); |
---|
294 | printf(" apos %s (=%5.2f%%) mean:%5.2f b\n", GBS_readable_size(chm.apos, "b"), chm.apos *100.0/mem, double(chm.apos) /chm.flag); |
---|
295 | printf(" size %s (=%5.2f%%) mean:%5.2f b\n", GBS_readable_size(chm.size, "b"), chm.size *100.0/mem, double(chm.size) /chm.flag); |
---|
296 | printf(" all %s (=%5.2f%%) mean:%5.2f b\n", GBS_readable_size(allhead, "b"), allhead *100.0/mem, double(allhead) /chm.flag); |
---|
297 | } |
---|
298 | |
---|
299 | size_t known_other_mem = |
---|
300 | 1 + // dummy byte at start of file (avoids that address of a node is zero) |
---|
301 | 4 + // PT_SERVER_MAGIC |
---|
302 | 1 + // PT_SERVER_VERSION |
---|
303 | 2 + // info block size |
---|
304 | (psg.big_db ? 8 : 0) + // big pointer to root node (if final int is 0xffffffff) |
---|
305 | 4; // final int |
---|
306 | |
---|
307 | allmem += known_other_mem; |
---|
308 | |
---|
309 | printf("overall mem:%7s\n", GBS_readable_size(allmem, "b")); |
---|
310 | printf("indexsize: %7s\n", GBS_readable_size(indexsize, "b")); |
---|
311 | |
---|
312 | if (indexsize>allmem) { |
---|
313 | printf("(file-mem):%7s\n", GBS_readable_size(indexsize-allmem, "b")); |
---|
314 | } |
---|
315 | else if (indexsize<allmem) { |
---|
316 | printf("(mem-file):%7s\n", GBS_readable_size(allmem-indexsize, "b")); |
---|
317 | } |
---|
318 | else { |
---|
319 | puts("indexsize exactly matches counted memory"); |
---|
320 | } |
---|
321 | fflush_all(); |
---|
322 | } |
---|
323 | }; |
---|
324 | #endif |
---|
325 | |
---|
326 | void PT_dump_tree_statistics(const char *indexfilename) { |
---|
327 | #if defined(CALCULATE_STATS_ON_QUERY) |
---|
328 | // show various debug information about the tree |
---|
329 | PT_statistic *stat = new PT_statistic; |
---|
330 | stat->analyse(psg.TREE_ROOT2(), 0); |
---|
331 | |
---|
332 | size_t filesize = GB_size_of_file(indexfilename); |
---|
333 | stat->dump(filesize); |
---|
334 | |
---|
335 | delete stat; |
---|
336 | #endif |
---|
337 | } |
---|
338 | |
---|
339 | // -------------------------------------------------------------------------------- |
---|
340 | |
---|
341 | class PT_dump_loc { |
---|
342 | const char *prefix; |
---|
343 | FILE *out; |
---|
344 | public: |
---|
345 | PT_dump_loc(const char *Prefix, FILE *Out) : prefix(Prefix), out(Out) {} |
---|
346 | |
---|
347 | int operator()(const DataLoc& loc) { |
---|
348 | fprintf(out, "%s %i=%s@%i>%i\n", prefix, loc.get_name(), loc.get_pid().get_shortname(), loc.get_abs_pos(), loc.get_rel_pos()); |
---|
349 | return 0; |
---|
350 | } |
---|
351 | int operator()(const AbsLoc& loc) { |
---|
352 | fprintf(out, "%s %i=%s@%i\n", prefix, loc.get_name(), loc.get_pid().get_shortname(), loc.get_abs_pos()); |
---|
353 | return 0; |
---|
354 | } |
---|
355 | }; |
---|
356 | |
---|
357 | template <typename PT> inline void dump_POS_TREE_special(PT *pt, const char *prefix, FILE *out); |
---|
358 | template <> inline void dump_POS_TREE_special(POS_TREE1 *pt, const char *prefix, FILE *out) { |
---|
359 | if (pt->is_saved()) { |
---|
360 | fprintf(out, "{x} %s [saved]\n", prefix); |
---|
361 | } |
---|
362 | else { |
---|
363 | pt_assert(pt->get_type() == PT1_UNDEF); |
---|
364 | fprintf(out, "{?} %s [undefined]\n", prefix); |
---|
365 | } |
---|
366 | } |
---|
367 | template <> inline void dump_POS_TREE_special(POS_TREE2 *, const char *, FILE *) {} |
---|
368 | |
---|
369 | template <typename PT> void PT_dump_POS_TREE_recursive(PT *pt, const char *prefix, FILE *out) { |
---|
370 | if (pt->is_node()) { |
---|
371 | for (int b = PT_QU; b<PT_BASES; b++) { |
---|
372 | PT *son = PT_read_son<PT>(pt, PT_base(b)); |
---|
373 | if (son) { |
---|
374 | char *subPrefix = GBS_global_string_copy("%s%c", prefix, base_2_readable(b)); |
---|
375 | PT_dump_POS_TREE_recursive(son, subPrefix, out); |
---|
376 | free(subPrefix); |
---|
377 | } |
---|
378 | } |
---|
379 | } |
---|
380 | else if (pt->is_leaf()) { |
---|
381 | char *subPrefix = GBS_global_string_copy("{l} %s", prefix); |
---|
382 | PT_dump_loc dump_leaf(subPrefix, out); |
---|
383 | dump_leaf(DataLoc(pt)); |
---|
384 | free(subPrefix); |
---|
385 | } |
---|
386 | else if (pt->is_chain()) { |
---|
387 | char *subPrefix = GBS_global_string_copy("{c} %s", prefix); |
---|
388 | PT_dump_loc locDumper(subPrefix, out); |
---|
389 | PT_forwhole_chain(pt, locDumper); |
---|
390 | free(subPrefix); |
---|
391 | } |
---|
392 | else { |
---|
393 | dump_POS_TREE_special(pt, prefix, out); |
---|
394 | } |
---|
395 | } |
---|
396 | |
---|
397 | // -------------------------------------------------------------------------------- |
---|
398 | |
---|
399 | void PT_dump_POS_TREE(POS_TREE1 *IF_DEBUG(node), FILE *IF_DEBUG(out)) { |
---|
400 | // Debug function for all stages |
---|
401 | #if defined(DEBUG) |
---|
402 | if (!node) fputs("<zero node>\n", out); |
---|
403 | |
---|
404 | fprintf(out, "node father %p\n", node->get_father()); |
---|
405 | |
---|
406 | switch (node->get_type()) { |
---|
407 | case PT1_LEAF: { |
---|
408 | DataLoc loc(node); |
---|
409 | fprintf(out, "leaf %i:%i,%i\n", loc.get_name(), loc.get_rel_pos(), loc.get_abs_pos()); |
---|
410 | break; |
---|
411 | } |
---|
412 | case PT1_NODE: |
---|
413 | for (long i = 0; i < PT_BASES; i++) { |
---|
414 | fprintf(out, "%6li:0x%p\n", i, PT_read_son(node, (PT_base)i)); |
---|
415 | } |
---|
416 | break; |
---|
417 | case PT1_CHAIN: |
---|
418 | fputs("chain:\n", out); |
---|
419 | PTD_chain_print chainPrinter; |
---|
420 | PT_forwhole_chain(node, chainPrinter); |
---|
421 | break; |
---|
422 | case PT1_SAVED: |
---|
423 | fputs("saved:\n", out); |
---|
424 | break; |
---|
425 | case PT1_UNDEF: |
---|
426 | fputs("<invalid node type>\n", out); |
---|
427 | break; |
---|
428 | } |
---|
429 | fflush_all(); |
---|
430 | #endif |
---|
431 | } |
---|
432 | |
---|
433 | static void PT_dump_POS_TREE_to_file(const char *dumpfile) { |
---|
434 | FILE *dump = fopen(dumpfile, "wt"); |
---|
435 | if (!dump) { |
---|
436 | GBK_terminate(GB_IO_error("writing", dumpfile)); |
---|
437 | } |
---|
438 | if (psg.get_stage() == STAGE1) { |
---|
439 | PT_dump_POS_TREE_recursive(psg.TREE_ROOT1(), "", dump); |
---|
440 | } |
---|
441 | else { |
---|
442 | PT_dump_POS_TREE_recursive(psg.TREE_ROOT2(), "", dump); |
---|
443 | } |
---|
444 | fclose(dump); |
---|
445 | |
---|
446 | fprintf(stderr, "Note: index has been dumped to '%s'\n", dumpfile); |
---|
447 | } |
---|
448 | |
---|
449 | |
---|
450 | int PT_index_dump(const PT_main *main) { |
---|
451 | const char *outfile = main->dumpname; |
---|
452 | PT_dump_POS_TREE_to_file(outfile); |
---|
453 | return 0; |
---|
454 | } |
---|